mirror of
https://github.com/hedge-dev/XenonRecomp.git
synced 2025-04-19 19:01:17 +00:00
Further inaccuracy fixes.
This commit is contained in:
parent
0e59052324
commit
fb97a569f9
@ -275,8 +275,8 @@ int main(int argc, char* argv[])
|
||||
fseek(f, 0, SEEK_SET);
|
||||
tempData.resize(fileSize);
|
||||
fread(tempData.data(), 1, fileSize, f);
|
||||
|
||||
shouldWrite = XXH3_64bits(tempData.data(), tempData.size()) != XXH3_64bits(out.data(), out.size());
|
||||
|
||||
shouldWrite = !XXH128_isEqual(XXH3_128bits(tempData.data(), tempData.size()), XXH3_128bits(out.data(), out.size()));
|
||||
}
|
||||
fclose(f);
|
||||
}
|
||||
@ -364,6 +364,7 @@ int main(int argc, char* argv[])
|
||||
if (insn.opcode == nullptr)
|
||||
{
|
||||
println("\t// {}", insn.op_str);
|
||||
std::println("Unable to decode instruction at 0x{:X}", base - 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -399,7 +400,13 @@ int main(int argc, char* argv[])
|
||||
}
|
||||
};
|
||||
|
||||
switch (insn.opcode->id)
|
||||
int id = insn.opcode->id;
|
||||
|
||||
// Handling instructions that don't disassemble correctly for some reason here
|
||||
if (id == PPC_INST_VUPKHSB128 && insn.operands[2] == 0x60) id = PPC_INST_VUPKHSH128;
|
||||
else if (id == PPC_INST_VUPKLSB128 && insn.operands[2] == 0x60) id = PPC_INST_VUPKLSH128;
|
||||
|
||||
switch (id)
|
||||
{
|
||||
case PPC_INST_ADD:
|
||||
println("\tctx.r{}.u64 = ctx.r{}.u64 + ctx.r{}.u64;", insn.operands[0], insn.operands[1], insn.operands[2]);
|
||||
@ -758,11 +765,11 @@ int main(int argc, char* argv[])
|
||||
break;
|
||||
|
||||
case PPC_INST_FCTIDZ:
|
||||
println("\tctx.f{}.s64 = ctx.f{}.f64;", insn.operands[0], insn.operands[1]);
|
||||
println("\tctx.f{}.s64 = trunc(ctx.f{}.f64);", insn.operands[0], insn.operands[1]);
|
||||
break;
|
||||
|
||||
case PPC_INST_FCTIWZ:
|
||||
println("\tctx.f{}.s32 = ctx.f{}.f64;", insn.operands[0], insn.operands[1]);
|
||||
println("\tctx.f{}.s32 = trunc(ctx.f{}.f64);", insn.operands[0], insn.operands[1]);
|
||||
break;
|
||||
|
||||
case PPC_INST_FDIV:
|
||||
@ -822,7 +829,7 @@ int main(int argc, char* argv[])
|
||||
break;
|
||||
|
||||
case PPC_INST_FRES:
|
||||
println("\tctx.f{}.f64 = 1.0 / ctx.f{}.f64;", insn.operands[0], insn.operands[1]);
|
||||
println("\tctx.f{}.f64 = 1.0f / float(ctx.f{}.f64);", insn.operands[0], insn.operands[1]);
|
||||
break;
|
||||
|
||||
case PPC_INST_FRSP:
|
||||
@ -1260,6 +1267,8 @@ int main(int argc, char* argv[])
|
||||
println("\tif (temp.u32 > 0x1F) temp.u32 = 0x1F;");
|
||||
println("\tctx.xer.ca = (ctx.r{}.s32 < 0) & (((ctx.r{}.s32 >> temp.u32) << temp.u32) != ctx.r{}.s32);", insn.operands[1], insn.operands[1], insn.operands[1]);
|
||||
println("\tctx.r{}.s64 = ctx.r{}.s32 >> {};", insn.operands[0], insn.operands[1], insn.operands[2]);
|
||||
if (strchr(insn.opcode->name, '.'))
|
||||
println("\tctx.cr0.compare<int32_t>(ctx.r{}.s32, 0, ctx.xer);", insn.operands[0]);
|
||||
break;
|
||||
|
||||
case PPC_INST_SRAWI:
|
||||
@ -1570,7 +1579,7 @@ int main(int argc, char* argv[])
|
||||
break;
|
||||
|
||||
case PPC_INST_VANDC128:
|
||||
println("\t_mm_store_ps(ctx.v{}.f32, _mm_andnot_ps(_mm_load_ps(ctx.v{}.f32), _mm_load_ps(ctx.v{}.f32)));", insn.operands[0], insn.operands[1], insn.operands[2]);
|
||||
println("\t_mm_store_ps(ctx.v{}.f32, _mm_andnot_ps(_mm_load_ps(ctx.v{}.f32), _mm_load_ps(ctx.v{}.f32)));", insn.operands[0], insn.operands[2], insn.operands[1]);
|
||||
break;
|
||||
|
||||
case PPC_INST_VAVGSB:
|
||||
@ -1587,8 +1596,7 @@ int main(int argc, char* argv[])
|
||||
|
||||
case PPC_INST_VCTSXS:
|
||||
case PPC_INST_VCFPSXWS128:
|
||||
// TODO: saturate
|
||||
println("\t_mm_store_si128((__m128i*)ctx.v{}.s32, _mm_cvttps_epi32(_mm_mul_ps(_mm_load_ps(ctx.v{}.f32), _mm_set1_ps(exp2f({})))));", insn.operands[0], insn.operands[1], insn.operands[2]);
|
||||
println("\t_mm_store_si128((__m128i*)ctx.v{}.s32, _mm_vctsxs(_mm_mul_ps(_mm_load_ps(ctx.v{}.f32), _mm_set1_ps(exp2f({})))));", insn.operands[0], insn.operands[1], insn.operands[2]);
|
||||
break;
|
||||
|
||||
case PPC_INST_VCFSX:
|
||||
@ -1602,6 +1610,7 @@ int main(int argc, char* argv[])
|
||||
break;
|
||||
|
||||
case PPC_INST_VCMPBFP128:
|
||||
println("\t__debugbreak();");
|
||||
break;
|
||||
|
||||
case PPC_INST_VCMPEQFP:
|
||||
@ -1717,7 +1726,6 @@ int main(int argc, char* argv[])
|
||||
|
||||
case PPC_INST_VNMSUBFP:
|
||||
case PPC_INST_VNMSUBFP128:
|
||||
// TODO: wrong argument order
|
||||
println("\t_mm_store_ps(ctx.v{}.f32, _mm_fnmadd_ps(_mm_load_ps(ctx.v{}.f32), _mm_load_ps(ctx.v{}.f32), _mm_load_ps(ctx.v{}.f32)));", insn.operands[0], insn.operands[1], insn.operands[2], insn.operands[3]);
|
||||
break;
|
||||
|
||||
@ -1744,6 +1752,26 @@ int main(int argc, char* argv[])
|
||||
}
|
||||
|
||||
case PPC_INST_VPKD3D128:
|
||||
// TODO: vectorize somehow?
|
||||
// NOTE: handling vector reversal here too
|
||||
switch (insn.operands[2])
|
||||
{
|
||||
case 0: // D3D color
|
||||
if (insn.operands[3] != 1 || insn.operands[4] != 3)
|
||||
std::println("Unexpected D3D color pack instruction at {:X}", base - 4);
|
||||
|
||||
for (size_t i = 0; i < 4; i++)
|
||||
{
|
||||
constexpr size_t indices[] = { 3, 0, 1, 2 };
|
||||
println("\ttemp.u32 {}= ctx.v{}.u8[{}] << {};", i == 0 ? "" : "|", insn.operands[1], i * 4, indices[i] * 8);
|
||||
}
|
||||
println("\tctx.v{}.u32[3] = temp.u32;", insn.operands[0]);
|
||||
break;
|
||||
|
||||
default:
|
||||
println("\t__debugbreak();");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PPC_INST_VPKSHUS:
|
||||
@ -1770,7 +1798,7 @@ int main(int argc, char* argv[])
|
||||
|
||||
case PPC_INST_VRLIMI128:
|
||||
{
|
||||
constexpr size_t imm[] = { _MM_SHUFFLE(0, 1, 2, 3), _MM_SHUFFLE(1, 2, 3, 0), _MM_SHUFFLE(2, 3, 0, 1), _MM_SHUFFLE(3, 0, 1, 2) };
|
||||
constexpr size_t imm[] = { _MM_SHUFFLE(3, 2, 1, 0), _MM_SHUFFLE(2, 1, 0, 3), _MM_SHUFFLE(1, 0, 3, 2), _MM_SHUFFLE(0, 3, 2, 1) };
|
||||
println("\t_mm_store_ps(ctx.v{}.f32, _mm_blend_ps(_mm_load_ps(ctx.v{}.f32), _mm_permute_ps(_mm_load_ps(ctx.v{}.f32), {}), {}));", insn.operands[0], insn.operands[0], insn.operands[1], imm[insn.operands[3]], insn.operands[2]);
|
||||
break;
|
||||
}
|
||||
@ -1781,7 +1809,7 @@ int main(int argc, char* argv[])
|
||||
break;
|
||||
|
||||
case PPC_INST_VSEL:
|
||||
println("\t_mm_store_ps(ctx.v{}.f32, _mm_or_ps(_mm_and_ps(_mm_load_ps(ctx.v{}.f32), _mm_load_ps(ctx.v{}.f32)), _mm_andnot_ps(_mm_load_ps(ctx.v{}.f32), _mm_load_ps(ctx.v{}.f32))));", insn.operands[0], insn.operands[3], insn.operands[1], insn.operands[3], insn.operands[2]);
|
||||
println("\t_mm_store_ps(ctx.v{}.f32, _mm_or_ps(_mm_andnot_ps(_mm_load_ps(ctx.v{}.f32), _mm_load_ps(ctx.v{}.f32)), _mm_and_ps(_mm_load_ps(ctx.v{}.f32), _mm_load_ps(ctx.v{}.f32))));", insn.operands[0], insn.operands[3], insn.operands[1], insn.operands[3], insn.operands[2]);
|
||||
break;
|
||||
|
||||
case PPC_INST_VSLB:
|
||||
@ -1805,7 +1833,7 @@ int main(int argc, char* argv[])
|
||||
{
|
||||
// NOTE: accounting for full vector reversal here
|
||||
uint32_t perm = 15 - insn.operands[2];
|
||||
println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_shuffle_epi8(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_set1_epi8({})));", insn.operands[0], insn.operands[1], perm);
|
||||
println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_shuffle_epi8(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_set1_epi8(char(0x{:X}))));", insn.operands[0], insn.operands[1], perm);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1814,20 +1842,20 @@ int main(int argc, char* argv[])
|
||||
// NOTE: accounting for full vector reversal here
|
||||
uint32_t perm = 7 - insn.operands[2];
|
||||
perm = (perm * 2) | ((perm * 2 + 1) << 8);
|
||||
println("\t_mm_store_si128((__m128i*)ctx.v{}.u16, _mm_shuffle_epi8(_mm_load_si128((__m128i*)ctx.v{}.u16), _mm_set1_epi16(0x{:X})));", insn.operands[0], insn.operands[1], perm);
|
||||
println("\t_mm_store_si128((__m128i*)ctx.v{}.u16, _mm_shuffle_epi8(_mm_load_si128((__m128i*)ctx.v{}.u16), _mm_set1_epi16(short(0x{:X}))));", insn.operands[0], insn.operands[1], perm);
|
||||
break;
|
||||
}
|
||||
|
||||
case PPC_INST_VSPLTISB:
|
||||
println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_set1_epi8(0x{:X}));", insn.operands[0], insn.operands[1]);
|
||||
println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_set1_epi8(char(0x{:X})));", insn.operands[0], insn.operands[1]);
|
||||
break;
|
||||
|
||||
case PPC_INST_VSPLTISW:
|
||||
println("\t_mm_store_si128((__m128i*)ctx.v{}.u32, _mm_set1_epi32(0x{:X}));", insn.operands[0], insn.operands[1]);
|
||||
println("\t_mm_store_si128((__m128i*)ctx.v{}.u32, _mm_set1_epi32(int(0x{:X})));", insn.operands[0], insn.operands[1]);
|
||||
break;
|
||||
|
||||
case PPC_INST_VSPLTISW128:
|
||||
println("\t_mm_store_si128((__m128i*)ctx.v{}.u32, _mm_set1_epi32(0x{:X}));", insn.operands[0], insn.operands[2]);
|
||||
println("\t_mm_store_si128((__m128i*)ctx.v{}.u32, _mm_set1_epi32(int(0x{:X})));", insn.operands[0], insn.operands[2]);
|
||||
break;
|
||||
|
||||
case PPC_INST_VSPLTW:
|
||||
@ -1884,20 +1912,33 @@ int main(int argc, char* argv[])
|
||||
|
||||
case PPC_INST_VUPKD3D128:
|
||||
// TODO: vectorize somehow?
|
||||
// NOTE: for some reason with binutils 2nd operand is multiplied by 4
|
||||
// NOTE: handling vector reversal here too
|
||||
switch (insn.operands[2])
|
||||
switch (insn.operands[2] >> 2)
|
||||
{
|
||||
case 4: // 2 shorts
|
||||
case 0: // D3D color
|
||||
for (size_t i = 0; i < 4; i++)
|
||||
{
|
||||
constexpr size_t indices[] = { 3, 0, 1, 2 };
|
||||
println("\ttemp.f32 = 1.0f;");
|
||||
println("\ttemp.u32 |= ctx.v{}.u8[{}];", insn.operands[1], indices[i]);
|
||||
println("\tctx.v{}.f32[{}] = temp.f32;", insn.operands[0], i);
|
||||
}
|
||||
break;
|
||||
|
||||
case 1: // 2 shorts
|
||||
for (size_t i = 0; i < 2; i++)
|
||||
{
|
||||
println("\ttemp.f32 = 3.0f;");
|
||||
println("\ttemp.s32 += ctx.v{}.s16[{}];", insn.operands[1], 7 - i); // TODO: not sure about the indexing here
|
||||
println("\ttemp.s32 += ctx.v{}.s16[{}];", insn.operands[1], i); // TODO: not sure about the indexing here
|
||||
println("\tctx.v{}.f32[{}] = temp.f32;", insn.operands[0], 3 - i);
|
||||
}
|
||||
println("\tctx.v{}.f32[1] = 0.0f;", insn.operands[0]);
|
||||
println("\tctx.v{}.f32[0] = 1.0f;", insn.operands[0]);
|
||||
break;
|
||||
|
||||
default:
|
||||
println("\t__debugbreak();");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
@ -1942,6 +1983,13 @@ int main(int argc, char* argv[])
|
||||
std::println("Unrecognized instruction at 0x{:X}: {}", base - 4, insn.opcode->name);
|
||||
break;
|
||||
}
|
||||
|
||||
if (strchr(insn.opcode->name, '.'))
|
||||
{
|
||||
int lastLine = out.find_last_of('\n', out.size() - 2);
|
||||
if (out.find("ctx.cr", lastLine + 1) == std::string::npos)
|
||||
std::println("Instruction at {:X} has RC bit enabled but no comparison was generated", base - 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -459,13 +459,13 @@ inline __m128i _mm_adds_epu32(__m128i a, __m128i b)
|
||||
inline __m128i _mm_avg_epi8(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i c = _mm_set1_epi8(char(128));
|
||||
return _mm_add_epi8(c, _mm_avg_epu8(_mm_add_epi8(c, a), _mm_add_epi8(c, b)));
|
||||
return _mm_xor_si128(c, _mm_avg_epu8(_mm_xor_si128(c, a), _mm_xor_si128(c, b)));
|
||||
}
|
||||
|
||||
inline __m128i _mm_avg_epi16(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i c = _mm_set1_epi16(short(32768));
|
||||
return _mm_add_epi16(c, _mm_avg_epu16(_mm_add_epi16(c, a), _mm_add_epi16(c, b)));
|
||||
return _mm_xor_si128(c, _mm_avg_epu16(_mm_xor_si128(c, a), _mm_xor_si128(c, b)));
|
||||
}
|
||||
|
||||
inline __m128 _mm_cvtepu32_ps_(__m128i v)
|
||||
@ -486,13 +486,27 @@ inline __m128i _mm_perm_epi8_(__m128i a, __m128i b, __m128i c)
|
||||
|
||||
inline __m128i _mm_cmpgt_epu8(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i c = _mm_set1_epi8(0x80);
|
||||
__m128i c = _mm_set1_epi8(char(128));
|
||||
return _mm_cmpgt_epi8(_mm_xor_si128(a, c), _mm_xor_si128(b, c));
|
||||
}
|
||||
|
||||
inline __m128i _mm_cmpgt_epu16(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i c = _mm_set1_epi16(0x8000);
|
||||
__m128i c = _mm_set1_epi16(short(32768));
|
||||
return _mm_cmpgt_epi16(_mm_xor_si128(a, c), _mm_xor_si128(b, c));
|
||||
}
|
||||
|
||||
inline __m128i _mm_vctsxs(__m128 a)
|
||||
{
|
||||
__m128i result = _mm_cvttps_epi32(a);
|
||||
|
||||
__m128 max_val = _mm_set1_ps(2147483648.0f);
|
||||
__m128 cmp_mask = _mm_cmpgt_ps(a, max_val);
|
||||
|
||||
result = _mm_xor_si128(result, _mm_castps_si128(cmp_mask));
|
||||
|
||||
__m128 ord_mask = _mm_cmpord_ps(a, a);
|
||||
result = _mm_and_si128(result, _mm_castps_si128(ord_mask));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
17
thirdparty/disasm/ppc-dis.c
vendored
17
thirdparty/disasm/ppc-dis.c
vendored
@ -843,10 +843,10 @@ const struct powerpc_operand powerpc_operands[] =
|
||||
{ 3, 18, NULL, NULL, 0 },
|
||||
|
||||
#define VD3D1 VD3D0 + 1
|
||||
{ 2, 16, NULL, NULL, 0 },
|
||||
{ 3, 16, NULL, NULL, 0 },
|
||||
|
||||
#define VD3D2 VD3D1 + 1
|
||||
{ 2, 6, NULL, NULL, 0 },
|
||||
{ 3, 6, NULL, NULL, 0 },
|
||||
|
||||
/* The SIMM field in a VX form instruction. */
|
||||
#define SIMM VD3D2 + 1
|
||||
@ -1825,10 +1825,10 @@ extract_vperm (unsigned long insn,
|
||||
#define VXR_MASK VXR(0x3f, 0x3ff, 1)
|
||||
|
||||
/* An VX128 form instruction. */
|
||||
#define VX128(op, xop) (OP(op) | (((unsigned long)(xop)) & 0x7d0))
|
||||
#define VX128(op, xop) (OP(op) | (((unsigned long)(xop)) & 0x3d0))
|
||||
|
||||
/* The mask for an VX form instruction. */
|
||||
#define VX128_MASK VX(0x3f, 0x7d0)
|
||||
#define VX128_MASK VX(0x3f, 0x3d0)
|
||||
|
||||
/* An VX128 form instruction. */
|
||||
#define VX128_1(op, xop) (OP(op) | (((unsigned long)(xop)) & 0x7f3))
|
||||
@ -2541,11 +2541,10 @@ const struct powerpc_opcode powerpc_opcodes[] = {
|
||||
{ "vminfp128", VX128(6, 704), VX128_MASK, PPCVEC128, { VD128, VA128, VB128 }, PPC_INST_VMINFP128 },
|
||||
{ "vmrghw128", VX128(6, 768), VX128_MASK, PPCVEC128, { VD128, VA128, VB128 }, PPC_INST_VMRGHW128 },
|
||||
{ "vmrglw128", VX128(6, 832), VX128_MASK, PPCVEC128, { VD128, VA128, VB128 }, PPC_INST_VMRGLW128 },
|
||||
{ "vupkhsb128", VX128(6, 896), VX128_MASK, PPCVEC128, { VD128, VB128 }, PPC_INST_VUPKHSB128 },
|
||||
{ "vupklsb128", VX128(6, 960), VX128_MASK, PPCVEC128, { VD128, VB128 }, PPC_INST_VUPKLSB128 },
|
||||
{ "vupkhsh128", VX128(6, 1952), VX128_MASK, PPCVEC128, { VD128, VB128 }, PPC_INST_VUPKHSH128 },
|
||||
{ "vupklsh128", VX128(6, 2016), VX128_MASK, PPCVEC128, { VD128, VB128 }, PPC_INST_VUPKLSH128 },
|
||||
|
||||
{ "vupkhsb128", VX128(6, 896), VX128_MASK, PPCVEC128, { VD128, VB128, VA128 }, PPC_INST_VUPKHSB128 },
|
||||
{ "vupklsb128", VX128(6, 960), VX128_MASK, PPCVEC128, { VD128, VB128, VA128 }, PPC_INST_VUPKLSB128 },
|
||||
//{ "vupkhsh128", VX128(6, 1952), VX128_MASK, PPCVEC128, { VD128, VB128 }, PPC_INST_VUPKHSH128 },
|
||||
//{ "vupklsh128", VX128(6, 2016), VX128_MASK, PPCVEC128, { VD128, VB128 }, PPC_INST_VUPKLSH128 },
|
||||
|
||||
{ "evaddw", VX(4, 512), VX_MASK, PPCSPE, { RS, RA, RB }, PPC_INST_EVADDW },
|
||||
{ "evaddiw", VX(4, 514), VX_MASK, PPCSPE, { RS, RB, UIMM }, PPC_INST_EVADDIW },
|
||||
|
Loading…
x
Reference in New Issue
Block a user