Fix tests.

This commit is contained in:
Skyth 2024-09-22 19:58:22 +03:00
parent 510cff56f2
commit 1914b941a2
6 changed files with 271 additions and 281 deletions

View File

@ -168,7 +168,7 @@ bool Recompiler::Recompile(const Function& fn, uint32_t base, const ppc_insn& in
} }
println("\tdefault:"); println("\tdefault:");
println("\t\t__unreachable();"); println("\t\t__builtin_unreachable();");
println("\t}}"); println("\t}}");
switchTable = switchTables.end(); switchTable = switchTables.end();
@ -738,7 +738,7 @@ bool Recompiler::Recompile(const Function& fn, uint32_t base, const ppc_insn& in
break; break;
case PPC_INST_LWBRX: case PPC_INST_LWBRX:
print("\tctx.r{}.u64 = _byteswap_ulong(PPC_LOAD_U32(", insn.operands[0]); print("\tctx.r{}.u64 = __builtin_bswap32(PPC_LOAD_U32(", insn.operands[0]);
if (insn.operands[1] != 0) if (insn.operands[1] != 0)
print("ctx.r{}.u32 + ", insn.operands[1]); print("ctx.r{}.u32 + ", insn.operands[1]);
println("ctx.r{}.u32));", insn.operands[2]); println("ctx.r{}.u32));", insn.operands[2]);
@ -1022,7 +1022,7 @@ bool Recompiler::Recompile(const Function& fn, uint32_t base, const ppc_insn& in
print("\tctx.cr0.eq = _InterlockedCompareExchange64(reinterpret_cast<__int64*>(base + "); print("\tctx.cr0.eq = _InterlockedCompareExchange64(reinterpret_cast<__int64*>(base + ");
if (insn.operands[1] != 0) if (insn.operands[1] != 0)
print("ctx.r{}.u32 + ", insn.operands[1]); print("ctx.r{}.u32 + ", insn.operands[1]);
println("ctx.r{}.u32), _byteswap_uint64(ctx.r{}.s64), _byteswap_uint64(ctx.reserved.s64)) == _byteswap_uint64(ctx.reserved.s64);", println("ctx.r{}.u32), __builtin_bswap64(ctx.r{}.s64), __builtin_bswap64(ctx.reserved.s64)) == __builtin_bswap64(ctx.reserved.s64);",
insn.operands[2], insn.operands[0]); insn.operands[2], insn.operands[0]);
println("\tctx.cr0.so = ctx.xer.so;"); println("\tctx.cr0.so = ctx.xer.so;");
break; break;
@ -1093,7 +1093,7 @@ bool Recompiler::Recompile(const Function& fn, uint32_t base, const ppc_insn& in
print("\tPPC_STORE_U16("); print("\tPPC_STORE_U16(");
if (insn.operands[1] != 0) if (insn.operands[1] != 0)
print("ctx.r{}.u32 + ", insn.operands[1]); print("ctx.r{}.u32 + ", insn.operands[1]);
println("ctx.r{}.u32, _byteswap_ushort(ctx.r{}.u16));", insn.operands[2], insn.operands[0]); println("ctx.r{}.u32, __builtin_bswap16(ctx.r{}.u16));", insn.operands[2], insn.operands[0]);
break; break;
case PPC_INST_STHX: case PPC_INST_STHX:
@ -1169,7 +1169,7 @@ bool Recompiler::Recompile(const Function& fn, uint32_t base, const ppc_insn& in
print("\tPPC_STORE_U32("); print("\tPPC_STORE_U32(");
if (insn.operands[1] != 0) if (insn.operands[1] != 0)
print("ctx.r{}.u32 + ", insn.operands[1]); print("ctx.r{}.u32 + ", insn.operands[1]);
println("ctx.r{}.u32, _byteswap_ulong(ctx.r{}.u32));", insn.operands[2], insn.operands[0]); println("ctx.r{}.u32, __builtin_bswap32(ctx.r{}.u32));", insn.operands[2], insn.operands[0]);
break; break;
case PPC_INST_STWCX: case PPC_INST_STWCX:
@ -1178,7 +1178,7 @@ bool Recompiler::Recompile(const Function& fn, uint32_t base, const ppc_insn& in
print("\tctx.cr0.eq = _InterlockedCompareExchange(reinterpret_cast<long*>(base + "); print("\tctx.cr0.eq = _InterlockedCompareExchange(reinterpret_cast<long*>(base + ");
if (insn.operands[1] != 0) if (insn.operands[1] != 0)
print("ctx.r{}.u32 + ", insn.operands[1]); print("ctx.r{}.u32 + ", insn.operands[1]);
println("ctx.r{}.u32), _byteswap_ulong(ctx.r{}.s32), _byteswap_ulong(ctx.reserved.s32)) == _byteswap_ulong(ctx.reserved.s32);", println("ctx.r{}.u32), __builtin_bswap32(ctx.r{}.s32), __builtin_bswap32(ctx.reserved.s32)) == __builtin_bswap32(ctx.reserved.s32);",
insn.operands[2], insn.operands[0]); insn.operands[2], insn.operands[0]);
println("\tctx.cr0.so = ctx.xer.so;"); println("\tctx.cr0.so = ctx.xer.so;");
break; break;
@ -1228,7 +1228,7 @@ bool Recompiler::Recompile(const Function& fn, uint32_t base, const ppc_insn& in
break; break;
case PPC_INST_SYNC: case PPC_INST_SYNC:
println("\t__faststorefence();"); // no op
break; break;
case PPC_INST_TDLGEI: case PPC_INST_TDLGEI:
@ -1328,6 +1328,7 @@ bool Recompiler::Recompile(const Function& fn, uint32_t base, const ppc_insn& in
break; break;
} }
case PPC_INST_VCMPBFP:
case PPC_INST_VCMPBFP128: case PPC_INST_VCMPBFP128:
println("\t__debugbreak();"); println("\t__debugbreak();");
break; break;
@ -1377,6 +1378,7 @@ bool Recompiler::Recompile(const Function& fn, uint32_t base, const ppc_insn& in
println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_cmpgt_epu16(_mm_load_si128((__m128i*)ctx.v{}.u16), _mm_load_si128((__m128i*)ctx.v{}.u16)));", insn.operands[0], insn.operands[1], insn.operands[2]); println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_cmpgt_epu16(_mm_load_si128((__m128i*)ctx.v{}.u16), _mm_load_si128((__m128i*)ctx.v{}.u16)));", insn.operands[0], insn.operands[1], insn.operands[2]);
break; break;
case PPC_INST_VEXPTEFP:
case PPC_INST_VEXPTEFP128: case PPC_INST_VEXPTEFP128:
// TODO: vectorize // TODO: vectorize
println("\tctx.fpscr.setFlushMode(true);"); println("\tctx.fpscr.setFlushMode(true);");
@ -1384,6 +1386,7 @@ bool Recompiler::Recompile(const Function& fn, uint32_t base, const ppc_insn& in
println("\tctx.v{}.f32[{}] = exp2f(ctx.v{}.f32[{}]);", insn.operands[0], i, insn.operands[1], i); println("\tctx.v{}.f32[{}] = exp2f(ctx.v{}.f32[{}]);", insn.operands[0], i, insn.operands[1], i);
break; break;
case PPC_INST_VLOGEFP:
case PPC_INST_VLOGEFP128: case PPC_INST_VLOGEFP128:
// TODO: vectorize // TODO: vectorize
println("\tctx.fpscr.setFlushMode(true);"); println("\tctx.fpscr.setFlushMode(true);");
@ -1509,6 +1512,7 @@ bool Recompiler::Recompile(const Function& fn, uint32_t base, const ppc_insn& in
break; break;
case PPC_INST_VPKSHUS: case PPC_INST_VPKSHUS:
case PPC_INST_VPKSHUS128:
println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_packus_epi16(_mm_load_si128((__m128i*)ctx.v{}.s16), _mm_load_si128((__m128i*)ctx.v{}.s16)));", insn.operands[0], insn.operands[2], insn.operands[1]); println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_packus_epi16(_mm_load_si128((__m128i*)ctx.v{}.s16), _mm_load_si128((__m128i*)ctx.v{}.s16)));", insn.operands[0], insn.operands[2], insn.operands[1]);
break; break;
@ -1518,6 +1522,7 @@ bool Recompiler::Recompile(const Function& fn, uint32_t base, const ppc_insn& in
println("\t_mm_store_ps(ctx.v{}.f32, _mm_rcp_ps(_mm_load_ps(ctx.v{}.f32)));", insn.operands[0], insn.operands[1]); println("\t_mm_store_ps(ctx.v{}.f32, _mm_rcp_ps(_mm_load_ps(ctx.v{}.f32)));", insn.operands[0], insn.operands[1]);
break; break;
case PPC_INST_VRFIM:
case PPC_INST_VRFIM128: case PPC_INST_VRFIM128:
println("\tctx.fpscr.setFlushMode(true);"); println("\tctx.fpscr.setFlushMode(true);");
println("\t_mm_store_ps(ctx.v{}.f32, _mm_round_ps(_mm_load_ps(ctx.v{}.f32), _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC));", insn.operands[0], insn.operands[1]); println("\t_mm_store_ps(ctx.v{}.f32, _mm_round_ps(_mm_load_ps(ctx.v{}.f32), _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC));", insn.operands[0], insn.operands[1]);
@ -1529,6 +1534,7 @@ bool Recompiler::Recompile(const Function& fn, uint32_t base, const ppc_insn& in
println("\t_mm_store_ps(ctx.v{}.f32, _mm_round_ps(_mm_load_ps(ctx.v{}.f32), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));", insn.operands[0], insn.operands[1]); println("\t_mm_store_ps(ctx.v{}.f32, _mm_round_ps(_mm_load_ps(ctx.v{}.f32), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));", insn.operands[0], insn.operands[1]);
break; break;
case PPC_INST_VRFIZ:
case PPC_INST_VRFIZ128: case PPC_INST_VRFIZ128:
println("\tctx.fpscr.setFlushMode(true);"); println("\tctx.fpscr.setFlushMode(true);");
println("\t_mm_store_ps(ctx.v{}.f32, _mm_round_ps(_mm_load_ps(ctx.v{}.f32), _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC));", insn.operands[0], insn.operands[1]); println("\t_mm_store_ps(ctx.v{}.f32, _mm_round_ps(_mm_load_ps(ctx.v{}.f32), _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC));", insn.operands[0], insn.operands[1]);
@ -1562,6 +1568,7 @@ bool Recompiler::Recompile(const Function& fn, uint32_t base, const ppc_insn& in
println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_alignr_epi8(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8), {}));", insn.operands[0], insn.operands[1], insn.operands[2], 16 - insn.operands[3]); println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_alignr_epi8(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8), {}));", insn.operands[0], insn.operands[1], insn.operands[2], 16 - insn.operands[3]);
break; break;
case PPC_INST_VSLW:
case PPC_INST_VSLW128: case PPC_INST_VSLW128:
// TODO: vectorize, ensure endianness is correct // TODO: vectorize, ensure endianness is correct
for (size_t i = 0; i < 4; i++) for (size_t i = 0; i < 4; i++)
@ -1608,6 +1615,7 @@ bool Recompiler::Recompile(const Function& fn, uint32_t base, const ppc_insn& in
println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_vsr(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8)));", insn.operands[0], insn.operands[1], insn.operands[2]); println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_vsr(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8)));", insn.operands[0], insn.operands[1], insn.operands[2]);
break; break;
case PPC_INST_VSRAW:
case PPC_INST_VSRAW128: case PPC_INST_VSRAW128:
// TODO: vectorize, ensure endianness is correct // TODO: vectorize, ensure endianness is correct
for (size_t i = 0; i < 4; i++) for (size_t i = 0; i < 4; i++)
@ -1676,6 +1684,7 @@ bool Recompiler::Recompile(const Function& fn, uint32_t base, const ppc_insn& in
} }
break; break;
case PPC_INST_VUPKHSB:
case PPC_INST_VUPKHSB128: case PPC_INST_VUPKHSB128:
println("\t_mm_store_si128((__m128i*)ctx.v{}.s16, _mm_cvtepi8_epi16(_mm_unpackhi_epi64(_mm_load_si128((__m128i*)ctx.v{}.s8), _mm_load_si128((__m128i*)ctx.v{}.s8))));", insn.operands[0], insn.operands[1], insn.operands[1]); println("\t_mm_store_si128((__m128i*)ctx.v{}.s16, _mm_cvtepi8_epi16(_mm_unpackhi_epi64(_mm_load_si128((__m128i*)ctx.v{}.s8), _mm_load_si128((__m128i*)ctx.v{}.s8))));", insn.operands[0], insn.operands[1], insn.operands[1]);
break; break;
@ -1685,6 +1694,7 @@ bool Recompiler::Recompile(const Function& fn, uint32_t base, const ppc_insn& in
println("\t_mm_store_si128((__m128i*)ctx.v{}.s32, _mm_cvtepi16_epi32(_mm_unpackhi_epi64(_mm_load_si128((__m128i*)ctx.v{}.s16), _mm_load_si128((__m128i*)ctx.v{}.s16))));", insn.operands[0], insn.operands[1], insn.operands[1]); println("\t_mm_store_si128((__m128i*)ctx.v{}.s32, _mm_cvtepi16_epi32(_mm_unpackhi_epi64(_mm_load_si128((__m128i*)ctx.v{}.s16), _mm_load_si128((__m128i*)ctx.v{}.s16))));", insn.operands[0], insn.operands[1], insn.operands[1]);
break; break;
case PPC_INST_VUPKLSB:
case PPC_INST_VUPKLSB128: case PPC_INST_VUPKLSB128:
println("\t_mm_store_si128((__m128i*)ctx.v{}.s32, _mm_cvtepi8_epi16(_mm_load_si128((__m128i*)ctx.v{}.s16)));", insn.operands[0], insn.operands[1]); println("\t_mm_store_si128((__m128i*)ctx.v{}.s32, _mm_cvtepi8_epi16(_mm_load_si128((__m128i*)ctx.v{}.s16)));", insn.operands[0], insn.operands[1]);
break; break;
@ -1780,6 +1790,11 @@ bool Recompiler::Recompile(const Function& fn)
} }
} }
#if 0
if (insn.opcode == nullptr || (insn.opcode->id != PPC_INST_B && insn.opcode->id != PPC_INST_BCTR && insn.opcode->id != PPC_INST_BLR))
std::println("Function at {:X} ends prematurely with instruction {} at {:X}", fn.base, insn.opcode != nullptr ? insn.opcode->name : "INVALID", base - 4);
#endif
println("}}\n"); println("}}\n");
return allRecompiled; return allRecompiled;

View File

@ -72,9 +72,10 @@ void SWARecompiler::Analyse()
} }
} }
auto hardcodedFuncCheck = [&](Function& f) auto hardcodedFuncCheck = [&](uint8_t* data, Function& f)
{ {
if (f.base == 0x824E7EF0) f.size = 0x98; if (*(uint32_t*)(data + 4) == 0x04000048) f.size = 0x8; // shifted ptr tail call
else if (f.base == 0x824E7EF0) f.size = 0x98;
else if (f.base == 0x824E7F28) f.size = 0x60; else if (f.base == 0x824E7F28) f.size = 0x60;
else if (f.base == 0x82C980E8) f.size = 0x110; else if (f.base == 0x82C980E8) f.size = 0x110;
else if (f.base == 0x82CF7080) f.size = 0x80; else if (f.base == 0x82CF7080) f.size = 0x80;
@ -137,8 +138,9 @@ void SWARecompiler::Analyse()
if (address >= section.base && address < section.base + section.size && image.symbols.find(address) == image.symbols.end()) if (address >= section.base && address < section.base + section.size && image.symbols.find(address) == image.symbols.end())
{ {
auto& fn = functions.emplace_back(Function::Analyze(section.data + address - section.base, section.base + section.size - address, address)); auto data = section.data + address - section.base;
hardcodedFuncCheck(fn); auto& fn = functions.emplace_back(Function::Analyze(data, section.base + section.size - address, address));
hardcodedFuncCheck(data, fn);
image.symbols.emplace(std::format("sub_{:X}", fn.base), fn.base, fn.size, Symbol_Function); image.symbols.emplace(std::format("sub_{:X}", fn.base), fn.base, fn.size, Symbol_Function);
} }
} }
@ -179,7 +181,7 @@ void SWARecompiler::Analyse()
else else
{ {
auto& fn = functions.emplace_back(Function::Analyze(data, dataEnd - data, base)); auto& fn = functions.emplace_back(Function::Analyze(data, dataEnd - data, base));
hardcodedFuncCheck(fn); hardcodedFuncCheck(data, fn);
image.symbols.emplace(std::format("sub_{:X}", fn.base), fn.base, fn.size, Symbol_Function); image.symbols.emplace(std::format("sub_{:X}", fn.base), fn.base, fn.size, Symbol_Function);
base += fn.size; base += fn.size;

View File

@ -98,8 +98,8 @@ void TestRecompiler::RecompileTests(const char* srcDirectoryPath, const char* ds
std::println(file, "#include <ppc_context.h>"); std::println(file, "#include <ppc_context.h>");
std::println(file, "#include <Windows.h>"); std::println(file, "#include <Windows.h>");
std::println(file, "#include <print>\n"); std::println(file, "#include <print>\n");
std::println(file, "#define PPC_CHECK_VALUE_U(lhs, rhs) if (lhs != rhs) std::println(__FUNCTION__ \" \" #lhs \" EXPECTED \" #rhs \" ACTUAL {{:X}}\", lhs)\n"); std::println(file, "#define PPC_CHECK_VALUE_U(f, lhs, rhs) if (lhs != rhs) std::println(#f \" \" #lhs \" EXPECTED \" #rhs \" ACTUAL {{:X}}\", lhs)\n");
std::println(file, "#define PPC_CHECK_VALUE_F(lhs, rhs) if (lhs != rhs) std::println(__FUNCTION__ \" \" #lhs \" EXPECTED \" #rhs \" ACTUAL {{}}\", lhs)\n"); std::println(file, "#define PPC_CHECK_VALUE_F(f, lhs, rhs) if (lhs != rhs) std::println(#f \" \" #lhs \" EXPECTED \" #rhs \" ACTUAL {{}}\", lhs)\n");
for (auto& [fn, addr] : functions) for (auto& [fn, addr] : functions)
{ {
@ -212,15 +212,16 @@ void TestRecompiler::RecompileTests(const char* srcDirectoryPath, const char* ds
int commaIndex2 = str.find(',', commaIndex1 + 1); int commaIndex2 = str.find(',', commaIndex1 + 1);
int closingBracketIndex = str.find(']', commaIndex2 + 1); int closingBracketIndex = str.find(']', commaIndex2 + 1);
std::println(file, "\tPPC_CHECK_VALUE_U(ctx.{}.u32[3], 0x{});", reg, str.substr(openingBracketIndex + 1, commaIndex0 - openingBracketIndex - 1)); std::println(file, "\tPPC_CHECK_VALUE_U({}, ctx.{}.u32[3], 0x{});", name, reg, str.substr(openingBracketIndex + 1, commaIndex0 - openingBracketIndex - 1));
std::println(file, "\tPPC_CHECK_VALUE_U(ctx.{}.u32[2], 0x{});", reg, str.substr(commaIndex0 + 2, commaIndex1 - commaIndex0 - 2)); std::println(file, "\tPPC_CHECK_VALUE_U({}, ctx.{}.u32[2], 0x{});", name, reg, str.substr(commaIndex0 + 2, commaIndex1 - commaIndex0 - 2));
std::println(file, "\tPPC_CHECK_VALUE_U(ctx.{}.u32[1], 0x{});", reg, str.substr(commaIndex1 + 2, commaIndex2 - commaIndex1 - 2)); std::println(file, "\tPPC_CHECK_VALUE_U({}, ctx.{}.u32[1], 0x{});", name, reg, str.substr(commaIndex1 + 2, commaIndex2 - commaIndex1 - 2));
std::println(file, "\tPPC_CHECK_VALUE_U(ctx.{}.u32[0], 0x{});", reg, str.substr(commaIndex2 + 2, closingBracketIndex - commaIndex2 - 2)); std::println(file, "\tPPC_CHECK_VALUE_U({}, ctx.{}.u32[0], 0x{});", name, reg, str.substr(commaIndex2 + 2, closingBracketIndex - commaIndex2 - 2));
} }
else else
{ {
std::println(file, "\tPPC_CHECK_VALUE_{}(ctx.{}.{}64, {});", std::println(file, "\tPPC_CHECK_VALUE_{}({}, ctx.{}.{}64, {});",
str.find('.', secondSpaceIndex) != std::string::npos ? 'F' : 'U', str.find('.', secondSpaceIndex) != std::string::npos ? 'F' : 'U',
name,
reg, reg,
str.find('.', secondSpaceIndex) != std::string::npos ? 'f' : 'u', str.find('.', secondSpaceIndex) != std::string::npos ? 'f' : 'u',
str.substr(secondSpaceIndex + 1)); str.substr(secondSpaceIndex + 1));
@ -238,7 +239,7 @@ void TestRecompiler::RecompileTests(const char* srcDirectoryPath, const char* ds
{ {
if (str[i] != ' ') if (str[i] != ' ')
{ {
std::println(file, "\tPPC_CHECK_VALUE_U(base[0x{} + 0x{:X}], 0x{}{});", address, j, str[i], str[i + 1]); std::println(file, "\tPPC_CHECK_VALUE_U({}, base[0x{} + 0x{:X}], 0x{}{});", name, address, j, str[i], str[i + 1]);
++i; // the loop adds another ++i; // the loop adds another
++j; ++j;
} }
@ -266,9 +267,10 @@ void TestRecompiler::RecompileTests(const char* srcDirectoryPath, const char* ds
} }
} }
std::println(file, "void main() {{"); std::println(file, "int main() {{");
std::println(file, "\tuint8_t* base = reinterpret_cast<uint8_t*>(VirtualAlloc(nullptr, 0x100000000, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE));"); std::println(file, "\tuint8_t* base = reinterpret_cast<uint8_t*>(VirtualAlloc(nullptr, 0x100000000, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE));");
fwrite(main.data(), 1, main.size(), file); fwrite(main.data(), 1, main.size(), file);
std::println(file, "\treturn 0;");
std::println(file, "}}"); std::println(file, "}}");
fclose(file); fclose(file);

View File

@ -1,6 +1,10 @@
project("PowerTests") project("PowerTests")
add_compile_options(
"-march=x86-64-v3"
"-Wno-unused-label"
"-Wno-unused-variable")
file(GLOB TestFiles *.cpp) file(GLOB TestFiles *.cpp)
add_executable(PowerTests ${TestFiles}) add_executable(PowerTests ${TestFiles})
target_link_libraries(PowerTests PUBLIC PowerUtils) target_link_libraries(PowerTests PUBLIC PowerUtils)

View File

@ -11,12 +11,7 @@ void Image::Map(const std::string_view& name, size_t base, uint32_t size, uint8_
const void* Image::Find(size_t address) const const void* Image::Find(size_t address) const
{ {
const auto section = sections.lower_bound(address); const auto section = std::prev(sections.upper_bound(address));
if (section == sections.end())
{
return nullptr;
}
return section->data + (address - section->base); return section->data + (address - section->base);
} }

View File

@ -6,37 +6,36 @@
#ifdef __clang__ #ifdef __clang__
#include <x86intrin.h> #include <x86intrin.h>
#define __restrict __restrict__
#define _byteswap_ushort __builtin_bswap16
#define _byteswap_ulong __builtin_bswap32
#define _byteswap_uint64 __builtin_bswap64
#define isnan __builtin_isnan
#define __assume __builtin_assume
#define __unreachable() __builtin_unreachable()
#else #else
#include <intrin.h> #include <intrin.h>
#define __unreachable() __assume(0) #define __restrict__ __restrict
#define __builtin_bswap16 _byteswap_ushort
#define __builtin_bswap32 _byteswap_ulong
#define __builtin_bswap64 _byteswap_uint64
#define __builtin_isnan isnan
#define __builtin_assume __assume
#define __builtin_unreachable() __assume(0)
#endif #endif
#define PPC_FUNC(x) extern "C" void x(PPCContext& __restrict ctx, uint8_t* base) noexcept #define PPC_FUNC(x) extern "C" void x(PPCContext& __restrict__ ctx, uint8_t* base) noexcept
#define PPC_FUNC_PROLOGUE() \ #define PPC_FUNC_PROLOGUE() \
__assume((reinterpret_cast<size_t>(base) & 0xFFFFFFFF) == 0); \ __builtin_assume((reinterpret_cast<size_t>(base) & 0xFFFFFFFF) == 0); \
PPCRegister temp; \ PPCRegister temp; \
PPCVRegister vtemp; \ PPCVRegister vtemp; \
uint32_t ea uint32_t ea
#define PPC_LOAD_U8(x) *(uint8_t*)(base + (x)) #define PPC_LOAD_U8(x) *(uint8_t*)(base + (x))
#define PPC_LOAD_U16(x) _byteswap_ushort(*(uint16_t*)(base + (x))) #define PPC_LOAD_U16(x) __builtin_bswap16(*(uint16_t*)(base + (x)))
#define PPC_LOAD_U32(x) _byteswap_ulong(*(uint32_t*)(base + (x))) #define PPC_LOAD_U32(x) __builtin_bswap32(*(uint32_t*)(base + (x)))
#define PPC_LOAD_U64(x) _byteswap_uint64(*(uint64_t*)(base + (x))) #define PPC_LOAD_U64(x) __builtin_bswap64(*(uint64_t*)(base + (x)))
#define PPC_STORE_U8(x, y) *(uint8_t*)(base + (x)) = (y) #define PPC_STORE_U8(x, y) *(uint8_t*)(base + (x)) = (y)
#define PPC_STORE_U16(x, y) *(uint16_t*)(base + (x)) = _byteswap_ushort(y) #define PPC_STORE_U16(x, y) *(uint16_t*)(base + (x)) = __builtin_bswap16(y)
#define PPC_STORE_U32(x, y) *(uint32_t*)(base + (x)) = _byteswap_ulong(y) #define PPC_STORE_U32(x, y) *(uint32_t*)(base + (x)) = __builtin_bswap32(y)
#define PPC_STORE_U64(x, y) *(uint64_t*)(base + (x)) = _byteswap_uint64(y) #define PPC_STORE_U64(x, y) *(uint64_t*)(base + (x)) = __builtin_bswap64(y)
typedef void PPCFunc(struct PPCContext& __restrict ctx, uint8_t* base); typedef void PPCFunc(struct PPCContext& __restrict__ ctx, uint8_t* base);
struct PPCFuncMapping struct PPCFuncMapping
{ {
@ -92,10 +91,10 @@ struct PPCCRRegister
void compare(double left, double right) void compare(double left, double right)
{ {
lt = left < right; un = __builtin_isnan(left) || __builtin_isnan(right);
gt = left > right; lt = !un && (left < right);
eq = left == right; gt = !un && (left > right);
un = isnan(left) || isnan(right); eq = !un && (left == right);
} }
void setFromMask(__m128 mask, int imm) void setFromMask(__m128 mask, int imm)
@ -171,240 +170,213 @@ struct PPCContext
PPCRegister ctr; PPCRegister ctr;
PPCXERRegister xer; PPCXERRegister xer;
PPCRegister reserved; PPCRegister reserved;
uint32_t msr; uint32_t msr = 0x200A000;
PPCCRRegister cr0;
PPCCRRegister cr1;
PPCCRRegister cr2;
PPCCRRegister cr3;
PPCCRRegister cr4;
PPCCRRegister cr5;
PPCCRRegister cr6;
PPCCRRegister cr7;
PPCRegister r0;
PPCRegister r1;
PPCRegister r2;
PPCRegister r3;
PPCRegister r4;
PPCRegister r5;
PPCRegister r6;
PPCRegister r7;
PPCRegister r8;
PPCRegister r9;
PPCRegister r10;
PPCRegister r11;
PPCRegister r12;
PPCRegister r13;
PPCRegister r14;
PPCRegister r15;
PPCRegister r16;
PPCRegister r17;
PPCRegister r18;
PPCRegister r19;
PPCRegister r20;
PPCRegister r21;
PPCRegister r22;
PPCRegister r23;
PPCRegister r24;
PPCRegister r25;
PPCRegister r26;
PPCRegister r27;
PPCRegister r28;
PPCRegister r29;
PPCRegister r30;
PPCRegister r31;
PPCFPSCRRegister fpscr; PPCFPSCRRegister fpscr;
union PPCRegister f0;
{ PPCRegister f1;
struct PPCRegister f2;
{ PPCRegister f3;
PPCCRRegister cr0; PPCRegister f4;
PPCCRRegister cr1; PPCRegister f5;
PPCCRRegister cr2; PPCRegister f6;
PPCCRRegister cr3; PPCRegister f7;
PPCCRRegister cr4; PPCRegister f8;
PPCCRRegister cr5; PPCRegister f9;
PPCCRRegister cr6; PPCRegister f10;
PPCCRRegister cr7; PPCRegister f11;
}; PPCRegister f12;
PPCCRRegister cr[8]; PPCRegister f13;
}; PPCRegister f14;
PPCRegister f15;
PPCRegister f16;
PPCRegister f17;
PPCRegister f18;
PPCRegister f19;
PPCRegister f20;
PPCRegister f21;
PPCRegister f22;
PPCRegister f23;
PPCRegister f24;
PPCRegister f25;
PPCRegister f26;
PPCRegister f27;
PPCRegister f28;
PPCRegister f29;
PPCRegister f30;
PPCRegister f31;
union PPCVRegister v0;
{ PPCVRegister v1;
struct PPCVRegister v2;
{ PPCVRegister v3;
PPCRegister r0; PPCVRegister v4;
PPCRegister r1; PPCVRegister v5;
PPCRegister r2; PPCVRegister v6;
PPCRegister r3; PPCVRegister v7;
PPCRegister r4; PPCVRegister v8;
PPCRegister r5; PPCVRegister v9;
PPCRegister r6; PPCVRegister v10;
PPCRegister r7; PPCVRegister v11;
PPCRegister r8; PPCVRegister v12;
PPCRegister r9; PPCVRegister v13;
PPCRegister r10; PPCVRegister v14;
PPCRegister r11; PPCVRegister v15;
PPCRegister r12; PPCVRegister v16;
PPCRegister r13; PPCVRegister v17;
PPCRegister r14; PPCVRegister v18;
PPCRegister r15; PPCVRegister v19;
PPCRegister r16; PPCVRegister v20;
PPCRegister r17; PPCVRegister v21;
PPCRegister r18; PPCVRegister v22;
PPCRegister r19; PPCVRegister v23;
PPCRegister r20; PPCVRegister v24;
PPCRegister r21; PPCVRegister v25;
PPCRegister r22; PPCVRegister v26;
PPCRegister r23; PPCVRegister v27;
PPCRegister r24; PPCVRegister v28;
PPCRegister r25; PPCVRegister v29;
PPCRegister r26; PPCVRegister v30;
PPCRegister r27; PPCVRegister v31;
PPCRegister r28; PPCVRegister v32;
PPCRegister r29; PPCVRegister v33;
PPCRegister r30; PPCVRegister v34;
PPCRegister r31; PPCVRegister v35;
}; PPCVRegister v36;
PPCRegister r[32]; PPCVRegister v37;
}; PPCVRegister v38;
PPCVRegister v39;
union PPCVRegister v40;
{ PPCVRegister v41;
struct PPCVRegister v42;
{ PPCVRegister v43;
PPCRegister f0; PPCVRegister v44;
PPCRegister f1; PPCVRegister v45;
PPCRegister f2; PPCVRegister v46;
PPCRegister f3; PPCVRegister v47;
PPCRegister f4; PPCVRegister v48;
PPCRegister f5; PPCVRegister v49;
PPCRegister f6; PPCVRegister v50;
PPCRegister f7; PPCVRegister v51;
PPCRegister f8; PPCVRegister v52;
PPCRegister f9; PPCVRegister v53;
PPCRegister f10; PPCVRegister v54;
PPCRegister f11; PPCVRegister v55;
PPCRegister f12; PPCVRegister v56;
PPCRegister f13; PPCVRegister v57;
PPCRegister f14; PPCVRegister v58;
PPCRegister f15; PPCVRegister v59;
PPCRegister f16; PPCVRegister v60;
PPCRegister f17; PPCVRegister v61;
PPCRegister f18; PPCVRegister v62;
PPCRegister f19; PPCVRegister v63;
PPCRegister f20; PPCVRegister v64;
PPCRegister f21; PPCVRegister v65;
PPCRegister f22; PPCVRegister v66;
PPCRegister f23; PPCVRegister v67;
PPCRegister f24; PPCVRegister v68;
PPCRegister f25; PPCVRegister v69;
PPCRegister f26; PPCVRegister v70;
PPCRegister f27; PPCVRegister v71;
PPCRegister f28; PPCVRegister v72;
PPCRegister f29; PPCVRegister v73;
PPCRegister f30; PPCVRegister v74;
PPCRegister f31; PPCVRegister v75;
}; PPCVRegister v76;
PPCRegister f[32]; PPCVRegister v77;
}; PPCVRegister v78;
PPCVRegister v79;
union PPCVRegister v80;
{ PPCVRegister v81;
struct PPCVRegister v82;
{ PPCVRegister v83;
PPCVRegister v0; PPCVRegister v84;
PPCVRegister v1; PPCVRegister v85;
PPCVRegister v2; PPCVRegister v86;
PPCVRegister v3; PPCVRegister v87;
PPCVRegister v4; PPCVRegister v88;
PPCVRegister v5; PPCVRegister v89;
PPCVRegister v6; PPCVRegister v90;
PPCVRegister v7; PPCVRegister v91;
PPCVRegister v8; PPCVRegister v92;
PPCVRegister v9; PPCVRegister v93;
PPCVRegister v10; PPCVRegister v94;
PPCVRegister v11; PPCVRegister v95;
PPCVRegister v12; PPCVRegister v96;
PPCVRegister v13; PPCVRegister v97;
PPCVRegister v14; PPCVRegister v98;
PPCVRegister v15; PPCVRegister v99;
PPCVRegister v16; PPCVRegister v100;
PPCVRegister v17; PPCVRegister v101;
PPCVRegister v18; PPCVRegister v102;
PPCVRegister v19; PPCVRegister v103;
PPCVRegister v20; PPCVRegister v104;
PPCVRegister v21; PPCVRegister v105;
PPCVRegister v22; PPCVRegister v106;
PPCVRegister v23; PPCVRegister v107;
PPCVRegister v24; PPCVRegister v108;
PPCVRegister v25; PPCVRegister v109;
PPCVRegister v26; PPCVRegister v110;
PPCVRegister v27; PPCVRegister v111;
PPCVRegister v28; PPCVRegister v112;
PPCVRegister v29; PPCVRegister v113;
PPCVRegister v30; PPCVRegister v114;
PPCVRegister v31; PPCVRegister v115;
PPCVRegister v32; PPCVRegister v116;
PPCVRegister v33; PPCVRegister v117;
PPCVRegister v34; PPCVRegister v118;
PPCVRegister v35; PPCVRegister v119;
PPCVRegister v36; PPCVRegister v120;
PPCVRegister v37; PPCVRegister v121;
PPCVRegister v38; PPCVRegister v122;
PPCVRegister v39; PPCVRegister v123;
PPCVRegister v40; PPCVRegister v124;
PPCVRegister v41; PPCVRegister v125;
PPCVRegister v42; PPCVRegister v126;
PPCVRegister v43; PPCVRegister v127;
PPCVRegister v44;
PPCVRegister v45;
PPCVRegister v46;
PPCVRegister v47;
PPCVRegister v48;
PPCVRegister v49;
PPCVRegister v50;
PPCVRegister v51;
PPCVRegister v52;
PPCVRegister v53;
PPCVRegister v54;
PPCVRegister v55;
PPCVRegister v56;
PPCVRegister v57;
PPCVRegister v58;
PPCVRegister v59;
PPCVRegister v60;
PPCVRegister v61;
PPCVRegister v62;
PPCVRegister v63;
PPCVRegister v64;
PPCVRegister v65;
PPCVRegister v66;
PPCVRegister v67;
PPCVRegister v68;
PPCVRegister v69;
PPCVRegister v70;
PPCVRegister v71;
PPCVRegister v72;
PPCVRegister v73;
PPCVRegister v74;
PPCVRegister v75;
PPCVRegister v76;
PPCVRegister v77;
PPCVRegister v78;
PPCVRegister v79;
PPCVRegister v80;
PPCVRegister v81;
PPCVRegister v82;
PPCVRegister v83;
PPCVRegister v84;
PPCVRegister v85;
PPCVRegister v86;
PPCVRegister v87;
PPCVRegister v88;
PPCVRegister v89;
PPCVRegister v90;
PPCVRegister v91;
PPCVRegister v92;
PPCVRegister v93;
PPCVRegister v94;
PPCVRegister v95;
PPCVRegister v96;
PPCVRegister v97;
PPCVRegister v98;
PPCVRegister v99;
PPCVRegister v100;
PPCVRegister v101;
PPCVRegister v102;
PPCVRegister v103;
PPCVRegister v104;
PPCVRegister v105;
PPCVRegister v106;
PPCVRegister v107;
PPCVRegister v108;
PPCVRegister v109;
PPCVRegister v110;
PPCVRegister v111;
PPCVRegister v112;
PPCVRegister v113;
PPCVRegister v114;
PPCVRegister v115;
PPCVRegister v116;
PPCVRegister v117;
PPCVRegister v118;
PPCVRegister v119;
PPCVRegister v120;
PPCVRegister v121;
PPCVRegister v122;
PPCVRegister v123;
PPCVRegister v124;
PPCVRegister v125;
PPCVRegister v126;
PPCVRegister v127;
};
PPCVRegister v[128];
};
}; };
inline uint8_t VectorMaskL[] = inline uint8_t VectorMaskL[] =