Added more instructions & added changes

This commit is contained in:
dennis 2025-03-05 09:10:24 +02:00
parent 914b61d986
commit 3f02123e7e
6 changed files with 582 additions and 87 deletions

View File

@ -98,37 +98,55 @@ void Recompiler::Analyse()
for (size_t i = 14; i < 128; i++) for (size_t i = 14; i < 128; i++)
{ {
if (i < 32) if (i < 32)
{
if (config.restGpr14Address != 0)
{ {
auto& restgpr = functions.emplace_back(); auto& restgpr = functions.emplace_back();
restgpr.base = config.restGpr14Address + (i - 14) * 4; restgpr.base = config.restGpr14Address + (i - 14) * 4;
restgpr.size = (32 - i) * 4 + 12; restgpr.size = (32 - i) * 4 + 12;
image.symbols.emplace(Symbol{ fmt::format("__restgprlr_{}", i), restgpr.base, restgpr.size, Symbol_Function }); image.symbols.emplace(Symbol{ fmt::format("__restgprlr_{}", i), restgpr.base, restgpr.size, Symbol_Function });
}
if (config.saveGpr14Address != 0)
{
auto& savegpr = functions.emplace_back(); auto& savegpr = functions.emplace_back();
savegpr.base = config.saveGpr14Address + (i - 14) * 4; savegpr.base = config.saveGpr14Address + (i - 14) * 4;
savegpr.size = (32 - i) * 4 + 8; savegpr.size = (32 - i) * 4 + 8;
image.symbols.emplace(fmt::format("__savegprlr_{}", i), savegpr.base, savegpr.size, Symbol_Function); image.symbols.emplace(fmt::format("__savegprlr_{}", i), savegpr.base, savegpr.size, Symbol_Function);
}
if (config.restFpr14Address != 0)
{
auto& restfpr = functions.emplace_back(); auto& restfpr = functions.emplace_back();
restfpr.base = config.restFpr14Address + (i - 14) * 4; restfpr.base = config.restFpr14Address + (i - 14) * 4;
restfpr.size = (32 - i) * 4 + 4; restfpr.size = (32 - i) * 4 + 4;
image.symbols.emplace(fmt::format("__restfpr_{}", i), restfpr.base, restfpr.size, Symbol_Function); image.symbols.emplace(fmt::format("__restfpr_{}", i), restfpr.base, restfpr.size, Symbol_Function);
}
if (config.saveFpr14Address != 0)
{
auto& savefpr = functions.emplace_back(); auto& savefpr = functions.emplace_back();
savefpr.base = config.saveFpr14Address + (i - 14) * 4; savefpr.base = config.saveFpr14Address + (i - 14) * 4;
savefpr.size = (32 - i) * 4 + 4; savefpr.size = (32 - i) * 4 + 4;
image.symbols.emplace(fmt::format("__savefpr_{}", i), savefpr.base, savefpr.size, Symbol_Function); image.symbols.emplace(fmt::format("__savefpr_{}", i), savefpr.base, savefpr.size, Symbol_Function);
}
if (config.restVmx14Address != 0)
{
auto& restvmx = functions.emplace_back(); auto& restvmx = functions.emplace_back();
restvmx.base = config.restVmx14Address + (i - 14) * 8; restvmx.base = config.restVmx14Address + (i - 14) * 8;
restvmx.size = (32 - i) * 8 + 4; restvmx.size = (32 - i) * 8 + 4;
image.symbols.emplace(fmt::format("__restvmx_{}", i), restvmx.base, restvmx.size, Symbol_Function); image.symbols.emplace(fmt::format("__restvmx_{}", i), restvmx.base, restvmx.size, Symbol_Function);
}
if (config.saveVmx14Address != 0)
{
auto& savevmx = functions.emplace_back(); auto& savevmx = functions.emplace_back();
savevmx.base = config.saveVmx14Address + (i - 14) * 8; savevmx.base = config.saveVmx14Address + (i - 14) * 8;
savevmx.size = (32 - i) * 8 + 4; savevmx.size = (32 - i) * 8 + 4;
image.symbols.emplace(fmt::format("__savevmx_{}", i), savevmx.base, savevmx.size, Symbol_Function); image.symbols.emplace(fmt::format("__savevmx_{}", i), savevmx.base, savevmx.size, Symbol_Function);
} }
}
if (i >= 64) if (i >= 64)
{ {
@ -506,6 +524,13 @@ bool Recompiler::Recompile(
println("\t{}.compare<int32_t>({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer()); println("\t{}.compare<int32_t>({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer());
break; break;
case PPC_INST_ADDC:
println("\t{}.ca = {}.u32 > ~{}.u32;", xer(), r(insn.operands[2]), r(insn.operands[1]));
println("\t{}.u64 = {}.u64 + {}.u64;", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2]));
if (strchr(insn.opcode->name, '.'))
println("\t{}.compare<int32_t>({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer());
break;
case PPC_INST_ADDE: case PPC_INST_ADDE:
println("\t{}.u8 = ({}.u32 + {}.u32 < {}.u32) | ({}.u32 + {}.u32 + {}.ca < {}.ca);", temp(), r(insn.operands[1]), r(insn.operands[2]), r(insn.operands[1]), r(insn.operands[1]), r(insn.operands[2]), xer(), xer()); println("\t{}.u8 = ({}.u32 + {}.u32 < {}.u32) | ({}.u32 + {}.u32 + {}.ca < {}.ca);", temp(), r(insn.operands[1]), r(insn.operands[2]), r(insn.operands[1]), r(insn.operands[1]), r(insn.operands[2]), xer(), xer());
println("\t{}.u64 = {}.u64 + {}.u64 + {}.ca;", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2]), xer()); println("\t{}.u64 = {}.u64 + {}.u64 + {}.ca;", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2]), xer());
@ -514,6 +539,14 @@ bool Recompiler::Recompile(
println("\t{}.compare<int32_t>({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer()); println("\t{}.compare<int32_t>({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer());
break; break;
case PPC_INST_ADDME:
println("\t{}.u8 = ({}.u32 - 1 < {}.u32) | ({}.u32 - 1 + {}.ca < {}.ca);", temp(), r(insn.operands[1]), r(insn.operands[1]), r(insn.operands[1]), xer(), xer());
println("\t{}.u64 = {}.u64 - 1 + {}.ca;", r(insn.operands[0]), r(insn.operands[1]), xer());
println("\t{}.ca = {}.u8;", xer(), temp());
if (strchr(insn.opcode->name, '.'))
println("\t{}.compare<int32_t>({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer());
break;
case PPC_INST_ADDI: case PPC_INST_ADDI:
print("\t{}.s64 = ", r(insn.operands[0])); print("\t{}.s64 = ", r(insn.operands[0]));
if (insn.operands[1] != 0) if (insn.operands[1] != 0)
@ -627,6 +660,14 @@ bool Recompiler::Recompile(
println("\tif ({}.u32 == 0) goto loc_{:X};", ctr(), insn.operands[0]); println("\tif ({}.u32 == 0) goto loc_{:X};", ctr(), insn.operands[0]);
break; break;
case PPC_INST_BDZF:
{
constexpr std::string_view fields[] = { "lt", "gt", "eq", "so" };
println("\t--{}.u64;", ctr());
println("\tif ({}.u32 == 0 && !{}.{}) goto loc_{:X};", ctr(), cr(insn.operands[0] / 4), fields[insn.operands[0] % 4], insn.operands[1]);
break;
}
case PPC_INST_BDZLR: case PPC_INST_BDZLR:
println("\t--{}.u64;", ctr()); println("\t--{}.u64;", ctr());
println("\tif ({}.u32 == 0) return;", ctr(), insn.operands[0]); println("\tif ({}.u32 == 0) return;", ctr(), insn.operands[0]);
@ -638,10 +679,20 @@ bool Recompiler::Recompile(
break; break;
case PPC_INST_BDNZF: case PPC_INST_BDNZF:
// NOTE: assuming eq here as a shortcut because all the instructions in the game do that {
constexpr std::string_view fields[] = { "lt", "gt", "eq", "so" };
println("\t--{}.u64;", ctr()); println("\t--{}.u64;", ctr());
println("\tif ({}.u32 != 0 && !{}.eq) goto loc_{:X};", ctr(), cr(insn.operands[0] / 4), insn.operands[1]); println("\tif ({}.u32 != 0 && !{}.{}) goto loc_{:X};", ctr(), cr(insn.operands[0] / 4), fields[insn.operands[0] % 4], insn.operands[1]);
break; break;
}
case PPC_INST_BDNZT:
{
constexpr std::string_view fields[] = { "lt", "gt", "eq", "so" };
println("\t--{}.u64;", ctr());
println("\tif ({}.u32 != 0 && {}.{}) goto loc_{:X};", ctr(), cr(insn.operands[0] / 4), fields[insn.operands[0] % 4], insn.operands[1]);
break;
}
case PPC_INST_BEQ: case PPC_INST_BEQ:
printConditionalBranch(false, "eq"); printConditionalBranch(false, "eq");
@ -687,7 +738,7 @@ bool Recompiler::Recompile(
break; break;
case PPC_INST_BLRL: case PPC_INST_BLRL:
println("__builtin_debugtrap();"); println("__debugbreak();");
break; break;
case PPC_INST_BLT: case PPC_INST_BLT:
@ -764,13 +815,27 @@ bool Recompiler::Recompile(
break; break;
case PPC_INST_CNTLZD: case PPC_INST_CNTLZD:
println("\t{0}.u64 = {1}.u64 == 0 ? 64 : __builtin_clzll({1}.u64);", r(insn.operands[0]), r(insn.operands[1])); println("\t{}.u64 = __lzcnt64({}.u64);", r(insn.operands[0]), r(insn.operands[1]));
break; break;
case PPC_INST_CNTLZW: case PPC_INST_CNTLZW:
println("\t{0}.u64 = {1}.u32 == 0 ? 32 : __builtin_clz({1}.u32);", r(insn.operands[0]), r(insn.operands[1])); println("\t{}.u64 = __lzcnt({}.u32);", r(insn.operands[0]), r(insn.operands[1]));
break; break;
case PPC_INST_CROR:
{
constexpr std::string_view fields[] = { "lt", "gt", "eq", "so" };
println("\t{}.{} = {}.{} | {}.{};", cr(insn.operands[0] / 4), fields[insn.operands[0] % 4], cr(insn.operands[1] / 4), fields[insn.operands[1] % 4], cr(insn.operands[2] / 4), fields[insn.operands[2] % 4]);
break;
}
case PPC_INST_CRORC:
{
constexpr std::string_view fields[] = { "lt", "gt", "eq", "so" };
println("\t{}.{} = {}.{} | (~{}.{} & 1);", cr(insn.operands[0] / 4), fields[insn.operands[0] % 4], cr(insn.operands[1] / 4), fields[insn.operands[1] % 4], cr(insn.operands[2] / 4), fields[insn.operands[2] % 4]);
break;
}
case PPC_INST_DB16CYC: case PPC_INST_DB16CYC:
// no op // no op
break; break;
@ -783,6 +848,10 @@ bool Recompiler::Recompile(
// no op // no op
break; break;
case PPC_INST_DCBST:
// no op
break;
case PPC_INST_DCBTST: case PPC_INST_DCBTST:
// no op // no op
break; break;
@ -827,6 +896,12 @@ bool Recompiler::Recompile(
// no op // no op
break; break;
case PPC_INST_EQV:
println("\t{}.u64 = ~({}.u64 ^ {}.u64);", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2]));
if (strchr(insn.opcode->name, '.'))
println("\t{}.compare<int32_t>({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer());
break;
case PPC_INST_EXTSB: case PPC_INST_EXTSB:
println("\t{}.s64 = {}.s8;", r(insn.operands[0]), r(insn.operands[1])); println("\t{}.s64 = {}.s8;", r(insn.operands[0]), r(insn.operands[1]));
if (strchr(insn.opcode->name, '.')) if (strchr(insn.opcode->name, '.'))
@ -1010,6 +1085,12 @@ bool Recompiler::Recompile(
println("{}.u32);", r(insn.operands[2])); println("{}.u32);", r(insn.operands[2]));
break; break;
case PPC_INST_LBZUX:
println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2]));
println("\t{}.u64 = PPC_LOAD_U8({});", r(insn.operands[0]), ea());
println("\t{}.u32 = {};", r(insn.operands[1]), ea());
break;
case PPC_INST_LD: case PPC_INST_LD:
print("\t{}.u64 = PPC_LOAD_U64(", r(insn.operands[0])); print("\t{}.u64 = PPC_LOAD_U64(", r(insn.operands[0]));
if (insn.operands[2] != 0) if (insn.operands[2] != 0)
@ -1038,6 +1119,12 @@ bool Recompiler::Recompile(
println("{}.u32);", r(insn.operands[2])); println("{}.u32);", r(insn.operands[2]));
break; break;
case PPC_INST_LDUX:
println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2]));
println("\t{}.u64 = PPC_LOAD_U64({});", r(insn.operands[0]), ea());
println("\t{}.u32 = {};", r(insn.operands[1]), ea());
break;
case PPC_INST_LFD: case PPC_INST_LFD:
printSetFlushMode(false); printSetFlushMode(false);
print("\t{}.u64 = PPC_LOAD_U64(", f(insn.operands[0])); print("\t{}.u64 = PPC_LOAD_U64(", f(insn.operands[0]));
@ -1046,6 +1133,13 @@ bool Recompiler::Recompile(
println("{});", int32_t(insn.operands[1])); println("{});", int32_t(insn.operands[1]));
break; break;
case PPC_INST_LFDU:
printSetFlushMode(false);
println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2]));
println("\t{}.u64 = PPC_LOAD_U64({});", r(insn.operands[0]), ea());
println("\t{}.u32 = {};", r(insn.operands[2]), ea());
break;
case PPC_INST_LFDX: case PPC_INST_LFDX:
printSetFlushMode(false); printSetFlushMode(false);
print("\t{}.u64 = PPC_LOAD_U64(", f(insn.operands[0])); print("\t{}.u64 = PPC_LOAD_U64(", f(insn.operands[0]));
@ -1054,6 +1148,13 @@ bool Recompiler::Recompile(
println("{}.u32);", r(insn.operands[2])); println("{}.u32);", r(insn.operands[2]));
break; break;
case PPC_INST_LFDUX:
printSetFlushMode(false);
println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2]));
println("\t{}.u64 = PPC_LOAD_U64({});", r(insn.operands[0]), ea());
println("\t{}.u32 = {};", r(insn.operands[1]), ea());
break;
case PPC_INST_LFS: case PPC_INST_LFS:
printSetFlushMode(false); printSetFlushMode(false);
print("\t{}.u32 = PPC_LOAD_U32(", temp()); print("\t{}.u32 = PPC_LOAD_U32(", temp());
@ -1063,6 +1164,14 @@ bool Recompiler::Recompile(
println("\t{}.f64 = double({}.f32);", f(insn.operands[0]), temp()); println("\t{}.f64 = double({}.f32);", f(insn.operands[0]), temp());
break; break;
case PPC_INST_LFSU:
printSetFlushMode(false);
println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2]));
println("\t{}.u32 = PPC_LOAD_U32({});", temp(), ea());
println("\t{}.u32 = {};", r(insn.operands[2]), ea());
println("\t{}.f64 = double({}.f32);", f(insn.operands[0]), temp());
break;
case PPC_INST_LFSX: case PPC_INST_LFSX:
printSetFlushMode(false); printSetFlushMode(false);
print("\t{}.u32 = PPC_LOAD_U32(", temp()); print("\t{}.u32 = PPC_LOAD_U32(", temp());
@ -1072,6 +1181,14 @@ bool Recompiler::Recompile(
println("\t{}.f64 = double({}.f32);", f(insn.operands[0]), temp()); println("\t{}.f64 = double({}.f32);", f(insn.operands[0]), temp());
break; break;
case PPC_INST_LFSUX:
printSetFlushMode(false);
println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2]));
println("\t{}.u32 = PPC_LOAD_U32({});", temp(), ea());
println("\t{}.u32 = {};", r(insn.operands[1]), ea());
println("\t{}.f64 = double({}.f32);", f(insn.operands[0]), temp());
break;
case PPC_INST_LHA: case PPC_INST_LHA:
print("\t{}.s64 = int16_t(PPC_LOAD_U16(", r(insn.operands[0])); print("\t{}.s64 = int16_t(PPC_LOAD_U16(", r(insn.operands[0]));
if (insn.operands[2] != 0) if (insn.operands[2] != 0)
@ -1079,6 +1196,12 @@ bool Recompiler::Recompile(
println("{}));", int32_t(insn.operands[1])); println("{}));", int32_t(insn.operands[1]));
break; break;
case PPC_INST_LHAU:
print("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2]));
print("\t{}.s64 = int16_t(PPC_LOAD_U16({}));", r(insn.operands[0]), ea());
print("\t{}.u32 = {};", r(insn.operands[2]), ea());
break;
case PPC_INST_LHAX: case PPC_INST_LHAX:
print("\t{}.s64 = int16_t(PPC_LOAD_U16(", r(insn.operands[0])); print("\t{}.s64 = int16_t(PPC_LOAD_U16(", r(insn.operands[0]));
if (insn.operands[1] != 0) if (insn.operands[1] != 0)
@ -1093,6 +1216,12 @@ bool Recompiler::Recompile(
println("{});", int32_t(insn.operands[1])); println("{});", int32_t(insn.operands[1]));
break; break;
case PPC_INST_LHZU:
println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2]));
println("\t{}.u64 = PPC_LOAD_U16({});", r(insn.operands[0]), ea());
println("\t{}.u32 = {};", r(insn.operands[2]), ea());
break;
case PPC_INST_LHZX: case PPC_INST_LHZX:
print("\t{}.u64 = PPC_LOAD_U16(", r(insn.operands[0])); print("\t{}.u64 = PPC_LOAD_U16(", r(insn.operands[0]));
if (insn.operands[1] != 0) if (insn.operands[1] != 0)
@ -1100,6 +1229,12 @@ bool Recompiler::Recompile(
println("{}.u32);", r(insn.operands[2])); println("{}.u32);", r(insn.operands[2]));
break; break;
case PPC_INST_LHZUX:
println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2]));
println("\t{}.u64 = PPC_LOAD_U16({});", r(insn.operands[0]), ea());
println("\t{}.u32 = {};", r(insn.operands[1]), ea());
break;
case PPC_INST_LI: case PPC_INST_LI:
println("\t{}.s64 = {};", r(insn.operands[0]), int32_t(insn.operands[1])); println("\t{}.s64 = {};", r(insn.operands[0]), int32_t(insn.operands[1]));
break; break;
@ -1112,6 +1247,7 @@ bool Recompiler::Recompile(
case PPC_INST_LVEWX128: case PPC_INST_LVEWX128:
case PPC_INST_LVX: case PPC_INST_LVX:
case PPC_INST_LVX128: case PPC_INST_LVX128:
case PPC_INST_LVEHX:
// NOTE: for endian swapping, we reverse the whole vector instead of individual elements. // NOTE: for endian swapping, we reverse the whole vector instead of individual elements.
// this is accounted for in every instruction (eg. dp3 sums yzw instead of xyz) // this is accounted for in every instruction (eg. dp3 sums yzw instead of xyz)
print("\t_mm_store_si128((__m128i*){}.u8, _mm_shuffle_epi8(_mm_load_si128((__m128i*)(base + ((", v(insn.operands[0])); print("\t_mm_store_si128((__m128i*){}.u8, _mm_shuffle_epi8(_mm_load_si128((__m128i*)(base + ((", v(insn.operands[0]));
@ -1207,6 +1343,12 @@ bool Recompiler::Recompile(
println("{}.u32);", r(insn.operands[2])); println("{}.u32);", r(insn.operands[2]));
break; break;
case PPC_INST_LWZUX:
println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2]));
println("\t{}.u64 = PPC_LOAD_U32({});", r(insn.operands[0]), ea());
println("\t{}.u32 = {};", r(insn.operands[1]), ea());
break;
case PPC_INST_MFCR: case PPC_INST_MFCR:
for (size_t i = 0; i < 32; i++) for (size_t i = 0; i < 32; i++)
{ {
@ -1343,43 +1485,43 @@ bool Recompiler::Recompile(
break; break;
case PPC_INST_RLDICL: case PPC_INST_RLDICL:
println("\t{}.u64 = __builtin_rotateleft64({}.u64, {}) & 0x{:X};", r(insn.operands[0]), r(insn.operands[1]), insn.operands[2], ComputeMask(insn.operands[3], 63)); println("\t{}.u64 = _rotl64({}.u64, {}) & 0x{:X};", r(insn.operands[0]), r(insn.operands[1]), insn.operands[2], ComputeMask(insn.operands[3], 63));
break; break;
case PPC_INST_RLDICR: case PPC_INST_RLDICR:
println("\t{}.u64 = __builtin_rotateleft64({}.u64, {}) & 0x{:X};", r(insn.operands[0]), r(insn.operands[1]), insn.operands[2], ComputeMask(0, insn.operands[3])); println("\t{}.u64 = _rotl64({}.u64, {}) & 0x{:X};", r(insn.operands[0]), r(insn.operands[1]), insn.operands[2], ComputeMask(0, insn.operands[3]));
break; break;
case PPC_INST_RLDIMI: case PPC_INST_RLDIMI:
{ {
const uint64_t mask = ComputeMask(insn.operands[3], ~insn.operands[2]); const uint64_t mask = ComputeMask(insn.operands[3], ~insn.operands[2]);
println("\t{}.u64 = (__builtin_rotateleft64({}.u64, {}) & 0x{:X}) | ({}.u64 & 0x{:X});", r(insn.operands[0]), r(insn.operands[1]), insn.operands[2], mask, r(insn.operands[0]), ~mask); println("\t{}.u64 = (_rotl64({}.u64, {}) & 0x{:X}) | ({}.u64 & 0x{:X});", r(insn.operands[0]), r(insn.operands[1]), insn.operands[2], mask, r(insn.operands[0]), ~mask);
break; break;
} }
case PPC_INST_RLWIMI: case PPC_INST_RLWIMI:
{ {
const uint64_t mask = ComputeMask(insn.operands[3] + 32, insn.operands[4] + 32); const uint64_t mask = ComputeMask(insn.operands[3] + 32, insn.operands[4] + 32);
println("\t{}.u64 = (__builtin_rotateleft32({}.u32, {}) & 0x{:X}) | ({}.u64 & 0x{:X});", r(insn.operands[0]), r(insn.operands[1]), insn.operands[2], mask, r(insn.operands[0]), ~mask); println("\t{}.u64 = (_rotl({}.u32, {}) & 0x{:X}) | ({}.u64 & 0x{:X});", r(insn.operands[0]), r(insn.operands[1]), insn.operands[2], mask, r(insn.operands[0]), ~mask);
break; break;
} }
case PPC_INST_RLWINM: case PPC_INST_RLWINM:
println("\t{}.u64 = __builtin_rotateleft64({}.u32 | ({}.u64 << 32), {}) & 0x{:X};", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[1]), insn.operands[2], ComputeMask(insn.operands[3] + 32, insn.operands[4] + 32)); println("\t{}.u64 = _rotl64({}.u32 | ({}.u64 << 32), {}) & 0x{:X};", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[1]), insn.operands[2], ComputeMask(insn.operands[3] + 32, insn.operands[4] + 32));
if (strchr(insn.opcode->name, '.')) if (strchr(insn.opcode->name, '.'))
println("\t{}.compare<int32_t>({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer()); println("\t{}.compare<int32_t>({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer());
break; break;
case PPC_INST_ROTLDI: case PPC_INST_ROTLDI:
println("\t{}.u64 = __builtin_rotateleft64({}.u64, {});", r(insn.operands[0]), r(insn.operands[1]), insn.operands[2]); println("\t{}.u64 = _rotl64({}.u64, {});", r(insn.operands[0]), r(insn.operands[1]), insn.operands[2]);
break; break;
case PPC_INST_ROTLW: case PPC_INST_ROTLW:
println("\t{}.u64 = __builtin_rotateleft32({}.u32, {}.u8 & 0x1F);", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2])); println("\t{}.u64 = _rotl({}.u32, {}.u8 & 0x1F);", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2]));
break; break;
case PPC_INST_ROTLWI: case PPC_INST_ROTLWI:
println("\t{}.u64 = __builtin_rotateleft32({}.u32, {});", r(insn.operands[0]), r(insn.operands[1]), insn.operands[2]); println("\t{}.u64 = _rotl({}.u32, {});", r(insn.operands[0]), r(insn.operands[1]), insn.operands[2]);
if (strchr(insn.opcode->name, '.')) if (strchr(insn.opcode->name, '.'))
println("\t{}.compare<int32_t>({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer()); println("\t{}.compare<int32_t>({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer());
break; break;
@ -1457,7 +1599,7 @@ bool Recompiler::Recompile(
case PPC_INST_STBU: case PPC_INST_STBU:
println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2])); println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2]));
println("\tPPC_STORE_U8({}, {}.u8);", ea(), r(insn.operands[0])); println("\t{}{}, {}.u8);", mmioStore() ? "PPC_MM_STORE_U8(" : "PPC_STORE_U8(", ea(), r(insn.operands[0]));
println("\t{}.u32 = {};", r(insn.operands[2]), ea()); println("\t{}.u32 = {};", r(insn.operands[2]), ea());
break; break;
@ -1468,6 +1610,12 @@ bool Recompiler::Recompile(
println("{}.u32, {}.u8);", r(insn.operands[2]), r(insn.operands[0])); println("{}.u32, {}.u8);", r(insn.operands[2]), r(insn.operands[0]));
break; break;
case PPC_INST_STBUX:
println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2]));
println("\t{}{}, {}.u8);", mmioStore() ? "PPC_MM_STORE_U8(" : "PPC_STORE_U8(", ea(), r(insn.operands[0]));
println("\t{}.u32 = {};", r(insn.operands[1]), ea());
break;
case PPC_INST_STD: case PPC_INST_STD:
print("{}", mmioStore() ? "\tPPC_MM_STORE_U64(" : "\tPPC_STORE_U64("); print("{}", mmioStore() ? "\tPPC_MM_STORE_U64(" : "\tPPC_STORE_U64(");
if (insn.operands[2] != 0) if (insn.operands[2] != 0)
@ -1478,16 +1626,16 @@ bool Recompiler::Recompile(
case PPC_INST_STDCX: case PPC_INST_STDCX:
println("\t{}.lt = 0;", cr(0)); println("\t{}.lt = 0;", cr(0));
println("\t{}.gt = 0;", cr(0)); println("\t{}.gt = 0;", cr(0));
print("\t{}.eq = __sync_bool_compare_and_swap(reinterpret_cast<uint64_t*>(base + ", cr(0)); print("\t{}.eq = PPC_InterlockedCompareExchange64(reinterpret_cast<int64_t*>(base + ", cr(0));
if (insn.operands[1] != 0) if (insn.operands[1] != 0)
print("{}.u32 + ", r(insn.operands[1])); print("{}.u32 + ", r(insn.operands[1]));
println("{}.u32), {}.s64, __builtin_bswap64({}.s64));", r(insn.operands[2]), reserved(), r(insn.operands[0])); println("{}.u32), __builtin_bswap64({}.s64), {}.s64) == {}.s64;", r(insn.operands[2]), r(insn.operands[0]), reserved(), reserved());
println("\t{}.so = {}.so;", cr(0), xer()); println("\t{}.so = {}.so;", cr(0), xer());
break; break;
case PPC_INST_STDU: case PPC_INST_STDU:
println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2])); println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2]));
println("\tPPC_STORE_U64({}, {}.u64);", ea(), r(insn.operands[0])); println("\t{}{}, {}.u64);", mmioStore() ? "PPC_MM_STORE_U64(" : "PPC_STORE_U64(", ea(), r(insn.operands[0]));
println("\t{}.u32 = {};", r(insn.operands[2]), ea()); println("\t{}.u32 = {};", r(insn.operands[2]), ea());
break; break;
@ -1498,6 +1646,12 @@ bool Recompiler::Recompile(
println("{}.u32, {}.u64);", r(insn.operands[2]), r(insn.operands[0])); println("{}.u32, {}.u64);", r(insn.operands[2]), r(insn.operands[0]));
break; break;
case PPC_INST_STDUX:
println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2]));
println("\t{}{}, {}.u64);", mmioStore() ? "PPC_MM_STORE_U64(" : "PPC_STORE_U64(", ea(), r(insn.operands[0]));
println("\t{}.u32 = {};", r(insn.operands[1]), ea());
break;
case PPC_INST_STFD: case PPC_INST_STFD:
printSetFlushMode(false); printSetFlushMode(false);
print("{}", mmioStore() ? "\tPPC_MM_STORE_U64(" : "\tPPC_STORE_U64("); print("{}", mmioStore() ? "\tPPC_MM_STORE_U64(" : "\tPPC_STORE_U64(");
@ -1506,6 +1660,13 @@ bool Recompiler::Recompile(
println("{}, {}.u64);", int32_t(insn.operands[1]), f(insn.operands[0])); println("{}, {}.u64);", int32_t(insn.operands[1]), f(insn.operands[0]));
break; break;
case PPC_INST_STFDU:
printSetFlushMode(false);
println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2]));
println("\t{}{}, {}.u64);", mmioStore() ? "PPC_MM_STORE_U64(" : "PPC_STORE_U64(", ea(), r(insn.operands[0]));
println("\t{}.u32 = {};", r(insn.operands[2]), ea());
break;
case PPC_INST_STFDX: case PPC_INST_STFDX:
printSetFlushMode(false); printSetFlushMode(false);
print("{}", mmioStore() ? "\tPPC_MM_STORE_U64(" : "\tPPC_STORE_U64("); print("{}", mmioStore() ? "\tPPC_MM_STORE_U64(" : "\tPPC_STORE_U64(");
@ -1531,6 +1692,14 @@ bool Recompiler::Recompile(
println("{}, {}.u32);", int32_t(insn.operands[1]), temp()); println("{}, {}.u32);", int32_t(insn.operands[1]), temp());
break; break;
case PPC_INST_STFSU:
printSetFlushMode(false);
println("\t{}.f32 = float({}.f64);", temp(), f(insn.operands[0]));
println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2]));
println("\t{}{}, {}.u32);", mmioStore() ? "PPC_MM_STORE_U32(" : "PPC_STORE_U32(", ea(), temp());
println("\t{}.u32 = {};", r(insn.operands[2]), ea());
break;
case PPC_INST_STFSX: case PPC_INST_STFSX:
printSetFlushMode(false); printSetFlushMode(false);
println("\t{}.f32 = float({}.f64);", temp(), f(insn.operands[0])); println("\t{}.f32 = float({}.f64);", temp(), f(insn.operands[0]));
@ -1540,6 +1709,14 @@ bool Recompiler::Recompile(
println("{}.u32, {}.u32);", r(insn.operands[2]), temp()); println("{}.u32, {}.u32);", r(insn.operands[2]), temp());
break; break;
case PPC_INST_STFSUX:
printSetFlushMode(false);
println("\t{}.f32 = float({}.f64);", temp(), f(insn.operands[0]));
println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2]));
println("\t{}{}, {}.u32);", mmioStore() ? "PPC_MM_STORE_U32(" : "PPC_STORE_U32(", ea(), temp());
println("\t{}.u32 = {};", r(insn.operands[1]), ea());
break;
case PPC_INST_STH: case PPC_INST_STH:
print("{}", mmioStore() ? "\tPPC_MM_STORE_U16(" : "\tPPC_STORE_U16("); print("{}", mmioStore() ? "\tPPC_MM_STORE_U16(" : "\tPPC_STORE_U16(");
if (insn.operands[2] != 0) if (insn.operands[2] != 0)
@ -1547,6 +1724,18 @@ bool Recompiler::Recompile(
println("{}, {}.u16);", int32_t(insn.operands[1]), r(insn.operands[0])); println("{}, {}.u16);", int32_t(insn.operands[1]), r(insn.operands[0]));
break; break;
case PPC_INST_STHU:
println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2]));
println("\t{}{}, {}.u16);", mmioStore() ? "PPC_MM_STORE_U16(" : "PPC_STORE_U16(", ea(), r(insn.operands[0]));
println("\t{}.u32 = {};", r(insn.operands[2]), ea());
break;
case PPC_INST_STHUX:
println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2]));
println("\t{}{}, {}.u16);", mmioStore() ? "PPC_MM_STORE_U16(" : "PPC_STORE_U16(", ea(), r(insn.operands[0]));
println("\t{}.u32 = {};", r(insn.operands[1]), ea());
break;
case PPC_INST_STHBRX: case PPC_INST_STHBRX:
print("{}", mmioStore() ? "\tPPC_MM_STORE_U16(" : "\tPPC_STORE_U16("); print("{}", mmioStore() ? "\tPPC_MM_STORE_U16(" : "\tPPC_STORE_U16(");
if (insn.operands[1] != 0) if (insn.operands[1] != 0)
@ -1633,22 +1822,22 @@ bool Recompiler::Recompile(
case PPC_INST_STWCX: case PPC_INST_STWCX:
println("\t{}.lt = 0;", cr(0)); println("\t{}.lt = 0;", cr(0));
println("\t{}.gt = 0;", cr(0)); println("\t{}.gt = 0;", cr(0));
print("\t{}.eq = __sync_bool_compare_and_swap(reinterpret_cast<uint32_t*>(base + ", cr(0)); print("\t{}.eq = PPC_InterlockedCompareExchange(reinterpret_cast<long*>(base + ", cr(0));
if (insn.operands[1] != 0) if (insn.operands[1] != 0)
print("{}.u32 + ", r(insn.operands[1])); print("{}.u32 + ", r(insn.operands[1]));
println("{}.u32), {}.s32, __builtin_bswap32({}.s32));", r(insn.operands[2]), reserved(), r(insn.operands[0])); println("{}.u32), __builtin_bswap32({}.s32), {}.s32) == {}.s32;", r(insn.operands[2]), r(insn.operands[0]), reserved(), reserved());
println("\t{}.so = {}.so;", cr(0), xer()); println("\t{}.so = {}.so;", cr(0), xer());
break; break;
case PPC_INST_STWU: case PPC_INST_STWU:
println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2])); println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2]));
println("\tPPC_STORE_U32({}, {}.u32);", ea(), r(insn.operands[0])); println("\t{}{}, {}.u32);", mmioStore() ? "PPC_MM_STORE_U32(" : "PPC_STORE_U32(", ea(), r(insn.operands[0]));
println("\t{}.u32 = {};", r(insn.operands[2]), ea()); println("\t{}.u32 = {};", r(insn.operands[2]), ea());
break; break;
case PPC_INST_STWUX: case PPC_INST_STWUX:
println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2])); println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2]));
println("\tPPC_STORE_U32({}, {}.u32);", ea(), r(insn.operands[0])); println("\t{}{}, {}.u32);", mmioStore() ? "PPC_MM_STORE_U32(" : "PPC_STORE_U32(", ea(), r(insn.operands[0]));
println("\t{}.u32 = {};", r(insn.operands[1]), ea()); println("\t{}.u32 = {};", r(insn.operands[1]), ea());
break; break;
@ -1680,6 +1869,14 @@ bool Recompiler::Recompile(
println("\t{}.compare<int32_t>({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer()); println("\t{}.compare<int32_t>({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer());
break; break;
case PPC_INST_SUBFZE:
println("\t{}.u8 = (~{}.u32 < ~{}.u32) | (~{}.u32 + {}.ca < {}.ca);", temp(), r(insn.operands[1]), r(insn.operands[1]), r(insn.operands[1]), xer(), xer());
println("\t{}.u64 = ~{}.u64 + {}.ca;", r(insn.operands[0]), r(insn.operands[1]), xer());
println("\t{}.ca = {}.u8;", xer(), temp());
if (strchr(insn.opcode->name, '.'))
println("\t{}.compare<int32_t>({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer());
break;
case PPC_INST_SUBFIC: case PPC_INST_SUBFIC:
println("\t{}.ca = {}.u32 <= {};", xer(), r(insn.operands[1]), insn.operands[2]); println("\t{}.ca = {}.u32 <= {};", xer(), r(insn.operands[1]), insn.operands[2]);
println("\t{}.s64 = {} - {}.s64;", r(insn.operands[0]), int32_t(insn.operands[2]), r(insn.operands[1])); println("\t{}.s64 = {} - {}.s64;", r(insn.operands[0]), int32_t(insn.operands[2]), r(insn.operands[1]));
@ -1715,10 +1912,23 @@ bool Recompiler::Recompile(
println("\t_mm_store_ps({}.f32, _mm_add_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); println("\t_mm_store_ps({}.f32, _mm_add_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break; break;
case PPC_INST_VADDSBS:
println("\t_mm_store_si128((__m128i*){}.s8, _mm_adds_epi8(_mm_load_si128((__m128i*){}.s8), _mm_load_si128((__m128i*){}.s8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VADDSHS: case PPC_INST_VADDSHS:
println("\t_mm_store_si128((__m128i*){}.s16, _mm_adds_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); println("\t_mm_store_si128((__m128i*){}.s16, _mm_adds_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break; break;
case PPC_INST_VADDSWS:
// TODO: vectorize
for (size_t i = 0; i < 4; i++)
{
println("\t{}.s64 = int64_t({}.s32[{}]) + int64_t({}.s32[{}]);", temp(), v(insn.operands[1]), i, v(insn.operands[2]), i);
println("\t{}.s32[{}] = {}.s64 > INT_MAX ? INT_MAX : {}.s64 < INT_MIN ? INT_MIN : {}.s64;", v(insn.operands[0]), i, temp(), temp(), temp());
}
break;
case PPC_INST_VADDUBM: case PPC_INST_VADDUBM:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_add_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); println("\t_mm_store_si128((__m128i*){}.u8, _mm_add_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break; break;
@ -1760,6 +1970,10 @@ bool Recompiler::Recompile(
println("\t_mm_store_si128((__m128i*){}.u8, _mm_avg_epu8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); println("\t_mm_store_si128((__m128i*){}.u8, _mm_avg_epu8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break; break;
case PPC_INST_VAVGUH:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_avg_epu16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VCTSXS: case PPC_INST_VCTSXS:
case PPC_INST_VCFPSXWS128: case PPC_INST_VCFPSXWS128:
printSetFlushMode(true); printSetFlushMode(true);
@ -1770,6 +1984,16 @@ bool Recompiler::Recompile(
println("_mm_load_ps({}.f32)));", v(insn.operands[1])); println("_mm_load_ps({}.f32)));", v(insn.operands[1]));
break; break;
case PPC_INST_VCTUXS:
case PPC_INST_VCFPUXWS128:
printSetFlushMode(true);
print("\t_mm_store_si128((__m128i*){}.u32, _mm_vctuxs(", v(insn.operands[0]));
if (insn.operands[2] != 0)
println("_mm_mul_ps(_mm_load_ps({}.f32), _mm_set1_ps({}))));", v(insn.operands[1]), 1u << insn.operands[2]);
else
println("_mm_load_ps({}.f32)));", v(insn.operands[1]));
break;
case PPC_INST_VCFSX: case PPC_INST_VCFSX:
case PPC_INST_VCSXWFP128: case PPC_INST_VCSXWFP128:
{ {
@ -1806,7 +2030,7 @@ bool Recompiler::Recompile(
case PPC_INST_VCMPBFP: case PPC_INST_VCMPBFP:
case PPC_INST_VCMPBFP128: case PPC_INST_VCMPBFP128:
println("\t__builtin_debugtrap();"); println("\t__debugbreak();");
break; break;
case PPC_INST_VCMPEQFP: case PPC_INST_VCMPEQFP:
@ -1823,6 +2047,12 @@ bool Recompiler::Recompile(
println("\t{}.setFromMask(_mm_load_si128((__m128i*){}.u8), 0xFFFF);", cr(6), v(insn.operands[0])); println("\t{}.setFromMask(_mm_load_si128((__m128i*){}.u8), 0xFFFF);", cr(6), v(insn.operands[0]));
break; break;
case PPC_INST_VCMPEQUH:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_cmpeq_epi16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
if (strchr(insn.opcode->name, '.'))
println("\t{}.setFromMask(_mm_load_si128((__m128i*){}.u16), 0xFFFF);", cr(6), v(insn.operands[0]));
break;
case PPC_INST_VCMPEQUW: case PPC_INST_VCMPEQUW:
case PPC_INST_VCMPEQUW128: case PPC_INST_VCMPEQUW128:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_cmpeq_epi32(_mm_load_si128((__m128i*){}.u32), _mm_load_si128((__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); println("\t_mm_store_si128((__m128i*){}.u8, _mm_cmpeq_epi32(_mm_load_si128((__m128i*){}.u32), _mm_load_si128((__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
@ -1848,10 +2078,26 @@ bool Recompiler::Recompile(
case PPC_INST_VCMPGTUB: case PPC_INST_VCMPGTUB:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_cmpgt_epu8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); println("\t_mm_store_si128((__m128i*){}.u8, _mm_cmpgt_epu8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
if (strchr(insn.opcode->name, '.'))
println("\t{}.setFromMask(_mm_load_si128((__m128i*){}.u8), 0xFFFF);", cr(6), v(insn.operands[0]));
break; break;
case PPC_INST_VCMPGTUH: case PPC_INST_VCMPGTUH:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_cmpgt_epu16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); println("\t_mm_store_si128((__m128i*){}.u8, _mm_cmpgt_epu16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
if (strchr(insn.opcode->name, '.'))
println("\t{}.setFromMask(_mm_load_si128((__m128i*){}.u16), 0xFFFF);", cr(6), v(insn.operands[0]));
break;
case PPC_INST_VCMPGTSH:
println("\t_mm_store_si128((__m128i*){}.s8, _mm_cmpgt_epi16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
if (strchr(insn.opcode->name, '.'))
println("\t{}.setFromMask(_mm_load_si128((__m128i*){}.s16), 0xFFFF);", cr(6), v(insn.operands[0]));
break;
case PPC_INST_VCMPGTSW:
println("\t_mm_store_si128((__m128i*){}.s8, _mm_cmpgt_epi32(_mm_load_si128((__m128i*){}.u32), _mm_load_si128((__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
if (strchr(insn.opcode->name, '.'))
println("\t{}.setFromMask(_mm_load_si128((__m128i*){}.s32), 0xFFFF);", cr(6), v(insn.operands[0]));
break; break;
case PPC_INST_VEXPTEFP: case PPC_INST_VEXPTEFP:
@ -1883,10 +2129,18 @@ bool Recompiler::Recompile(
println("\t_mm_store_ps({}.f32, _mm_max_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); println("\t_mm_store_ps({}.f32, _mm_max_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break; break;
case PPC_INST_VMAXSH:
println("\t_mm_store_si128((__m128i*){}.u16, _mm_max_epi16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VMAXSW: case PPC_INST_VMAXSW:
println("\t_mm_store_si128((__m128i*){}.u32, _mm_max_epi32(_mm_load_si128((__m128i*){}.u32), _mm_load_si128((__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); println("\t_mm_store_si128((__m128i*){}.u32, _mm_max_epi32(_mm_load_si128((__m128i*){}.u32), _mm_load_si128((__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break; break;
case PPC_INST_VMINSH:
println("\t_mm_store_si128((__m128i*){}.u16, _mm_min_epi16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VMINFP: case PPC_INST_VMINFP:
case PPC_INST_VMINFP128: case PPC_INST_VMINFP128:
printSetFlushMode(true); printSetFlushMode(true);
@ -1990,16 +2244,41 @@ bool Recompiler::Recompile(
break; break;
default: default:
println("\t__builtin_debugtrap();"); println("\t__debugbreak();");
break; break;
} }
break; break;
case PPC_INST_VPKSHSS:
case PPC_INST_VPKSHSS128:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_packs_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1]));
break;
case PPC_INST_VPKSWSS:
case PPC_INST_VPKSWSS128:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_packs_epi32(_mm_load_si128((__m128i*){}.s32), _mm_load_si128((__m128i*){}.s32)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1]));
break;
case PPC_INST_VPKSHUS: case PPC_INST_VPKSHUS:
case PPC_INST_VPKSHUS128: case PPC_INST_VPKSHUS128:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_packus_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); println("\t_mm_store_si128((__m128i*){}.u8, _mm_packus_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1]));
break; break;
case PPC_INST_VPKSWUS:
case PPC_INST_VPKSWUS128:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_packus_epi32(_mm_load_si128((__m128i*){}.s32), _mm_load_si128((__m128i*){}.s32)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1]));
break;
case PPC_INST_VPKUHUS:
case PPC_INST_VPKUHUS128:
for (size_t i = 0; i < 8; i++)
{
println("\t{0}.u8[{1}] = {2}.u16[{1}] > UCHAR_MAX ? UCHAR_MAX : {2}.u16[{1}];", vTemp(), i, v(insn.operands[2]));
println("\t{0}.u8[{1}] = {2}.u16[{3}] > UCHAR_MAX ? UCHAR_MAX : {2}.u16[{3}];", vTemp(), i + 8, v(insn.operands[1]), i);
}
println("{} = {};", v(insn.operands[0]), vTemp());
break;
case PPC_INST_VREFP: case PPC_INST_VREFP:
case PPC_INST_VREFP128: case PPC_INST_VREFP128:
// TODO: see if we can use rcp safely // TODO: see if we can use rcp safely
@ -2032,6 +2311,14 @@ bool Recompiler::Recompile(
break; break;
} }
case PPC_INST_VRLH:
for (size_t i = 0; i < 8; i++)
{
println("\t{0}.u16[{1}] = ({2}.u16[{1}] << ({3}.u16[{1}] & 0xF)) | ({2}.u16[{1}] >> (16 - ({3}.u16[{1}] & 0xF)));", vTemp(), i, v(insn.operands[1]), v(insn.operands[2]));
}
println("{} = {};", v(insn.operands[0]), vTemp());
break;
case PPC_INST_VRSQRTEFP: case PPC_INST_VRSQRTEFP:
case PPC_INST_VRSQRTEFP128: case PPC_INST_VRSQRTEFP128:
// TODO: see if we can use rsqrt safely // TODO: see if we can use rsqrt safely
@ -2041,6 +2328,7 @@ bool Recompiler::Recompile(
break; break;
case PPC_INST_VSEL: case PPC_INST_VSEL:
case PPC_INST_VSEL128:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_or_si128(_mm_andnot_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)), _mm_and_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8))));", v(insn.operands[0]), v(insn.operands[3]), v(insn.operands[1]), v(insn.operands[3]), v(insn.operands[2])); println("\t_mm_store_si128((__m128i*){}.u8, _mm_or_si128(_mm_andnot_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)), _mm_and_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8))));", v(insn.operands[0]), v(insn.operands[3]), v(insn.operands[1]), v(insn.operands[3]), v(insn.operands[2]));
break; break;
@ -2050,6 +2338,12 @@ bool Recompiler::Recompile(
println("\t{}.u8[{}] = {}.u8[{}] << ({}.u8[{}] & 0x7);", v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i); println("\t{}.u8[{}] = {}.u8[{}] << ({}.u8[{}] & 0x7);", v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i);
break; break;
case PPC_INST_VSLH:
// TODO: vectorize
for (size_t i = 0; i < 8; i++)
println("\t{}.u16[{}] = {}.u16[{}] << ({}.u8[{}] & 0xF);", v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i * 2);
break;
case PPC_INST_VSLDOI: case PPC_INST_VSLDOI:
case PPC_INST_VSLDOI128: case PPC_INST_VSLDOI128:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_alignr_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8), {}));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]), 16 - insn.operands[3]); println("\t_mm_store_si128((__m128i*){}.u8, _mm_alignr_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8), {}));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]), 16 - insn.operands[3]);
@ -2083,6 +2377,10 @@ bool Recompiler::Recompile(
println("\t_mm_store_si128((__m128i*){}.u8, _mm_set1_epi8(char(0x{:X})));", v(insn.operands[0]), insn.operands[1]); println("\t_mm_store_si128((__m128i*){}.u8, _mm_set1_epi8(char(0x{:X})));", v(insn.operands[0]), insn.operands[1]);
break; break;
case PPC_INST_VSPLTISH:
println("\t_mm_store_si128((__m128i*){}.u16, _mm_set1_epi16(int(0x{:X})));", v(insn.operands[0]), insn.operands[1]);
break;
case PPC_INST_VSPLTISW: case PPC_INST_VSPLTISW:
case PPC_INST_VSPLTISW128: case PPC_INST_VSPLTISW128:
println("\t_mm_store_si128((__m128i*){}.u32, _mm_set1_epi32(int(0x{:X})));", v(insn.operands[0]), insn.operands[1]); println("\t_mm_store_si128((__m128i*){}.u32, _mm_set1_epi32(int(0x{:X})));", v(insn.operands[0]), insn.operands[1]);
@ -2102,6 +2400,18 @@ bool Recompiler::Recompile(
println("\t_mm_store_si128((__m128i*){}.u8, _mm_vsr(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); println("\t_mm_store_si128((__m128i*){}.u8, _mm_vsr(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break; break;
case PPC_INST_VSRAB:
// TODO: vectorize, ensure endianness is correct
for (size_t i = 0; i < 16; i++)
println("\t{}.s8[{}] = {}.s8[{}] >> ({}.u8[{}] & 0x7);", v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i);
break;
case PPC_INST_VSRAH:
// TODO: vectorize, ensure endianness is correct
for (size_t i = 0; i < 8; i++)
println("\t{}.s16[{}] = {}.s16[{}] >> ({}.u8[{}] & 0xF);", v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i * 2);
break;
case PPC_INST_VSRAW: case PPC_INST_VSRAW:
case PPC_INST_VSRAW128: case PPC_INST_VSRAW128:
// TODO: vectorize, ensure endianness is correct // TODO: vectorize, ensure endianness is correct
@ -2109,6 +2419,12 @@ bool Recompiler::Recompile(
println("\t{}.s32[{}] = {}.s32[{}] >> ({}.u8[{}] & 0x1F);", v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i * 4); println("\t{}.s32[{}] = {}.s32[{}] >> ({}.u8[{}] & 0x1F);", v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i * 4);
break; break;
case PPC_INST_VSRH:
// TODO: vectorize, ensure endianness is correct
for (size_t i = 0; i < 8; i++)
println("\t{}.u16[{}] = {}.u16[{}] >> ({}.u8[{}] & 0xF);", v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i * 2);
break;
case PPC_INST_VSRW: case PPC_INST_VSRW:
case PPC_INST_VSRW128: case PPC_INST_VSRW128:
// TODO: vectorize, ensure endianness is correct // TODO: vectorize, ensure endianness is correct
@ -2122,6 +2438,15 @@ bool Recompiler::Recompile(
println("\t_mm_store_ps({}.f32, _mm_sub_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); println("\t_mm_store_ps({}.f32, _mm_sub_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break; break;
case PPC_INST_VSUBSHS:
// TODO: vectorize
for (size_t i = 0; i < 8; i++)
{
println("\t{}.s64 = int64_t({}.s16[{}]) - int64_t({}.s16[{}]);", temp(), v(insn.operands[1]), i, v(insn.operands[2]), i);
println("\t{}.s16[{}] = {}.s64 > SHRT_MAX ? SHRT_MAX : {}.s64 < SHRT_MIN ? SHRT_MIN : {}.s64;", v(insn.operands[0]), i, temp(), temp(), temp());
}
break;
case PPC_INST_VSUBSWS: case PPC_INST_VSUBSWS:
// TODO: vectorize // TODO: vectorize
for (size_t i = 0; i < 4; i++) for (size_t i = 0; i < 4; i++)
@ -2135,8 +2460,12 @@ bool Recompiler::Recompile(
println("\t_mm_store_si128((__m128i*){}.u8, _mm_subs_epu8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); println("\t_mm_store_si128((__m128i*){}.u8, _mm_subs_epu8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break; break;
case PPC_INST_VSUBUBM:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_sub_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VSUBUHM: case PPC_INST_VSUBUHM:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_sub_epi16(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); println("\t_mm_store_si128((__m128i*){}.u8, _mm_sub_epi16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break; break;
case PPC_INST_VUPKD3D128: case PPC_INST_VUPKD3D128:
@ -2166,7 +2495,7 @@ bool Recompiler::Recompile(
break; break;
default: default:
println("\t__builtin_debugtrap();"); println("\t__debugbreak();");
break; break;
} }
break; break;
@ -2216,6 +2545,74 @@ bool Recompiler::Recompile(
println("\t{}.u64 = {}.u64 ^ {};", r(insn.operands[0]), r(insn.operands[1]), insn.operands[2] << 16); println("\t{}.u64 = {}.u64 ^ {};", r(insn.operands[0]), r(insn.operands[1]), insn.operands[2] << 16);
break; break;
case PPC_INST_MULHD:
println("\t{}.s64 = (int64_t({}.s32) * int64_t({}.s32)) >> 32;", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2]));
break;
case PPC_INST_MULHDU:
println("\t{}.u64 = (uint64_t({}.u32) * uint64_t({}.u32)) >> 32;", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2]));
break;
case PPC_INST_VCMPGTUW:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_cmpgt_epu32(_mm_load_si128((__m128i*){}.u32), _mm_load_si128((__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
if (strchr(insn.opcode->name, '.')) // For vcmpgtuw.
println("\t{}.setFromMask(_mm_load_ps({}.f32), 0xF);", cr(6), v(insn.operands[0]));
break;
case PPC_INST_VANDC:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_andnot_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1]));
break;
case PPC_INST_VNOR:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_xor_si128(_mm_or_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)), _mm_set1_epi32(0xFFFFFFFF)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VNOR128:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_xor_si128(_mm_or_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)), _mm_set1_epi32(0xFFFFFFFF)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VSL:
// Assuming byte-wise shift for generality (could be word/halfword depending on context)
for (size_t i = 0; i < 16; i++)
println("\t{}.u8[{}] = {}.u8[{}] << ({}.u8[{}] & 0x7);", v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i);
break;
case PPC_INST_VMAXUB:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_max_epu8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VMINUB:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_min_epu8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VPKUHUM:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_packus_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1]));
break;
case PPC_INST_VSUBUWS:
println("\t_mm_store_si128((__m128i*){}.u32, _mm_subs_epu32(_mm_load_si128((__m128i*){}.u32), _mm_load_si128((__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VRLW128:
for (size_t i = 0; i < 4; i++)
println("\t{}.u32[{}] = ({}.u32[{}] << ({}.u8[{}] & 0x1F)) | ({}.u32[{}] >> (32 - ({}.u8[{}] & 0x1F)));", v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i * 4, v(insn.operands[1]), i, v(insn.operands[2]), i * 4);
break;
case PPC_INST_MACLHWU:
println("\t{}.u64 = (({}.u32 & 0xFFFF) * ({}.u32 & 0xFFFF) + {}.u32) & 0xFFFFFFFF;",
r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2]), r(insn.operands[0]));
break;
case PPC_INST_VSUBUWM:
println("\t_mm_store_si128((__m128i*){}.u32, _mm_sub_epi32(_mm_load_si128((__m128i*){}.u32), _mm_load_si128((__m128i*){}.u32)));",
v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_MACCHWU:
println("\t{}.u64 = ((({}.u32 >> 16) * ({}.u32 & 0xFFFF)) + {}.u32) & 0xFFFFFFFF;",
r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2]), r(insn.operands[0]));
break;
default: default:
return false; return false;
} }

View File

@ -38,6 +38,15 @@ void RecompilerConfig::Load(const std::string_view& configFilePath)
longJmpAddress = main["longjmp_address"].value_or(0u); longJmpAddress = main["longjmp_address"].value_or(0u);
setJmpAddress = main["setjmp_address"].value_or(0u); setJmpAddress = main["setjmp_address"].value_or(0u);
if (restGpr14Address == 0) fmt::println("ERROR: __restgprlr_14 address is unspecified");
if (saveGpr14Address == 0) fmt::println("ERROR: __savegprlr_14 address is unspecified");
if (restFpr14Address == 0) fmt::println("ERROR: __restfpr_14 address is unspecified");
if (saveFpr14Address == 0) fmt::println("ERROR: __savefpr_14 address is unspecified");
if (restVmx14Address == 0) fmt::println("ERROR: __restvmx_14 address is unspecified");
if (saveVmx14Address == 0) fmt::println("ERROR: __savevmx_14 address is unspecified");
if (restVmx64Address == 0) fmt::println("ERROR: __restvmx_64 address is unspecified");
if (saveVmx64Address == 0) fmt::println("ERROR: __savevmx_64 address is unspecified");
if (auto functionsArray = main["functions"].as_array()) if (auto functionsArray = main["functions"].as_array())
{ {
for (auto& func : *functionsArray) for (auto& func : *functionsArray)

View File

@ -2,24 +2,32 @@
#include <cassert> #include <cassert>
template<typename T> // https://github.com/hedge-dev/XenonRecomp/pull/35
inline T ByteSwap(T value) #ifdef __clang__
#define _byte_swap16(value) __builtin_bswap16(static_cast<uint16_t>(value))
#define _byte_swap32(value) __builtin_bswap32(static_cast<uint32_t>(value))
#define _byte_swap64(value) __builtin_bswap64(static_cast<uint64_t>(value))
#elif defined(_MSC_VER)
#define _byte_swap16(value) _byteswap_ushort(static_cast<uint16_t>(value))
#define _byte_swap32(value) _byteswap_ulong(static_cast<uint32_t>(value))
#define _byte_swap64(value) _byteswap_uint64(static_cast<uint64_t>(value))
#endif
template<typename T> T ByteSwap(T value)
{ {
if constexpr (sizeof(T) == 1) if constexpr (sizeof(T) == 1)
return value; return value;
else if constexpr (sizeof(T) == 2) if constexpr (sizeof(T) == 2)
return static_cast<T>(__builtin_bswap16(static_cast<uint16_t>(value))); return static_cast<T>(_byte_swap16(value));
else if constexpr (sizeof(T) == 4) if constexpr (sizeof(T) == 4)
return static_cast<T>(__builtin_bswap32(static_cast<uint32_t>(value))); return static_cast<T>(_byte_swap32(value));
else if constexpr (sizeof(T) == 8) if constexpr (sizeof(T) == 8)
return static_cast<T>(__builtin_bswap64(static_cast<uint64_t>(value))); return static_cast<T>(_byte_swap64(value));
assert(false && "Unexpected byte size."); assert(false && "Unexpected byte size.");
return value;
} }
template<typename T> template<typename T> void ByteSwapInplace(T& value)
inline void ByteSwapInplace(T& value)
{ {
value = ByteSwap(value); value = ByteSwap(value);
} }

View File

@ -651,10 +651,91 @@ inline __m128i _mm_vctsxs(__m128 src1)
return _mm_andnot_si128(_mm_castps_si128(xmm2), _mm_castps_si128(dest)); return _mm_andnot_si128(_mm_castps_si128(xmm2), _mm_castps_si128(dest));
} }
inline __m128i _mm_vctuxs(__m128 src1)
{
__m128 xmm0 = _mm_max_ps(src1, _mm_set1_epi32(0));
__m128 xmm1 = _mm_cmpge_ps(xmm0, _mm_set1_ps((float)0x80000000));
__m128 xmm2 = _mm_sub_ps(xmm0, _mm_set1_ps((float)0x80000000));
xmm0 = _mm_blendv_ps(xmm0, xmm2, xmm1);
__m128i dest = _mm_cvttps_epi32(xmm0);
xmm0 = _mm_cmpeq_epi32(dest, _mm_set1_epi32(INT_MIN));
xmm1 = _mm_and_si128(xmm1, _mm_set1_epi32(INT_MIN));
dest = _mm_add_epi32(dest, xmm1);
return _mm_or_si128(dest, xmm0);
}
inline __m128i _mm_vsr(__m128i a, __m128i b) inline __m128i _mm_vsr(__m128i a, __m128i b)
{ {
b = _mm_srli_epi64(_mm_slli_epi64(b, 61), 61); b = _mm_srli_epi64(_mm_slli_epi64(b, 61), 61);
return _mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(_mm_srl_epi64(a, b)), _mm_castsi128_ps(_mm_srl_epi64(_mm_srli_si128(a, 4), b)), 0x10)); return _mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(_mm_srl_epi64(a, b)), _mm_castsi128_ps(_mm_srl_epi64(_mm_srli_si128(a, 4), b)), 0x10));
} }
inline uint64_t _rotl64(uint64_t value, int shift) {
shift &= 63; // Normalize shift to 0-63
return (value << shift) | (value >> (64 - shift));
}
inline uint32_t __lzcnt(uint32_t value) {
if (value == 0) return 32;
uint32_t count = 0;
while ((value & 0x80000000) == 0) {
count++;
value <<= 1;
}
return count;
}
#ifdef _WIN32
#pragma intrinsic(_InterlockedCompareExchange)
#define PPC_InterlockedCompareExchange _InterlockedCompareExchange
#else
// Fallback for GCC/Clang
inline long PPC_InterlockedCompareExchange(long volatile* Destination, long Exchange, long Comparand) {
int32_t expected = Comparand;
bool success = __atomic_compare_exchange_n(
reinterpret_cast<volatile int32_t*>(Destination), // Preserve volatile
&expected,
Exchange,
false,
__ATOMIC_SEQ_CST,
__ATOMIC_SEQ_CST
);
return success ? Comparand : expected;
}
#endif
#ifdef _WIN32
#pragma intrinsic(_InterlockedCompareExchange64)
#define PPC_InterlockedCompareExchange64 _InterlockedCompareExchange64
#else
// Fallback for GCC/Clang
inline int64_t PPC_InterlockedCompareExchange64(int64_t volatile* Destination, int64_t Exchange, int64_t Comparand) {
int64_t expected = Comparand;
bool success = __atomic_compare_exchange_n(
reinterpret_cast<volatile int64_t*>(Destination), // Preserve volatile
&expected,
Exchange,
false,
__ATOMIC_SEQ_CST,
__ATOMIC_SEQ_CST
);
return success ? Comparand : expected;
}
#endif
#ifndef __debugbreak
#ifdef _WIN32
#pragma intrinsic(__debugbreak)
#define __debugbreak __debugbreak
#else
// GCC/Clang/Linux fallback
#ifdef __x86_64__
#define __debugbreak() asm volatile("int $0x3")
#else
#define __debugbreak() raise(SIGTRAP)
#endif
#endif
#endif
#endif #endif