Add more instructions regarding Bakugan Battle Brawlers

This commit is contained in:
DeaTh-G 2024-10-13 18:12:23 +02:00 committed by DeaTh-G
parent 7dd4f91ac6
commit cdfa0907fd

View File

@ -703,6 +703,10 @@ bool Recompiler::Recompile(
// no op
break;
case PPC_INST_DCBST:
// no op
break;
case PPC_INST_DCBTST:
// no op
break;
@ -930,6 +934,12 @@ bool Recompiler::Recompile(
println("{}.u32);", r(insn.operands[2]));
break;
case PPC_INST_LBZUX:
println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2]));
println("\t{}.u64 = PPC_LOAD_U8({});", r(insn.operands[0]), ea());
println("\t{}.u32 = {};", r(insn.operands[1]), ea());
break;
case PPC_INST_LD:
print("\t{}.u64 = PPC_LOAD_U64(", r(insn.operands[0]));
if (insn.operands[2] != 0)
@ -958,6 +968,12 @@ bool Recompiler::Recompile(
println("{}.u32);", r(insn.operands[2]));
break;
case PPC_INST_LDUX:
println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2]));
println("\t{}.u64 = PPC_LOAD_U64({});", r(insn.operands[0]), ea());
println("\t{}.u32 = {};", r(insn.operands[1]), ea());
break;
case PPC_INST_LFD:
printSetFlushMode(false);
print("\t{}.u64 = PPC_LOAD_U64(", f(insn.operands[0]));
@ -966,6 +982,13 @@ bool Recompiler::Recompile(
println("{});", int32_t(insn.operands[1]));
break;
case PPC_INST_LFDU:
printSetFlushMode(false);
println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2]));
println("\t{}.u64 = PPC_LOAD_U64({});", r(insn.operands[0]), ea());
println("\t{}.u32 = {};", r(insn.operands[2]), ea());
break;
case PPC_INST_LFDX:
printSetFlushMode(false);
print("\t{}.u64 = PPC_LOAD_U64(", f(insn.operands[0]));
@ -974,6 +997,13 @@ bool Recompiler::Recompile(
println("{}.u32);", r(insn.operands[2]));
break;
case PPC_INST_LFDUX:
printSetFlushMode(false);
println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2]));
println("\t{}.u64 = PPC_LOAD_U64({});", r(insn.operands[0]), ea());
println("\t{}.u32 = {};", r(insn.operands[1]), ea());
break;
case PPC_INST_LFS:
printSetFlushMode(false);
print("\t{}.u32 = PPC_LOAD_U32(", temp());
@ -983,6 +1013,14 @@ bool Recompiler::Recompile(
println("\t{}.f64 = double({}.f32);", f(insn.operands[0]), temp());
break;
case PPC_INST_LFSU:
printSetFlushMode(false);
println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2]));
println("\t{}.u32 = PPC_LOAD_U32({});", temp(), ea());
println("\t{}.u32 = {};", r(insn.operands[2]), ea());
println("\t{}.f64 = double({}.f32);", f(insn.operands[0]), temp());
break;
case PPC_INST_LFSX:
printSetFlushMode(false);
print("\t{}.u32 = PPC_LOAD_U32(", temp());
@ -992,6 +1030,14 @@ bool Recompiler::Recompile(
println("\t{}.f64 = double({}.f32);", f(insn.operands[0]), temp());
break;
case PPC_INST_LFSUX:
printSetFlushMode(false);
println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2]));
println("\t{}.u32 = PPC_LOAD_U32({});", temp(), ea());
println("\t{}.u32 = {};", r(insn.operands[1]), ea());
println("\t{}.f64 = double({}.f32);", f(insn.operands[0]), temp());
break;
case PPC_INST_LHA:
print("\t{}.s64 = int16_t(PPC_LOAD_U16(", r(insn.operands[0]));
if (insn.operands[2] != 0)
@ -999,6 +1045,12 @@ bool Recompiler::Recompile(
println("{}));", int32_t(insn.operands[1]));
break;
case PPC_INST_LHAU:
print("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2]));
print("\t{}.s64 = int16_t(PPC_LOAD_U16({}));", r(insn.operands[0]), ea());
print("\t{}.u32 = {};", r(insn.operands[2]), ea());
break;
case PPC_INST_LHAX:
print("\t{}.s64 = int16_t(PPC_LOAD_U16(", r(insn.operands[0]));
if (insn.operands[1] != 0)
@ -1013,6 +1065,12 @@ bool Recompiler::Recompile(
println("{});", int32_t(insn.operands[1]));
break;
case PPC_INST_LHZU:
println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2]));
println("\t{}.u64 = PPC_LOAD_U16({});", r(insn.operands[0]), ea());
println("\t{}.u32 = {};", r(insn.operands[2]), ea());
break;
case PPC_INST_LHZX:
print("\t{}.u64 = PPC_LOAD_U16(", r(insn.operands[0]));
if (insn.operands[1] != 0)
@ -1020,6 +1078,12 @@ bool Recompiler::Recompile(
println("{}.u32);", r(insn.operands[2]));
break;
case PPC_INST_LHZUX:
println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2]));
println("\t{}.u64 = PPC_LOAD_U16({});", r(insn.operands[0]), ea());
println("\t{}.u32 = {};", r(insn.operands[1]), ea());
break;
case PPC_INST_LI:
println("\t{}.s64 = {};", r(insn.operands[0]), int32_t(insn.operands[1]));
break;
@ -1032,6 +1096,7 @@ bool Recompiler::Recompile(
case PPC_INST_LVEWX128:
case PPC_INST_LVX:
case PPC_INST_LVX128:
case PPC_INST_LVEHX:
// NOTE: for endian swapping, we reverse the whole vector instead of individual elements.
// this is accounted for in every instruction (eg. dp3 sums yzw instead of xyz)
print("\t_mm_store_si128((__m128i*){}.u8, _mm_shuffle_epi8(_mm_load_si128((__m128i*)(base + ((", v(insn.operands[0]));
@ -1127,6 +1192,12 @@ bool Recompiler::Recompile(
println("{}.u32);", r(insn.operands[2]));
break;
case PPC_INST_LWZUX:
println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2]));
println("\t{}.u64 = PPC_LOAD_U32({});", r(insn.operands[0]), ea());
println("\t{}.u32 = {};", r(insn.operands[1]), ea());
break;
case PPC_INST_MFCR:
for (size_t i = 0; i < 32; i++)
{
@ -1388,6 +1459,12 @@ bool Recompiler::Recompile(
println("{}.u32, {}.u8);", r(insn.operands[2]), r(insn.operands[0]));
break;
case PPC_INST_STBUX:
println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2]));
println("\tPPC_STORE_U8({}, {}.u8);", ea(), r(insn.operands[0]));
println("\t{}.u32 = {};", r(insn.operands[1]), ea());
break;
case PPC_INST_STD:
print("{}", mmioStore() ? "\tPPC_MM_STORE_U64(" : "\tPPC_STORE_U64(");
if (insn.operands[2] != 0)
@ -1418,6 +1495,12 @@ bool Recompiler::Recompile(
println("{}.u32, {}.u64);", r(insn.operands[2]), r(insn.operands[0]));
break;
case PPC_INST_STDUX:
println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2]));
println("\tPPC_STORE_U64({}, {}.u64);", ea(), r(insn.operands[0]));
println("\t{}.u32 = {};", r(insn.operands[1]), ea());
break;
case PPC_INST_STFD:
printSetFlushMode(false);
print("{}", mmioStore() ? "\tPPC_MM_STORE_U64(" : "\tPPC_STORE_U64(");
@ -1426,6 +1509,13 @@ bool Recompiler::Recompile(
println("{}, {}.u64);", int32_t(insn.operands[1]), f(insn.operands[0]));
break;
case PPC_INST_STFDU:
printSetFlushMode(false);
println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2]));
println("\tPPC_STORE_U64({}, {}.u64);", ea(), r(insn.operands[0]));
println("\t{}.u32 = {};", r(insn.operands[2]), ea());
break;
case PPC_INST_STFDX:
printSetFlushMode(false);
print("{}", mmioStore() ? "\tPPC_MM_STORE_U64(" : "\tPPC_STORE_U64(");
@ -1451,6 +1541,14 @@ bool Recompiler::Recompile(
println("{}, {}.u32);", int32_t(insn.operands[1]), temp());
break;
case PPC_INST_STFSU:
printSetFlushMode(false);
println("\t{}.f32 = float({}.f64);", temp(), f(insn.operands[0]));
println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2]));
println("\tPPC_STORE_U32({}, {}.u32);", ea(), temp());
println("\t{}.u32 = {};", r(insn.operands[2]), ea());
break;
case PPC_INST_STFSX:
printSetFlushMode(false);
println("\t{}.f32 = float({}.f64);", temp(), f(insn.operands[0]));
@ -1460,6 +1558,14 @@ bool Recompiler::Recompile(
println("{}.u32, {}.u32);", r(insn.operands[2]), temp());
break;
case PPC_INST_STFSUX:
printSetFlushMode(false);
println("\t{}.f32 = float({}.f64);", temp(), f(insn.operands[0]));
println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2]));
println("\tPPC_STORE_U32({}, {}.u32);", ea(), temp());
println("\t{}.u32 = {};", r(insn.operands[1]), ea());
break;
case PPC_INST_STH:
print("{}", mmioStore() ? "\tPPC_MM_STORE_U16(" : "\tPPC_STORE_U16(");
if (insn.operands[2] != 0)
@ -1467,6 +1573,18 @@ bool Recompiler::Recompile(
println("{}, {}.u16);", int32_t(insn.operands[1]), r(insn.operands[0]));
break;
case PPC_INST_STHU:
println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2]));
println("\tPPC_STORE_U16({}, {}.u16);", ea(), r(insn.operands[0]));
println("\t{}.u32 = {};", r(insn.operands[2]), ea());
break;
case PPC_INST_STHUX:
println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2]));
println("\tPPC_STORE_U16({}, {}.u16);", ea(), r(insn.operands[0]));
println("\t{}.u32 = {};", r(insn.operands[1]), ea());
break;
case PPC_INST_STHBRX:
print("{}", mmioStore() ? "\tPPC_MM_STORE_U16(" : "\tPPC_STORE_U16(");
if (insn.operands[1] != 0)
@ -1635,10 +1753,18 @@ bool Recompiler::Recompile(
println("\t_mm_store_ps({}.f32, _mm_add_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VADDSBS:
println("\t_mm_store_si128((__m128i*){}.s8, _mm_adds_epi8(_mm_load_si128((__m128i*){}.s8), _mm_load_si128((__m128i*){}.s8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VADDSHS:
println("\t_mm_store_si128((__m128i*){}.s16, _mm_adds_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VADDSWS:
println("\t_mm_store_si128((__m128i*){}.s32, _mm_adds_epi32(_mm_load_si128((__m128i*){}.s32), _mm_load_si128((__m128i*){}.s32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VADDUBM:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_add_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
@ -1680,6 +1806,10 @@ bool Recompiler::Recompile(
println("\t_mm_store_si128((__m128i*){}.u8, _mm_avg_epu8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VAVGUH:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_avg_epu16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VCTSXS:
case PPC_INST_VCFPSXWS128:
printSetFlushMode(true);
@ -1743,6 +1873,12 @@ bool Recompiler::Recompile(
println("\t{}.setFromMask(_mm_load_si128((__m128i*){}.u8), 0xFFFF);", cr(6), v(insn.operands[0]));
break;
case PPC_INST_VCMPEQUH:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_cmpeq_epi16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
if (strchr(insn.opcode->name, '.'))
println("\t{}.setFromMask(_mm_load_si128((__m128i*){}.u16), 0xFFFF);", cr(6), v(insn.operands[0]));
break;
case PPC_INST_VCMPEQUW:
case PPC_INST_VCMPEQUW128:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_cmpeq_epi32(_mm_load_si128((__m128i*){}.u32), _mm_load_si128((__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
@ -1768,10 +1904,26 @@ bool Recompiler::Recompile(
case PPC_INST_VCMPGTUB:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_cmpgt_epu8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
if (strchr(insn.opcode->name, '.'))
println("\t{}.setFromMask(_mm_load_si128((__m128i*){}.u8), 0xFFFF);", cr(6), v(insn.operands[0]));
break;
case PPC_INST_VCMPGTUH:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_cmpgt_epu16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
if (strchr(insn.opcode->name, '.'))
println("\t{}.setFromMask(_mm_load_si128((__m128i*){}.u16), 0xFFFF);", cr(6), v(insn.operands[0]));
break;
case PPC_INST_VCMPGTSH:
println("\t_mm_store_si128((__m128i*){}.s8, _mm_cmpgt_epi16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
if (strchr(insn.opcode->name, '.'))
println("\t{}.setFromMask(_mm_load_si128((__m128i*){}.s16), 0xFFFF);", cr(6), v(insn.operands[0]));
break;
case PPC_INST_VCMPGTSW:
println("\t_mm_store_si128((__m128i*){}.s8, _mm_cmpgt_epi32(_mm_load_si128((__m128i*){}.u32), _mm_load_si128((__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
if (strchr(insn.opcode->name, '.'))
println("\t{}.setFromMask(_mm_load_si128((__m128i*){}.s32), 0xFFFF);", cr(6), v(insn.operands[0]));
break;
case PPC_INST_VEXPTEFP:
@ -1803,10 +1955,18 @@ bool Recompiler::Recompile(
println("\t_mm_store_ps({}.f32, _mm_max_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VMAXSH:
println("\t_mm_store_si128((__m128i*){}.u16, _mm_max_epi16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VMAXSW:
println("\t_mm_store_si128((__m128i*){}.u32, _mm_max_epi32(_mm_load_si128((__m128i*){}.u32), _mm_load_si128((__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VMINSH:
println("\t_mm_store_si128((__m128i*){}.u16, _mm_max_epi16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VMINFP:
case PPC_INST_VMINFP128:
printSetFlushMode(true);
@ -1915,11 +2075,26 @@ bool Recompiler::Recompile(
}
break;
case PPC_INST_VPKSHSS:
case PPC_INST_VPKSHSS128:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_packs_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1]));
break;
case PPC_INST_VPKSWSS:
case PPC_INST_VPKSWSS128:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_packs_epi32(_mm_load_si128((__m128i*){}.s32), _mm_load_si128((__m128i*){}.s32)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1]));
break;
case PPC_INST_VPKSHUS:
case PPC_INST_VPKSHUS128:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_packus_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1]));
break;
case PPC_INST_VPKSWUS:
case PPC_INST_VPKSWUS128:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_packus_epi32(_mm_load_si128((__m128i*){}.s32), _mm_load_si128((__m128i*){}.s32)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1]));
break;
case PPC_INST_VREFP:
case PPC_INST_VREFP128:
// TODO: see if we can use rcp safely
@ -1961,6 +2136,7 @@ bool Recompiler::Recompile(
break;
case PPC_INST_VSEL:
case PPC_INST_VSEL128:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_or_si128(_mm_andnot_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)), _mm_and_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8))));", v(insn.operands[0]), v(insn.operands[3]), v(insn.operands[1]), v(insn.operands[3]), v(insn.operands[2]));
break;
@ -1970,6 +2146,12 @@ bool Recompiler::Recompile(
println("\t{}.u8[{}] = {}.u8[{}] << ({}.u8[{}] & 0x7);", v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i);
break;
case PPC_INST_VSLH:
// TODO: vectorize
for (size_t i = 0; i < 8; i++)
println("\t{}.u16[{}] = {}.u16[{}] << ({}.u8[{}] & 0xF);", v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i);
break;
case PPC_INST_VSLDOI:
case PPC_INST_VSLDOI128:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_alignr_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8), {}));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]), 16 - insn.operands[3]);
@ -2003,6 +2185,10 @@ bool Recompiler::Recompile(
println("\t_mm_store_si128((__m128i*){}.u8, _mm_set1_epi8(char(0x{:X})));", v(insn.operands[0]), insn.operands[1]);
break;
case PPC_INST_VSPLTISH:
println("\t_mm_store_si128((__m128i*){}.u16, _mm_set1_epi16(int(0x{:X})));", v(insn.operands[0]), insn.operands[1]);
break;
case PPC_INST_VSPLTISW:
case PPC_INST_VSPLTISW128:
println("\t_mm_store_si128((__m128i*){}.u32, _mm_set1_epi32(int(0x{:X})));", v(insn.operands[0]), insn.operands[1]);
@ -2022,6 +2208,18 @@ bool Recompiler::Recompile(
println("\t_mm_store_si128((__m128i*){}.u8, _mm_vsr(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VSRAB:
// TODO: vectorize, ensure endianness is correct
for (size_t i = 0; i < 16; i++)
println("\t{}.s8[{}] = {}.s8[{}] >> ({}.u8[{}] & 0x7);", v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i * 4);
break;
case PPC_INST_VSRAH:
// TODO: vectorize, ensure endianness is correct
for (size_t i = 0; i < 8; i++)
println("\t{}.s16[{}] = {}.s16[{}] >> ({}.u8[{}] & 0xF);", v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i * 4);
break;
case PPC_INST_VSRAW:
case PPC_INST_VSRAW128:
// TODO: vectorize, ensure endianness is correct
@ -2029,6 +2227,12 @@ bool Recompiler::Recompile(
println("\t{}.s32[{}] = {}.s32[{}] >> ({}.u8[{}] & 0x1F);", v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i * 4);
break;
case PPC_INST_VSRH:
// TODO: vectorize, ensure endianness is correct
for (size_t i = 0; i < 8; i++)
println("\t{}.u16[{}] = {}.u16[{}] >> ({}.u8[{}] & 0xF);", v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i * 4);
break;
case PPC_INST_VSRW:
case PPC_INST_VSRW128:
// TODO: vectorize, ensure endianness is correct
@ -2042,6 +2246,15 @@ bool Recompiler::Recompile(
println("\t_mm_store_ps({}.f32, _mm_sub_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VSUBSHS:
// TODO: vectorize
for (size_t i = 0; i < 8; i++)
{
println("\t{}.s64 = int64_t({}.s16[{}]) - int64_t({}.s16[{}]);", temp(), v(insn.operands[1]), i, v(insn.operands[2]), i);
println("\t{}.s16[{}] = {}.s64 > SHRT_MAX ? SHRT_MAX : {}.s64 < SHRT_MIN ? SHRT_MIN : {}.s64;", v(insn.operands[0]), i, temp(), temp(), temp());
}
break;
case PPC_INST_VSUBSWS:
// TODO: vectorize
for (size_t i = 0; i < 4; i++)
@ -2055,8 +2268,12 @@ bool Recompiler::Recompile(
println("\t_mm_store_si128((__m128i*){}.u8, _mm_subs_epu8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VSUBUBM:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_sub_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VSUBUHM:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_sub_epi16(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
println("\t_mm_store_si128((__m128i*){}.u8, _mm_sub_epi16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VUPKD3D128: