From cdfa0907fdd95833d1d55c23dac480226c4ba600 Mon Sep 17 00:00:00 2001 From: DeaTh-G <55578911+DeaTh-G@users.noreply.github.com> Date: Sun, 13 Oct 2024 18:12:23 +0200 Subject: [PATCH] Add more instructions regarding Bakugan Battle Brawlers --- PowerRecomp/recompiler.cpp | 219 ++++++++++++++++++++++++++++++++++++- 1 file changed, 218 insertions(+), 1 deletion(-) diff --git a/PowerRecomp/recompiler.cpp b/PowerRecomp/recompiler.cpp index e330293..94379f2 100644 --- a/PowerRecomp/recompiler.cpp +++ b/PowerRecomp/recompiler.cpp @@ -703,6 +703,10 @@ bool Recompiler::Recompile( // no op break; + case PPC_INST_DCBST: + // no op + break; + case PPC_INST_DCBTST: // no op break; @@ -930,6 +934,12 @@ bool Recompiler::Recompile( println("{}.u32);", r(insn.operands[2])); break; + case PPC_INST_LBZUX: + println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2])); + println("\t{}.u64 = PPC_LOAD_U8({});", r(insn.operands[0]), ea()); + println("\t{}.u32 = {};", r(insn.operands[1]), ea()); + break; + case PPC_INST_LD: print("\t{}.u64 = PPC_LOAD_U64(", r(insn.operands[0])); if (insn.operands[2] != 0) @@ -958,6 +968,12 @@ bool Recompiler::Recompile( println("{}.u32);", r(insn.operands[2])); break; + case PPC_INST_LDUX: + println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2])); + println("\t{}.u64 = PPC_LOAD_U64({});", r(insn.operands[0]), ea()); + println("\t{}.u32 = {};", r(insn.operands[1]), ea()); + break; + case PPC_INST_LFD: printSetFlushMode(false); print("\t{}.u64 = PPC_LOAD_U64(", f(insn.operands[0])); @@ -966,6 +982,13 @@ bool Recompiler::Recompile( println("{});", int32_t(insn.operands[1])); break; + case PPC_INST_LFDU: + printSetFlushMode(false); + println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2])); + println("\t{}.u64 = PPC_LOAD_U64({});", r(insn.operands[0]), ea()); + println("\t{}.u32 = {};", r(insn.operands[2]), ea()); + break; + case PPC_INST_LFDX: printSetFlushMode(false); print("\t{}.u64 = PPC_LOAD_U64(", f(insn.operands[0])); @@ -974,6 +997,13 @@ bool Recompiler::Recompile( println("{}.u32);", r(insn.operands[2])); break; + case PPC_INST_LFDUX: + printSetFlushMode(false); + println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2])); + println("\t{}.u64 = PPC_LOAD_U64({});", r(insn.operands[0]), ea()); + println("\t{}.u32 = {};", r(insn.operands[1]), ea()); + break; + case PPC_INST_LFS: printSetFlushMode(false); print("\t{}.u32 = PPC_LOAD_U32(", temp()); @@ -983,6 +1013,14 @@ bool Recompiler::Recompile( println("\t{}.f64 = double({}.f32);", f(insn.operands[0]), temp()); break; + case PPC_INST_LFSU: + printSetFlushMode(false); + println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2])); + println("\t{}.u32 = PPC_LOAD_U32({});", temp(), ea()); + println("\t{}.u32 = {};", r(insn.operands[2]), ea()); + println("\t{}.f64 = double({}.f32);", f(insn.operands[0]), temp()); + break; + case PPC_INST_LFSX: printSetFlushMode(false); print("\t{}.u32 = PPC_LOAD_U32(", temp()); @@ -992,6 +1030,14 @@ bool Recompiler::Recompile( println("\t{}.f64 = double({}.f32);", f(insn.operands[0]), temp()); break; + case PPC_INST_LFSUX: + printSetFlushMode(false); + println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2])); + println("\t{}.u32 = PPC_LOAD_U32({});", temp(), ea()); + println("\t{}.u32 = {};", r(insn.operands[1]), ea()); + println("\t{}.f64 = double({}.f32);", f(insn.operands[0]), temp()); + break; + case PPC_INST_LHA: print("\t{}.s64 = int16_t(PPC_LOAD_U16(", r(insn.operands[0])); if (insn.operands[2] != 0) @@ -999,6 +1045,12 @@ bool Recompiler::Recompile( println("{}));", int32_t(insn.operands[1])); break; + case PPC_INST_LHAU: + print("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2])); + print("\t{}.s64 = int16_t(PPC_LOAD_U16({}));", r(insn.operands[0]), ea()); + print("\t{}.u32 = {};", r(insn.operands[2]), ea()); + break; + case PPC_INST_LHAX: print("\t{}.s64 = int16_t(PPC_LOAD_U16(", r(insn.operands[0])); if (insn.operands[1] != 0) @@ -1013,6 +1065,12 @@ bool Recompiler::Recompile( println("{});", int32_t(insn.operands[1])); break; + case PPC_INST_LHZU: + println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2])); + println("\t{}.u64 = PPC_LOAD_U16({});", r(insn.operands[0]), ea()); + println("\t{}.u32 = {};", r(insn.operands[2]), ea()); + break; + case PPC_INST_LHZX: print("\t{}.u64 = PPC_LOAD_U16(", r(insn.operands[0])); if (insn.operands[1] != 0) @@ -1020,6 +1078,12 @@ bool Recompiler::Recompile( println("{}.u32);", r(insn.operands[2])); break; + case PPC_INST_LHZUX: + println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2])); + println("\t{}.u64 = PPC_LOAD_U16({});", r(insn.operands[0]), ea()); + println("\t{}.u32 = {};", r(insn.operands[1]), ea()); + break; + case PPC_INST_LI: println("\t{}.s64 = {};", r(insn.operands[0]), int32_t(insn.operands[1])); break; @@ -1032,6 +1096,7 @@ bool Recompiler::Recompile( case PPC_INST_LVEWX128: case PPC_INST_LVX: case PPC_INST_LVX128: + case PPC_INST_LVEHX: // NOTE: for endian swapping, we reverse the whole vector instead of individual elements. // this is accounted for in every instruction (eg. dp3 sums yzw instead of xyz) print("\t_mm_store_si128((__m128i*){}.u8, _mm_shuffle_epi8(_mm_load_si128((__m128i*)(base + ((", v(insn.operands[0])); @@ -1127,6 +1192,12 @@ bool Recompiler::Recompile( println("{}.u32);", r(insn.operands[2])); break; + case PPC_INST_LWZUX: + println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2])); + println("\t{}.u64 = PPC_LOAD_U32({});", r(insn.operands[0]), ea()); + println("\t{}.u32 = {};", r(insn.operands[1]), ea()); + break; + case PPC_INST_MFCR: for (size_t i = 0; i < 32; i++) { @@ -1388,6 +1459,12 @@ bool Recompiler::Recompile( println("{}.u32, {}.u8);", r(insn.operands[2]), r(insn.operands[0])); break; + case PPC_INST_STBUX: + println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2])); + println("\tPPC_STORE_U8({}, {}.u8);", ea(), r(insn.operands[0])); + println("\t{}.u32 = {};", r(insn.operands[1]), ea()); + break; + case PPC_INST_STD: print("{}", mmioStore() ? "\tPPC_MM_STORE_U64(" : "\tPPC_STORE_U64("); if (insn.operands[2] != 0) @@ -1418,6 +1495,12 @@ bool Recompiler::Recompile( println("{}.u32, {}.u64);", r(insn.operands[2]), r(insn.operands[0])); break; + case PPC_INST_STDUX: + println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2])); + println("\tPPC_STORE_U64({}, {}.u64);", ea(), r(insn.operands[0])); + println("\t{}.u32 = {};", r(insn.operands[1]), ea()); + break; + case PPC_INST_STFD: printSetFlushMode(false); print("{}", mmioStore() ? "\tPPC_MM_STORE_U64(" : "\tPPC_STORE_U64("); @@ -1426,6 +1509,13 @@ bool Recompiler::Recompile( println("{}, {}.u64);", int32_t(insn.operands[1]), f(insn.operands[0])); break; + case PPC_INST_STFDU: + printSetFlushMode(false); + println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2])); + println("\tPPC_STORE_U64({}, {}.u64);", ea(), r(insn.operands[0])); + println("\t{}.u32 = {};", r(insn.operands[2]), ea()); + break; + case PPC_INST_STFDX: printSetFlushMode(false); print("{}", mmioStore() ? "\tPPC_MM_STORE_U64(" : "\tPPC_STORE_U64("); @@ -1451,6 +1541,14 @@ bool Recompiler::Recompile( println("{}, {}.u32);", int32_t(insn.operands[1]), temp()); break; + case PPC_INST_STFSU: + printSetFlushMode(false); + println("\t{}.f32 = float({}.f64);", temp(), f(insn.operands[0])); + println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2])); + println("\tPPC_STORE_U32({}, {}.u32);", ea(), temp()); + println("\t{}.u32 = {};", r(insn.operands[2]), ea()); + break; + case PPC_INST_STFSX: printSetFlushMode(false); println("\t{}.f32 = float({}.f64);", temp(), f(insn.operands[0])); @@ -1460,6 +1558,14 @@ bool Recompiler::Recompile( println("{}.u32, {}.u32);", r(insn.operands[2]), temp()); break; + case PPC_INST_STFSUX: + printSetFlushMode(false); + println("\t{}.f32 = float({}.f64);", temp(), f(insn.operands[0])); + println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2])); + println("\tPPC_STORE_U32({}, {}.u32);", ea(), temp()); + println("\t{}.u32 = {};", r(insn.operands[1]), ea()); + break; + case PPC_INST_STH: print("{}", mmioStore() ? "\tPPC_MM_STORE_U16(" : "\tPPC_STORE_U16("); if (insn.operands[2] != 0) @@ -1467,6 +1573,18 @@ bool Recompiler::Recompile( println("{}, {}.u16);", int32_t(insn.operands[1]), r(insn.operands[0])); break; + case PPC_INST_STHU: + println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2])); + println("\tPPC_STORE_U16({}, {}.u16);", ea(), r(insn.operands[0])); + println("\t{}.u32 = {};", r(insn.operands[2]), ea()); + break; + + case PPC_INST_STHUX: + println("\t{} = {}.u32 + {}.u32;", ea(), r(insn.operands[1]), r(insn.operands[2])); + println("\tPPC_STORE_U16({}, {}.u16);", ea(), r(insn.operands[0])); + println("\t{}.u32 = {};", r(insn.operands[1]), ea()); + break; + case PPC_INST_STHBRX: print("{}", mmioStore() ? "\tPPC_MM_STORE_U16(" : "\tPPC_STORE_U16("); if (insn.operands[1] != 0) @@ -1635,10 +1753,18 @@ bool Recompiler::Recompile( println("\t_mm_store_ps({}.f32, _mm_add_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; + case PPC_INST_VADDSBS: + println("\t_mm_store_si128((__m128i*){}.s8, _mm_adds_epi8(_mm_load_si128((__m128i*){}.s8), _mm_load_si128((__m128i*){}.s8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + break; + case PPC_INST_VADDSHS: println("\t_mm_store_si128((__m128i*){}.s16, _mm_adds_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; + case PPC_INST_VADDSWS: + println("\t_mm_store_si128((__m128i*){}.s32, _mm_adds_epi32(_mm_load_si128((__m128i*){}.s32), _mm_load_si128((__m128i*){}.s32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + break; + case PPC_INST_VADDUBM: println("\t_mm_store_si128((__m128i*){}.u8, _mm_add_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; @@ -1680,6 +1806,10 @@ bool Recompiler::Recompile( println("\t_mm_store_si128((__m128i*){}.u8, _mm_avg_epu8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; + case PPC_INST_VAVGUH: + println("\t_mm_store_si128((__m128i*){}.u8, _mm_avg_epu16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + break; + case PPC_INST_VCTSXS: case PPC_INST_VCFPSXWS128: printSetFlushMode(true); @@ -1743,6 +1873,12 @@ bool Recompiler::Recompile( println("\t{}.setFromMask(_mm_load_si128((__m128i*){}.u8), 0xFFFF);", cr(6), v(insn.operands[0])); break; + case PPC_INST_VCMPEQUH: + println("\t_mm_store_si128((__m128i*){}.u8, _mm_cmpeq_epi16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + if (strchr(insn.opcode->name, '.')) + println("\t{}.setFromMask(_mm_load_si128((__m128i*){}.u16), 0xFFFF);", cr(6), v(insn.operands[0])); + break; + case PPC_INST_VCMPEQUW: case PPC_INST_VCMPEQUW128: println("\t_mm_store_si128((__m128i*){}.u8, _mm_cmpeq_epi32(_mm_load_si128((__m128i*){}.u32), _mm_load_si128((__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); @@ -1768,10 +1904,26 @@ bool Recompiler::Recompile( case PPC_INST_VCMPGTUB: println("\t_mm_store_si128((__m128i*){}.u8, _mm_cmpgt_epu8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + if (strchr(insn.opcode->name, '.')) + println("\t{}.setFromMask(_mm_load_si128((__m128i*){}.u8), 0xFFFF);", cr(6), v(insn.operands[0])); break; case PPC_INST_VCMPGTUH: println("\t_mm_store_si128((__m128i*){}.u8, _mm_cmpgt_epu16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + if (strchr(insn.opcode->name, '.')) + println("\t{}.setFromMask(_mm_load_si128((__m128i*){}.u16), 0xFFFF);", cr(6), v(insn.operands[0])); + break; + + case PPC_INST_VCMPGTSH: + println("\t_mm_store_si128((__m128i*){}.s8, _mm_cmpgt_epi16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + if (strchr(insn.opcode->name, '.')) + println("\t{}.setFromMask(_mm_load_si128((__m128i*){}.s16), 0xFFFF);", cr(6), v(insn.operands[0])); + break; + + case PPC_INST_VCMPGTSW: + println("\t_mm_store_si128((__m128i*){}.s8, _mm_cmpgt_epi32(_mm_load_si128((__m128i*){}.u32), _mm_load_si128((__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + if (strchr(insn.opcode->name, '.')) + println("\t{}.setFromMask(_mm_load_si128((__m128i*){}.s32), 0xFFFF);", cr(6), v(insn.operands[0])); break; case PPC_INST_VEXPTEFP: @@ -1803,10 +1955,18 @@ bool Recompiler::Recompile( println("\t_mm_store_ps({}.f32, _mm_max_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; + case PPC_INST_VMAXSH: + println("\t_mm_store_si128((__m128i*){}.u16, _mm_max_epi16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + break; + case PPC_INST_VMAXSW: println("\t_mm_store_si128((__m128i*){}.u32, _mm_max_epi32(_mm_load_si128((__m128i*){}.u32), _mm_load_si128((__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; + case PPC_INST_VMINSH: + println("\t_mm_store_si128((__m128i*){}.u16, _mm_max_epi16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + break; + case PPC_INST_VMINFP: case PPC_INST_VMINFP128: printSetFlushMode(true); @@ -1915,11 +2075,26 @@ bool Recompiler::Recompile( } break; + case PPC_INST_VPKSHSS: + case PPC_INST_VPKSHSS128: + println("\t_mm_store_si128((__m128i*){}.u8, _mm_packs_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); + break; + + case PPC_INST_VPKSWSS: + case PPC_INST_VPKSWSS128: + println("\t_mm_store_si128((__m128i*){}.u8, _mm_packs_epi32(_mm_load_si128((__m128i*){}.s32), _mm_load_si128((__m128i*){}.s32)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); + break; + case PPC_INST_VPKSHUS: case PPC_INST_VPKSHUS128: println("\t_mm_store_si128((__m128i*){}.u8, _mm_packus_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); break; + case PPC_INST_VPKSWUS: + case PPC_INST_VPKSWUS128: + println("\t_mm_store_si128((__m128i*){}.u8, _mm_packus_epi32(_mm_load_si128((__m128i*){}.s32), _mm_load_si128((__m128i*){}.s32)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); + break; + case PPC_INST_VREFP: case PPC_INST_VREFP128: // TODO: see if we can use rcp safely @@ -1961,6 +2136,7 @@ bool Recompiler::Recompile( break; case PPC_INST_VSEL: + case PPC_INST_VSEL128: println("\t_mm_store_si128((__m128i*){}.u8, _mm_or_si128(_mm_andnot_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)), _mm_and_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8))));", v(insn.operands[0]), v(insn.operands[3]), v(insn.operands[1]), v(insn.operands[3]), v(insn.operands[2])); break; @@ -1970,6 +2146,12 @@ bool Recompiler::Recompile( println("\t{}.u8[{}] = {}.u8[{}] << ({}.u8[{}] & 0x7);", v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i); break; + case PPC_INST_VSLH: + // TODO: vectorize + for (size_t i = 0; i < 8; i++) + println("\t{}.u16[{}] = {}.u16[{}] << ({}.u8[{}] & 0xF);", v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i); + break; + case PPC_INST_VSLDOI: case PPC_INST_VSLDOI128: println("\t_mm_store_si128((__m128i*){}.u8, _mm_alignr_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8), {}));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]), 16 - insn.operands[3]); @@ -2003,6 +2185,10 @@ bool Recompiler::Recompile( println("\t_mm_store_si128((__m128i*){}.u8, _mm_set1_epi8(char(0x{:X})));", v(insn.operands[0]), insn.operands[1]); break; + case PPC_INST_VSPLTISH: + println("\t_mm_store_si128((__m128i*){}.u16, _mm_set1_epi16(int(0x{:X})));", v(insn.operands[0]), insn.operands[1]); + break; + case PPC_INST_VSPLTISW: case PPC_INST_VSPLTISW128: println("\t_mm_store_si128((__m128i*){}.u32, _mm_set1_epi32(int(0x{:X})));", v(insn.operands[0]), insn.operands[1]); @@ -2022,6 +2208,18 @@ bool Recompiler::Recompile( println("\t_mm_store_si128((__m128i*){}.u8, _mm_vsr(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; + case PPC_INST_VSRAB: + // TODO: vectorize, ensure endianness is correct + for (size_t i = 0; i < 16; i++) + println("\t{}.s8[{}] = {}.s8[{}] >> ({}.u8[{}] & 0x7);", v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i * 4); + break; + + case PPC_INST_VSRAH: + // TODO: vectorize, ensure endianness is correct + for (size_t i = 0; i < 8; i++) + println("\t{}.s16[{}] = {}.s16[{}] >> ({}.u8[{}] & 0xF);", v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i * 4); + break; + case PPC_INST_VSRAW: case PPC_INST_VSRAW128: // TODO: vectorize, ensure endianness is correct @@ -2029,6 +2227,12 @@ bool Recompiler::Recompile( println("\t{}.s32[{}] = {}.s32[{}] >> ({}.u8[{}] & 0x1F);", v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i * 4); break; + case PPC_INST_VSRH: + // TODO: vectorize, ensure endianness is correct + for (size_t i = 0; i < 8; i++) + println("\t{}.u16[{}] = {}.u16[{}] >> ({}.u8[{}] & 0xF);", v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i * 4); + break; + case PPC_INST_VSRW: case PPC_INST_VSRW128: // TODO: vectorize, ensure endianness is correct @@ -2042,6 +2246,15 @@ bool Recompiler::Recompile( println("\t_mm_store_ps({}.f32, _mm_sub_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; + case PPC_INST_VSUBSHS: + // TODO: vectorize + for (size_t i = 0; i < 8; i++) + { + println("\t{}.s64 = int64_t({}.s16[{}]) - int64_t({}.s16[{}]);", temp(), v(insn.operands[1]), i, v(insn.operands[2]), i); + println("\t{}.s16[{}] = {}.s64 > SHRT_MAX ? SHRT_MAX : {}.s64 < SHRT_MIN ? SHRT_MIN : {}.s64;", v(insn.operands[0]), i, temp(), temp(), temp()); + } + break; + case PPC_INST_VSUBSWS: // TODO: vectorize for (size_t i = 0; i < 4; i++) @@ -2055,8 +2268,12 @@ bool Recompiler::Recompile( println("\t_mm_store_si128((__m128i*){}.u8, _mm_subs_epu8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; + case PPC_INST_VSUBUBM: + println("\t_mm_store_si128((__m128i*){}.u8, _mm_sub_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + break; + case PPC_INST_VSUBUHM: - println("\t_mm_store_si128((__m128i*){}.u8, _mm_sub_epi16(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + println("\t_mm_store_si128((__m128i*){}.u8, _mm_sub_epi16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; case PPC_INST_VUPKD3D128: