From ee4cab12b8195249e0aa43288df9fa1913d7f76c Mon Sep 17 00:00:00 2001 From: Skyth <19259897+blueskythlikesclouds@users.noreply.github.com> Date: Mon, 16 Sep 2024 16:03:23 +0300 Subject: [PATCH] Implement even more vector instructions & add missing ones. --- PowerRecomp/main.cpp | 53 ++++++++++++++++++++++++++++++++---- PowerUtils/ppc_context.h | 21 ++++++++++++++ thirdparty/disasm/ppc-dis.c | 6 ++-- thirdparty/disasm/ppc-inst.h | 2 ++ 4 files changed, 74 insertions(+), 8 deletions(-) diff --git a/PowerRecomp/main.cpp b/PowerRecomp/main.cpp index 9f85259..e49da1e 100644 --- a/PowerRecomp/main.cpp +++ b/PowerRecomp/main.cpp @@ -1246,11 +1246,11 @@ int main() break; case PPC_INST_VAVGSB: - // TODO: no _mm_avg_epi8 + println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_avg_epi8(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8)));", insn.operands[0], insn.operands[1], insn.operands[2]); break; case PPC_INST_VAVGSH: - // TODO: no _mm_avg_epi16 + println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_avg_epi16(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8)));", insn.operands[0], insn.operands[1], insn.operands[2]); break; case PPC_INST_VAVGUB: @@ -1266,6 +1266,8 @@ int main() break; case PPC_INST_VCFUX: + // NOTE: ignoring the immediate since it's always 0 in the game code + println("\t_mm_store_ps(ctx.v{}.f32, _mm_cvtepu32_ps(_mm_load_si128((__m128i*)ctx.v{}.u32)));", insn.operands[0], insn.operands[1]); break; case PPC_INST_VCMPBFP128: @@ -1279,10 +1281,20 @@ int main() case PPC_INST_VCMPEQUB: case PPC_INST_VCMPEQUW: case PPC_INST_VCMPEQUW128: + break; + case PPC_INST_VCMPGEFP: case PPC_INST_VCMPGEFP128: + // TODO: . variant + println("\t_mm_store_ps(ctx.v{}.f32, _mm_cmpge_ps(_mm_load_ps(ctx.v{}.f32), _mm_load_ps(ctx.v{}.f32)));", insn.operands[0], insn.operands[1], insn.operands[2]); + break; + case PPC_INST_VCMPGTFP: case PPC_INST_VCMPGTFP128: + // TODO: . variant + println("\t_mm_store_ps(ctx.v{}.f32, _mm_cmpgt_ps(_mm_load_ps(ctx.v{}.f32), _mm_load_ps(ctx.v{}.f32)));", insn.operands[0], insn.operands[1], insn.operands[2]); + break; + case PPC_INST_VCMPGTUB: case PPC_INST_VCMPGTUH: case PPC_INST_VCSXWFP128: @@ -1291,13 +1303,15 @@ int main() break; case PPC_INST_VEXPTEFP128: - // TODO: this doesn't exist despite being documented? - //println("\t_mm_store_ps(ctx.v{}.f32, _mm_exp2_ps(_mm_load_ps(ctx.v{}.f32)));", insn.operands[0], insn.operands[1]); + // TODO: vectorize + for (size_t i = 0; i < 4; i++) + println("\tctx.v{}.f32[{}] = exp2f(ctx.v{}.f32[{}]);", insn.operands[0], i, insn.operands[1], i); break; case PPC_INST_VLOGEFP128: - // TODO: this doesn't exist despite being documented? - //println("\t_mm_store_ps(ctx.v{}.f32, _mm_log2_ps(_mm_load_ps(ctx.v{}.f32)));", insn.operands[0], insn.operands[1]); + // TODO: vectorize + for (size_t i = 0; i < 4; i++) + println("\tctx.v{}.f32[{}] = log2f(ctx.v{}.f32[{}]);", insn.operands[0], i, insn.operands[1], i); break; case PPC_INST_VMADDCFP128: @@ -1415,8 +1429,13 @@ int main() break; case PPC_INST_VSLB: + break; + case PPC_INST_VSLDOI: case PPC_INST_VSLDOI128: + println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_alignr_epi8(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8), {}));", insn.operands[0], insn.operands[1], insn.operands[2], 16 - insn.operands[3]); + break; + case PPC_INST_VSLW128: case PPC_INST_VSPLTH: case PPC_INST_VSPLTISB: @@ -1436,13 +1455,35 @@ int main() break; case PPC_INST_VSUBSWS: + break; + case PPC_INST_VSUBUBS: + println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_subs_epu8(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8)));", insn.operands[0], insn.operands[1], insn.operands[2]); + break; + case PPC_INST_VSUBUHM: + println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_sub_epi16(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8)));", insn.operands[0], insn.operands[1], insn.operands[2]); + break; + case PPC_INST_VUPKD3D128: + break; + case PPC_INST_VUPKHSB128: + println("\t_mm_store_si128((__m128i*)ctx.v{}.s16, _mm_cvtepi8_epi16(_mm_unpackhi_epi64(_mm_load_si128((__m128i*)ctx.v{}.s8), _mm_load_si128((__m128i*)ctx.v{}.s8))));", insn.operands[0], insn.operands[1], insn.operands[1]); + break; + case PPC_INST_VUPKHSH: + case PPC_INST_VUPKHSH128: + println("\t_mm_store_si128((__m128i*)ctx.v{}.s32, _mm_cvtepi16_epi32(_mm_unpackhi_epi64(_mm_load_si128((__m128i*)ctx.v{}.s16), _mm_load_si128((__m128i*)ctx.v{}.s16))));", insn.operands[0], insn.operands[1], insn.operands[1]); + break; + case PPC_INST_VUPKLSB128: + println("\t_mm_store_si128((__m128i*)ctx.v{}.s32, _mm_cvtepi8_epi16(_mm_load_si128((__m128i*)ctx.v{}.s16)));", insn.operands[0], insn.operands[1]); + break; + case PPC_INST_VUPKLSH: + case PPC_INST_VUPKLSH128: + println("\t_mm_store_si128((__m128i*)ctx.v{}.s32, _mm_cvtepi16_epi32(_mm_load_si128((__m128i*)ctx.v{}.s16)));", insn.operands[0], insn.operands[1]); break; case PPC_INST_VXOR: diff --git a/PowerUtils/ppc_context.h b/PowerUtils/ppc_context.h index f8f5b63..d102533 100644 --- a/PowerUtils/ppc_context.h +++ b/PowerUtils/ppc_context.h @@ -346,3 +346,24 @@ inline __m128i _mm_adds_epu32(__m128i a, __m128i b) { return _mm_add_epi32(_mm_min_epu32(a, _mm_xor_si128(b, _mm_cmpeq_epi32(b, b))), b); } + +inline __m128i _mm_avg_epi8(__m128i a, __m128i b) +{ + __m128i c = _mm_set1_epi8(char(128)); + return _mm_add_epi8(c, _mm_avg_epu8(_mm_add_epi8(c, a), _mm_add_epi8(c, b))); +} + +inline __m128i _mm_avg_epi16(__m128i a, __m128i b) +{ + __m128i c = _mm_set1_epi16(short(32768)); + return _mm_add_epi16(c, _mm_avg_epu16(_mm_add_epi16(c, a), _mm_add_epi16(c, b))); +} + +inline __m128 _mm_cvtepu32_ps(__m128i v) +{ + __m128i v2 = _mm_srli_epi32(v, 1); + __m128i v1 = _mm_sub_epi32(v, v2); + __m128 v2f = _mm_cvtepi32_ps(v2); + __m128 v1f = _mm_cvtepi32_ps(v1); + return _mm_add_ps(v2f, v1f); +} diff --git a/thirdparty/disasm/ppc-dis.c b/thirdparty/disasm/ppc-dis.c index bb2f5aa..7b1facb 100644 --- a/thirdparty/disasm/ppc-dis.c +++ b/thirdparty/disasm/ppc-dis.c @@ -1825,10 +1825,10 @@ extract_vperm (unsigned long insn, #define VXR_MASK VXR(0x3f, 0x3ff, 1) /* An VX128 form instruction. */ -#define VX128(op, xop) (OP(op) | (((unsigned long)(xop)) & 0x3d0)) +#define VX128(op, xop) (OP(op) | (((unsigned long)(xop)) & 0x7d0)) /* The mask for an VX form instruction. */ -#define VX128_MASK VX(0x3f, 0x3d0) +#define VX128_MASK VX(0x3f, 0x7d0) /* An VX128 form instruction. */ #define VX128_1(op, xop) (OP(op) | (((unsigned long)(xop)) & 0x7f3)) @@ -2543,6 +2543,8 @@ const struct powerpc_opcode powerpc_opcodes[] = { { "vmrglw128", VX128(6, 832), VX128_MASK, PPCVEC128, { VD128, VA128, VB128 }, PPC_INST_VMRGLW128 }, { "vupkhsb128", VX128(6, 896), VX128_MASK, PPCVEC128, { VD128, VB128 }, PPC_INST_VUPKHSB128 }, { "vupklsb128", VX128(6, 960), VX128_MASK, PPCVEC128, { VD128, VB128 }, PPC_INST_VUPKLSB128 }, +{ "vupkhsh128", VX128(6, 1952), VX128_MASK, PPCVEC128, { VD128, VB128 }, PPC_INST_VUPKHSH128 }, +{ "vupklsh128", VX128(6, 2016), VX128_MASK, PPCVEC128, { VD128, VB128 }, PPC_INST_VUPKLSH128 }, { "evaddw", VX(4, 512), VX_MASK, PPCSPE, { RS, RA, RB }, PPC_INST_EVADDW }, diff --git a/thirdparty/disasm/ppc-inst.h b/thirdparty/disasm/ppc-inst.h index 197ff41..89df9ee 100644 --- a/thirdparty/disasm/ppc-inst.h +++ b/thirdparty/disasm/ppc-inst.h @@ -1844,3 +1844,5 @@ #define PPC_INST_DENBCDQ 1842 #define PPC_INST_FCFID 1843 #define PPC_INST_DIEXQ 1844 +#define PPC_INST_VUPKHSH128 1845 +#define PPC_INST_VUPKLSH128 1846