Implement even more vector instructions & add missing ones.

This commit is contained in:
Skyth 2024-09-16 16:03:23 +03:00
parent 3a887e6e2c
commit ee4cab12b8
4 changed files with 74 additions and 8 deletions

View File

@ -1246,11 +1246,11 @@ int main()
break;
case PPC_INST_VAVGSB:
// TODO: no _mm_avg_epi8
println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_avg_epi8(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8)));", insn.operands[0], insn.operands[1], insn.operands[2]);
break;
case PPC_INST_VAVGSH:
// TODO: no _mm_avg_epi16
println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_avg_epi16(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8)));", insn.operands[0], insn.operands[1], insn.operands[2]);
break;
case PPC_INST_VAVGUB:
@ -1266,6 +1266,8 @@ int main()
break;
case PPC_INST_VCFUX:
// NOTE: ignoring the immediate since it's always 0 in the game code
println("\t_mm_store_ps(ctx.v{}.f32, _mm_cvtepu32_ps(_mm_load_si128((__m128i*)ctx.v{}.u32)));", insn.operands[0], insn.operands[1]);
break;
case PPC_INST_VCMPBFP128:
@ -1279,10 +1281,20 @@ int main()
case PPC_INST_VCMPEQUB:
case PPC_INST_VCMPEQUW:
case PPC_INST_VCMPEQUW128:
break;
case PPC_INST_VCMPGEFP:
case PPC_INST_VCMPGEFP128:
// TODO: . variant
println("\t_mm_store_ps(ctx.v{}.f32, _mm_cmpge_ps(_mm_load_ps(ctx.v{}.f32), _mm_load_ps(ctx.v{}.f32)));", insn.operands[0], insn.operands[1], insn.operands[2]);
break;
case PPC_INST_VCMPGTFP:
case PPC_INST_VCMPGTFP128:
// TODO: . variant
println("\t_mm_store_ps(ctx.v{}.f32, _mm_cmpgt_ps(_mm_load_ps(ctx.v{}.f32), _mm_load_ps(ctx.v{}.f32)));", insn.operands[0], insn.operands[1], insn.operands[2]);
break;
case PPC_INST_VCMPGTUB:
case PPC_INST_VCMPGTUH:
case PPC_INST_VCSXWFP128:
@ -1291,13 +1303,15 @@ int main()
break;
case PPC_INST_VEXPTEFP128:
// TODO: this doesn't exist despite being documented?
//println("\t_mm_store_ps(ctx.v{}.f32, _mm_exp2_ps(_mm_load_ps(ctx.v{}.f32)));", insn.operands[0], insn.operands[1]);
// TODO: vectorize
for (size_t i = 0; i < 4; i++)
println("\tctx.v{}.f32[{}] = exp2f(ctx.v{}.f32[{}]);", insn.operands[0], i, insn.operands[1], i);
break;
case PPC_INST_VLOGEFP128:
// TODO: this doesn't exist despite being documented?
//println("\t_mm_store_ps(ctx.v{}.f32, _mm_log2_ps(_mm_load_ps(ctx.v{}.f32)));", insn.operands[0], insn.operands[1]);
// TODO: vectorize
for (size_t i = 0; i < 4; i++)
println("\tctx.v{}.f32[{}] = log2f(ctx.v{}.f32[{}]);", insn.operands[0], i, insn.operands[1], i);
break;
case PPC_INST_VMADDCFP128:
@ -1415,8 +1429,13 @@ int main()
break;
case PPC_INST_VSLB:
break;
case PPC_INST_VSLDOI:
case PPC_INST_VSLDOI128:
println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_alignr_epi8(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8), {}));", insn.operands[0], insn.operands[1], insn.operands[2], 16 - insn.operands[3]);
break;
case PPC_INST_VSLW128:
case PPC_INST_VSPLTH:
case PPC_INST_VSPLTISB:
@ -1436,13 +1455,35 @@ int main()
break;
case PPC_INST_VSUBSWS:
break;
case PPC_INST_VSUBUBS:
println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_subs_epu8(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8)));", insn.operands[0], insn.operands[1], insn.operands[2]);
break;
case PPC_INST_VSUBUHM:
println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_sub_epi16(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8)));", insn.operands[0], insn.operands[1], insn.operands[2]);
break;
case PPC_INST_VUPKD3D128:
break;
case PPC_INST_VUPKHSB128:
println("\t_mm_store_si128((__m128i*)ctx.v{}.s16, _mm_cvtepi8_epi16(_mm_unpackhi_epi64(_mm_load_si128((__m128i*)ctx.v{}.s8), _mm_load_si128((__m128i*)ctx.v{}.s8))));", insn.operands[0], insn.operands[1], insn.operands[1]);
break;
case PPC_INST_VUPKHSH:
case PPC_INST_VUPKHSH128:
println("\t_mm_store_si128((__m128i*)ctx.v{}.s32, _mm_cvtepi16_epi32(_mm_unpackhi_epi64(_mm_load_si128((__m128i*)ctx.v{}.s16), _mm_load_si128((__m128i*)ctx.v{}.s16))));", insn.operands[0], insn.operands[1], insn.operands[1]);
break;
case PPC_INST_VUPKLSB128:
println("\t_mm_store_si128((__m128i*)ctx.v{}.s32, _mm_cvtepi8_epi16(_mm_load_si128((__m128i*)ctx.v{}.s16)));", insn.operands[0], insn.operands[1]);
break;
case PPC_INST_VUPKLSH:
case PPC_INST_VUPKLSH128:
println("\t_mm_store_si128((__m128i*)ctx.v{}.s32, _mm_cvtepi16_epi32(_mm_load_si128((__m128i*)ctx.v{}.s16)));", insn.operands[0], insn.operands[1]);
break;
case PPC_INST_VXOR:

View File

@ -346,3 +346,24 @@ inline __m128i _mm_adds_epu32(__m128i a, __m128i b)
{
return _mm_add_epi32(_mm_min_epu32(a, _mm_xor_si128(b, _mm_cmpeq_epi32(b, b))), b);
}
inline __m128i _mm_avg_epi8(__m128i a, __m128i b)
{
__m128i c = _mm_set1_epi8(char(128));
return _mm_add_epi8(c, _mm_avg_epu8(_mm_add_epi8(c, a), _mm_add_epi8(c, b)));
}
inline __m128i _mm_avg_epi16(__m128i a, __m128i b)
{
__m128i c = _mm_set1_epi16(short(32768));
return _mm_add_epi16(c, _mm_avg_epu16(_mm_add_epi16(c, a), _mm_add_epi16(c, b)));
}
inline __m128 _mm_cvtepu32_ps(__m128i v)
{
__m128i v2 = _mm_srli_epi32(v, 1);
__m128i v1 = _mm_sub_epi32(v, v2);
__m128 v2f = _mm_cvtepi32_ps(v2);
__m128 v1f = _mm_cvtepi32_ps(v1);
return _mm_add_ps(v2f, v1f);
}

View File

@ -1825,10 +1825,10 @@ extract_vperm (unsigned long insn,
#define VXR_MASK VXR(0x3f, 0x3ff, 1)
/* An VX128 form instruction. */
#define VX128(op, xop) (OP(op) | (((unsigned long)(xop)) & 0x3d0))
#define VX128(op, xop) (OP(op) | (((unsigned long)(xop)) & 0x7d0))
/* The mask for an VX form instruction. */
#define VX128_MASK VX(0x3f, 0x3d0)
#define VX128_MASK VX(0x3f, 0x7d0)
/* An VX128 form instruction. */
#define VX128_1(op, xop) (OP(op) | (((unsigned long)(xop)) & 0x7f3))
@ -2543,6 +2543,8 @@ const struct powerpc_opcode powerpc_opcodes[] = {
{ "vmrglw128", VX128(6, 832), VX128_MASK, PPCVEC128, { VD128, VA128, VB128 }, PPC_INST_VMRGLW128 },
{ "vupkhsb128", VX128(6, 896), VX128_MASK, PPCVEC128, { VD128, VB128 }, PPC_INST_VUPKHSB128 },
{ "vupklsb128", VX128(6, 960), VX128_MASK, PPCVEC128, { VD128, VB128 }, PPC_INST_VUPKLSB128 },
{ "vupkhsh128", VX128(6, 1952), VX128_MASK, PPCVEC128, { VD128, VB128 }, PPC_INST_VUPKHSH128 },
{ "vupklsh128", VX128(6, 2016), VX128_MASK, PPCVEC128, { VD128, VB128 }, PPC_INST_VUPKLSH128 },
{ "evaddw", VX(4, 512), VX_MASK, PPCSPE, { RS, RA, RB }, PPC_INST_EVADDW },

View File

@ -1844,3 +1844,5 @@
#define PPC_INST_DENBCDQ 1842
#define PPC_INST_FCFID 1843
#define PPC_INST_DIEXQ 1844
#define PPC_INST_VUPKHSH128 1845
#define PPC_INST_VUPKLSH128 1846