mirror of
https://github.com/hedge-dev/XenonRecomp.git
synced 2025-06-03 15:52:05 +00:00
Implement even more vector instructions & add missing ones.
This commit is contained in:
parent
3a887e6e2c
commit
ee4cab12b8
@ -1246,11 +1246,11 @@ int main()
|
||||
break;
|
||||
|
||||
case PPC_INST_VAVGSB:
|
||||
// TODO: no _mm_avg_epi8
|
||||
println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_avg_epi8(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8)));", insn.operands[0], insn.operands[1], insn.operands[2]);
|
||||
break;
|
||||
|
||||
case PPC_INST_VAVGSH:
|
||||
// TODO: no _mm_avg_epi16
|
||||
println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_avg_epi16(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8)));", insn.operands[0], insn.operands[1], insn.operands[2]);
|
||||
break;
|
||||
|
||||
case PPC_INST_VAVGUB:
|
||||
@ -1266,6 +1266,8 @@ int main()
|
||||
break;
|
||||
|
||||
case PPC_INST_VCFUX:
|
||||
// NOTE: ignoring the immediate since it's always 0 in the game code
|
||||
println("\t_mm_store_ps(ctx.v{}.f32, _mm_cvtepu32_ps(_mm_load_si128((__m128i*)ctx.v{}.u32)));", insn.operands[0], insn.operands[1]);
|
||||
break;
|
||||
|
||||
case PPC_INST_VCMPBFP128:
|
||||
@ -1279,10 +1281,20 @@ int main()
|
||||
case PPC_INST_VCMPEQUB:
|
||||
case PPC_INST_VCMPEQUW:
|
||||
case PPC_INST_VCMPEQUW128:
|
||||
break;
|
||||
|
||||
case PPC_INST_VCMPGEFP:
|
||||
case PPC_INST_VCMPGEFP128:
|
||||
// TODO: . variant
|
||||
println("\t_mm_store_ps(ctx.v{}.f32, _mm_cmpge_ps(_mm_load_ps(ctx.v{}.f32), _mm_load_ps(ctx.v{}.f32)));", insn.operands[0], insn.operands[1], insn.operands[2]);
|
||||
break;
|
||||
|
||||
case PPC_INST_VCMPGTFP:
|
||||
case PPC_INST_VCMPGTFP128:
|
||||
// TODO: . variant
|
||||
println("\t_mm_store_ps(ctx.v{}.f32, _mm_cmpgt_ps(_mm_load_ps(ctx.v{}.f32), _mm_load_ps(ctx.v{}.f32)));", insn.operands[0], insn.operands[1], insn.operands[2]);
|
||||
break;
|
||||
|
||||
case PPC_INST_VCMPGTUB:
|
||||
case PPC_INST_VCMPGTUH:
|
||||
case PPC_INST_VCSXWFP128:
|
||||
@ -1291,13 +1303,15 @@ int main()
|
||||
break;
|
||||
|
||||
case PPC_INST_VEXPTEFP128:
|
||||
// TODO: this doesn't exist despite being documented?
|
||||
//println("\t_mm_store_ps(ctx.v{}.f32, _mm_exp2_ps(_mm_load_ps(ctx.v{}.f32)));", insn.operands[0], insn.operands[1]);
|
||||
// TODO: vectorize
|
||||
for (size_t i = 0; i < 4; i++)
|
||||
println("\tctx.v{}.f32[{}] = exp2f(ctx.v{}.f32[{}]);", insn.operands[0], i, insn.operands[1], i);
|
||||
break;
|
||||
|
||||
case PPC_INST_VLOGEFP128:
|
||||
// TODO: this doesn't exist despite being documented?
|
||||
//println("\t_mm_store_ps(ctx.v{}.f32, _mm_log2_ps(_mm_load_ps(ctx.v{}.f32)));", insn.operands[0], insn.operands[1]);
|
||||
// TODO: vectorize
|
||||
for (size_t i = 0; i < 4; i++)
|
||||
println("\tctx.v{}.f32[{}] = log2f(ctx.v{}.f32[{}]);", insn.operands[0], i, insn.operands[1], i);
|
||||
break;
|
||||
|
||||
case PPC_INST_VMADDCFP128:
|
||||
@ -1415,8 +1429,13 @@ int main()
|
||||
break;
|
||||
|
||||
case PPC_INST_VSLB:
|
||||
break;
|
||||
|
||||
case PPC_INST_VSLDOI:
|
||||
case PPC_INST_VSLDOI128:
|
||||
println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_alignr_epi8(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8), {}));", insn.operands[0], insn.operands[1], insn.operands[2], 16 - insn.operands[3]);
|
||||
break;
|
||||
|
||||
case PPC_INST_VSLW128:
|
||||
case PPC_INST_VSPLTH:
|
||||
case PPC_INST_VSPLTISB:
|
||||
@ -1436,13 +1455,35 @@ int main()
|
||||
break;
|
||||
|
||||
case PPC_INST_VSUBSWS:
|
||||
break;
|
||||
|
||||
case PPC_INST_VSUBUBS:
|
||||
println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_subs_epu8(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8)));", insn.operands[0], insn.operands[1], insn.operands[2]);
|
||||
break;
|
||||
|
||||
case PPC_INST_VSUBUHM:
|
||||
println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_sub_epi16(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8)));", insn.operands[0], insn.operands[1], insn.operands[2]);
|
||||
break;
|
||||
|
||||
case PPC_INST_VUPKD3D128:
|
||||
break;
|
||||
|
||||
case PPC_INST_VUPKHSB128:
|
||||
println("\t_mm_store_si128((__m128i*)ctx.v{}.s16, _mm_cvtepi8_epi16(_mm_unpackhi_epi64(_mm_load_si128((__m128i*)ctx.v{}.s8), _mm_load_si128((__m128i*)ctx.v{}.s8))));", insn.operands[0], insn.operands[1], insn.operands[1]);
|
||||
break;
|
||||
|
||||
case PPC_INST_VUPKHSH:
|
||||
case PPC_INST_VUPKHSH128:
|
||||
println("\t_mm_store_si128((__m128i*)ctx.v{}.s32, _mm_cvtepi16_epi32(_mm_unpackhi_epi64(_mm_load_si128((__m128i*)ctx.v{}.s16), _mm_load_si128((__m128i*)ctx.v{}.s16))));", insn.operands[0], insn.operands[1], insn.operands[1]);
|
||||
break;
|
||||
|
||||
case PPC_INST_VUPKLSB128:
|
||||
println("\t_mm_store_si128((__m128i*)ctx.v{}.s32, _mm_cvtepi8_epi16(_mm_load_si128((__m128i*)ctx.v{}.s16)));", insn.operands[0], insn.operands[1]);
|
||||
break;
|
||||
|
||||
case PPC_INST_VUPKLSH:
|
||||
case PPC_INST_VUPKLSH128:
|
||||
println("\t_mm_store_si128((__m128i*)ctx.v{}.s32, _mm_cvtepi16_epi32(_mm_load_si128((__m128i*)ctx.v{}.s16)));", insn.operands[0], insn.operands[1]);
|
||||
break;
|
||||
|
||||
case PPC_INST_VXOR:
|
||||
|
@ -346,3 +346,24 @@ inline __m128i _mm_adds_epu32(__m128i a, __m128i b)
|
||||
{
|
||||
return _mm_add_epi32(_mm_min_epu32(a, _mm_xor_si128(b, _mm_cmpeq_epi32(b, b))), b);
|
||||
}
|
||||
|
||||
inline __m128i _mm_avg_epi8(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i c = _mm_set1_epi8(char(128));
|
||||
return _mm_add_epi8(c, _mm_avg_epu8(_mm_add_epi8(c, a), _mm_add_epi8(c, b)));
|
||||
}
|
||||
|
||||
inline __m128i _mm_avg_epi16(__m128i a, __m128i b)
|
||||
{
|
||||
__m128i c = _mm_set1_epi16(short(32768));
|
||||
return _mm_add_epi16(c, _mm_avg_epu16(_mm_add_epi16(c, a), _mm_add_epi16(c, b)));
|
||||
}
|
||||
|
||||
inline __m128 _mm_cvtepu32_ps(__m128i v)
|
||||
{
|
||||
__m128i v2 = _mm_srli_epi32(v, 1);
|
||||
__m128i v1 = _mm_sub_epi32(v, v2);
|
||||
__m128 v2f = _mm_cvtepi32_ps(v2);
|
||||
__m128 v1f = _mm_cvtepi32_ps(v1);
|
||||
return _mm_add_ps(v2f, v1f);
|
||||
}
|
||||
|
6
thirdparty/disasm/ppc-dis.c
vendored
6
thirdparty/disasm/ppc-dis.c
vendored
@ -1825,10 +1825,10 @@ extract_vperm (unsigned long insn,
|
||||
#define VXR_MASK VXR(0x3f, 0x3ff, 1)
|
||||
|
||||
/* An VX128 form instruction. */
|
||||
#define VX128(op, xop) (OP(op) | (((unsigned long)(xop)) & 0x3d0))
|
||||
#define VX128(op, xop) (OP(op) | (((unsigned long)(xop)) & 0x7d0))
|
||||
|
||||
/* The mask for an VX form instruction. */
|
||||
#define VX128_MASK VX(0x3f, 0x3d0)
|
||||
#define VX128_MASK VX(0x3f, 0x7d0)
|
||||
|
||||
/* An VX128 form instruction. */
|
||||
#define VX128_1(op, xop) (OP(op) | (((unsigned long)(xop)) & 0x7f3))
|
||||
@ -2543,6 +2543,8 @@ const struct powerpc_opcode powerpc_opcodes[] = {
|
||||
{ "vmrglw128", VX128(6, 832), VX128_MASK, PPCVEC128, { VD128, VA128, VB128 }, PPC_INST_VMRGLW128 },
|
||||
{ "vupkhsb128", VX128(6, 896), VX128_MASK, PPCVEC128, { VD128, VB128 }, PPC_INST_VUPKHSB128 },
|
||||
{ "vupklsb128", VX128(6, 960), VX128_MASK, PPCVEC128, { VD128, VB128 }, PPC_INST_VUPKLSB128 },
|
||||
{ "vupkhsh128", VX128(6, 1952), VX128_MASK, PPCVEC128, { VD128, VB128 }, PPC_INST_VUPKHSH128 },
|
||||
{ "vupklsh128", VX128(6, 2016), VX128_MASK, PPCVEC128, { VD128, VB128 }, PPC_INST_VUPKLSH128 },
|
||||
|
||||
|
||||
{ "evaddw", VX(4, 512), VX_MASK, PPCSPE, { RS, RA, RB }, PPC_INST_EVADDW },
|
||||
|
2
thirdparty/disasm/ppc-inst.h
vendored
2
thirdparty/disasm/ppc-inst.h
vendored
@ -1844,3 +1844,5 @@
|
||||
#define PPC_INST_DENBCDQ 1842
|
||||
#define PPC_INST_FCFID 1843
|
||||
#define PPC_INST_DIEXQ 1844
|
||||
#define PPC_INST_VUPKHSH128 1845
|
||||
#define PPC_INST_VUPKLSH128 1846
|
||||
|
Loading…
x
Reference in New Issue
Block a user