Vector comparison instructions.

This commit is contained in:
Skyth 2024-09-17 21:32:13 +03:00
parent 34d44c3c0a
commit 7d7ab43bbc
2 changed files with 55 additions and 9 deletions

View File

@ -1241,6 +1241,7 @@ int main()
// no op
break;
// TODO: vector instructions require denormal flushing checks
case PPC_INST_VADDFP:
case PPC_INST_VADDFP128:
println("\t_mm_store_ps(ctx.v{}.f32, _mm_add_ps(_mm_load_ps(ctx.v{}.f32), _mm_load_ps(ctx.v{}.f32)));", insn.operands[0], insn.operands[1], insn.operands[2]);
@ -1291,17 +1292,20 @@ int main()
println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_avg_epu8(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8)));", insn.operands[0], insn.operands[1], insn.operands[2]);
break;
case PPC_INST_VCTSXS:
case PPC_INST_VCFPSXWS128:
// TODO: saturate
println("\t_mm_store_si128((__m128i*)ctx.v{}.s32, _mm_cvttps_epi32(_mm_mul_ps(_mm_load_ps(ctx.v{}.f32), _mm_set1_ps(exp2f({})))));", insn.operands[0], insn.operands[1], insn.operands[2]);
break;
case PPC_INST_VCFSX:
// NOTE: ignoring the immediate since it's always 0 in the game code
println("\t_mm_store_ps(ctx.v{}.f32, _mm_cvtepi32_ps(_mm_load_si128((__m128i*)ctx.v{}.u32)));", insn.operands[0], insn.operands[1]);
case PPC_INST_VCSXWFP128:
println("\t_mm_store_ps(ctx.v{}.f32, _mm_mul_ps(_mm_cvtepi32_ps(_mm_load_si128((__m128i*)ctx.v{}.u32)), _mm_set1_ps(ldexpf(1.0f, {}))));", insn.operands[0], insn.operands[1], -int32_t(insn.operands[2]));
break;
case PPC_INST_VCFUX:
// NOTE: ignoring the immediate since it's always 0 in the game code
println("\t_mm_store_ps(ctx.v{}.f32, _mm_cvtepu32_ps(_mm_load_si128((__m128i*)ctx.v{}.u32)));", insn.operands[0], insn.operands[1]);
case PPC_INST_VCUXWFP128:
println("\t_mm_store_ps(ctx.v{}.f32, _mm_mul_ps(_mm_cvtepu32_ps(_mm_load_si128((__m128i*)ctx.v{}.u32)), _mm_set1_ps(ldexpf(1.0f, {}))));", insn.operands[0], insn.operands[1], -int32_t(insn.operands[2]));
break;
case PPC_INST_VCMPBFP128:
@ -1313,27 +1317,38 @@ int main()
break;
case PPC_INST_VCMPEQUB:
println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_cmpeq_epi8(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8)));", insn.operands[0], insn.operands[1], insn.operands[2]);
if (insn.opcode->opcode & 0x1)
println("\tctx.cr6.setFromMask(_mm_load_si128((__m128i*)ctx.v{}.u8), 0xFFFF);", insn.operands[0]);
break;
case PPC_INST_VCMPEQUW:
case PPC_INST_VCMPEQUW128:
println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_cmpeq_epi32(_mm_load_si128((__m128i*)ctx.v{}.u32), _mm_load_si128((__m128i*)ctx.v{}.u32)));", insn.operands[0], insn.operands[1], insn.operands[2]);
if ((insn.opcode->id == PPC_INST_VCMPEQUW && (insn.opcode->opcode & 0x1)) || (insn.opcode->id == PPC_INST_VCMPEQUW128 && (insn.opcode->opcode & 0x40)))
println("\tctx.cr6.setFromMask(_mm_load_ps(ctx.v{}.f32), 0xF);", insn.operands[0]);
break;
case PPC_INST_VCMPGEFP:
case PPC_INST_VCMPGEFP128:
// TODO: . variant
println("\t_mm_store_ps(ctx.v{}.f32, _mm_cmpge_ps(_mm_load_ps(ctx.v{}.f32), _mm_load_ps(ctx.v{}.f32)));", insn.operands[0], insn.operands[1], insn.operands[2]);
if (insn.opcode->id == PPC_INST_VCMPGEFP128 && (insn.opcode->opcode & 0x40))
println("\tctx.cr6.setFromMask(_mm_load_ps(ctx.v{}.f32), 0xF);", insn.operands[0]);
break;
case PPC_INST_VCMPGTFP:
case PPC_INST_VCMPGTFP128:
// TODO: . variant
println("\t_mm_store_ps(ctx.v{}.f32, _mm_cmpgt_ps(_mm_load_ps(ctx.v{}.f32), _mm_load_ps(ctx.v{}.f32)));", insn.operands[0], insn.operands[1], insn.operands[2]);
if (insn.opcode->id == PPC_INST_VCMPGTFP128 && (insn.opcode->opcode & 0x40))
println("\tctx.cr6.setFromMask(_mm_load_ps(ctx.v{}.f32), 0xF);", insn.operands[0]);
break;
case PPC_INST_VCMPGTUB:
println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_cmpgt_epu8(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8)));", insn.operands[0], insn.operands[1], insn.operands[2]);
break;
case PPC_INST_VCMPGTUH:
case PPC_INST_VCSXWFP128:
case PPC_INST_VCTSXS:
case PPC_INST_VCUXWFP128:
println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_cmpgt_epu16(_mm_load_si128((__m128i*)ctx.v{}.u16), _mm_load_si128((__m128i*)ctx.v{}.u16)));", insn.operands[0], insn.operands[1], insn.operands[2]);
break;
case PPC_INST_VEXPTEFP128:

View File

@ -81,6 +81,24 @@ struct PPCCRRegister
eq = left == right;
un = isnan(left) || isnan(right);
}
void setFromMask(__m128 mask, int imm)
{
int m = _mm_movemask_ps(mask);
lt = m == imm; // all equal
gt = 0;
eq = m == 0; // none equal
so = 0;
}
void setFromMask(__m128i mask, int imm)
{
int m = _mm_movemask_epi8(mask);
lt = m == imm; // all equal
gt = 0;
eq = m == 0; // none equal
so = 0;
}
};
struct alignas(0x10) PPCVRegister
@ -455,3 +473,16 @@ inline __m128i _mm_perm_epi8(__m128i a, __m128i b, __m128i c)
__m128i e = _mm_sub_epi8(d, _mm_and_si128(c, d));
return _mm_blendv_epi8(_mm_shuffle_epi8(a, e), _mm_shuffle_epi8(b, e), _mm_slli_epi32(c, 3));
}
inline __m128i _mm_cmpgt_epu8(__m128i a, __m128i b)
{
__m128i c = _mm_set1_epi8(0x80);
return _mm_cmpgt_epi8(_mm_xor_si128(a, c), _mm_xor_si128(b, c));
}
inline __m128i _mm_cmpgt_epu16(__m128i a, __m128i b)
{
__m128i c = _mm_set1_epi16(0x8000);
return _mm_cmpgt_epi16(_mm_xor_si128(a, c), _mm_xor_si128(b, c));
}