diff --git a/PowerRecomp/recompiler.cpp b/PowerRecomp/recompiler.cpp index 8593e30..b1dbd8b 100644 --- a/PowerRecomp/recompiler.cpp +++ b/PowerRecomp/recompiler.cpp @@ -1881,6 +1881,16 @@ bool Recompiler::Recompile( println("_mm_load_ps({}.f32)));", v(insn.operands[1])); break; + case PPC_INST_VCTUXS: + case PPC_INST_VCFPUXWS128: + printSetFlushMode(true); + print("\t_mm_store_si128((__m128i*){}.u32, _mm_vctuxs(", v(insn.operands[0])); + if (insn.operands[2] != 0) + println("_mm_mul_ps(_mm_load_ps({}.f32), _mm_set1_ps({}))));", v(insn.operands[1]), 1u << insn.operands[2]); + else + println("_mm_load_ps({}.f32)));", v(insn.operands[1])); + break; + case PPC_INST_VCFSX: case PPC_INST_VCSXWFP128: { @@ -2198,6 +2208,14 @@ bool Recompiler::Recompile( break; } + case PPC_INST_VRLH: + for (size_t i = 0; i < 8; i++) + { + println("\t{0}.u16[{1}] = ({2}.u16[{1}] << ({3}.u16[{1}] & 0xF)) | ({2}.u16[{1}] >> (16 - ({3}.u16[{1}] & 0xF)));", vTemp(), i, v(insn.operands[1]), v(insn.operands[2])); + } + println("{} = {};", v(insn.operands[0]), vTemp()); + break; + case PPC_INST_VRSQRTEFP: case PPC_INST_VRSQRTEFP128: // TODO: see if we can use rsqrt safely @@ -2752,7 +2770,7 @@ void Recompiler::SaveCurrentOutData(const std::string_view& name) bool shouldWrite = true; // Check if an identical file already exists first to not trigger recompilation - std::string filePath = std::format("{}{}/{}", config.directoryPath, config.outDirectoryPath, name.empty() ? cppName : name); + std::string filePath = std::format("{}/{}/{}", config.directoryPath, config.outDirectoryPath, name.empty() ? cppName : name); FILE* f = fopen(filePath.c_str(), "rb"); if (f) { diff --git a/PowerUtils/ppc_context.h b/PowerUtils/ppc_context.h index 64c257d..727492b 100644 --- a/PowerUtils/ppc_context.h +++ b/PowerUtils/ppc_context.h @@ -644,6 +644,19 @@ inline __m128i _mm_vctsxs(__m128 src1) return _mm_andnot_si128(_mm_castps_si128(xmm2), _mm_castps_si128(dest)); } +inline __m128i _mm_vctuxs(__m128 src1) +{ + __m128 xmm0 = _mm_max_ps(src1, _mm_set1_epi32(0)); + __m128 xmm1 = _mm_cmpge_ps(xmm0, _mm_set1_ps((float)0x80000000)); + __m128 xmm2 = _mm_sub_ps(xmm0, _mm_set1_ps((float)0x80000000)); + xmm0 = _mm_blendv_ps(xmm0, xmm2, xmm1); + __m128i dest = _mm_cvttps_epi32(xmm0); + xmm0 = _mm_cmpeq_epi32(dest, _mm_set1_epi32(INT_MIN)); + xmm1 = _mm_and_si128(xmm1, _mm_set1_epi32(INT_MIN)); + dest = _mm_add_epi32(dest, xmm1); + return _mm_or_si128(dest, xmm0); +} + inline __m128i _mm_vsr(__m128i a, __m128i b) { b = _mm_srli_epi64(_mm_slli_epi64(b, 61), 61);