Make some FPU operations more explicit.

This commit is contained in:
Skyth 2024-09-27 12:37:12 +03:00
parent edbb86913b
commit 8639a690e5
2 changed files with 29 additions and 21 deletions

View File

@ -560,7 +560,7 @@ bool Recompiler::Recompile(
case PPC_INST_FADDS:
printSetFlushMode(false);
println("\t{}.f64 = float({}.f64 + {}.f64);", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]));
println("\t{}.f64 = double(float({}.f64 + {}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]));
break;
case PPC_INST_FCFID:
@ -575,17 +575,17 @@ bool Recompiler::Recompile(
case PPC_INST_FCTID:
printSetFlushMode(false);
println("\t{}.s64 = ({}.f64 > double(LLONG_MAX)) ? LLONG_MAX : _mm_cvtsd_si64(_mm_load1_pd(&{}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[1]));
println("\t{}.s64 = ({}.f64 > double(LLONG_MAX)) ? LLONG_MAX : _mm_cvtsd_si64(_mm_load_sd(&{}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[1]));
break;
case PPC_INST_FCTIDZ:
printSetFlushMode(false);
println("\t{}.s64 = ({}.f64 > double(LLONG_MAX)) ? LLONG_MAX : _mm_cvttsd_si64(_mm_load1_pd(&{}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[1]));
println("\t{}.s64 = ({}.f64 > double(LLONG_MAX)) ? LLONG_MAX : _mm_cvttsd_si64(_mm_load_sd(&{}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[1]));
break;
case PPC_INST_FCTIWZ:
printSetFlushMode(false);
println("\t{}.s64 = ({}.f64 > double(INT_MAX)) ? INT_MAX : _mm_cvttsd_si32(_mm_load1_pd(&{}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[1]));
println("\t{}.s64 = ({}.f64 > double(INT_MAX)) ? INT_MAX : _mm_cvttsd_si32(_mm_load_sd(&{}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[1]));
break;
case PPC_INST_FDIV:
@ -595,7 +595,7 @@ bool Recompiler::Recompile(
case PPC_INST_FDIVS:
printSetFlushMode(false);
println("\t{}.f64 = float({}.f64 / {}.f64);", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]));
println("\t{}.f64 = double(float({}.f64 / {}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]));
break;
case PPC_INST_FMADD:
@ -605,7 +605,7 @@ bool Recompiler::Recompile(
case PPC_INST_FMADDS:
printSetFlushMode(false);
println("\t{}.f64 = float({}.f64 * {}.f64 + {}.f64);", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]), f(insn.operands[3]));
println("\t{}.f64 = double(float({}.f64 * {}.f64 + {}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]), f(insn.operands[3]));
break;
case PPC_INST_FMR:
@ -620,7 +620,7 @@ bool Recompiler::Recompile(
case PPC_INST_FMSUBS:
printSetFlushMode(false);
println("\t{}.f64 = float({}.f64 * {}.f64 - {}.f64);", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]), f(insn.operands[3]));
println("\t{}.f64 = double(float({}.f64 * {}.f64 - {}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]), f(insn.operands[3]));
break;
case PPC_INST_FMUL:
@ -630,7 +630,7 @@ bool Recompiler::Recompile(
case PPC_INST_FMULS:
printSetFlushMode(false);
println("\t{}.f64 = float({}.f64 * {}.f64);", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]));
println("\t{}.f64 = double(float({}.f64 * {}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]));
break;
case PPC_INST_FNABS:
@ -645,7 +645,7 @@ bool Recompiler::Recompile(
case PPC_INST_FNMADDS:
printSetFlushMode(false);
println("\t{}.f64 = float(-({}.f64 * {}.f64 + {}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]), f(insn.operands[3]));
println("\t{}.f64 = double(float(-({}.f64 * {}.f64 + {}.f64)));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]), f(insn.operands[3]));
break;
case PPC_INST_FNMSUB:
@ -655,7 +655,7 @@ bool Recompiler::Recompile(
case PPC_INST_FNMSUBS:
printSetFlushMode(false);
println("\t{}.f64 = float(-({}.f64 * {}.f64 - {}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]), f(insn.operands[3]));
println("\t{}.f64 = double(float(-({}.f64 * {}.f64 - {}.f64)));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]), f(insn.operands[3]));
break;
case PPC_INST_FRES:
@ -665,7 +665,7 @@ bool Recompiler::Recompile(
case PPC_INST_FRSP:
printSetFlushMode(false);
println("\t{}.f64 = float({}.f64);", f(insn.operands[0]), f(insn.operands[1]));
println("\t{}.f64 = double(float({}.f64));", f(insn.operands[0]), f(insn.operands[1]));
break;
case PPC_INST_FSEL:
@ -680,7 +680,7 @@ bool Recompiler::Recompile(
case PPC_INST_FSQRTS:
printSetFlushMode(false);
println("\t{}.f64 = float(sqrt({}.f64));", f(insn.operands[0]), f(insn.operands[1]));
println("\t{}.f64 = double(float(sqrt({}.f64)));", f(insn.operands[0]), f(insn.operands[1]));
break;
case PPC_INST_FSUB:
@ -690,7 +690,7 @@ bool Recompiler::Recompile(
case PPC_INST_FSUBS:
printSetFlushMode(false);
println("\t{}.f64 = float({}.f64 - {}.f64);", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]));
println("\t{}.f64 = double(float({}.f64 - {}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]));
break;
case PPC_INST_LBZ:
@ -763,7 +763,7 @@ bool Recompiler::Recompile(
if (insn.operands[2] != 0)
print("{}.u32 + ", r(insn.operands[2]));
println("{});", int32_t(insn.operands[1]));
println("\t{}.f64 = {}.f32;", f(insn.operands[0]), temp());
println("\t{}.f64 = double({}.f32);", f(insn.operands[0]), temp());
break;
case PPC_INST_LFSX:
@ -772,7 +772,7 @@ bool Recompiler::Recompile(
if (insn.operands[1] != 0)
print("{}.u32 + ", r(insn.operands[1]));
println("{}.u32);", r(insn.operands[2]));
println("\t{}.f64 = {}.f32;", f(insn.operands[0]), temp());
println("\t{}.f64 = double({}.f32);", f(insn.operands[0]), temp());
break;
case PPC_INST_LHA:
@ -1227,7 +1227,7 @@ bool Recompiler::Recompile(
case PPC_INST_STFS:
printSetFlushMode(false);
println("\t{}.f32 = {}.f64;", temp(), f(insn.operands[0]));
println("\t{}.f32 = float({}.f64);", temp(), f(insn.operands[0]));
print("\tPPC_STORE_U32(");
if (insn.operands[2] != 0)
print("{}.u32 + ", r(insn.operands[2]));
@ -1236,7 +1236,7 @@ bool Recompiler::Recompile(
case PPC_INST_STFSX:
printSetFlushMode(false);
println("\t{}.f32 = {}.f64;", temp(), f(insn.operands[0]));
println("\t{}.f32 = float({}.f64);", temp(), f(insn.operands[0]));
print("\tPPC_STORE_U32(");
if (insn.operands[1] != 0)
print("{}.u32 + ", r(insn.operands[1]));

View File

@ -27,8 +27,7 @@
#define PPC_FUNC(x) extern "C" PPC_NOINLINE void x(PPCContext& __restrict__ ctx, uint8_t* base)
#define PPC_FUNC_PROLOGUE() \
__builtin_assume(((size_t)base & 0xFFFFFFFF) == 0); \
#define PPC_FUNC_PROLOGUE() __builtin_assume(((size_t)base & 0xFFFFFFFF) == 0)
#define PPC_LOAD_U8(x) *(uint8_t*)(base + (x))
#define PPC_LOAD_U16(x) __builtin_bswap16(*(uint16_t*)(base + (x)))
@ -141,20 +140,29 @@ struct alignas(0x10) PPCVRegister
};
};
#define PPC_ROUND_NEAREST 0x00
#define PPC_ROUND_TOWARD_ZERO 0x01
#define PPC_ROUND_UP 0x02
#define PPC_ROUND_DOWN 0x03
#define PPC_ROUND_MASK 0x03
struct PPCFPSCRRegister
{
uint32_t csr;
static constexpr size_t GuestToHost[] = { _MM_ROUND_NEAREST, _MM_ROUND_TOWARD_ZERO, _MM_ROUND_UP, _MM_ROUND_DOWN };
static constexpr size_t HostToGuest[] = { PPC_ROUND_NEAREST, PPC_ROUND_DOWN, PPC_ROUND_UP, PPC_ROUND_TOWARD_ZERO };
inline uint32_t loadFromHost() noexcept
{
csr = _mm_getcsr();
return (0x6C >> ((csr & _MM_ROUND_MASK) >> 12)) & 3;
return HostToGuest[(csr & _MM_ROUND_MASK) >> 13];
}
inline void storeFromGuest(uint32_t value) noexcept
{
csr &= ~_MM_ROUND_MASK;
csr |= ((0x6C >> (2 * (value & 3))) & 3) << 13;
csr |= GuestToHost[value & PPC_ROUND_MASK];
_mm_setcsr(csr);
}