Merge 775ad31136b8be0690bf9895a26862e1117a2bef into 865319a39cec873370500d26ce775959a4c5e784

This commit is contained in:
MadLadMikael 2025-04-17 13:58:29 +02:00 committed by GitHub
commit 843ee91fcc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 180 additions and 6 deletions

View File

@ -89,7 +89,7 @@ Additionally, mid-asm hooks can be inserted directly into the translated C++ cod
XenonAnalyse, when used as a command-line application, allows an XEX file to be passed as an input argument to output a TOML file containing all the detected jump tables in the executable:
```
XenonAnalyse [input XEX file path] [output jump table TOML file path]
XenonAnalyse [input XEX file path] [name of toml output file]
```
However, as explained in the earlier sections, due to variations between games, additional support may be needed to handle different patterns.
@ -100,6 +100,7 @@ However, as explained in the earlier sections, due to variations between games,
XenonRecomp accepts a TOML file with recompiler configurations and the path to the `ppc_context.h` file located in the XenonUtils directory:
```
XenonRecomp [input TOML file path] [input PPC context header file path]
```
@ -110,7 +111,7 @@ XenonRecomp [input TOML file path] [input PPC context header file path]
```toml
[main]
file_path = "../private/default.xex"
file_path = "./private/default.xex"
patch_file_path = "../private/default.xexp"
patched_file_path = "../private/default_patched.xex"
out_directory_path = "../ppc"
@ -249,11 +250,15 @@ Once the files are generated, refresh XenonTests' CMake cache to make them appea
## Building
The project requires CMake 3.20 or later and Clang 18 or later to build. Since the repository includes submodules, ensure you clone it recursively.
Compilers other than Clang have not been tested and are not recommended, including for recompilation output. The project relies on compiler-specific intrinsics and techniques that may not function correctly on other compilers, and many optimization methods depend on Clang's code generation.
### Windows (MSYS2)
-install [MSYS2](https://www.msys2.org/) and use the "MSYS2 CLANG64" environment to build the project.
On Windows, you can use the clang-cl toolset and open the project in Visual Studio's CMake integration.
-First, you need to install the necessary packages (`mingw-w64-clang-x86_64-cmake`, `mingw-w64-clang-x86_64-libc++`, `mingw-w64-clang-x86_64-clang` and `mingw-w64-x86_64-ninja`) with `pacman -S <package_name>`.
-Then, you can head into the cloned repo's directory (you can access your C drive by going into the `/c` folder inside of MSYS2), and execute the command `cmake -DCMAKE_BUILD_TYPE=Debug .`, which will generate a `build.ninja` file for the project.
-Finally, run the `ninja` command, and you should end up with compiled executables. Attempting to launch them will tell you about a missing `libc++.dll` file, which you can copy to your current folder with the `cp /clang64/bin/libc++.dll .` command.
## Special Thanks

View File

@ -2,6 +2,11 @@ cmake_minimum_required (VERSION 3.8)
project("XenonRecomp")
# Find required packages
find_package(fmt REQUIRED)
find_package(tomlplusplus REQUIRED)
find_package(xxHash REQUIRED)
add_executable(XenonRecomp
"main.cpp"
"recompiler.cpp"

View File

@ -800,6 +800,13 @@ bool Recompiler::Recompile(
// no op
break;
case PPC_INST_EQV:
println("\t{}.u64 = ~({}.u64 ^ {}.u64);", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2]));
if (strchr(insn.opcode->name, '.'))
println("\t{}.compare<int64_t>({}.s64, 0, {});", cr(0), r(insn.operands[0]), xer()); // Check if CR0 comparison uses s64
break;
case PPC_INST_DCBF:
// no op
break;
@ -808,6 +815,10 @@ bool Recompiler::Recompile(
// no op
break;
case PPC_INST_DCBST:
// no op
break;
case PPC_INST_DCBTST:
// no op
break;
@ -990,6 +1001,19 @@ bool Recompiler::Recompile(
println("\t{}.f64 = double(float({}.f64));", f(insn.operands[0]), f(insn.operands[1]));
break;
case PPC_INST_FRSQRTE:
printSetFlushMode(false); // Ensure standard FPU mode
// Uses SSE reciprocal square root estimate instruction _mm_rsqrt_ss
println("\t{{");
println("\t\t__m128 val_pd = _mm_load_sd(&{}.f64);", f(insn.operands[1])); // Load double
println("\t\t__m128 val_ss = _mm_cvtpd_ps(val_pd);"); // Convert to single
println("\t\t__m128 rsqrt_est_ss = _mm_rsqrt_ss(val_ss);"); // Estimate (single)
println("\t\t__m128 result_pd = _mm_cvtps_pd(rsqrt_est_ss);"); // Convert back to double
println("\t\t_mm_store_sd(&{}.f64, result_pd);", f(insn.operands[0])); // Store result
println("\t}}");
// FRSQRTE does not typically set FPSCR bits, but check PDF if needed.
break;
case PPC_INST_FSEL:
printSetFlushMode(false);
println("\t{}.f64 = {}.f64 >= 0.0 ? {}.f64 : {}.f64;", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]), f(insn.operands[3]));
@ -1315,10 +1339,28 @@ bool Recompiler::Recompile(
println("\t{}.s64 = {}.s64 * {}.s64;", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2]));
break;
case PPC_INST_MULHD:
println("\t{}.s64 = ((__int128_t){}.s64 * (__int128_t){}.s64) >> 64;", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2]));
if (strchr(insn.opcode->name, '.'))
println("\t{}.compare<int64_t>({}.s64, 0, {});", cr(0), r(insn.operands[0]), xer()); // Check if CR0 comparison uses s64
break;
case PPC_INST_MULHDU:
println("\t{}.u64 = ((__uint128_t){}.u64 * (__uint128_t){}.u64) >> 64;", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2]));
if (strchr(insn.opcode->name, '.'))
println("\t{}.compare<int64_t>({}.s64, 0, {});", cr(0), r(insn.operands[0]), xer()); // Check if CR0 comparison uses s64 or u64
break;
case PPC_INST_MULLI:
println("\t{}.s64 = {}.s64 * {};", r(insn.operands[0]), r(insn.operands[1]), int32_t(insn.operands[2]));
break;
case PPC_INST_MULLHWU: // Verify this ID exists
println("\t{}.u64 = (uint32_t)(({}.u64 & 0xFFFF) * ({}.u64 & 0xFFFF));", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2]));
if (strchr(insn.opcode->name, '.'))
println("\t{}.compare<int32_t>({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer());
break;
case PPC_INST_MULLW:
println("\t{}.s64 = int64_t({}.s32) * int64_t({}.s32);", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2]));
if (strchr(insn.opcode->name, '.'))
@ -1339,6 +1381,19 @@ bool Recompiler::Recompile(
// no op
break;
case PPC_INST_VNOR128:
printSetFlushMode(true);
println("\t{{");
println("\t\t__m128i vra = _mm_load_si128((__m128i*){}.u8);", v(insn.operands[1])); // Load VRA
println("\t\t__m128i vrb = _mm_load_si128((__m128i*){}.u8);", v(insn.operands[2])); // Load VRB
println("\t\t__m128i or_result = _mm_or_si128(vra, vrb);"); // VRA | VRB
// Invert bits using XOR with all ones (~(A|B))
println("\t\t__m128i all_ones = _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128());");
println("\t\t__m128i nor_result = _mm_xor_si128(or_result, all_ones);");
println("\t\t_mm_store_si128((__m128i*){}.u8, nor_result);", v(insn.operands[0])); // Store VRT
println("\t}}");
break;
case PPC_INST_NOR:
println("\t{}.u64 = ~({}.u64 | {}.u64);", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2]));
break;
@ -1539,6 +1594,14 @@ bool Recompiler::Recompile(
println("{}.u32, {}.u64);", r(insn.operands[2]), f(insn.operands[0]));
break;
case PPC_INST_STFSU:
printSetFlushMode(false);
println("\t{}.f32 = float({}.f64);", temp(), f(insn.operands[0])); // Convert FRS (double) to float in temp
println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2])); // Calculate EA = RA + D
println("\tPPC_STORE_U32({}, {}.u32);", ea(), temp()); // Store float bits
println("\t{}.u32 = {};", r(insn.operands[2]), ea()); // Update RA with EA
break;
case PPC_INST_STFIWX:
printSetFlushMode(false);
print("{}", mmioStore() ? "\tPPC_MM_STORE_U32(" : "\tPPC_STORE_U32(");
@ -1769,6 +1832,12 @@ bool Recompiler::Recompile(
println("\t_mm_store_si128((__m128i*){}.u8, _mm_and_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VANDC:
printSetFlushMode(true);
// Computes VRA & ~VRB using _mm_andnot_si128(VRB, VRA)
println("\t_mm_store_si128((__m128i*){}.u8, _mm_andnot_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); // VRT, VRB, VRA
break;
case PPC_INST_VANDC128:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_andnot_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1]));
break;
@ -1795,6 +1864,22 @@ bool Recompiler::Recompile(
println("_mm_load_ps({}.f32)));", v(insn.operands[1]));
break;
case PPC_INST_VCFPUXWS128: // Or PPC_INST_VCTUXS if that's the ID used
printSetFlushMode(true);
println("\t{{");
println("\t\t__m128 vrbf = _mm_load_ps({}.f32);", v(insn.operands[1])); // Load VRB floats
if (insn.operands[2] != 0) { // Check UIMM (operand 2)
// Scale VRB by 2^UIMM before converting
println("\t\tfloat scale = ldexpf(1.0f, {});", (int32_t)insn.operands[2]); // Calculate 2^UIMM
println("\t\t__m128 scale_ps = _mm_set1_ps(scale);");
println("\t\tvrbf = _mm_mul_ps(vrbf, scale_ps);");
}
// Use the helper function from ppc_context.h which handles conversion and saturation
println("\t\t__m128i result = _mm_vctuxs(vrbf);");
println("\t\t_mm_store_si128((__m128i*){}.u32, result);", v(insn.operands[0])); // Store VRT
println("\t}}");
break;
case PPC_INST_VCFSX:
case PPC_INST_VCSXWFP128:
{
@ -1918,6 +2003,28 @@ bool Recompiler::Recompile(
println("\t_mm_store_ps({}.f32, _mm_min_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
break;
case PPC_INST_VPKSWSS:
case PPC_INST_VPKSWSS128: // Or PPC_INST_VPKSWSS
printSetFlushMode(true);
println("\t_mm_store_si128((__m128i*){}.s16, _mm_packs_epi32(_mm_load_si128((__m128i*){}.s32), _mm_load_si128((__m128i*){}.s32)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); // VRT, VRA, VRB
break;
case PPC_INST_VPKUWUS128:
printSetFlushMode(true);
println("\t{{");
println("\t\t__m128i max_val = _mm_set1_epi32(0xFFFF);"); // Max value for unsigned 16-bit
println("\t\t__m128i vra = _mm_load_si128((__m128i*){}.u32);", v(insn.operands[2])); // Load VRA (operand 2)
println("\t\t__m128i vrb = _mm_load_si128((__m128i*){}.u32);", v(insn.operands[1])); // Load VRB (operand 1)
// Saturate VRA words (unsigned) [0, 65535]
println("\t\tvra = _mm_min_epu32(vra, max_val);");
// Saturate VRB words (unsigned) [0, 65535]
println("\t\tvrb = _mm_min_epu32(vrb, max_val);");
// Pack clamped words. _mm_packs_epi32 works correctly here because inputs are pre-clamped.
println("\t\t__m128i result = _mm_packs_epi32(vra, vrb);");
println("\t\t_mm_store_si128((__m128i*){}.u16, result);", v(insn.operands[0])); // Store VRT (operand 0)
println("\t}}");
break;
case PPC_INST_VMRGHB:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_unpackhi_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1]));
break;
@ -2047,6 +2154,14 @@ bool Recompiler::Recompile(
println("\t_mm_store_si128((__m128i*){}.u8, _mm_packus_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1]));
break;
case PPC_INST_VPKUHUS:
case PPC_INST_VPKUHUS128: // Or PPC_INST_VPKUHUS
printSetFlushMode(true);
// _mm_packus_epi16 performs unsigned saturation from signed 16-bit to unsigned 8-bit.
// This matches VPKUHUS behavior.
println("\t_mm_store_si128((__m128i*){}.u8, _mm_packus_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); // VRT, VRA, VRB
break;
case PPC_INST_VREFP:
case PPC_INST_VREFP128:
// TODO: see if we can use rcp safely
@ -2088,7 +2203,11 @@ bool Recompiler::Recompile(
break;
case PPC_INST_VSEL:
println("\t_mm_store_si128((__m128i*){}.u8, _mm_or_si128(_mm_andnot_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)), _mm_and_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8))));", v(insn.operands[0]), v(insn.operands[3]), v(insn.operands[1]), v(insn.operands[3]), v(insn.operands[2]));
case PPC_INST_VSEL128: // Or PPC_INST_VSEL
printSetFlushMode(true);
// VRT = (VRC sign bit set) ? VRB : VRA;
// _mm_blendv_epi8 uses the sign bit of the mask (VRC) to select bytes from VRB (if sign=1) or VRA (if sign=0)
println("\t_mm_store_si128((__m128i*){}.u8, _mm_blendv_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]), v(insn.operands[3])); // VRT, VRA, VRB, VRC
break;
case PPC_INST_VSLB:

Binary file not shown.

Binary file not shown.

45
thirdparty/disasm/cmake_install.cmake vendored Normal file
View File

@ -0,0 +1,45 @@
# Install script for directory: E:/recomp/XenonRecomp/thirdparty/disasm
# Set the install prefix
if(NOT DEFINED CMAKE_INSTALL_PREFIX)
set(CMAKE_INSTALL_PREFIX "C:/Program Files (x86)/disasm")
endif()
string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
# Set the install configuration name.
if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
if(BUILD_TYPE)
string(REGEX REPLACE "^[^A-Za-z0-9_]+" ""
CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}")
else()
set(CMAKE_INSTALL_CONFIG_NAME "Debug")
endif()
message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"")
endif()
# Set the component getting installed.
if(NOT CMAKE_INSTALL_COMPONENT)
if(COMPONENT)
message(STATUS "Install component: \"${COMPONENT}\"")
set(CMAKE_INSTALL_COMPONENT "${COMPONENT}")
else()
set(CMAKE_INSTALL_COMPONENT)
endif()
endif()
# Is this installation the result of a crosscompile?
if(NOT DEFINED CMAKE_CROSSCOMPILING)
set(CMAKE_CROSSCOMPILING "FALSE")
endif()
# Set path to fallback-tool for dependency-resolution.
if(NOT DEFINED CMAKE_OBJDUMP)
set(CMAKE_OBJDUMP "C:/msys64/clang64/bin/llvm-objdump.exe")
endif()
string(REPLACE ";" "\n" CMAKE_INSTALL_MANIFEST_CONTENT
"${CMAKE_INSTALL_MANIFEST_FILES}")
if(CMAKE_INSTALL_LOCAL_ONLY)
file(WRITE "E:/recomp/XenonRecomp/thirdparty/disasm/install_local_manifest.txt"
"${CMAKE_INSTALL_MANIFEST_CONTENT}")
endif()

BIN
thirdparty/disasm/libdisasm.a vendored Normal file

Binary file not shown.