diff --git a/README.md b/README.md index 0aa90e5..c6840c6 100644 --- a/README.md +++ b/README.md @@ -89,7 +89,7 @@ Additionally, mid-asm hooks can be inserted directly into the translated C++ cod XenonAnalyse, when used as a command-line application, allows an XEX file to be passed as an input argument to output a TOML file containing all the detected jump tables in the executable: ``` -XenonAnalyse [input XEX file path] [output jump table TOML file path] +XenonAnalyse [input XEX file path] [name of toml output file] ``` However, as explained in the earlier sections, due to variations between games, additional support may be needed to handle different patterns. @@ -100,6 +100,7 @@ However, as explained in the earlier sections, due to variations between games, XenonRecomp accepts a TOML file with recompiler configurations and the path to the `ppc_context.h` file located in the XenonUtils directory: + ``` XenonRecomp [input TOML file path] [input PPC context header file path] ``` @@ -110,7 +111,7 @@ XenonRecomp [input TOML file path] [input PPC context header file path] ```toml [main] -file_path = "../private/default.xex" +file_path = "./private/default.xex" patch_file_path = "../private/default.xexp" patched_file_path = "../private/default_patched.xex" out_directory_path = "../ppc" @@ -249,11 +250,15 @@ Once the files are generated, refresh XenonTests' CMake cache to make them appea ## Building -The project requires CMake 3.20 or later and Clang 18 or later to build. Since the repository includes submodules, ensure you clone it recursively. -Compilers other than Clang have not been tested and are not recommended, including for recompilation output. The project relies on compiler-specific intrinsics and techniques that may not function correctly on other compilers, and many optimization methods depend on Clang's code generation. +### Windows (MSYS2) +-install [MSYS2](https://www.msys2.org/) and use the "MSYS2 CLANG64" environment to build the project. -On Windows, you can use the clang-cl toolset and open the project in Visual Studio's CMake integration. +-First, you need to install the necessary packages (`mingw-w64-clang-x86_64-cmake`, `mingw-w64-clang-x86_64-libc++`, `mingw-w64-clang-x86_64-clang` and `mingw-w64-x86_64-ninja`) with `pacman -S `. + +-Then, you can head into the cloned repo's directory (you can access your C drive by going into the `/c` folder inside of MSYS2), and execute the command `cmake -DCMAKE_BUILD_TYPE=Debug .`, which will generate a `build.ninja` file for the project. + +-Finally, run the `ninja` command, and you should end up with compiled executables. Attempting to launch them will tell you about a missing `libc++.dll` file, which you can copy to your current folder with the `cp /clang64/bin/libc++.dll .` command. ## Special Thanks diff --git a/XenonRecomp/CMakeLists.txt b/XenonRecomp/CMakeLists.txt index f5db6d1..895c18c 100644 --- a/XenonRecomp/CMakeLists.txt +++ b/XenonRecomp/CMakeLists.txt @@ -2,6 +2,11 @@ cmake_minimum_required (VERSION 3.8) project("XenonRecomp") +# Find required packages +find_package(fmt REQUIRED) +find_package(tomlplusplus REQUIRED) +find_package(xxHash REQUIRED) + add_executable(XenonRecomp "main.cpp" "recompiler.cpp" diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index f860817..ea77e8e 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -800,6 +800,13 @@ bool Recompiler::Recompile( // no op break; + + case PPC_INST_EQV: + println("\t{}.u64 = ~({}.u64 ^ {}.u64);", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2])); + if (strchr(insn.opcode->name, '.')) + println("\t{}.compare({}.s64, 0, {});", cr(0), r(insn.operands[0]), xer()); // Check if CR0 comparison uses s64 + break; + case PPC_INST_DCBF: // no op break; @@ -808,6 +815,10 @@ bool Recompiler::Recompile( // no op break; + case PPC_INST_DCBST: + // no op + break; + case PPC_INST_DCBTST: // no op break; @@ -990,6 +1001,19 @@ bool Recompiler::Recompile( println("\t{}.f64 = double(float({}.f64));", f(insn.operands[0]), f(insn.operands[1])); break; + case PPC_INST_FRSQRTE: + printSetFlushMode(false); // Ensure standard FPU mode + // Uses SSE reciprocal square root estimate instruction _mm_rsqrt_ss + println("\t{{"); + println("\t\t__m128 val_pd = _mm_load_sd(&{}.f64);", f(insn.operands[1])); // Load double + println("\t\t__m128 val_ss = _mm_cvtpd_ps(val_pd);"); // Convert to single + println("\t\t__m128 rsqrt_est_ss = _mm_rsqrt_ss(val_ss);"); // Estimate (single) + println("\t\t__m128 result_pd = _mm_cvtps_pd(rsqrt_est_ss);"); // Convert back to double + println("\t\t_mm_store_sd(&{}.f64, result_pd);", f(insn.operands[0])); // Store result + println("\t}}"); + // FRSQRTE does not typically set FPSCR bits, but check PDF if needed. + break; + case PPC_INST_FSEL: printSetFlushMode(false); println("\t{}.f64 = {}.f64 >= 0.0 ? {}.f64 : {}.f64;", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]), f(insn.operands[3])); @@ -1315,10 +1339,28 @@ bool Recompiler::Recompile( println("\t{}.s64 = {}.s64 * {}.s64;", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2])); break; + case PPC_INST_MULHD: + println("\t{}.s64 = ((__int128_t){}.s64 * (__int128_t){}.s64) >> 64;", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2])); + if (strchr(insn.opcode->name, '.')) + println("\t{}.compare({}.s64, 0, {});", cr(0), r(insn.operands[0]), xer()); // Check if CR0 comparison uses s64 + break; + + case PPC_INST_MULHDU: + println("\t{}.u64 = ((__uint128_t){}.u64 * (__uint128_t){}.u64) >> 64;", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2])); + if (strchr(insn.opcode->name, '.')) + println("\t{}.compare({}.s64, 0, {});", cr(0), r(insn.operands[0]), xer()); // Check if CR0 comparison uses s64 or u64 + break; + case PPC_INST_MULLI: println("\t{}.s64 = {}.s64 * {};", r(insn.operands[0]), r(insn.operands[1]), int32_t(insn.operands[2])); break; + case PPC_INST_MULLHWU: // Verify this ID exists + println("\t{}.u64 = (uint32_t)(({}.u64 & 0xFFFF) * ({}.u64 & 0xFFFF));", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2])); + if (strchr(insn.opcode->name, '.')) + println("\t{}.compare({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer()); + break; + case PPC_INST_MULLW: println("\t{}.s64 = int64_t({}.s32) * int64_t({}.s32);", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2])); if (strchr(insn.opcode->name, '.')) @@ -1339,6 +1381,19 @@ bool Recompiler::Recompile( // no op break; + case PPC_INST_VNOR128: + printSetFlushMode(true); + println("\t{{"); + println("\t\t__m128i vra = _mm_load_si128((__m128i*){}.u8);", v(insn.operands[1])); // Load VRA + println("\t\t__m128i vrb = _mm_load_si128((__m128i*){}.u8);", v(insn.operands[2])); // Load VRB + println("\t\t__m128i or_result = _mm_or_si128(vra, vrb);"); // VRA | VRB + // Invert bits using XOR with all ones (~(A|B)) + println("\t\t__m128i all_ones = _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128());"); + println("\t\t__m128i nor_result = _mm_xor_si128(or_result, all_ones);"); + println("\t\t_mm_store_si128((__m128i*){}.u8, nor_result);", v(insn.operands[0])); // Store VRT + println("\t}}"); + break; + case PPC_INST_NOR: println("\t{}.u64 = ~({}.u64 | {}.u64);", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2])); break; @@ -1539,6 +1594,14 @@ bool Recompiler::Recompile( println("{}.u32, {}.u64);", r(insn.operands[2]), f(insn.operands[0])); break; + case PPC_INST_STFSU: + printSetFlushMode(false); + println("\t{}.f32 = float({}.f64);", temp(), f(insn.operands[0])); // Convert FRS (double) to float in temp + println("\t{} = {} + {}.u32;", ea(), int32_t(insn.operands[1]), r(insn.operands[2])); // Calculate EA = RA + D + println("\tPPC_STORE_U32({}, {}.u32);", ea(), temp()); // Store float bits + println("\t{}.u32 = {};", r(insn.operands[2]), ea()); // Update RA with EA + break; + case PPC_INST_STFIWX: printSetFlushMode(false); print("{}", mmioStore() ? "\tPPC_MM_STORE_U32(" : "\tPPC_STORE_U32("); @@ -1769,6 +1832,12 @@ bool Recompiler::Recompile( println("\t_mm_store_si128((__m128i*){}.u8, _mm_and_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; + case PPC_INST_VANDC: + printSetFlushMode(true); + // Computes VRA & ~VRB using _mm_andnot_si128(VRB, VRA) + println("\t_mm_store_si128((__m128i*){}.u8, _mm_andnot_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); // VRT, VRB, VRA + break; + case PPC_INST_VANDC128: println("\t_mm_store_si128((__m128i*){}.u8, _mm_andnot_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); break; @@ -1795,6 +1864,22 @@ bool Recompiler::Recompile( println("_mm_load_ps({}.f32)));", v(insn.operands[1])); break; + case PPC_INST_VCFPUXWS128: // Or PPC_INST_VCTUXS if that's the ID used + printSetFlushMode(true); + println("\t{{"); + println("\t\t__m128 vrbf = _mm_load_ps({}.f32);", v(insn.operands[1])); // Load VRB floats + if (insn.operands[2] != 0) { // Check UIMM (operand 2) + // Scale VRB by 2^UIMM before converting + println("\t\tfloat scale = ldexpf(1.0f, {});", (int32_t)insn.operands[2]); // Calculate 2^UIMM + println("\t\t__m128 scale_ps = _mm_set1_ps(scale);"); + println("\t\tvrbf = _mm_mul_ps(vrbf, scale_ps);"); + } + // Use the helper function from ppc_context.h which handles conversion and saturation + println("\t\t__m128i result = _mm_vctuxs(vrbf);"); + println("\t\t_mm_store_si128((__m128i*){}.u32, result);", v(insn.operands[0])); // Store VRT + println("\t}}"); + break; + case PPC_INST_VCFSX: case PPC_INST_VCSXWFP128: { @@ -1918,6 +2003,28 @@ bool Recompiler::Recompile( println("\t_mm_store_ps({}.f32, _mm_min_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; + case PPC_INST_VPKSWSS: + case PPC_INST_VPKSWSS128: // Or PPC_INST_VPKSWSS + printSetFlushMode(true); + println("\t_mm_store_si128((__m128i*){}.s16, _mm_packs_epi32(_mm_load_si128((__m128i*){}.s32), _mm_load_si128((__m128i*){}.s32)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); // VRT, VRA, VRB + break; + + case PPC_INST_VPKUWUS128: + printSetFlushMode(true); + println("\t{{"); + println("\t\t__m128i max_val = _mm_set1_epi32(0xFFFF);"); // Max value for unsigned 16-bit + println("\t\t__m128i vra = _mm_load_si128((__m128i*){}.u32);", v(insn.operands[2])); // Load VRA (operand 2) + println("\t\t__m128i vrb = _mm_load_si128((__m128i*){}.u32);", v(insn.operands[1])); // Load VRB (operand 1) + // Saturate VRA words (unsigned) [0, 65535] + println("\t\tvra = _mm_min_epu32(vra, max_val);"); + // Saturate VRB words (unsigned) [0, 65535] + println("\t\tvrb = _mm_min_epu32(vrb, max_val);"); + // Pack clamped words. _mm_packs_epi32 works correctly here because inputs are pre-clamped. + println("\t\t__m128i result = _mm_packs_epi32(vra, vrb);"); + println("\t\t_mm_store_si128((__m128i*){}.u16, result);", v(insn.operands[0])); // Store VRT (operand 0) + println("\t}}"); + break; + case PPC_INST_VMRGHB: println("\t_mm_store_si128((__m128i*){}.u8, _mm_unpackhi_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); break; @@ -2047,6 +2154,14 @@ bool Recompiler::Recompile( println("\t_mm_store_si128((__m128i*){}.u8, _mm_packus_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); break; + case PPC_INST_VPKUHUS: + case PPC_INST_VPKUHUS128: // Or PPC_INST_VPKUHUS + printSetFlushMode(true); + // _mm_packus_epi16 performs unsigned saturation from signed 16-bit to unsigned 8-bit. + // This matches VPKUHUS behavior. + println("\t_mm_store_si128((__m128i*){}.u8, _mm_packus_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); // VRT, VRA, VRB + break; + case PPC_INST_VREFP: case PPC_INST_VREFP128: // TODO: see if we can use rcp safely @@ -2088,7 +2203,11 @@ bool Recompiler::Recompile( break; case PPC_INST_VSEL: - println("\t_mm_store_si128((__m128i*){}.u8, _mm_or_si128(_mm_andnot_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)), _mm_and_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8))));", v(insn.operands[0]), v(insn.operands[3]), v(insn.operands[1]), v(insn.operands[3]), v(insn.operands[2])); + case PPC_INST_VSEL128: // Or PPC_INST_VSEL + printSetFlushMode(true); + // VRT = (VRC sign bit set) ? VRB : VRA; + // _mm_blendv_epi8 uses the sign bit of the mask (VRC) to select bytes from VRB (if sign=1) or VRA (if sign=0) + println("\t_mm_store_si128((__m128i*){}.u8, _mm_blendv_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]), v(insn.operands[3])); // VRT, VRA, VRB, VRC break; case PPC_INST_VSLB: diff --git a/thirdparty/disasm/CMakeFiles/disasm.dir/disasm.c.obj b/thirdparty/disasm/CMakeFiles/disasm.dir/disasm.c.obj new file mode 100644 index 0000000..a3d0f68 Binary files /dev/null and b/thirdparty/disasm/CMakeFiles/disasm.dir/disasm.c.obj differ diff --git a/thirdparty/disasm/CMakeFiles/disasm.dir/ppc-dis.c.obj b/thirdparty/disasm/CMakeFiles/disasm.dir/ppc-dis.c.obj new file mode 100644 index 0000000..4e244f7 Binary files /dev/null and b/thirdparty/disasm/CMakeFiles/disasm.dir/ppc-dis.c.obj differ diff --git a/thirdparty/disasm/cmake_install.cmake b/thirdparty/disasm/cmake_install.cmake new file mode 100644 index 0000000..1238808 --- /dev/null +++ b/thirdparty/disasm/cmake_install.cmake @@ -0,0 +1,45 @@ +# Install script for directory: E:/recomp/XenonRecomp/thirdparty/disasm + +# Set the install prefix +if(NOT DEFINED CMAKE_INSTALL_PREFIX) + set(CMAKE_INSTALL_PREFIX "C:/Program Files (x86)/disasm") +endif() +string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") + +# Set the install configuration name. +if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) + if(BUILD_TYPE) + string(REGEX REPLACE "^[^A-Za-z0-9_]+" "" + CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") + else() + set(CMAKE_INSTALL_CONFIG_NAME "Debug") + endif() + message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") +endif() + +# Set the component getting installed. +if(NOT CMAKE_INSTALL_COMPONENT) + if(COMPONENT) + message(STATUS "Install component: \"${COMPONENT}\"") + set(CMAKE_INSTALL_COMPONENT "${COMPONENT}") + else() + set(CMAKE_INSTALL_COMPONENT) + endif() +endif() + +# Is this installation the result of a crosscompile? +if(NOT DEFINED CMAKE_CROSSCOMPILING) + set(CMAKE_CROSSCOMPILING "FALSE") +endif() + +# Set path to fallback-tool for dependency-resolution. +if(NOT DEFINED CMAKE_OBJDUMP) + set(CMAKE_OBJDUMP "C:/msys64/clang64/bin/llvm-objdump.exe") +endif() + +string(REPLACE ";" "\n" CMAKE_INSTALL_MANIFEST_CONTENT + "${CMAKE_INSTALL_MANIFEST_FILES}") +if(CMAKE_INSTALL_LOCAL_ONLY) + file(WRITE "E:/recomp/XenonRecomp/thirdparty/disasm/install_local_manifest.txt" + "${CMAKE_INSTALL_MANIFEST_CONTENT}") +endif() diff --git a/thirdparty/disasm/libdisasm.a b/thirdparty/disasm/libdisasm.a new file mode 100644 index 0000000..9e9a18c Binary files /dev/null and b/thirdparty/disasm/libdisasm.a differ