mirror of
https://github.com/hedge-dev/XenonRecomp.git
synced 2025-06-06 18:31:03 +00:00
missing 3 functions, 0x82F08730 starts at wrong address
This commit is contained in:
parent
6dbbc6ea14
commit
21f1a81aa3
@ -188,6 +188,12 @@ functions = [
|
|||||||
|
|
||||||
You can define function boundaries explicitly using the `functions` property if XenonAnalyse fails to analyze them correctly, for example, with functions containing jump tables.
|
You can define function boundaries explicitly using the `functions` property if XenonAnalyse fails to analyze them correctly, for example, with functions containing jump tables.
|
||||||
|
|
||||||
|
You can automatically generate these using the FunctionParser.py script. You will need to create a HTML of your decompiled XEX with IDA using `File -> Produce File -> Create HTML File...` and save the terminal output from running XenonRecomp by appending `> [output log file path]` to the command.
|
||||||
|
|
||||||
|
```
|
||||||
|
python3 FunctionParser.py [input IDA HTML file path] [input XenonRecomp log file path] [output function list file path]
|
||||||
|
```
|
||||||
|
|
||||||
#### Invalid Instruction Skips
|
#### Invalid Instruction Skips
|
||||||
|
|
||||||
```toml
|
```toml
|
||||||
|
118
parser.py
118
parser.py
@ -21,6 +21,9 @@ ida_html = sys.argv[1]
|
|||||||
xenonrecomp_log = sys.argv[2]
|
xenonrecomp_log = sys.argv[2]
|
||||||
output_file = sys.argv[3]
|
output_file = sys.argv[3]
|
||||||
|
|
||||||
|
# Disable extra debug output
|
||||||
|
debug = False
|
||||||
|
|
||||||
##
|
##
|
||||||
## Parse XenonRecomp log
|
## Parse XenonRecomp log
|
||||||
##
|
##
|
||||||
@ -37,7 +40,7 @@ with open(xenonrecomp_log, 'r') as file:
|
|||||||
# Read each line in the file
|
# Read each line in the file
|
||||||
for line in file:
|
for line in file:
|
||||||
# If this line describes an error, it has the address of a problematic switch statement
|
# If this line describes an error, it has the address of a problematic switch statement
|
||||||
if re.search('ERROR', line) != None:
|
if re.search('ERROR: Switch case at ', line) != None:
|
||||||
# Save the address as integer
|
# Save the address as integer
|
||||||
switch_addrs.append(line[switch_idx:switch_idx+8])
|
switch_addrs.append(line[switch_idx:switch_idx+8])
|
||||||
|
|
||||||
@ -55,14 +58,14 @@ functs = []
|
|||||||
num_functs = 0
|
num_functs = 0
|
||||||
|
|
||||||
# Function for adding to function list and incrementing count
|
# Function for adding to function list and incrementing count
|
||||||
def add_function(new_start_addr, prev_end_addr):
|
def add_function(new_start_addr, prev_end_addr, start_type):
|
||||||
global num_functs
|
global num_functs
|
||||||
# If an end address for the last added function was specified
|
# If an end address for the last added function was specified
|
||||||
if prev_end_addr != None:
|
if prev_end_addr != None:
|
||||||
# Set end address for last added function
|
# Set end address for last added function
|
||||||
functs[num_functs-1][1] = prev_end_addr
|
functs[num_functs-1][1] = prev_end_addr
|
||||||
# Add a new function to the list with the specified starting address
|
# Add a new function to the list with the specified starting address
|
||||||
functs.append([new_start_addr, 0, []])
|
functs.append([new_start_addr, 0, [], start_type])
|
||||||
# Increment the number of functions
|
# Increment the number of functions
|
||||||
num_functs = num_functs+1
|
num_functs = num_functs+1
|
||||||
|
|
||||||
@ -75,8 +78,11 @@ end_parse = False
|
|||||||
# Initialize address of last blr instruction to 0
|
# Initialize address of last blr instruction to 0
|
||||||
blr_addr = '00000000'
|
blr_addr = '00000000'
|
||||||
|
|
||||||
|
# Initialize address of last bctr instruction to 0
|
||||||
|
bctr_addr = '00000000'
|
||||||
|
|
||||||
# Initialize address of last padding to 0
|
# Initialize address of last padding to 0
|
||||||
pad_addr = '00000000'
|
pad_addr = 0
|
||||||
|
|
||||||
# Import each line of decompiled code
|
# Import each line of decompiled code
|
||||||
print("Parsing IDA HTML...")
|
print("Parsing IDA HTML...")
|
||||||
@ -95,32 +101,80 @@ with open(ida_html, 'r') as file:
|
|||||||
# Save current address as integer
|
# Save current address as integer
|
||||||
curr_addr_int = int(curr_addr, 16)
|
curr_addr_int = int(curr_addr, 16)
|
||||||
|
|
||||||
|
# If this is not the first function being added
|
||||||
if num_functs > 0:
|
if num_functs > 0:
|
||||||
# If last address had padding, then this function was already added
|
# If last address had padding, then this function was already added
|
||||||
if not curr_addr_int-4 == int(pad_addr, 16):
|
if curr_addr_int-4 == pad_addr:
|
||||||
|
# Set function type for start address
|
||||||
|
functs[num_functs-1][3] = 'sub'
|
||||||
|
else:
|
||||||
# Check if this function is part of latest added function
|
# Check if this function is part of latest added function
|
||||||
is_nested_funct = False
|
is_nested_funct = False
|
||||||
nested_functs = functs[num_functs-1][2]
|
nested_functs = functs[num_functs-1][2]
|
||||||
for nested_funct in nested_functs:
|
for nested_funct in nested_functs:
|
||||||
is_nested_funct = nested_funct==curr_addr
|
if nested_funct == curr_addr:
|
||||||
|
is_nested_funct = True
|
||||||
|
break
|
||||||
|
|
||||||
# If last address was not padding and not nested in latest function
|
# If last address was not padding and not nested in latest function
|
||||||
if not is_nested_funct:
|
if not is_nested_funct:
|
||||||
# If this is not the first function being added
|
# Add new function and last function's end address
|
||||||
if num_functs > 0:
|
add_function(curr_addr_int, curr_addr_int, 'sub')
|
||||||
# Add new function and last function's end address
|
|
||||||
add_function(curr_addr_int, curr_addr_int)
|
|
||||||
else:
|
else:
|
||||||
# Add new function
|
# Add new function
|
||||||
add_function(curr_addr_int, None)
|
add_function(curr_addr_int, None, 'sub')
|
||||||
|
|
||||||
# If this is a location
|
# If this is a location
|
||||||
elif re.search('^\.text:'+curr_addr+' </span><span class="c[0-9]*">loc_'+curr_addr, line):
|
elif re.search('^\.text:'+curr_addr+' </span><span class="c[0-9]*">loc_'+curr_addr, line):
|
||||||
curr_addr_int = int(curr_addr, 16)
|
curr_addr_int = int(curr_addr, 16)
|
||||||
|
curr_funct = functs[num_functs-1]
|
||||||
# If previous address was a blr instruction
|
# If previous address was a blr instruction
|
||||||
if curr_addr_int-4 == blr_addr:
|
if curr_addr_int-4 == int(blr_addr, 16):
|
||||||
print(curr_addr)
|
# If last added function is a subroutine and has no nested functions
|
||||||
add_function(curr_addr_int, curr_addr_int)
|
if curr_funct[3] == 'sub' and not curr_funct[2]:
|
||||||
|
xref_idx = line.find('XREF: sub_')
|
||||||
|
# If XREF is a subroutine
|
||||||
|
if xref_idx > -1:
|
||||||
|
xref = line[xref_idx+10:xref_idx+18]
|
||||||
|
# If the XREF is equivalent to the last function's start address
|
||||||
|
if int(xref, 16) == curr_funct[0]:
|
||||||
|
# Store as nested function in latest function
|
||||||
|
functs[num_functs-1][2].append(xref)
|
||||||
|
# If not, add this address as a new function
|
||||||
|
else:
|
||||||
|
add_function(curr_addr_int, curr_addr_int, 'loc')
|
||||||
|
# If not, add this address as new function
|
||||||
|
else:
|
||||||
|
add_function(curr_addr_int, curr_addr_int, 'loc')
|
||||||
|
|
||||||
|
# If last added function is not a subroutine or has nested functions:
|
||||||
|
else:
|
||||||
|
# Check for XREF to subroutine
|
||||||
|
xref_idx = line.find('XREF: sub_')
|
||||||
|
if xref_idx > -1:
|
||||||
|
xref = line[xref_idx+10:xref_idx+18]
|
||||||
|
# If not found, check for XREF to .text address
|
||||||
|
else:
|
||||||
|
xref_idx = line.find('XREF: .text:')
|
||||||
|
if xref_idx > -1:
|
||||||
|
underscore_idx = line.find('_', xref_idx)
|
||||||
|
# If referencing sub_, loc_, etc.
|
||||||
|
if underscore_idx > -1:
|
||||||
|
xref = line[underscore_idx+1:underscore_idx+9]
|
||||||
|
# Else, there's only the address after .text
|
||||||
|
else:
|
||||||
|
xref = line[xref_idx+12:xref_idx+20]
|
||||||
|
else:
|
||||||
|
xref = '-1'
|
||||||
|
|
||||||
|
# If XREF points to subroutine or .text address before current address
|
||||||
|
if int(xref, 16) < curr_addr_int:
|
||||||
|
# Store as nested function
|
||||||
|
functs[num_functs-1][2].append(xref)
|
||||||
|
# If not, add this address as new funciton
|
||||||
|
else:
|
||||||
|
add_function(curr_addr_int, curr_addr_int, 'loc')
|
||||||
|
|
||||||
# If not, store as nested function in latest function
|
# If not, store as nested function in latest function
|
||||||
else:
|
else:
|
||||||
# Find address of function that references this
|
# Find address of function that references this
|
||||||
@ -132,9 +186,14 @@ with open(ida_html, 'r') as file:
|
|||||||
|
|
||||||
# Check if this line is padding
|
# Check if this line is padding
|
||||||
elif num_functs > 0 and re.search('<span class="c[0-9]*">\.long </span><span class="c[0-9]*">0$', line):
|
elif num_functs > 0 and re.search('<span class="c[0-9]*">\.long </span><span class="c[0-9]*">0$', line):
|
||||||
|
# Convert current address to integer
|
||||||
curr_addr_int = int(curr_addr, 16)
|
curr_addr_int = int(curr_addr, 16)
|
||||||
|
|
||||||
# Add a new function at the line after padding, and end the current function at this padding address
|
# Add a new function at the line after padding, and end the current function at this padding address
|
||||||
add_function(curr_addr_int+4, curr_addr_int)
|
add_function(curr_addr_int+4, curr_addr_int, None)
|
||||||
|
|
||||||
|
# Save padding address
|
||||||
|
pad_addr = curr_addr_int
|
||||||
|
|
||||||
# Check for blr instruction
|
# Check for blr instruction
|
||||||
elif re.search('<span class="c[0-9]*">blr', line):
|
elif re.search('<span class="c[0-9]*">blr', line):
|
||||||
@ -175,7 +234,11 @@ for switch_addr in switch_addrs:
|
|||||||
curr_funct_end = curr_funct[1]
|
curr_funct_end = curr_funct[1]
|
||||||
if(switch_addr_int <= curr_funct_end):
|
if(switch_addr_int <= curr_funct_end):
|
||||||
# Save current function's start address and the function's length
|
# Save current function's start address and the function's length
|
||||||
output_functs.append([hex(curr_funct_start), hex(curr_funct_end-curr_funct_start), switch_addr])
|
if debug:
|
||||||
|
output_functs.append([hex(curr_funct_start), hex(curr_funct_end-curr_funct_start), switch_addr])
|
||||||
|
else:
|
||||||
|
output_functs.append([hex(curr_funct_start), hex(curr_funct_end-curr_funct_start)])
|
||||||
|
|
||||||
# Don't need to continue search for this switch statement
|
# Don't need to continue search for this switch statement
|
||||||
search_for_funct = False
|
search_for_funct = False
|
||||||
|
|
||||||
@ -184,23 +247,42 @@ for switch_addr in switch_addrs:
|
|||||||
|
|
||||||
# Related function was not found
|
# Related function was not found
|
||||||
else:
|
else:
|
||||||
print(f"WARNING: Function relating to {switch_addr} not found")
|
print(f"WARNING: Function relating to {switch_addr} not found! Skipping.")
|
||||||
# Don't need to continue search for this switch statement
|
# Don't need to continue search for this switch statement
|
||||||
search_for_funct = False
|
search_for_funct = False
|
||||||
|
|
||||||
|
# Remove duplicates
|
||||||
|
if not debug:
|
||||||
|
output_functs = list(set(tuple(funct) for funct in output_functs))
|
||||||
|
|
||||||
|
# Make sure there are no functions with the same starting address but different lengths
|
||||||
|
for i in range(len(output_functs)):
|
||||||
|
for j in range(i+1, len(output_functs)):
|
||||||
|
curr_funct_start = output_functs[i][0]
|
||||||
|
if curr_funct_start == output_functs[j][0]:
|
||||||
|
print(f"WARNING: {curr_funct_start} has multiple entries of different lengths, manually find correct one.")
|
||||||
|
|
||||||
print(f"{len(output_functs)} functions found!")
|
print(f"{len(output_functs)} functions found!")
|
||||||
|
|
||||||
|
##
|
||||||
|
## Output all found functions to TOML in correct format
|
||||||
|
##
|
||||||
|
|
||||||
# Create formatted string to export to TOML
|
# Create formatted string to export to TOML
|
||||||
output_str = "functions = ["
|
output_str = "functions = ["
|
||||||
|
|
||||||
# Append all function addresses and lengths to formatted string
|
# Append all function addresses and lengths to formatted string
|
||||||
|
print("Outputting to formatted file...")
|
||||||
for funct in output_functs:
|
for funct in output_functs:
|
||||||
# Format hex to uppercase
|
# Format hex to uppercase
|
||||||
curr_funct_start = '0x'+funct[0][2:].upper()
|
curr_funct_start = '0x'+funct[0][2:].upper()
|
||||||
curr_funct_end = '0x'+funct[1][2:].upper()
|
curr_funct_end = '0x'+funct[1][2:].upper()
|
||||||
|
|
||||||
# Format function
|
# Format function
|
||||||
curr_funct = "\n { address = "+curr_funct_start+", size = "+curr_funct_end+" src = "+funct[2]+" },"
|
curr_funct = "\n { address = "+curr_funct_start+", size = "+curr_funct_end
|
||||||
|
if debug:
|
||||||
|
curr_funct = curr_funct+", src = "+funct[2]
|
||||||
|
curr_funct = curr_funct+" },"
|
||||||
|
|
||||||
# Add to complete output string
|
# Add to complete output string
|
||||||
output_str = output_str+curr_funct
|
output_str = output_str+curr_funct
|
||||||
|
Loading…
x
Reference in New Issue
Block a user