## ## Searches for functions in .text that are referenced by functions in .pdata ## ## Input: ## Decompiled code - Created in IDA Pro 9.0SP1 with File -> Produce File -> Create HTML File... ## CLI output from a XenonRecomp run - When trying to compile with XenonRecomp, use > to save the output from the terminal ## ## Output: ## XenonRecomp config - Function block for TOML to be inputted into XenonRecomp ## import sys import re # Check if correct number of input arguments were given if len(sys.argv) != 4: sys.exit("parser.py [IDA HTML] [XenonRecomp log] [Output TOML]") # Filepath input arguments ida_html = sys.argv[1] xenonrecomp_log = sys.argv[2] output_file = sys.argv[3] # Disable extra debug output debug = False ## ## Parse XenonRecomp log ## # The starting index of the erroneous switch statement address in the XenonRecomp log switch_idx = 22 # Initialize list to store erroneous switch statement addresses switch_addrs = [] print("Parsing XenonRecomp log...") # Import each line of XenonRecomp log with open(xenonrecomp_log, 'r') as file: # Read each line in the file for line in file: # If this line describes an error, it has the address of a problematic switch statement if re.search('ERROR: Switch case at ', line) != None: # Save the address as integer switch_addrs.append(line[switch_idx:switch_idx+8]) # Save only unique addresses and sort switch_addrs = set(switch_addrs) ## ## Parse IDA HTML ## # Initialize list to store start and end of functions functs = [] # Count how many functions have been added num_functs = 0 # Function for adding to function list and incrementing count def add_function(new_start_addr, prev_end_addr, start_type): global num_functs # If an end address for the last added function was specified if prev_end_addr != None: # Set end address for last added function functs[num_functs-1][1] = prev_end_addr # Add a new function to the list with the specified starting address functs.append([new_start_addr, 0, [], start_type]) # Increment the number of functions num_functs = num_functs+1 # Mark if we are in .text section in_text = False # Mark if we should end parsing end_parse = False # Initialize address of last bctr instruction to 0 bctr_addr = '00000000' # Initialize address of last blr instruction to 0 blr_addr = '00000000' # Initialize address of last 'End of function' comment to 0 eof_addr = '00000000' # Initialize address of last restgprlr instruction to 0 restgprlr_addr = '00000000' # Initialize address of last padding to 0 pad_addr = 0 # Import each line of decompiled code print("Parsing IDA HTML...") with open(ida_html, 'r') as file: # Read each line in the file for line in file: if not end_parse: # If in .text if in_text: # Get the current address colon_idx = line.find(':') curr_addr = line[colon_idx+1:colon_idx+9] # Check if this is the start of a function if re.search('^\.text:'+curr_addr+' sub_'+curr_addr+': *# [A-Z][A-Z][A-Z][A-Z] XREF:.*', line): # Save current address as integer curr_addr_int = int(curr_addr, 16) # If this is not the first function being added if num_functs > 0: # If last address had padding or restgprlr instruction, then this function was already added if curr_addr_int-4 == pad_addr or curr_addr_int-4 == restgprlr_addr: # Set function type for start address functs[num_functs-1][3] = 'sub' else: # Check if this function is part of latest added function is_nested_funct = False nested_functs = functs[num_functs-1][2] for nested_funct in nested_functs: if nested_funct == curr_addr: is_nested_funct = True break # If last address was not padding and not nested in latest function if not is_nested_funct: # Add new function and last function's end address add_function(curr_addr_int, curr_addr_int, 'sub') else: # Add new function add_function(curr_addr_int, None, 'sub') # If this is a location elif re.search('^\.text:'+curr_addr+' loc_'+curr_addr, line): curr_addr_int = int(curr_addr, 16) curr_funct = functs[num_functs-1] # If previous address was a blr instruction if curr_addr_int-4 == int(blr_addr, 16): # If previous address had an 'End of function' comment or if there was a bctr with the comment if blr_addr == eof_addr or bctr_addr == eof_addr: # Find a XREF pointing to a .text address xref_idx = line.find('XREF: .text:') if xref_idx > -1: underscore_idx = line.find('_', xref_idx) if underscore_idx > -1: xref = line[underscore_idx+1:underscore_idx+9] else: xref = line[xref_idx+12:xref_idx+20] else: xref = None # Couldn't find XREF pointing to .text address or the XREF is after this address if xref == None or int(xref, 16) > curr_addr_int: # Add as new function add_function(curr_addr_int, curr_addr_int, 'loc') else: # Find address of function that references this xref_idx = line.find('CODE XREF: sub_') # If it was found if xref_idx > -1: # Store as nested function in latest function functs[num_functs-1][2].append(line[xref_idx+15:xref_idx+23]) # Check if this line is padding elif num_functs > 0 and re.search('\.long 0$', line): # Convert current address to integer curr_addr_int = int(curr_addr, 16) # Add a new function at the line after padding, and end the current function at this padding address add_function(curr_addr_int+4, curr_addr_int, None) # Save padding address pad_addr = curr_addr_int # Check for blr instruction elif re.search('blr$', line): blr_addr = curr_addr # Check for 'End of function' comment elif re.search('End of function ', line): eof_addr = curr_addr # Check for bctr instruction elif re.search('bctr$', line): bctr_addr = curr_addr # Check for restgprlr instruction elif re.search('b __restgprlr_[0-9][0-9]$', line): # Convert current address to integer curr_addr_int = int(curr_addr, 16) # Add a new function at the line after restgprlr instruction, and end the current function at this address add_function(curr_addr_int+4, curr_addr_int, None) restgprlr_addr = curr_addr_int # If not in .text else: # If .text section header found if re.search('\.section "\.text"', line) != None: in_text = True ## ## Find .text functions that are referenced by .pdata functions ## # Initialize list for functions that need to be added to toml output_functs = [] # Look for related functions for every unique errored switch statement print("Searching for needed functions...") for switch_addr in switch_addrs: # Start looking at first subroutine curr_funct_idx = 0 # Save current switch statement address as integer switch_addr_int = int(switch_addr, 16) # The related function for this switch statement has not been found yet search_for_funct = True # Start search for function relating to switch statement while(search_for_funct): curr_funct = functs[curr_funct_idx] # If switch address is after this function's start curr_funct_start = curr_funct[0] if(switch_addr_int > curr_funct_start): # If switch address is before this function's end curr_funct_end = curr_funct[1] if(switch_addr_int <= curr_funct_end): # Save current function's start address and the function's length if debug: output_functs.append([hex(curr_funct_start), hex(curr_funct_end-curr_funct_start), switch_addr]) else: output_functs.append([hex(curr_funct_start), hex(curr_funct_end-curr_funct_start)]) # Don't need to continue search for this switch statement search_for_funct = False # Look in next function curr_funct_idx = curr_funct_idx + 1 # Related function was not found else: print(f"WARNING: Function relating to {switch_addr} not found! Skipping.") # Don't need to continue search for this switch statement search_for_funct = False # Remove duplicates if not debug: output_functs = list(set(tuple(funct) for funct in output_functs)) # Make sure there are no functions with the same starting address but different lengths for i in range(len(output_functs)): for j in range(i+1, len(output_functs)): curr_funct_start = output_functs[i][0] if curr_funct_start == output_functs[j][0]: print(f"WARNING: {curr_funct_start} has multiple entries of different lengths, manually find correct one.") print(f"{len(output_functs)} functions found!") ## ## Output all found functions to TOML in correct format ## # Create formatted string to export to TOML output_str = "functions = [" # Append all function addresses and lengths to formatted string print("Outputting to formatted file...") for funct in output_functs: # Format hex to uppercase curr_funct_start = '0x'+funct[0][2:].upper() curr_funct_end = '0x'+funct[1][2:].upper() # Format function curr_funct = "\n { address = "+curr_funct_start+", size = "+curr_funct_end if debug: curr_funct = curr_funct+", src = "+funct[2] curr_funct = curr_funct+" }," # Add to complete output string output_str = output_str+curr_funct # Delete last comma output_str = output_str[:len(output_str)-1] # Add last bracket output_str = output_str+"\n]" # Output to file with open(output_file, "w") as file: file.write(output_str)