1#!/usr/bin/env python3 2 3""" 4This script reads the input from stdin, extracts all lines starting with 5"# FDATA: " (or a given prefix instead of "FDATA"), parses the directives, 6replaces symbol names ("#name#") with either symbol values or with offsets from 7respective anchor symbols, and prints the resulting file to stdout. 8""" 9 10import argparse 11import subprocess 12import sys 13import re 14 15parser = argparse.ArgumentParser() 16parser.add_argument("input") 17parser.add_argument("objfile", help="Object file to extract symbol values from") 18parser.add_argument("output") 19parser.add_argument("prefix", nargs="?", default="FDATA", help="Custom FDATA prefix") 20parser.add_argument("--nmtool", default="nm", help="Path to nm tool") 21 22args = parser.parse_args() 23 24# Regexes to extract FDATA lines from input and parse FDATA and pre-aggregated 25# profile data 26prefix_pat = re.compile(f"^# {args.prefix}: (.*)") 27 28# FDATA records: 29# <is symbol?> <closest elf symbol or DSO name> <relative FROM address> 30# <is symbol?> <closest elf symbol or DSO name> <relative TO address> 31# <number of mispredictions> <number of branches> 32fdata_pat = re.compile(r"([01].*) (?P<exec>\d+) (?P<mispred>\d+)") 33 34# Pre-aggregated profile: 35# {B|F|f} [<start_id>:]<start_offset> [<end_id>:]<end_offset> <count> 36# [<mispred_count>] 37preagg_pat = re.compile(r"(?P<type>[BFf]) (?P<offsets_count>.*)") 38 39# Replacement symbol: #symname# 40replace_pat = re.compile(r"#(?P<symname>[^#]+)#") 41 42# Read input and construct the representation of fdata expressions 43# as (src_tuple, dst_tuple, mispred_count, exec_count) tuples, where src and dst 44# are represented as (is_sym, anchor, offset) tuples 45exprs = [] 46with open(args.input, 'r') as f: 47 for line in f.readlines(): 48 prefix_match = prefix_pat.match(line) 49 if not prefix_match: 50 continue 51 profile_line = prefix_match.group(1) 52 fdata_match = fdata_pat.match(profile_line) 53 preagg_match = preagg_pat.match(profile_line) 54 if fdata_match: 55 src_dst, execnt, mispred = fdata_match.groups() 56 # Split by whitespaces not preceded by a backslash (negative lookbehind) 57 chunks = re.split(r'(?<!\\) +', src_dst) 58 # Check if the number of records separated by non-escaped whitespace 59 # exactly matches the format. 60 assert len(chunks) == 6, f"ERROR: wrong format/whitespaces must be escaped:\n{line}" 61 exprs.append(('FDATA', (*chunks, execnt, mispred))) 62 elif preagg_match: 63 exprs.append(('PREAGG', preagg_match.groups())) 64 else: 65 exit("ERROR: unexpected input:\n%s" % line) 66 67# Read nm output: <symbol value> <symbol type> <symbol name> 68nm_output = subprocess.run([args.nmtool, '--defined-only', args.objfile], 69 text = True, capture_output = True).stdout 70# Populate symbol map 71symbols = {} 72for symline in nm_output.splitlines(): 73 symval, _, symname = symline.split(maxsplit=2) 74 symbols[symname] = symval 75 76def evaluate_symbol(issym, anchor, offsym): 77 sym_match = replace_pat.match(offsym) 78 if not sym_match: 79 # No need to evaluate symbol value, return as is 80 return f'{issym} {anchor} {offsym}' 81 symname = sym_match.group('symname') 82 assert symname in symbols, f"ERROR: symbol {symname} is not defined in binary" 83 # Evaluate to an absolute offset if issym is false 84 if issym == '0': 85 return f'{issym} {anchor} {symbols[symname]}' 86 # Evaluate symbol against its anchor if issym is true 87 assert anchor in symbols, f"ERROR: symbol {anchor} is not defined in binary" 88 anchor_value = int(symbols[anchor], 16) 89 symbol_value = int(symbols[symname], 16) 90 sym_offset = symbol_value - anchor_value 91 return f'{issym} {anchor} {format(sym_offset, "x")}' 92 93def replace_symbol(matchobj): 94 ''' 95 Expects matchobj to only capture one group which contains the symbol name. 96 ''' 97 symname = matchobj.group('symname') 98 assert symname in symbols, f"ERROR: symbol {symname} is not defined in binary" 99 return symbols[symname] 100 101with open(args.output, 'w', newline='\n') as f: 102 for etype, expr in exprs: 103 if etype == 'FDATA': 104 issym1, anchor1, offsym1, issym2, anchor2, offsym2, execnt, mispred = expr 105 print(evaluate_symbol(issym1, anchor1, offsym1), 106 evaluate_symbol(issym2, anchor2, offsym2), 107 execnt, mispred, file = f) 108 elif etype == 'PREAGG': 109 # Replace all symbols enclosed in ## 110 print(expr[0], re.sub(replace_pat, replace_symbol, expr[1]), 111 file = f) 112 else: 113 exit("ERROR: unhandled expression type:\n%s" % etype) 114