1#!/usr/bin/env python3 2import argparse 3import subprocess 4from typing import * 5import tempfile 6import copy 7import os 8import shutil 9import sys 10import re 11import configparser 12from types import SimpleNamespace 13from textwrap import dedent 14 15# USAGE: 16# 0. Prepare two BOLT build versions: base and compare. 17# 1. Create the config by invoking this script with required options. 18# Save the config as `llvm-bolt-wrapper.ini` next to the script or 19# in the testing directory. 20# In the base BOLT build directory: 21# 2. Rename `llvm-bolt` to `llvm-bolt.real` 22# 3. Create a symlink from this script to `llvm-bolt` 23# 4. Create `llvm-bolt-wrapper.ini` and fill it using the example below. 24# 25# This script will compare binaries produced by base and compare BOLT, and 26# report elapsed processing time and max RSS. 27 28# read options from config file llvm-bolt-wrapper.ini in script CWD 29# 30# [config] 31# # mandatory 32# base_bolt = /full/path/to/llvm-bolt.real 33# cmp_bolt = /full/path/to/other/llvm-bolt 34# # optional, default to False 35# verbose 36# keep_tmp 37# no_minimize 38# run_sequentially 39# compare_output 40# skip_binary_cmp 41# # optional, defaults to timing.log in CWD 42# timing_file = timing1.log 43 44def read_cfg(): 45 src_dir = os.path.dirname(os.path.abspath(__file__)) 46 cfg = configparser.ConfigParser(allow_no_value = True) 47 cfgs = cfg.read("llvm-bolt-wrapper.ini") 48 if not cfgs: 49 cfgs = cfg.read(os.path.join(src_dir, "llvm-bolt-wrapper.ini")) 50 assert cfgs, f"llvm-bolt-wrapper.ini is not found in {os.getcwd()}" 51 52 def get_cfg(key): 53 # if key is not present in config, assume False 54 if key not in cfg['config']: 55 return False 56 # if key is present, but has no value, assume True 57 if not cfg['config'][key]: 58 return True 59 # if key has associated value, interpret the value 60 return cfg['config'].getboolean(key) 61 62 d = { 63 # BOLT binary locations 64 'BASE_BOLT': cfg['config']['base_bolt'], 65 'CMP_BOLT': cfg['config']['cmp_bolt'], 66 # optional 67 'VERBOSE': get_cfg('verbose'), 68 'KEEP_TMP': get_cfg('keep_tmp'), 69 'NO_MINIMIZE': get_cfg('no_minimize'), 70 'RUN_SEQUENTIALLY': get_cfg('run_sequentially'), 71 'COMPARE_OUTPUT': get_cfg('compare_output'), 72 'SKIP_BINARY_CMP': get_cfg('skip_binary_cmp'), 73 'TIMING_FILE': cfg['config'].get('timing_file', 'timing.log'), 74 } 75 if d['VERBOSE']: 76 print(f"Using config {os.path.abspath(cfgs[0])}") 77 return SimpleNamespace(**d) 78 79# perf2bolt mode 80PERF2BOLT_MODE = ['-aggregate-only', '-ignore-build-id'] 81 82# boltdiff mode 83BOLTDIFF_MODE = ['-diff-only', '-o', '/dev/null'] 84 85# options to suppress binary differences as much as possible 86MINIMIZE_DIFFS = ['-bolt-info=0'] 87 88# bolt output options that need to be intercepted 89BOLT_OUTPUT_OPTS = { 90 '-o': 'BOLT output binary', 91 '-w': 'BOLT recorded profile', 92} 93 94# regex patterns to exclude the line from log comparison 95SKIP_MATCH = [ 96 'BOLT-INFO: BOLT version', 97 r'^Args: ', 98 r'^BOLT-DEBUG:', 99 r'BOLT-INFO:.*data.*output data', 100 'WARNING: reading perf data directly', 101] 102 103def run_cmd(cmd, out_f, cfg): 104 if cfg.VERBOSE: 105 print(' '.join(cmd)) 106 return subprocess.Popen(cmd, stdout=out_f, stderr=subprocess.STDOUT) 107 108def run_bolt(bolt_path, bolt_args, out_f, cfg): 109 p2b = os.path.basename(sys.argv[0]) == 'perf2bolt' # perf2bolt mode 110 bd = os.path.basename(sys.argv[0]) == 'llvm-boltdiff' # boltdiff mode 111 hm = sys.argv[1] == 'heatmap' # heatmap mode 112 cmd = ['/usr/bin/time', '-f', '%e %M', bolt_path] + bolt_args 113 if p2b: 114 # -ignore-build-id can occur at most once, hence remove it from cmd 115 if '-ignore-build-id' in cmd: 116 cmd.remove('-ignore-build-id') 117 cmd += PERF2BOLT_MODE 118 elif bd: 119 cmd += BOLTDIFF_MODE 120 elif not cfg.NO_MINIMIZE and not hm: 121 cmd += MINIMIZE_DIFFS 122 return run_cmd(cmd, out_f, cfg) 123 124def prepend_dash(args: Mapping[AnyStr, AnyStr]) -> Sequence[AnyStr]: 125 ''' 126 Accepts parsed arguments and returns flat list with dash prepended to 127 the option. 128 Example: Namespace(o='test.tmp') -> ['-o', 'test.tmp'] 129 ''' 130 dashed = [('-'+key,value) for (key,value) in args.items()] 131 flattened = list(sum(dashed, ())) 132 return flattened 133 134def replace_cmp_path(tmp: AnyStr, args: Mapping[AnyStr, AnyStr]) -> Sequence[AnyStr]: 135 ''' 136 Keeps file names, but replaces the path to a temp folder. 137 Example: Namespace(o='abc/test.tmp') -> Namespace(o='/tmp/tmpf9un/test.tmp') 138 Except preserve /dev/null. 139 ''' 140 replace_path = lambda x: os.path.join(tmp, os.path.basename(x)) if x != '/dev/null' else '/dev/null' 141 new_args = {key: replace_path(value) for key, value in args.items()} 142 return prepend_dash(new_args) 143 144def preprocess_args(args: argparse.Namespace) -> Mapping[AnyStr, AnyStr]: 145 ''' 146 Drop options that weren't parsed (e.g. -w), convert to a dict 147 ''' 148 return {key: value for key, value in vars(args).items() if value} 149 150def write_to(txt, filename, mode='w'): 151 with open(filename, mode) as f: 152 f.write(txt) 153 154def wait(proc, fdesc): 155 proc.wait() 156 fdesc.close() 157 return open(fdesc.name) 158 159def compare_logs(main, cmp, skip_begin=0, skip_end=0, str_input=True): 160 ''' 161 Compares logs but allows for certain lines to be excluded from comparison. 162 If str_input is True (default), the input it assumed to be a string, 163 which is split into lines. Otherwise the input is assumed to be a file. 164 Returns None on success, mismatch otherwise. 165 ''' 166 main_inp = main.splitlines() if str_input else main.readlines() 167 cmp_inp = cmp.splitlines() if str_input else cmp.readlines() 168 # rewind logs after consumption 169 if not str_input: 170 main.seek(0) 171 cmp.seek(0) 172 for lhs, rhs in list(zip(main_inp, cmp_inp))[skip_begin:-skip_end or None]: 173 if lhs != rhs: 174 # check skip patterns 175 for skip in SKIP_MATCH: 176 # both lines must contain the pattern 177 if re.search(skip, lhs) and re.search(skip, rhs): 178 break 179 # otherwise return mismatching lines 180 else: 181 return (lhs, rhs) 182 return None 183 184def fmt_cmp(cmp_tuple): 185 if not cmp_tuple: 186 return '' 187 return f'main:\n{cmp_tuple[0]}\ncmp:\n{cmp_tuple[1]}\n' 188 189def compare_with(lhs, rhs, cmd, skip_begin=0, skip_end=0): 190 ''' 191 Runs cmd on both lhs and rhs and compares stdout. 192 Returns tuple (mismatch, lhs_stdout): 193 - if stdout matches between two files, mismatch is None, 194 - otherwise mismatch is a tuple of mismatching lines. 195 ''' 196 run = lambda binary: subprocess.run(cmd.split() + [binary], 197 text=True, check=True, 198 capture_output=True).stdout 199 run_lhs = run(lhs) 200 run_rhs = run(rhs) 201 cmp = compare_logs(run_lhs, run_rhs, skip_begin, skip_end) 202 return cmp, run_lhs 203 204def parse_cmp_offset(cmp_out): 205 ''' 206 Extracts byte number from cmp output: 207 file1 file2 differ: byte X, line Y 208 ''' 209 return int(re.search(r'byte (\d+),', cmp_out).groups()[0]) 210 211def report_real_time(binary, main_err, cmp_err, cfg): 212 ''' 213 Extracts real time from stderr and appends it to TIMING FILE it as csv: 214 "output binary; base bolt; cmp bolt" 215 ''' 216 def get_real_from_stderr(logline): 217 return '; '.join(logline.split()) 218 for line in main_err: 219 pass 220 main = get_real_from_stderr(line) 221 for line in cmp_err: 222 pass 223 cmp = get_real_from_stderr(line) 224 write_to(f"{binary}; {main}; {cmp}\n", cfg.TIMING_FILE, 'a') 225 # rewind logs after consumption 226 main_err.seek(0) 227 cmp_err.seek(0) 228 229def clean_exit(tmp, out, exitcode, cfg): 230 # temp files are only cleaned on success 231 if not cfg.KEEP_TMP: 232 shutil.rmtree(tmp) 233 234 # report stdout and stderr from the main process 235 shutil.copyfileobj(out, sys.stdout) 236 sys.exit(exitcode) 237 238def find_section(offset, readelf_hdr): 239 hdr = readelf_hdr.split('\n') 240 section = None 241 # extract sections table (parse objdump -hw output) 242 for line in hdr[5:-1]: 243 cols = line.strip().split() 244 # extract section offset 245 file_offset = int(cols[5], 16) 246 # section size 247 size = int(cols[2], 16) 248 if offset >= file_offset and offset <= file_offset + size: 249 if sys.stdout.isatty(): # terminal supports colors 250 print(f"\033[1m{line}\033[0m") 251 else: 252 print(f">{line}") 253 section = cols[1] 254 else: 255 print(line) 256 return section 257 258def main_config_generator(): 259 parser = argparse.ArgumentParser() 260 parser.add_argument('base_bolt', help='Full path to base llvm-bolt binary') 261 parser.add_argument('cmp_bolt', help='Full path to cmp llvm-bolt binary') 262 parser.add_argument('--verbose', action='store_true', 263 help='Print subprocess invocation cmdline (default False)') 264 parser.add_argument('--keep_tmp', action='store_true', 265 help = 'Preserve tmp folder on a clean exit ' 266 '(tmp directory is preserved on crash by default)') 267 parser.add_argument('--no_minimize', action='store_true', 268 help=f'Do not add `{MINIMIZE_DIFFS}` that is used ' 269 'by default to reduce binary differences') 270 parser.add_argument('--run_sequentially', action='store_true', 271 help='Run both binaries sequentially (default ' 272 'in parallel). Use for timing comparison') 273 parser.add_argument('--compare_output', action='store_true', 274 help = 'Compare bolt stdout/stderr (disabled by default)') 275 parser.add_argument('--skip_binary_cmp', action='store_true', 276 help = 'Disable output comparison') 277 parser.add_argument('--timing_file', help = 'Override path to timing log ' 278 'file (default `timing.log` in CWD)') 279 args = parser.parse_args() 280 281 print(dedent(f'''\ 282 [config] 283 # mandatory 284 base_bolt = {args.base_bolt} 285 cmp_bolt = {args.cmp_bolt}''')) 286 del args.base_bolt 287 del args.cmp_bolt 288 d = vars(args) 289 if any(d.values()): 290 print("# optional") 291 for key, value in d.items(): 292 if value: 293 print(key) 294 295def main(): 296 cfg = read_cfg() 297 # intercept output arguments 298 parser = argparse.ArgumentParser(add_help=False) 299 for option, help in BOLT_OUTPUT_OPTS.items(): 300 parser.add_argument(option, help=help) 301 args, unknownargs = parser.parse_known_args() 302 args = preprocess_args(args) 303 cmp_args = copy.deepcopy(args) 304 tmp = tempfile.mkdtemp() 305 cmp_args = replace_cmp_path(tmp, cmp_args) 306 307 # reconstruct output arguments: prepend dash 308 args = prepend_dash(args) 309 310 # run both BOLT binaries 311 main_f = open(os.path.join(tmp, 'main_bolt.stdout'), 'w') 312 cmp_f = open(os.path.join(tmp, 'cmp_bolt.stdout'), 'w') 313 main_bolt = run_bolt(cfg.BASE_BOLT, unknownargs + args, main_f, cfg) 314 if cfg.RUN_SEQUENTIALLY: 315 main_out = wait(main_bolt, main_f) 316 cmp_bolt = run_bolt(cfg.CMP_BOLT, unknownargs + cmp_args, cmp_f, cfg) 317 else: 318 cmp_bolt = run_bolt(cfg.CMP_BOLT, unknownargs + cmp_args, cmp_f, cfg) 319 main_out = wait(main_bolt, main_f) 320 cmp_out = wait(cmp_bolt, cmp_f) 321 322 # check exit code 323 if main_bolt.returncode != cmp_bolt.returncode: 324 print(tmp) 325 exit("exitcode mismatch") 326 327 # compare logs, skip_end=1 skips the line with time 328 out = compare_logs(main_out, cmp_out, skip_end=1, str_input=False) if cfg.COMPARE_OUTPUT else None 329 if out: 330 print(tmp) 331 print(fmt_cmp(out)) 332 write_to(fmt_cmp(out), os.path.join(tmp, 'summary.txt')) 333 exit("logs mismatch") 334 335 if os.path.basename(sys.argv[0]) == 'llvm-boltdiff': # boltdiff mode 336 # no output binary to compare, so just exit 337 clean_exit(tmp, main_out, main_bolt.returncode, cfg) 338 339 # compare binaries (using cmp) 340 main_binary = args[args.index('-o')+1] 341 cmp_binary = cmp_args[cmp_args.index('-o')+1] 342 if main_binary == '/dev/null': 343 assert cmp_binary == '/dev/null' 344 cfg.SKIP_BINARY_CMP = True 345 346 # report binary timing as csv: output binary; base bolt real; cmp bolt real 347 report_real_time(main_binary, main_out, cmp_out, cfg) 348 349 # check if files exist 350 main_exists = os.path.exists(main_binary) 351 cmp_exists = os.path.exists(cmp_binary) 352 if main_exists and cmp_exists: 353 # proceed to comparison 354 pass 355 elif not main_exists and not cmp_exists: 356 # both don't exist, assume it's intended, skip comparison 357 clean_exit(tmp, main_out, main_bolt.returncode, cfg) 358 elif main_exists: 359 assert not cmp_exists 360 exit(f"{cmp_binary} doesn't exist") 361 else: 362 assert not main_exists 363 exit(f"{main_binary} doesn't exist") 364 365 if not cfg.SKIP_BINARY_CMP: 366 cmp_proc = subprocess.run(['cmp', '-b', main_binary, cmp_binary], 367 capture_output=True, text=True) 368 if cmp_proc.returncode: 369 # check if output is an ELF file (magic bytes) 370 with open(main_binary, 'rb') as f: 371 magic = f.read(4) 372 if magic != b'\x7fELF': 373 exit("output mismatch") 374 # check if ELF headers match 375 mismatch, _ = compare_with(main_binary, cmp_binary, 'readelf -We') 376 if mismatch: 377 print(fmt_cmp(mismatch)) 378 write_to(fmt_cmp(mismatch), os.path.join(tmp, 'headers.txt')) 379 exit("headers mismatch") 380 # if headers match, compare sections (skip line with filename) 381 mismatch, hdr = compare_with(main_binary, cmp_binary, 'objdump -hw', 382 skip_begin=2) 383 assert not mismatch 384 # check which section has the first mismatch 385 mismatch_offset = parse_cmp_offset(cmp_proc.stdout) 386 section = find_section(mismatch_offset, hdr) 387 exit(f"binary mismatch @{hex(mismatch_offset)} ({section})") 388 389 clean_exit(tmp, main_out, main_bolt.returncode, cfg) 390 391if __name__ == "__main__": 392 # config generator mode if the script is launched as is 393 if os.path.basename(__file__) == "llvm-bolt-wrapper.py": 394 main_config_generator() 395 else: 396 # llvm-bolt interceptor mode otherwise 397 main() 398