1#!/usr/bin/env python2.7 2 3"""A script to generate FileCheck statements for regression tests. 4 5This script is a utility to update LLVM opt or llc test cases with new 6FileCheck patterns. It can either update all of the tests in the file or 7a single test function. 8 9Example usage: 10$ update_test_checks.py --tool=../bin/opt test/foo.ll 11 12Workflow: 131. Make a compiler patch that requires updating some number of FileCheck lines 14 in regression test files. 152. Save the patch and revert it from your local work area. 163. Update the RUN-lines in the affected regression tests to look canonical. 17 Example: "; RUN: opt < %s -instcombine -S | FileCheck %s" 184. Refresh the FileCheck lines for either the entire file or select functions by 19 running this script. 205. Commit the fresh baseline of checks. 216. Apply your patch from step 1 and rebuild your local binaries. 227. Re-run this script on affected regression tests. 238. Check the diffs to ensure the script has done something reasonable. 249. Submit a patch including the regression test diffs for review. 25 26A common pattern is to have the script insert complete checking of every 27instruction. Then, edit it down to only check the relevant instructions. 28The script is designed to make adding checks to a test case fast, it is *not* 29designed to be authoratitive about what constitutes a good test! 30""" 31 32import argparse 33import itertools 34import os # Used to advertise this file's name ("autogenerated_note"). 35import string 36import subprocess 37import sys 38import tempfile 39import re 40 41ADVERT = '; NOTE: Assertions have been autogenerated by ' 42 43# RegEx: this is where the magic happens. 44 45SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)') 46SCRUB_WHITESPACE_RE = re.compile(r'(?!^(| \w))[ \t]+', flags=re.M) 47SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M) 48SCRUB_X86_SHUFFLES_RE = ( 49 re.compile( 50 r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = .*)$', 51 flags=re.M)) 52SCRUB_X86_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)') 53SCRUB_X86_RIP_RE = re.compile(r'[.\w]+\(%rip\)') 54SCRUB_X86_LCP_RE = re.compile(r'\.LCPI[0-9]+_[0-9]+') 55SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n') 56SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*') 57 58RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$') 59IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(') 60LLC_FUNCTION_RE = re.compile( 61 r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?' 62 r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*' 63 r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section)', 64 flags=(re.M | re.S)) 65OPT_FUNCTION_RE = re.compile( 66 r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\(' 67 r'(\s+)?[^)]*[^{]*\{\n(?P<body>.*?)^\}$', 68 flags=(re.M | re.S)) 69CHECK_PREFIX_RE = re.compile('--check-prefix=(\S+)') 70CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:') 71# Match things that look at identifiers, but only if they are followed by 72# spaces, commas, paren, or end of the string 73IR_VALUE_RE = re.compile(r'(\s+)%(.+?)([,\s\(\)]|\Z)') 74 75 76# Invoke the tool that is being tested. 77def invoke_tool(args, cmd_args, ir): 78 with open(ir) as ir_file: 79 stdout = subprocess.check_output(args.tool_binary + ' ' + cmd_args, 80 shell=True, stdin=ir_file) 81 # Fix line endings to unix CR style. 82 stdout = stdout.replace('\r\n', '\n') 83 return stdout 84 85 86# FIXME: Separate the x86-specific scrubbers, so this can be used for other targets. 87def scrub_asm(asm): 88 # Detect shuffle asm comments and hide the operands in favor of the comments. 89 asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm) 90 # Generically match the stack offset of a memory operand. 91 asm = SCRUB_X86_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm) 92 # Generically match a RIP-relative memory operand. 93 asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm) 94 # Generically match a LCP symbol. 95 asm = SCRUB_X86_LCP_RE.sub(r'{{\.LCPI.*}}', asm) 96 # Strip kill operands inserted into the asm. 97 asm = SCRUB_KILL_COMMENT_RE.sub('', asm) 98 return asm 99 100 101def scrub_body(body, tool_basename): 102 # Scrub runs of whitespace out of the assembly, but leave the leading 103 # whitespace in place. 104 body = SCRUB_WHITESPACE_RE.sub(r' ', body) 105 # Expand the tabs used for indentation. 106 body = string.expandtabs(body, 2) 107 # Strip trailing whitespace. 108 body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body) 109 if tool_basename == "llc": 110 body = scrub_asm(body) 111 return body 112 113 114# Build up a dictionary of all the function bodies. 115def build_function_body_dictionary(raw_tool_output, prefixes, func_dict, verbose, tool_basename): 116 if tool_basename == "llc": 117 func_regex = LLC_FUNCTION_RE 118 else: 119 func_regex = OPT_FUNCTION_RE 120 for m in func_regex.finditer(raw_tool_output): 121 if not m: 122 continue 123 func = m.group('func') 124 scrubbed_body = scrub_body(m.group('body'), tool_basename) 125 if func.startswith('stress'): 126 # We only use the last line of the function body for stress tests. 127 scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:]) 128 if verbose: 129 print >>sys.stderr, 'Processing function: ' + func 130 for l in scrubbed_body.splitlines(): 131 print >>sys.stderr, ' ' + l 132 for prefix in prefixes: 133 if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body: 134 if prefix == prefixes[-1]: 135 print >>sys.stderr, ('WARNING: Found conflicting asm under the ' 136 'same prefix: %r!' % (prefix,)) 137 else: 138 func_dict[prefix][func] = None 139 continue 140 141 func_dict[prefix][func] = scrubbed_body 142 143 144# Create a FileCheck variable name based on an IR name. 145def get_value_name(var): 146 if var.isdigit(): 147 var = 'TMP' + var 148 var = var.replace('.', '_') 149 return var.upper() 150 151 152# Create a FileCheck variable from regex. 153def get_value_definition(var): 154 return '[[' + get_value_name(var) + ':%.*]]' 155 156 157# Use a FileCheck variable. 158def get_value_use(var): 159 return '[[' + get_value_name(var) + ']]' 160 161# Replace IR value defs and uses with FileCheck variables. 162def genericize_check_lines(lines): 163 # This gets called for each match that occurs in 164 # a line. We transform variables we haven't seen 165 # into defs, and variables we have seen into uses. 166 def transform_line_vars(match): 167 var = match.group(2) 168 if var in vars_seen: 169 rv = get_value_use(var) 170 else: 171 vars_seen.add(var) 172 rv = get_value_definition(var) 173 # re.sub replaces the entire regex match 174 # with whatever you return, so we have 175 # to make sure to hand it back everything 176 # including the commas and spaces. 177 return match.group(1) + rv + match.group(3) 178 179 vars_seen = set() 180 lines_with_def = [] 181 182 for i, line in enumerate(lines): 183 # An IR variable named '%.' matches the FileCheck regex string. 184 line = line.replace('%.', '%dot') 185 # Ignore any comments, since the check lines will too. 186 scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line) 187 lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line) 188 return lines 189 190 191def add_checks(output_lines, prefix_list, func_dict, func_name, tool_basename): 192 # Select a label format based on the whether we're checking asm or IR. 193 if tool_basename == "llc": 194 check_label_format = "; %s-LABEL: %s:" 195 else: 196 check_label_format = "; %s-LABEL: @%s(" 197 198 printed_prefixes = [] 199 for checkprefixes, _ in prefix_list: 200 for checkprefix in checkprefixes: 201 if checkprefix in printed_prefixes: 202 break 203 if not func_dict[checkprefix][func_name]: 204 continue 205 # Add some space between different check prefixes, but not after the last 206 # check line (before the test code). 207 #if len(printed_prefixes) != 0: 208 # output_lines.append(';') 209 printed_prefixes.append(checkprefix) 210 output_lines.append(check_label_format % (checkprefix, func_name)) 211 func_body = func_dict[checkprefix][func_name].splitlines() 212 213 # For IR output, change all defs to FileCheck variables, so we're immune 214 # to variable naming fashions. 215 if tool_basename == "opt": 216 func_body = genericize_check_lines(func_body) 217 218 # This could be selectively enabled with an optional invocation argument. 219 # Disabled for now: better to check everything. Be safe rather than sorry. 220 221 # Handle the first line of the function body as a special case because 222 # it's often just noise (a useless asm comment or entry label). 223 #if func_body[0].startswith("#") or func_body[0].startswith("entry:"): 224 # is_blank_line = True 225 #else: 226 # output_lines.append('; %s: %s' % (checkprefix, func_body[0])) 227 # is_blank_line = False 228 229 # For llc tests, there may be asm directives between the label and the 230 # first checked line (most likely that first checked line is "# BB#0"). 231 if tool_basename == "opt": 232 is_blank_line = False 233 else: 234 is_blank_line = True; 235 236 for func_line in func_body: 237 if func_line.strip() == '': 238 is_blank_line = True 239 continue 240 # Do not waste time checking IR comments. 241 if tool_basename == "opt": 242 func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line) 243 244 # Skip blank lines instead of checking them. 245 if is_blank_line == True: 246 output_lines.append('; %s: %s' % (checkprefix, func_line)) 247 else: 248 output_lines.append('; %s-NEXT: %s' % (checkprefix, func_line)) 249 is_blank_line = False 250 251 # Add space between different check prefixes and also before the first 252 # line of code in the test function. 253 output_lines.append(';') 254 break 255 return output_lines 256 257 258def should_add_line_to_output(input_line, prefix_set): 259 # Skip any blank comment lines in the IR. 260 if input_line.strip() == ';': 261 return False 262 # Skip any blank lines in the IR. 263 #if input_line.strip() == '': 264 # return False 265 # And skip any CHECK lines. We're building our own. 266 m = CHECK_RE.match(input_line) 267 if m and m.group(1) in prefix_set: 268 return False 269 270 return True 271 272 273def main(): 274 from argparse import RawTextHelpFormatter 275 parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter) 276 parser.add_argument('-v', '--verbose', action='store_true', 277 help='Show verbose output') 278 parser.add_argument('--tool-binary', default='llc', 279 help='The tool used to generate the test case') 280 parser.add_argument( 281 '--function', help='The function in the test file to update') 282 parser.add_argument('tests', nargs='+') 283 args = parser.parse_args() 284 285 autogenerated_note = (ADVERT + 'utils/' + os.path.basename(__file__)) 286 287 tool_basename = os.path.basename(args.tool_binary) 288 if (tool_basename != "llc" and tool_basename != "opt"): 289 print >>sys.stderr, 'ERROR: Unexpected tool name: ' + tool_basename 290 sys.exit(1) 291 292 for test in args.tests: 293 if args.verbose: 294 print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,) 295 with open(test) as f: 296 input_lines = [l.rstrip() for l in f] 297 298 raw_lines = [m.group(1) 299 for m in [RUN_LINE_RE.match(l) for l in input_lines] if m] 300 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] 301 for l in raw_lines[1:]: 302 if run_lines[-1].endswith("\\"): 303 run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l 304 else: 305 run_lines.append(l) 306 307 if args.verbose: 308 print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),) 309 for l in run_lines: 310 print >>sys.stderr, ' RUN: ' + l 311 312 prefix_list = [] 313 for l in run_lines: 314 (tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('|', 1)]) 315 316 if not tool_cmd.startswith(tool_basename + ' '): 317 print >>sys.stderr, 'WARNING: Skipping non-%s RUN line: %s' % (tool_basename, l) 318 continue 319 320 if not filecheck_cmd.startswith('FileCheck '): 321 print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l 322 continue 323 324 tool_cmd_args = tool_cmd[len(tool_basename):].strip() 325 tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip() 326 327 check_prefixes = [m.group(1) 328 for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)] 329 if not check_prefixes: 330 check_prefixes = ['CHECK'] 331 332 # FIXME: We should use multiple check prefixes to common check lines. For 333 # now, we just ignore all but the last. 334 prefix_list.append((check_prefixes, tool_cmd_args)) 335 336 func_dict = {} 337 for prefixes, _ in prefix_list: 338 for prefix in prefixes: 339 func_dict.update({prefix: dict()}) 340 for prefixes, tool_args in prefix_list: 341 if args.verbose: 342 print >>sys.stderr, 'Extracted tool cmd: ' + tool_basename + ' ' + tool_args 343 print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes) 344 345 raw_tool_output = invoke_tool(args, tool_args, test) 346 build_function_body_dictionary(raw_tool_output, prefixes, func_dict, args.verbose, tool_basename) 347 348 is_in_function = False 349 is_in_function_start = False 350 prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes]) 351 if args.verbose: 352 print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,) 353 output_lines = [] 354 output_lines.append(autogenerated_note) 355 356 for input_line in input_lines: 357 if is_in_function_start: 358 if input_line == '': 359 continue 360 if input_line.lstrip().startswith(';'): 361 m = CHECK_RE.match(input_line) 362 if not m or m.group(1) not in prefix_set: 363 output_lines.append(input_line) 364 continue 365 366 # Print out the various check lines here. 367 output_lines = add_checks(output_lines, prefix_list, func_dict, name, tool_basename) 368 is_in_function_start = False 369 370 if is_in_function: 371 if should_add_line_to_output(input_line, prefix_set) == True: 372 # This input line of the function body will go as-is into the output. 373 # Except make leading whitespace uniform: 2 spaces. 374 input_line = SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line) 375 output_lines.append(input_line) 376 else: 377 continue 378 if input_line.strip() == '}': 379 is_in_function = False 380 continue 381 382 # Discard any previous script advertising. 383 if input_line.startswith(ADVERT): 384 continue 385 386 # If it's outside a function, it just gets copied to the output. 387 output_lines.append(input_line) 388 389 m = IR_FUNCTION_RE.match(input_line) 390 if not m: 391 continue 392 name = m.group(1) 393 if args.function is not None and name != args.function: 394 # When filtering on a specific function, skip all others. 395 continue 396 is_in_function = is_in_function_start = True 397 398 if args.verbose: 399 print>>sys.stderr, 'Writing %d lines to %s...' % (len(output_lines), test) 400 401 with open(test, 'wb') as f: 402 f.writelines([l + '\n' for l in output_lines]) 403 404 405if __name__ == '__main__': 406 main() 407 408