1from __future__ import print_function 2 3import copy 4import glob 5import re 6import subprocess 7import sys 8 9if sys.version_info[0] > 2: 10 class string: 11 expandtabs = str.expandtabs 12else: 13 import string 14 15##### Common utilities for update_*test_checks.py 16 17 18_verbose = False 19 20def parse_commandline_args(parser): 21 parser.add_argument('-v', '--verbose', action='store_true', 22 help='Show verbose output') 23 parser.add_argument('-u', '--update-only', action='store_true', 24 help='Only update test if it was already autogened') 25 parser.add_argument('--force-update', action='store_true', 26 help='Update test even if it was autogened by a different script') 27 parser.add_argument('--enable', action='store_true', dest='enabled', default=True, 28 help='Activate CHECK line generation from this point forward') 29 parser.add_argument('--disable', action='store_false', dest='enabled', 30 help='Deactivate CHECK line generation from this point forward') 31 args = parser.parse_args() 32 global _verbose 33 _verbose = args.verbose 34 return args 35 36 37class InputLineInfo(object): 38 def __init__(self, line, line_number, args, argv): 39 self.line = line 40 self.line_number = line_number 41 self.args = args 42 self.argv = argv 43 44 45class TestInfo(object): 46 def __init__(self, test, parser, script_name, input_lines, args, argv, 47 comment_prefix, argparse_callback): 48 self.parser = parser 49 self.argparse_callback = argparse_callback 50 self.path = test 51 self.args = args 52 self.argv = argv 53 self.input_lines = input_lines 54 self.run_lines = find_run_lines(test, self.input_lines) 55 self.comment_prefix = comment_prefix 56 if self.comment_prefix is None: 57 if self.path.endswith('.mir'): 58 self.comment_prefix = '#' 59 else: 60 self.comment_prefix = ';' 61 self.autogenerated_note_prefix = self.comment_prefix + ' ' + UTC_ADVERT 62 self.test_autogenerated_note = self.autogenerated_note_prefix + script_name 63 self.test_autogenerated_note += get_autogennote_suffix(parser, self.args) 64 65 def iterlines(self, output_lines): 66 output_lines.append(self.test_autogenerated_note) 67 for line_num, input_line in enumerate(self.input_lines): 68 # Discard any previous script advertising. 69 if input_line.startswith(self.autogenerated_note_prefix): 70 continue 71 self.args, self.argv = check_for_command(input_line, self.parser, 72 self.args, self.argv, self.argparse_callback) 73 if not self.args.enabled: 74 output_lines.append(input_line) 75 continue 76 yield InputLineInfo(input_line, line_num, self.args, self.argv) 77 78 79def itertests(test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None): 80 for pattern in test_patterns: 81 # On Windows we must expand the patterns ourselves. 82 tests_list = glob.glob(pattern) 83 if not tests_list: 84 warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,)) 85 continue 86 for test in tests_list: 87 with open(test) as f: 88 input_lines = [l.rstrip() for l in f] 89 args = parser.parse_args() 90 if argparse_callback is not None: 91 argparse_callback(args) 92 argv = sys.argv[:] 93 first_line = input_lines[0] if input_lines else "" 94 if UTC_ADVERT in first_line: 95 if script_name not in first_line and not args.force_update: 96 warn("Skipping test which wasn't autogenerated by " + script_name, test) 97 continue 98 args, argv = check_for_command(first_line, parser, args, argv, argparse_callback) 99 elif args.update_only: 100 assert UTC_ADVERT not in first_line 101 warn("Skipping test which isn't autogenerated: " + test) 102 continue 103 yield TestInfo(test, parser, script_name, input_lines, args, argv, 104 comment_prefix, argparse_callback) 105 106 107def should_add_line_to_output(input_line, prefix_set): 108 # Skip any blank comment lines in the IR. 109 if input_line.strip() == ';': 110 return False 111 # Skip any blank lines in the IR. 112 #if input_line.strip() == '': 113 # return False 114 # And skip any CHECK lines. We're building our own. 115 m = CHECK_RE.match(input_line) 116 if m and m.group(1) in prefix_set: 117 return False 118 119 return True 120 121# Invoke the tool that is being tested. 122def invoke_tool(exe, cmd_args, ir): 123 with open(ir) as ir_file: 124 # TODO Remove the str form which is used by update_test_checks.py and 125 # update_llc_test_checks.py 126 # The safer list form is used by update_cc_test_checks.py 127 if isinstance(cmd_args, list): 128 stdout = subprocess.check_output([exe] + cmd_args, stdin=ir_file) 129 else: 130 stdout = subprocess.check_output(exe + ' ' + cmd_args, 131 shell=True, stdin=ir_file) 132 if sys.version_info[0] > 2: 133 stdout = stdout.decode() 134 # Fix line endings to unix CR style. 135 return stdout.replace('\r\n', '\n') 136 137##### LLVM IR parser 138RUN_LINE_RE = re.compile(r'^\s*(?://|[;#])\s*RUN:\s*(.*)$') 139CHECK_PREFIX_RE = re.compile(r'--?check-prefix(?:es)?[= ](\S+)') 140PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$') 141CHECK_RE = re.compile(r'^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:') 142 143UTC_ARGS_KEY = 'UTC_ARGS:' 144UTC_ARGS_CMD = re.compile(r'.*' + UTC_ARGS_KEY + '\s*(?P<cmd>.*)\s*$') 145UTC_ADVERT = 'NOTE: Assertions have been autogenerated by ' 146 147OPT_FUNCTION_RE = re.compile( 148 r'^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s]+?))?\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w.-]+?)\s*' 149 r'(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*)\{\n(?P<body>.*?)^\}$', 150 flags=(re.M | re.S)) 151 152ANALYZE_FUNCTION_RE = re.compile( 153 r'^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.-]+?)\':' 154 r'\s*\n(?P<body>.*)$', 155 flags=(re.X | re.S)) 156 157IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@([\w.-]+)\s*\(') 158TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$') 159TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)') 160MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)') 161 162SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)') 163SCRUB_WHITESPACE_RE = re.compile(r'(?!^(| \w))[ \t]+', flags=re.M) 164SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M) 165SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE 166SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(r'([ \t]|(#[0-9]+))+$', flags=re.M) 167SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n') 168SCRUB_LOOP_COMMENT_RE = re.compile( 169 r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M) 170SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r'(?<=\S)+[ \t]*#$', flags=re.M) 171 172 173def error(msg, test_file=None): 174 if test_file: 175 msg = '{}: {}'.format(msg, test_file) 176 print('ERROR: {}'.format(msg), file=sys.stderr) 177 178def warn(msg, test_file=None): 179 if test_file: 180 msg = '{}: {}'.format(msg, test_file) 181 print('WARNING: {}'.format(msg), file=sys.stderr) 182 183def debug(*args, **kwargs): 184 # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs): 185 if 'file' not in kwargs: 186 kwargs['file'] = sys.stderr 187 if _verbose: 188 print(*args, **kwargs) 189 190def find_run_lines(test, lines): 191 debug('Scanning for RUN lines in test file:', test) 192 raw_lines = [m.group(1) 193 for m in [RUN_LINE_RE.match(l) for l in lines] if m] 194 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] 195 for l in raw_lines[1:]: 196 if run_lines[-1].endswith('\\'): 197 run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l 198 else: 199 run_lines.append(l) 200 debug('Found {} RUN lines in {}:'.format(len(run_lines), test)) 201 for l in run_lines: 202 debug(' RUN: {}'.format(l)) 203 return run_lines 204 205def scrub_body(body): 206 # Scrub runs of whitespace out of the assembly, but leave the leading 207 # whitespace in place. 208 body = SCRUB_WHITESPACE_RE.sub(r' ', body) 209 # Expand the tabs used for indentation. 210 body = string.expandtabs(body, 2) 211 # Strip trailing whitespace. 212 body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r'', body) 213 return body 214 215def do_scrub(body, scrubber, scrubber_args, extra): 216 if scrubber_args: 217 local_args = copy.deepcopy(scrubber_args) 218 local_args[0].extra_scrub = extra 219 return scrubber(body, *local_args) 220 return scrubber(body, *scrubber_args) 221 222# Build up a dictionary of all the function bodies. 223class function_body(object): 224 def __init__(self, string, extra, args_and_sig, attrs): 225 self.scrub = string 226 self.extrascrub = extra 227 self.args_and_sig = args_and_sig 228 self.attrs = attrs 229 def is_same_except_arg_names(self, extrascrub, args_and_sig, attrs): 230 arg_names = set() 231 def drop_arg_names(match): 232 arg_names.add(match.group(2)) 233 return match.group(1) + match.group(3) 234 def repl_arg_names(match): 235 if match.group(2) in arg_names: 236 return match.group(1) + match.group(3) 237 return match.group(1) + match.group(2) + match.group(3) 238 if self.attrs != attrs: 239 return False 240 ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig) 241 ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig) 242 if ans0 != ans1: 243 return False 244 es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub) 245 es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub) 246 es0 = SCRUB_IR_COMMENT_RE.sub(r'', es0) 247 es1 = SCRUB_IR_COMMENT_RE.sub(r'', es1) 248 return es0 == es1 249 250 def __str__(self): 251 return self.scrub 252 253def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose, record_args, check_attributes): 254 for m in function_re.finditer(raw_tool_output): 255 if not m: 256 continue 257 func = m.group('func') 258 body = m.group('body') 259 attrs = m.group('attrs') if check_attributes else '' 260 # Determine if we print arguments, the opening brace, or nothing after the function name 261 if record_args and 'args_and_sig' in m.groupdict(): 262 args_and_sig = scrub_body(m.group('args_and_sig').strip()) 263 elif 'args_and_sig' in m.groupdict(): 264 args_and_sig = '(' 265 else: 266 args_and_sig = '' 267 scrubbed_body = do_scrub(body, scrubber, scrubber_args, extra = False) 268 scrubbed_extra = do_scrub(body, scrubber, scrubber_args, extra = True) 269 if 'analysis' in m.groupdict(): 270 analysis = m.group('analysis') 271 if analysis.lower() != 'cost model analysis': 272 warn('Unsupported analysis mode: %r!' % (analysis,)) 273 if func.startswith('stress'): 274 # We only use the last line of the function body for stress tests. 275 scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:]) 276 if verbose: 277 print('Processing function: ' + func, file=sys.stderr) 278 for l in scrubbed_body.splitlines(): 279 print(' ' + l, file=sys.stderr) 280 for prefix in prefixes: 281 if func in func_dict[prefix]: 282 if str(func_dict[prefix][func]) != scrubbed_body or (func_dict[prefix][func] and (func_dict[prefix][func].args_and_sig != args_and_sig or func_dict[prefix][func].attrs != attrs)): 283 if func_dict[prefix][func] and func_dict[prefix][func].is_same_except_arg_names(scrubbed_extra, args_and_sig, attrs): 284 func_dict[prefix][func].scrub = scrubbed_extra 285 func_dict[prefix][func].args_and_sig = args_and_sig 286 continue 287 else: 288 if prefix == prefixes[-1]: 289 warn('Found conflicting asm under the same prefix: %r!' % (prefix,)) 290 else: 291 func_dict[prefix][func] = None 292 continue 293 294 func_dict[prefix][func] = function_body(scrubbed_body, scrubbed_extra, args_and_sig, attrs) 295 296##### Generator of LLVM IR CHECK lines 297 298SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*') 299 300# Match things that look at identifiers, but only if they are followed by 301# spaces, commas, paren, or end of the string 302IR_VALUE_RE = re.compile(r'(\s+)%([\w.-]+?)([,\s\(\)]|\Z)') 303 304NAMELESS_PREFIX = "TMP" 305 306# Create a FileCheck variable name based on an IR name. 307def get_value_name(var): 308 if var.isdigit(): 309 var = NAMELESS_PREFIX + var 310 var = var.replace('.', '_') 311 var = var.replace('-', '_') 312 return var.upper() 313 314 315# Create a FileCheck variable from regex. 316def get_value_definition(var): 317 return '[[' + get_value_name(var) + ':%.*]]' 318 319 320# Use a FileCheck variable. 321def get_value_use(var): 322 return '[[' + get_value_name(var) + ']]' 323 324# Replace IR value defs and uses with FileCheck variables. 325def genericize_check_lines(lines, is_analyze, vars_seen): 326 # This gets called for each match that occurs in 327 # a line. We transform variables we haven't seen 328 # into defs, and variables we have seen into uses. 329 def transform_line_vars(match): 330 var = match.group(2) 331 if NAMELESS_PREFIX.lower() in var.lower(): 332 warn("Change IR value name '%s' to prevent possible conflict with scripted FileCheck name." % (var,)) 333 if var in vars_seen: 334 rv = get_value_use(var) 335 else: 336 vars_seen.add(var) 337 rv = get_value_definition(var) 338 # re.sub replaces the entire regex match 339 # with whatever you return, so we have 340 # to make sure to hand it back everything 341 # including the commas and spaces. 342 return match.group(1) + rv + match.group(3) 343 344 lines_with_def = [] 345 346 for i, line in enumerate(lines): 347 # An IR variable named '%.' matches the FileCheck regex string. 348 line = line.replace('%.', '%dot') 349 # Ignore any comments, since the check lines will too. 350 scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line) 351 if is_analyze: 352 lines[i] = scrubbed_line 353 else: 354 lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line) 355 return lines 356 357 358def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze): 359 # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well. 360 prefix_exclusions = set() 361 printed_prefixes = [] 362 for p in prefix_list: 363 checkprefixes = p[0] 364 # If not all checkprefixes of this run line produced the function we cannot check for it as it does not 365 # exist for this run line. A subset of the check prefixes might know about the function but only because 366 # other run lines created it. 367 if any(map(lambda checkprefix: func_name not in func_dict[checkprefix], checkprefixes)): 368 prefix_exclusions |= set(checkprefixes) 369 continue 370 371 # prefix_exclusions is constructed, we can now emit the output 372 for p in prefix_list: 373 checkprefixes = p[0] 374 for checkprefix in checkprefixes: 375 if checkprefix in printed_prefixes: 376 break 377 378 # Check if the prefix is excluded. 379 if checkprefix in prefix_exclusions: 380 continue 381 382 # If we do not have output for this prefix we skip it. 383 if not func_dict[checkprefix][func_name]: 384 continue 385 386 # Add some space between different check prefixes, but not after the last 387 # check line (before the test code). 388 if is_asm: 389 if len(printed_prefixes) != 0: 390 output_lines.append(comment_marker) 391 392 vars_seen = set() 393 printed_prefixes.append(checkprefix) 394 attrs = str(func_dict[checkprefix][func_name].attrs) 395 attrs = '' if attrs == 'None' else attrs 396 if attrs: 397 output_lines.append('%s %s: Function Attrs: %s' % (comment_marker, checkprefix, attrs)) 398 args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig) 399 args_and_sig = genericize_check_lines([args_and_sig], is_analyze, vars_seen)[0] 400 if '[[' in args_and_sig: 401 output_lines.append(check_label_format % (checkprefix, func_name, '')) 402 output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig)) 403 else: 404 output_lines.append(check_label_format % (checkprefix, func_name, args_and_sig)) 405 func_body = str(func_dict[checkprefix][func_name]).splitlines() 406 407 # For ASM output, just emit the check lines. 408 if is_asm: 409 output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0])) 410 for func_line in func_body[1:]: 411 if func_line.strip() == '': 412 output_lines.append('%s %s-EMPTY:' % (comment_marker, checkprefix)) 413 else: 414 output_lines.append('%s %s-NEXT: %s' % (comment_marker, checkprefix, func_line)) 415 break 416 417 # For IR output, change all defs to FileCheck variables, so we're immune 418 # to variable naming fashions. 419 func_body = genericize_check_lines(func_body, is_analyze, vars_seen) 420 421 # This could be selectively enabled with an optional invocation argument. 422 # Disabled for now: better to check everything. Be safe rather than sorry. 423 424 # Handle the first line of the function body as a special case because 425 # it's often just noise (a useless asm comment or entry label). 426 #if func_body[0].startswith("#") or func_body[0].startswith("entry:"): 427 # is_blank_line = True 428 #else: 429 # output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0])) 430 # is_blank_line = False 431 432 is_blank_line = False 433 434 for func_line in func_body: 435 if func_line.strip() == '': 436 is_blank_line = True 437 continue 438 # Do not waste time checking IR comments. 439 func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line) 440 441 # Skip blank lines instead of checking them. 442 if is_blank_line: 443 output_lines.append('{} {}: {}'.format( 444 comment_marker, checkprefix, func_line)) 445 else: 446 output_lines.append('{} {}-NEXT: {}'.format( 447 comment_marker, checkprefix, func_line)) 448 is_blank_line = False 449 450 # Add space between different check prefixes and also before the first 451 # line of code in the test function. 452 output_lines.append(comment_marker) 453 break 454 455def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict, 456 func_name, preserve_names, function_sig): 457 # Label format is based on IR string. 458 function_def_regex = 'define {{[^@]+}}' if function_sig else '' 459 check_label_format = '{} %s-LABEL: {}@%s%s'.format(comment_marker, function_def_regex) 460 add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, 461 check_label_format, False, preserve_names) 462 463def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name): 464 check_label_format = '{} %s-LABEL: \'%s%s\''.format(comment_marker) 465 add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, True) 466 467 468def check_prefix(prefix): 469 if not PREFIX_RE.match(prefix): 470 hint = "" 471 if ',' in prefix: 472 hint = " Did you mean '--check-prefixes=" + prefix + "'?" 473 warn(("Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." + hint) % 474 (prefix)) 475 476 477def verify_filecheck_prefixes(fc_cmd): 478 fc_cmd_parts = fc_cmd.split() 479 for part in fc_cmd_parts: 480 if "check-prefix=" in part: 481 prefix = part.split('=', 1)[1] 482 check_prefix(prefix) 483 elif "check-prefixes=" in part: 484 prefixes = part.split('=', 1)[1].split(',') 485 for prefix in prefixes: 486 check_prefix(prefix) 487 if prefixes.count(prefix) > 1: 488 warn("Supplied prefix '%s' is not unique in the prefix list." % (prefix,)) 489 490 491def get_autogennote_suffix(parser, args): 492 autogenerated_note_args = '' 493 for action in parser._actions: 494 if not hasattr(args, action.dest): 495 continue # Ignore options such as --help that aren't included in args 496 # Ignore parameters such as paths to the binary or the list of tests 497 if action.dest in ('tests', 'update_only', 'opt_binary', 'llc_binary', 498 'clang', 'opt', 'llvm_bin', 'verbose'): 499 continue 500 value = getattr(args, action.dest) 501 if action.const is not None: # action stores a constant (usually True/False) 502 # Skip actions with different constant values (this happens with boolean 503 # --foo/--no-foo options) 504 if value != action.const: 505 continue 506 if parser.get_default(action.dest) == value: 507 continue # Don't add default values 508 autogenerated_note_args += action.option_strings[0] + ' ' 509 if action.const is None: # action takes a parameter 510 autogenerated_note_args += '%s ' % value 511 if autogenerated_note_args: 512 autogenerated_note_args = ' %s %s' % (UTC_ARGS_KEY, autogenerated_note_args[:-1]) 513 return autogenerated_note_args 514 515 516def check_for_command(line, parser, args, argv, argparse_callback): 517 cmd_m = UTC_ARGS_CMD.match(line) 518 if cmd_m: 519 cmd = cmd_m.group('cmd').strip().split(' ') 520 argv = argv + cmd 521 args = parser.parse_args(filter(lambda arg: arg not in args.tests, argv)) 522 if argparse_callback is not None: 523 argparse_callback(args) 524 return args, argv 525