1from __future__ import print_function
2
3import copy
4import glob
5import re
6import subprocess
7import sys
8
9if sys.version_info[0] > 2:
10  class string:
11    expandtabs = str.expandtabs
12else:
13  import string
14
15##### Common utilities for update_*test_checks.py
16
17
18_verbose = False
19
20def parse_commandline_args(parser):
21  parser.add_argument('-v', '--verbose', action='store_true',
22                      help='Show verbose output')
23  parser.add_argument('-u', '--update-only', action='store_true',
24                      help='Only update test if it was already autogened')
25  parser.add_argument('--force-update', action='store_true',
26                      help='Update test even if it was autogened by a different script')
27  parser.add_argument('--enable', action='store_true', dest='enabled', default=True,
28                       help='Activate CHECK line generation from this point forward')
29  parser.add_argument('--disable', action='store_false', dest='enabled',
30                      help='Deactivate CHECK line generation from this point forward')
31  args = parser.parse_args()
32  global _verbose
33  _verbose = args.verbose
34  return args
35
36
37class InputLineInfo(object):
38  def __init__(self, line, line_number, args, argv):
39    self.line = line
40    self.line_number = line_number
41    self.args = args
42    self.argv = argv
43
44
45class TestInfo(object):
46  def __init__(self, test, parser, script_name, input_lines, args, argv,
47               comment_prefix, argparse_callback):
48    self.parser = parser
49    self.argparse_callback = argparse_callback
50    self.path = test
51    self.args = args
52    self.argv = argv
53    self.input_lines = input_lines
54    self.run_lines = find_run_lines(test, self.input_lines)
55    self.comment_prefix = comment_prefix
56    if self.comment_prefix is None:
57      if self.path.endswith('.mir'):
58        self.comment_prefix = '#'
59      else:
60        self.comment_prefix = ';'
61    self.autogenerated_note_prefix = self.comment_prefix + ' ' + UTC_ADVERT
62    self.test_autogenerated_note = self.autogenerated_note_prefix + script_name
63    self.test_autogenerated_note += get_autogennote_suffix(parser, self.args)
64
65  def iterlines(self, output_lines):
66    output_lines.append(self.test_autogenerated_note)
67    for line_num, input_line in enumerate(self.input_lines):
68      # Discard any previous script advertising.
69      if input_line.startswith(self.autogenerated_note_prefix):
70        continue
71      self.args, self.argv = check_for_command(input_line, self.parser,
72                                               self.args, self.argv, self.argparse_callback)
73      if not self.args.enabled:
74        output_lines.append(input_line)
75        continue
76      yield InputLineInfo(input_line, line_num, self.args, self.argv)
77
78
79def itertests(test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None):
80  for pattern in test_patterns:
81    # On Windows we must expand the patterns ourselves.
82    tests_list = glob.glob(pattern)
83    if not tests_list:
84      warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,))
85      continue
86    for test in tests_list:
87      with open(test) as f:
88        input_lines = [l.rstrip() for l in f]
89      args = parser.parse_args()
90      if argparse_callback is not None:
91        argparse_callback(args)
92      argv = sys.argv[:]
93      first_line = input_lines[0] if input_lines else ""
94      if UTC_ADVERT in first_line:
95        if script_name not in first_line and not args.force_update:
96          warn("Skipping test which wasn't autogenerated by " + script_name, test)
97          continue
98        args, argv = check_for_command(first_line, parser, args, argv, argparse_callback)
99      elif args.update_only:
100        assert UTC_ADVERT not in first_line
101        warn("Skipping test which isn't autogenerated: " + test)
102        continue
103      yield TestInfo(test, parser, script_name, input_lines, args, argv,
104                     comment_prefix, argparse_callback)
105
106
107def should_add_line_to_output(input_line, prefix_set):
108  # Skip any blank comment lines in the IR.
109  if input_line.strip() == ';':
110    return False
111  # Skip any blank lines in the IR.
112  #if input_line.strip() == '':
113  #  return False
114  # And skip any CHECK lines. We're building our own.
115  m = CHECK_RE.match(input_line)
116  if m and m.group(1) in prefix_set:
117    return False
118
119  return True
120
121# Invoke the tool that is being tested.
122def invoke_tool(exe, cmd_args, ir):
123  with open(ir) as ir_file:
124    # TODO Remove the str form which is used by update_test_checks.py and
125    # update_llc_test_checks.py
126    # The safer list form is used by update_cc_test_checks.py
127    if isinstance(cmd_args, list):
128      stdout = subprocess.check_output([exe] + cmd_args, stdin=ir_file)
129    else:
130      stdout = subprocess.check_output(exe + ' ' + cmd_args,
131                                       shell=True, stdin=ir_file)
132    if sys.version_info[0] > 2:
133      stdout = stdout.decode()
134  # Fix line endings to unix CR style.
135  return stdout.replace('\r\n', '\n')
136
137##### LLVM IR parser
138RUN_LINE_RE = re.compile(r'^\s*(?://|[;#])\s*RUN:\s*(.*)$')
139CHECK_PREFIX_RE = re.compile(r'--?check-prefix(?:es)?[= ](\S+)')
140PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$')
141CHECK_RE = re.compile(r'^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:')
142
143UTC_ARGS_KEY = 'UTC_ARGS:'
144UTC_ARGS_CMD = re.compile(r'.*' + UTC_ARGS_KEY + '\s*(?P<cmd>.*)\s*$')
145UTC_ADVERT = 'NOTE: Assertions have been autogenerated by '
146
147OPT_FUNCTION_RE = re.compile(
148    r'^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s]+?))?\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w.-]+?)\s*'
149    r'(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*)\{\n(?P<body>.*?)^\}$',
150    flags=(re.M | re.S))
151
152ANALYZE_FUNCTION_RE = re.compile(
153    r'^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.-]+?)\':'
154    r'\s*\n(?P<body>.*)$',
155    flags=(re.X | re.S))
156
157IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@([\w.-]+)\s*\(')
158TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
159TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
160MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
161
162SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
163SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
164SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
165SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
166SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(r'([ \t]|(#[0-9]+))+$', flags=re.M)
167SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
168SCRUB_LOOP_COMMENT_RE = re.compile(
169    r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
170SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r'(?<=\S)+[ \t]*#$', flags=re.M)
171
172
173def error(msg, test_file=None):
174  if test_file:
175    msg = '{}: {}'.format(msg, test_file)
176  print('ERROR: {}'.format(msg), file=sys.stderr)
177
178def warn(msg, test_file=None):
179  if test_file:
180    msg = '{}: {}'.format(msg, test_file)
181  print('WARNING: {}'.format(msg), file=sys.stderr)
182
183def debug(*args, **kwargs):
184  # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
185  if 'file' not in kwargs:
186    kwargs['file'] = sys.stderr
187  if _verbose:
188    print(*args, **kwargs)
189
190def find_run_lines(test, lines):
191  debug('Scanning for RUN lines in test file:', test)
192  raw_lines = [m.group(1)
193               for m in [RUN_LINE_RE.match(l) for l in lines] if m]
194  run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
195  for l in raw_lines[1:]:
196    if run_lines[-1].endswith('\\'):
197      run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
198    else:
199      run_lines.append(l)
200  debug('Found {} RUN lines in {}:'.format(len(run_lines), test))
201  for l in run_lines:
202    debug('  RUN: {}'.format(l))
203  return run_lines
204
205def scrub_body(body):
206  # Scrub runs of whitespace out of the assembly, but leave the leading
207  # whitespace in place.
208  body = SCRUB_WHITESPACE_RE.sub(r' ', body)
209  # Expand the tabs used for indentation.
210  body = string.expandtabs(body, 2)
211  # Strip trailing whitespace.
212  body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r'', body)
213  return body
214
215def do_scrub(body, scrubber, scrubber_args, extra):
216  if scrubber_args:
217    local_args = copy.deepcopy(scrubber_args)
218    local_args[0].extra_scrub = extra
219    return scrubber(body, *local_args)
220  return scrubber(body, *scrubber_args)
221
222# Build up a dictionary of all the function bodies.
223class function_body(object):
224  def __init__(self, string, extra, args_and_sig, attrs):
225    self.scrub = string
226    self.extrascrub = extra
227    self.args_and_sig = args_and_sig
228    self.attrs = attrs
229  def is_same_except_arg_names(self, extrascrub, args_and_sig, attrs):
230    arg_names = set()
231    def drop_arg_names(match):
232        arg_names.add(match.group(2))
233        return match.group(1) + match.group(3)
234    def repl_arg_names(match):
235        if match.group(2) in arg_names:
236            return match.group(1) + match.group(3)
237        return match.group(1) + match.group(2) + match.group(3)
238    if self.attrs != attrs:
239      return False
240    ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig)
241    ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig)
242    if ans0 != ans1:
243        return False
244    es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub)
245    es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub)
246    es0 = SCRUB_IR_COMMENT_RE.sub(r'', es0)
247    es1 = SCRUB_IR_COMMENT_RE.sub(r'', es1)
248    return es0 == es1
249
250  def __str__(self):
251    return self.scrub
252
253def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose, record_args, check_attributes):
254  for m in function_re.finditer(raw_tool_output):
255    if not m:
256      continue
257    func = m.group('func')
258    body = m.group('body')
259    attrs = m.group('attrs') if check_attributes else ''
260    # Determine if we print arguments, the opening brace, or nothing after the function name
261    if record_args and 'args_and_sig' in m.groupdict():
262        args_and_sig = scrub_body(m.group('args_and_sig').strip())
263    elif 'args_and_sig' in m.groupdict():
264        args_and_sig = '('
265    else:
266        args_and_sig = ''
267    scrubbed_body = do_scrub(body, scrubber, scrubber_args, extra = False)
268    scrubbed_extra = do_scrub(body, scrubber, scrubber_args, extra = True)
269    if 'analysis' in m.groupdict():
270      analysis = m.group('analysis')
271      if analysis.lower() != 'cost model analysis':
272        warn('Unsupported analysis mode: %r!' % (analysis,))
273    if func.startswith('stress'):
274      # We only use the last line of the function body for stress tests.
275      scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
276    if verbose:
277      print('Processing function: ' + func, file=sys.stderr)
278      for l in scrubbed_body.splitlines():
279        print('  ' + l, file=sys.stderr)
280    for prefix in prefixes:
281      if func in func_dict[prefix]:
282        if str(func_dict[prefix][func]) != scrubbed_body or (func_dict[prefix][func] and (func_dict[prefix][func].args_and_sig != args_and_sig or func_dict[prefix][func].attrs != attrs)):
283          if func_dict[prefix][func] and func_dict[prefix][func].is_same_except_arg_names(scrubbed_extra, args_and_sig, attrs):
284            func_dict[prefix][func].scrub = scrubbed_extra
285            func_dict[prefix][func].args_and_sig = args_and_sig
286            continue
287          else:
288            if prefix == prefixes[-1]:
289              warn('Found conflicting asm under the same prefix: %r!' % (prefix,))
290            else:
291              func_dict[prefix][func] = None
292              continue
293
294      func_dict[prefix][func] = function_body(scrubbed_body, scrubbed_extra, args_and_sig, attrs)
295
296##### Generator of LLVM IR CHECK lines
297
298SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
299
300# Match things that look at identifiers, but only if they are followed by
301# spaces, commas, paren, or end of the string
302IR_VALUE_RE = re.compile(r'(\s+)%([\w.-]+?)([,\s\(\)]|\Z)')
303
304NAMELESS_PREFIX = "TMP"
305
306# Create a FileCheck variable name based on an IR name.
307def get_value_name(var):
308  if var.isdigit():
309    var = NAMELESS_PREFIX + var
310  var = var.replace('.', '_')
311  var = var.replace('-', '_')
312  return var.upper()
313
314
315# Create a FileCheck variable from regex.
316def get_value_definition(var):
317  return '[[' + get_value_name(var) + ':%.*]]'
318
319
320# Use a FileCheck variable.
321def get_value_use(var):
322  return '[[' + get_value_name(var) + ']]'
323
324# Replace IR value defs and uses with FileCheck variables.
325def genericize_check_lines(lines, is_analyze, vars_seen):
326  # This gets called for each match that occurs in
327  # a line. We transform variables we haven't seen
328  # into defs, and variables we have seen into uses.
329  def transform_line_vars(match):
330    var = match.group(2)
331    if NAMELESS_PREFIX.lower() in var.lower():
332      warn("Change IR value name '%s' to prevent possible conflict with scripted FileCheck name." % (var,))
333    if var in vars_seen:
334      rv = get_value_use(var)
335    else:
336      vars_seen.add(var)
337      rv = get_value_definition(var)
338    # re.sub replaces the entire regex match
339    # with whatever you return, so we have
340    # to make sure to hand it back everything
341    # including the commas and spaces.
342    return match.group(1) + rv + match.group(3)
343
344  lines_with_def = []
345
346  for i, line in enumerate(lines):
347    # An IR variable named '%.' matches the FileCheck regex string.
348    line = line.replace('%.', '%dot')
349    # Ignore any comments, since the check lines will too.
350    scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
351    if is_analyze:
352      lines[i] = scrubbed_line
353    else:
354      lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
355  return lines
356
357
358def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze):
359  # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
360  prefix_exclusions = set()
361  printed_prefixes = []
362  for p in prefix_list:
363    checkprefixes = p[0]
364    # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
365    # exist for this run line. A subset of the check prefixes might know about the function but only because
366    # other run lines created it.
367    if any(map(lambda checkprefix: func_name not in func_dict[checkprefix], checkprefixes)):
368        prefix_exclusions |= set(checkprefixes)
369        continue
370
371  # prefix_exclusions is constructed, we can now emit the output
372  for p in prefix_list:
373    checkprefixes = p[0]
374    for checkprefix in checkprefixes:
375      if checkprefix in printed_prefixes:
376        break
377
378      # Check if the prefix is excluded.
379      if checkprefix in prefix_exclusions:
380        continue
381
382      # If we do not have output for this prefix we skip it.
383      if not func_dict[checkprefix][func_name]:
384        continue
385
386      # Add some space between different check prefixes, but not after the last
387      # check line (before the test code).
388      if is_asm:
389        if len(printed_prefixes) != 0:
390          output_lines.append(comment_marker)
391
392      vars_seen = set()
393      printed_prefixes.append(checkprefix)
394      attrs = str(func_dict[checkprefix][func_name].attrs)
395      attrs = '' if attrs == 'None' else attrs
396      if attrs:
397        output_lines.append('%s %s: Function Attrs: %s' % (comment_marker, checkprefix, attrs))
398      args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
399      args_and_sig = genericize_check_lines([args_and_sig], is_analyze, vars_seen)[0]
400      if '[[' in args_and_sig:
401        output_lines.append(check_label_format % (checkprefix, func_name, ''))
402        output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig))
403      else:
404        output_lines.append(check_label_format % (checkprefix, func_name, args_and_sig))
405      func_body = str(func_dict[checkprefix][func_name]).splitlines()
406
407      # For ASM output, just emit the check lines.
408      if is_asm:
409        output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
410        for func_line in func_body[1:]:
411          if func_line.strip() == '':
412            output_lines.append('%s %s-EMPTY:' % (comment_marker, checkprefix))
413          else:
414            output_lines.append('%s %s-NEXT:  %s' % (comment_marker, checkprefix, func_line))
415        break
416
417      # For IR output, change all defs to FileCheck variables, so we're immune
418      # to variable naming fashions.
419      func_body = genericize_check_lines(func_body, is_analyze, vars_seen)
420
421      # This could be selectively enabled with an optional invocation argument.
422      # Disabled for now: better to check everything. Be safe rather than sorry.
423
424      # Handle the first line of the function body as a special case because
425      # it's often just noise (a useless asm comment or entry label).
426      #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
427      #  is_blank_line = True
428      #else:
429      #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
430      #  is_blank_line = False
431
432      is_blank_line = False
433
434      for func_line in func_body:
435        if func_line.strip() == '':
436          is_blank_line = True
437          continue
438        # Do not waste time checking IR comments.
439        func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
440
441        # Skip blank lines instead of checking them.
442        if is_blank_line:
443          output_lines.append('{} {}:       {}'.format(
444              comment_marker, checkprefix, func_line))
445        else:
446          output_lines.append('{} {}-NEXT:  {}'.format(
447              comment_marker, checkprefix, func_line))
448        is_blank_line = False
449
450      # Add space between different check prefixes and also before the first
451      # line of code in the test function.
452      output_lines.append(comment_marker)
453      break
454
455def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict,
456                  func_name, preserve_names, function_sig):
457  # Label format is based on IR string.
458  function_def_regex = 'define {{[^@]+}}' if function_sig else ''
459  check_label_format = '{} %s-LABEL: {}@%s%s'.format(comment_marker, function_def_regex)
460  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,
461             check_label_format, False, preserve_names)
462
463def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name):
464  check_label_format = '{} %s-LABEL: \'%s%s\''.format(comment_marker)
465  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, True)
466
467
468def check_prefix(prefix):
469  if not PREFIX_RE.match(prefix):
470        hint = ""
471        if ',' in prefix:
472          hint = " Did you mean '--check-prefixes=" + prefix + "'?"
473        warn(("Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." + hint) %
474             (prefix))
475
476
477def verify_filecheck_prefixes(fc_cmd):
478  fc_cmd_parts = fc_cmd.split()
479  for part in fc_cmd_parts:
480    if "check-prefix=" in part:
481      prefix = part.split('=', 1)[1]
482      check_prefix(prefix)
483    elif "check-prefixes=" in part:
484      prefixes = part.split('=', 1)[1].split(',')
485      for prefix in prefixes:
486        check_prefix(prefix)
487        if prefixes.count(prefix) > 1:
488          warn("Supplied prefix '%s' is not unique in the prefix list." % (prefix,))
489
490
491def get_autogennote_suffix(parser, args):
492  autogenerated_note_args = ''
493  for action in parser._actions:
494    if not hasattr(args, action.dest):
495      continue  # Ignore options such as --help that aren't included in args
496    # Ignore parameters such as paths to the binary or the list of tests
497    if action.dest in ('tests', 'update_only', 'opt_binary', 'llc_binary',
498                       'clang', 'opt', 'llvm_bin', 'verbose'):
499      continue
500    value = getattr(args, action.dest)
501    if action.const is not None:  # action stores a constant (usually True/False)
502      # Skip actions with different constant values (this happens with boolean
503      # --foo/--no-foo options)
504      if value != action.const:
505        continue
506    if parser.get_default(action.dest) == value:
507      continue  # Don't add default values
508    autogenerated_note_args += action.option_strings[0] + ' '
509    if action.const is None:  # action takes a parameter
510      autogenerated_note_args += '%s ' % value
511  if autogenerated_note_args:
512    autogenerated_note_args = ' %s %s' % (UTC_ARGS_KEY, autogenerated_note_args[:-1])
513  return autogenerated_note_args
514
515
516def check_for_command(line, parser, args, argv, argparse_callback):
517    cmd_m = UTC_ARGS_CMD.match(line)
518    if cmd_m:
519        cmd = cmd_m.group('cmd').strip().split(' ')
520        argv = argv + cmd
521        args = parser.parse_args(filter(lambda arg: arg not in args.tests, argv))
522        if argparse_callback is not None:
523          argparse_callback(args)
524    return args, argv
525