1#!/usr/bin/env python2.7
2
3"""A script to generate FileCheck statements for regression tests.
4
5This script is a utility to update LLVM opt or llc test cases with new
6FileCheck patterns. It can either update all of the tests in the file or
7a single test function.
8
9Example usage:
10$ update_test_checks.py --tool=../bin/opt test/foo.ll
11
12Workflow:
131. Make a compiler patch that requires updating some number of FileCheck lines
14   in regression test files.
152. Save the patch and revert it from your local work area.
163. Update the RUN-lines in the affected regression tests to look canonical.
17   Example: "; RUN: opt < %s -instcombine -S | FileCheck %s"
184. Refresh the FileCheck lines for either the entire file or select functions by
19   running this script.
205. Commit the fresh baseline of checks.
216. Apply your patch from step 1 and rebuild your local binaries.
227. Re-run this script on affected regression tests.
238. Check the diffs to ensure the script has done something reasonable.
249. Submit a patch including the regression test diffs for review.
25
26A common pattern is to have the script insert complete checking of every
27instruction. Then, edit it down to only check the relevant instructions.
28The script is designed to make adding checks to a test case fast, it is *not*
29designed to be authoratitive about what constitutes a good test!
30"""
31
32import argparse
33import itertools
34import os         # Used to advertise this file's name ("autogenerated_note").
35import string
36import subprocess
37import sys
38import tempfile
39import re
40
41ADVERT = '; NOTE: Assertions have been autogenerated by '
42
43# RegEx: this is where the magic happens.
44
45SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
46SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
47SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
48SCRUB_X86_SHUFFLES_RE = (
49    re.compile(
50        r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = .*)$',
51        flags=re.M))
52SCRUB_X86_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)')
53SCRUB_X86_RIP_RE = re.compile(r'[.\w]+\(%rip\)')
54SCRUB_X86_LCP_RE = re.compile(r'\.LCPI[0-9]+_[0-9]+')
55SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
56SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
57
58RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$')
59IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(')
60LLC_FUNCTION_RE = re.compile(
61    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?'
62    r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
63    r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section)',
64    flags=(re.M | re.S))
65OPT_FUNCTION_RE = re.compile(
66    r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\('
67    r'(\s+)?[^)]*[^{]*\{\n(?P<body>.*?)^\}$',
68    flags=(re.M | re.S))
69CHECK_PREFIX_RE = re.compile('--check-prefix=(\S+)')
70CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
71# Match things that look at identifiers, but only if they are followed by
72# spaces, commas, paren, or end of the string
73IR_VALUE_RE = re.compile(r'(\s+)%(.+?)([,\s\(\)]|\Z)')
74
75
76# Invoke the tool that is being tested.
77def invoke_tool(args, cmd_args, ir):
78  with open(ir) as ir_file:
79    stdout = subprocess.check_output(args.tool_binary + ' ' + cmd_args,
80                                     shell=True, stdin=ir_file)
81  # Fix line endings to unix CR style.
82  stdout = stdout.replace('\r\n', '\n')
83  return stdout
84
85
86# FIXME: Separate the x86-specific scrubbers, so this can be used for other targets.
87def scrub_asm(asm):
88  # Detect shuffle asm comments and hide the operands in favor of the comments.
89  asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
90  # Generically match the stack offset of a memory operand.
91  asm = SCRUB_X86_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm)
92  # Generically match a RIP-relative memory operand.
93  asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)
94  # Generically match a LCP symbol.
95  asm = SCRUB_X86_LCP_RE.sub(r'{{\.LCPI.*}}', asm)
96  # Strip kill operands inserted into the asm.
97  asm = SCRUB_KILL_COMMENT_RE.sub('', asm)
98  return asm
99
100
101def scrub_body(body, tool_basename):
102  # Scrub runs of whitespace out of the assembly, but leave the leading
103  # whitespace in place.
104  body = SCRUB_WHITESPACE_RE.sub(r' ', body)
105  # Expand the tabs used for indentation.
106  body = string.expandtabs(body, 2)
107  # Strip trailing whitespace.
108  body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
109  if tool_basename == "llc":
110    body = scrub_asm(body)
111  return body
112
113
114# Build up a dictionary of all the function bodies.
115def build_function_body_dictionary(raw_tool_output, prefixes, func_dict, verbose, tool_basename):
116  if tool_basename == "llc":
117    func_regex = LLC_FUNCTION_RE
118  else:
119    func_regex = OPT_FUNCTION_RE
120  for m in func_regex.finditer(raw_tool_output):
121    if not m:
122      continue
123    func = m.group('func')
124    scrubbed_body = scrub_body(m.group('body'), tool_basename)
125    if func.startswith('stress'):
126      # We only use the last line of the function body for stress tests.
127      scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
128    if verbose:
129      print >>sys.stderr, 'Processing function: ' + func
130      for l in scrubbed_body.splitlines():
131        print >>sys.stderr, '  ' + l
132    for prefix in prefixes:
133      if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
134        if prefix == prefixes[-1]:
135          print >>sys.stderr, ('WARNING: Found conflicting asm under the '
136                               'same prefix: %r!' % (prefix,))
137        else:
138          func_dict[prefix][func] = None
139          continue
140
141      func_dict[prefix][func] = scrubbed_body
142
143
144# Create a FileCheck variable name based on an IR name.
145def get_value_name(var):
146  if var.isdigit():
147    var = 'TMP' + var
148  var = var.replace('.', '_')
149  return var.upper()
150
151
152# Create a FileCheck variable from regex.
153def get_value_definition(var):
154  return '[[' + get_value_name(var) + ':%.*]]'
155
156
157# Use a FileCheck variable.
158def get_value_use(var):
159  return '[[' + get_value_name(var) + ']]'
160
161# Replace IR value defs and uses with FileCheck variables.
162def genericize_check_lines(lines):
163  # This gets called for each match that occurs in
164  # a line. We transform variables we haven't seen
165  # into defs, and variables we have seen into uses.
166  def transform_line_vars(match):
167    var = match.group(2)
168    if var in vars_seen:
169      rv = get_value_use(var)
170    else:
171      vars_seen.add(var)
172      rv = get_value_definition(var)
173    # re.sub replaces the entire regex match
174    # with whatever you return, so we have
175    # to make sure to hand it back everything
176    # including the commas and spaces.
177    return match.group(1) + rv + match.group(3)
178
179  vars_seen = set()
180  lines_with_def = []
181
182  for i, line in enumerate(lines):
183    # An IR variable named '%.' matches the FileCheck regex string.
184    line = line.replace('%.', '%dot')
185    # Ignore any comments, since the check lines will too.
186    scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
187    lines[i] =  IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
188  return lines
189
190
191def add_checks(output_lines, prefix_list, func_dict, func_name, tool_basename):
192  # Select a label format based on the whether we're checking asm or IR.
193  if tool_basename == "llc":
194    check_label_format = "; %s-LABEL: %s:"
195  else:
196    check_label_format = "; %s-LABEL: @%s("
197
198  printed_prefixes = []
199  for checkprefixes, _ in prefix_list:
200    for checkprefix in checkprefixes:
201      if checkprefix in printed_prefixes:
202        break
203      if not func_dict[checkprefix][func_name]:
204        continue
205      # Add some space between different check prefixes, but not after the last
206      # check line (before the test code).
207      #if len(printed_prefixes) != 0:
208      #  output_lines.append(';')
209      printed_prefixes.append(checkprefix)
210      output_lines.append(check_label_format % (checkprefix, func_name))
211      func_body = func_dict[checkprefix][func_name].splitlines()
212
213      # For IR output, change all defs to FileCheck variables, so we're immune
214      # to variable naming fashions.
215      if tool_basename == "opt":
216        func_body = genericize_check_lines(func_body)
217
218      # This could be selectively enabled with an optional invocation argument.
219      # Disabled for now: better to check everything. Be safe rather than sorry.
220
221      # Handle the first line of the function body as a special case because
222      # it's often just noise (a useless asm comment or entry label).
223      #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
224      #  is_blank_line = True
225      #else:
226      #  output_lines.append('; %s:       %s' % (checkprefix, func_body[0]))
227      #  is_blank_line = False
228
229      # For llc tests, there may be asm directives between the label and the
230      # first checked line (most likely that first checked line is "# BB#0").
231      if tool_basename == "opt":
232        is_blank_line = False
233      else:
234        is_blank_line = True;
235
236      for func_line in func_body:
237        if func_line.strip() == '':
238          is_blank_line = True
239          continue
240        # Do not waste time checking IR comments.
241        if tool_basename == "opt":
242          func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
243
244        # Skip blank lines instead of checking them.
245        if is_blank_line == True:
246          output_lines.append('; %s:       %s' % (checkprefix, func_line))
247        else:
248          output_lines.append('; %s-NEXT:  %s' % (checkprefix, func_line))
249        is_blank_line = False
250
251      # Add space between different check prefixes and also before the first
252      # line of code in the test function.
253      output_lines.append(';')
254      break
255  return output_lines
256
257
258def should_add_line_to_output(input_line, prefix_set):
259  # Skip any blank comment lines in the IR.
260  if input_line.strip() == ';':
261    return False
262  # Skip any blank lines in the IR.
263  #if input_line.strip() == '':
264  #  return False
265  # And skip any CHECK lines. We're building our own.
266  m = CHECK_RE.match(input_line)
267  if m and m.group(1) in prefix_set:
268    return False
269
270  return True
271
272
273def main():
274  from argparse import RawTextHelpFormatter
275  parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
276  parser.add_argument('-v', '--verbose', action='store_true',
277                      help='Show verbose output')
278  parser.add_argument('--tool-binary', default='llc',
279                      help='The tool used to generate the test case')
280  parser.add_argument(
281      '--function', help='The function in the test file to update')
282  parser.add_argument('tests', nargs='+')
283  args = parser.parse_args()
284
285  autogenerated_note = (ADVERT + 'utils/' + os.path.basename(__file__))
286
287  tool_basename = os.path.basename(args.tool_binary)
288  if (tool_basename != "llc" and tool_basename != "opt"):
289    print >>sys.stderr, 'ERROR: Unexpected tool name: ' + tool_basename
290    sys.exit(1)
291
292  for test in args.tests:
293    if args.verbose:
294      print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,)
295    with open(test) as f:
296      input_lines = [l.rstrip() for l in f]
297
298    raw_lines = [m.group(1)
299                 for m in [RUN_LINE_RE.match(l) for l in input_lines] if m]
300    run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
301    for l in raw_lines[1:]:
302      if run_lines[-1].endswith("\\"):
303        run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
304      else:
305        run_lines.append(l)
306
307    if args.verbose:
308      print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),)
309      for l in run_lines:
310        print >>sys.stderr, '  RUN: ' + l
311
312    prefix_list = []
313    for l in run_lines:
314      (tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('|', 1)])
315
316      if not tool_cmd.startswith(tool_basename + ' '):
317        print >>sys.stderr, 'WARNING: Skipping non-%s RUN line: %s' % (tool_basename, l)
318        continue
319
320      if not filecheck_cmd.startswith('FileCheck '):
321        print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l
322        continue
323
324      tool_cmd_args = tool_cmd[len(tool_basename):].strip()
325      tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
326
327      check_prefixes = [m.group(1)
328                        for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)]
329      if not check_prefixes:
330        check_prefixes = ['CHECK']
331
332      # FIXME: We should use multiple check prefixes to common check lines. For
333      # now, we just ignore all but the last.
334      prefix_list.append((check_prefixes, tool_cmd_args))
335
336    func_dict = {}
337    for prefixes, _ in prefix_list:
338      for prefix in prefixes:
339        func_dict.update({prefix: dict()})
340    for prefixes, tool_args in prefix_list:
341      if args.verbose:
342        print >>sys.stderr, 'Extracted tool cmd: ' + tool_basename + ' ' + tool_args
343        print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
344
345      raw_tool_output = invoke_tool(args, tool_args, test)
346      build_function_body_dictionary(raw_tool_output, prefixes, func_dict, args.verbose, tool_basename)
347
348    is_in_function = False
349    is_in_function_start = False
350    prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes])
351    if args.verbose:
352      print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,)
353    output_lines = []
354    output_lines.append(autogenerated_note)
355
356    for input_line in input_lines:
357      if is_in_function_start:
358        if input_line == '':
359          continue
360        if input_line.lstrip().startswith(';'):
361          m = CHECK_RE.match(input_line)
362          if not m or m.group(1) not in prefix_set:
363            output_lines.append(input_line)
364            continue
365
366        # Print out the various check lines here.
367        output_lines = add_checks(output_lines, prefix_list, func_dict, name, tool_basename)
368        is_in_function_start = False
369
370      if is_in_function:
371        if should_add_line_to_output(input_line, prefix_set) == True:
372          # This input line of the function body will go as-is into the output.
373          # Except make leading whitespace uniform: 2 spaces.
374          input_line = SCRUB_LEADING_WHITESPACE_RE.sub(r'  ', input_line)
375          output_lines.append(input_line)
376        else:
377          continue
378        if input_line.strip() == '}':
379          is_in_function = False
380        continue
381
382      # Discard any previous script advertising.
383      if input_line.startswith(ADVERT):
384        continue
385
386      # If it's outside a function, it just gets copied to the output.
387      output_lines.append(input_line)
388
389      m = IR_FUNCTION_RE.match(input_line)
390      if not m:
391        continue
392      name = m.group(1)
393      if args.function is not None and name != args.function:
394        # When filtering on a specific function, skip all others.
395        continue
396      is_in_function = is_in_function_start = True
397
398    if args.verbose:
399      print>>sys.stderr, 'Writing %d lines to %s...' % (len(output_lines), test)
400
401    with open(test, 'wb') as f:
402      f.writelines([l + '\n' for l in output_lines])
403
404
405if __name__ == '__main__':
406  main()
407
408