1from __future__ import absolute_import
2import errno
3import io
4import itertools
5import getopt
6import os, signal, subprocess, sys
7import re
8import stat
9import pathlib
10import platform
11import shutil
12import tempfile
13import threading
14
15import io
16try:
17    from StringIO import StringIO
18except ImportError:
19    from io import StringIO
20
21from lit.ShCommands import GlobItem, Command
22import lit.ShUtil as ShUtil
23import lit.Test as Test
24import lit.util
25from lit.util import to_bytes, to_string, to_unicode
26from lit.BooleanExpression import BooleanExpression
27
28class InternalShellError(Exception):
29    def __init__(self, command, message):
30        self.command = command
31        self.message = message
32
33kIsWindows = platform.system() == 'Windows'
34
35# Don't use close_fds on Windows.
36kUseCloseFDs = not kIsWindows
37
38# Use temporary files to replace /dev/null on Windows.
39kAvoidDevNull = kIsWindows
40kDevNull = "/dev/null"
41
42# A regex that matches %dbg(ARG), which lit inserts at the beginning of each
43# run command pipeline such that ARG specifies the pipeline's source line
44# number.  lit later expands each %dbg(ARG) to a command that behaves as a null
45# command in the target shell so that the line number is seen in lit's verbose
46# mode.
47#
48# This regex captures ARG.  ARG must not contain a right parenthesis, which
49# terminates %dbg.  ARG must not contain quotes, in which ARG might be enclosed
50# during expansion.
51#
52# COMMAND that follows %dbg(ARG) is also captured. COMMAND can be
53# empty as a result of conditinal substitution.
54kPdbgRegex = '%dbg\\(([^)\'"]*)\\)(.*)'
55
56class ShellEnvironment(object):
57
58    """Mutable shell environment containing things like CWD and env vars.
59
60    Environment variables are not implemented, but cwd tracking is. In addition,
61    we maintain a dir stack for pushd/popd.
62    """
63
64    def __init__(self, cwd, env):
65        self.cwd = cwd
66        self.env = dict(env)
67        self.dirStack = []
68
69    def change_dir(self, newdir):
70        if os.path.isabs(newdir):
71            self.cwd = newdir
72        else:
73            self.cwd = os.path.realpath(os.path.join(self.cwd, newdir))
74
75class TimeoutHelper(object):
76    """
77        Object used to helper manage enforcing a timeout in
78        _executeShCmd(). It is passed through recursive calls
79        to collect processes that have been executed so that when
80        the timeout happens they can be killed.
81    """
82    def __init__(self, timeout):
83        self.timeout = timeout
84        self._procs = []
85        self._timeoutReached = False
86        self._doneKillPass = False
87        # This lock will be used to protect concurrent access
88        # to _procs and _doneKillPass
89        self._lock = None
90        self._timer = None
91
92    def cancel(self):
93        if not self.active():
94            return
95        self._timer.cancel()
96
97    def active(self):
98        return self.timeout > 0
99
100    def addProcess(self, proc):
101        if not self.active():
102            return
103        needToRunKill = False
104        with self._lock:
105            self._procs.append(proc)
106            # Avoid re-entering the lock by finding out if kill needs to be run
107            # again here but call it if necessary once we have left the lock.
108            # We could use a reentrant lock here instead but this code seems
109            # clearer to me.
110            needToRunKill = self._doneKillPass
111
112        # The initial call to _kill() from the timer thread already happened so
113        # we need to call it again from this thread, otherwise this process
114        # will be left to run even though the timeout was already hit
115        if needToRunKill:
116            assert self.timeoutReached()
117            self._kill()
118
119    def startTimer(self):
120        if not self.active():
121            return
122
123        # Do some late initialisation that's only needed
124        # if there is a timeout set
125        self._lock = threading.Lock()
126        self._timer = threading.Timer(self.timeout, self._handleTimeoutReached)
127        self._timer.start()
128
129    def _handleTimeoutReached(self):
130        self._timeoutReached = True
131        self._kill()
132
133    def timeoutReached(self):
134        return self._timeoutReached
135
136    def _kill(self):
137        """
138            This method may be called multiple times as we might get unlucky
139            and be in the middle of creating a new process in _executeShCmd()
140            which won't yet be in ``self._procs``. By locking here and in
141            addProcess() we should be able to kill processes launched after
142            the initial call to _kill()
143        """
144        with self._lock:
145            for p in self._procs:
146                lit.util.killProcessAndChildren(p.pid)
147            # Empty the list and note that we've done a pass over the list
148            self._procs = [] # Python2 doesn't have list.clear()
149            self._doneKillPass = True
150
151class ShellCommandResult(object):
152    """Captures the result of an individual command."""
153
154    def __init__(self, command, stdout, stderr, exitCode, timeoutReached,
155                 outputFiles = []):
156        self.command = command
157        self.stdout = stdout
158        self.stderr = stderr
159        self.exitCode = exitCode
160        self.timeoutReached = timeoutReached
161        self.outputFiles = list(outputFiles)
162
163def executeShCmd(cmd, shenv, results, timeout=0):
164    """
165        Wrapper around _executeShCmd that handles
166        timeout
167    """
168    # Use the helper even when no timeout is required to make
169    # other code simpler (i.e. avoid bunch of ``!= None`` checks)
170    timeoutHelper = TimeoutHelper(timeout)
171    if timeout > 0:
172        timeoutHelper.startTimer()
173    finalExitCode = _executeShCmd(cmd, shenv, results, timeoutHelper)
174    timeoutHelper.cancel()
175    timeoutInfo = None
176    if timeoutHelper.timeoutReached():
177        timeoutInfo = 'Reached timeout of {} seconds'.format(timeout)
178
179    return (finalExitCode, timeoutInfo)
180
181def expand_glob(arg, cwd):
182    if isinstance(arg, GlobItem):
183        return sorted(arg.resolve(cwd))
184    return [arg]
185
186def expand_glob_expressions(args, cwd):
187    result = [args[0]]
188    for arg in args[1:]:
189        result.extend(expand_glob(arg, cwd))
190    return result
191
192def quote_windows_command(seq):
193    """
194    Reimplement Python's private subprocess.list2cmdline for MSys compatibility
195
196    Based on CPython implementation here:
197      https://hg.python.org/cpython/file/849826a900d2/Lib/subprocess.py#l422
198
199    Some core util distributions (MSys) don't tokenize command line arguments
200    the same way that MSVC CRT does. Lit rolls its own quoting logic similar to
201    the stock CPython logic to paper over these quoting and tokenization rule
202    differences.
203
204    We use the same algorithm from MSDN as CPython
205    (http://msdn.microsoft.com/en-us/library/17w5ykft.aspx), but we treat more
206    characters as needing quoting, such as double quotes themselves, and square
207    brackets.
208
209    For MSys based tools, this is very brittle though, because quoting an
210    argument makes the MSys based tool unescape backslashes where it shouldn't
211    (e.g. "a\b\\c\\\\d" becomes "a\b\c\\d" where it should stay as it was,
212    according to regular win32 command line parsing rules).
213    """
214    result = []
215    needquote = False
216    for arg in seq:
217        bs_buf = []
218
219        # Add a space to separate this argument from the others
220        if result:
221            result.append(' ')
222
223        # This logic differs from upstream list2cmdline.
224        needquote = (" " in arg) or ("\t" in arg) or ("\"" in arg) or ("[" in arg) or (";" in arg) or not arg
225        if needquote:
226            result.append('"')
227
228        for c in arg:
229            if c == '\\':
230                # Don't know if we need to double yet.
231                bs_buf.append(c)
232            elif c == '"':
233                # Double backslashes.
234                result.append('\\' * len(bs_buf)*2)
235                bs_buf = []
236                result.append('\\"')
237            else:
238                # Normal char
239                if bs_buf:
240                    result.extend(bs_buf)
241                    bs_buf = []
242                result.append(c)
243
244        # Add remaining backslashes, if any.
245        if bs_buf:
246            result.extend(bs_buf)
247
248        if needquote:
249            result.extend(bs_buf)
250            result.append('"')
251
252    return ''.join(result)
253
254# args are from 'export' or 'env' command.
255# Skips the command, and parses its arguments.
256# Modifies env accordingly.
257# Returns copy of args without the command or its arguments.
258def updateEnv(env, args):
259    arg_idx_next = len(args)
260    unset_next_env_var = False
261    for arg_idx, arg in enumerate(args[1:]):
262        # Support for the -u flag (unsetting) for env command
263        # e.g., env -u FOO -u BAR will remove both FOO and BAR
264        # from the environment.
265        if arg == '-u':
266            unset_next_env_var = True
267            continue
268        if unset_next_env_var:
269            unset_next_env_var = False
270            if arg in env.env:
271                del env.env[arg]
272            continue
273
274        # Partition the string into KEY=VALUE.
275        key, eq, val = arg.partition('=')
276        # Stop if there was no equals.
277        if eq == '':
278            arg_idx_next = arg_idx + 1
279            break
280        env.env[key] = val
281    return args[arg_idx_next:]
282
283def executeBuiltinCd(cmd, shenv):
284    """executeBuiltinCd - Change the current directory."""
285    if len(cmd.args) != 2:
286        raise InternalShellError(cmd, "'cd' supports only one argument")
287    # Update the cwd in the parent environment.
288    shenv.change_dir(cmd.args[1])
289    # The cd builtin always succeeds. If the directory does not exist, the
290    # following Popen calls will fail instead.
291    return ShellCommandResult(cmd, "", "", 0, False)
292
293def executeBuiltinPushd(cmd, shenv):
294    """executeBuiltinPushd - Change the current dir and save the old."""
295    if len(cmd.args) != 2:
296        raise InternalShellError(cmd, "'pushd' supports only one argument")
297    shenv.dirStack.append(shenv.cwd)
298    shenv.change_dir(cmd.args[1])
299    return ShellCommandResult(cmd, "", "", 0, False)
300
301def executeBuiltinPopd(cmd, shenv):
302    """executeBuiltinPopd - Restore a previously saved working directory."""
303    if len(cmd.args) != 1:
304        raise InternalShellError(cmd, "'popd' does not support arguments")
305    if not shenv.dirStack:
306        raise InternalShellError(cmd, "popd: directory stack empty")
307    shenv.cwd = shenv.dirStack.pop()
308    return ShellCommandResult(cmd, "", "", 0, False)
309
310def executeBuiltinExport(cmd, shenv):
311    """executeBuiltinExport - Set an environment variable."""
312    if len(cmd.args) != 2:
313        raise InternalShellError("'export' supports only one argument")
314    updateEnv(shenv, cmd.args)
315    return ShellCommandResult(cmd, "", "", 0, False)
316
317def executeBuiltinEcho(cmd, shenv):
318    """Interpret a redirected echo command"""
319    opened_files = []
320    stdin, stdout, stderr = processRedirects(cmd, subprocess.PIPE, shenv,
321                                             opened_files)
322    if stdin != subprocess.PIPE or stderr != subprocess.PIPE:
323        raise InternalShellError(
324                cmd, "stdin and stderr redirects not supported for echo")
325
326    # Some tests have un-redirected echo commands to help debug test failures.
327    # Buffer our output and return it to the caller.
328    is_redirected = True
329    encode = lambda x : x
330    if stdout == subprocess.PIPE:
331        is_redirected = False
332        stdout = StringIO()
333    elif kIsWindows:
334        # Reopen stdout in binary mode to avoid CRLF translation. The versions
335        # of echo we are replacing on Windows all emit plain LF, and the LLVM
336        # tests now depend on this.
337        # When we open as binary, however, this also means that we have to write
338        # 'bytes' objects to stdout instead of 'str' objects.
339        encode = lit.util.to_bytes
340        stdout = open(stdout.name, stdout.mode + 'b')
341        opened_files.append((None, None, stdout, None))
342
343    # Implement echo flags. We only support -e and -n, and not yet in
344    # combination. We have to ignore unknown flags, because `echo "-D FOO"`
345    # prints the dash.
346    args = cmd.args[1:]
347    interpret_escapes = False
348    write_newline = True
349    while len(args) >= 1 and args[0] in ('-e', '-n'):
350        flag = args[0]
351        args = args[1:]
352        if flag == '-e':
353            interpret_escapes = True
354        elif flag == '-n':
355            write_newline = False
356
357    def maybeUnescape(arg):
358        if not interpret_escapes:
359            return arg
360
361        arg = lit.util.to_bytes(arg)
362        codec = 'string_escape' if sys.version_info < (3,0) else 'unicode_escape'
363        return arg.decode(codec)
364
365    if args:
366        for arg in args[:-1]:
367            stdout.write(encode(maybeUnescape(arg)))
368            stdout.write(encode(' '))
369        stdout.write(encode(maybeUnescape(args[-1])))
370    if write_newline:
371        stdout.write(encode('\n'))
372
373    for (name, mode, f, path) in opened_files:
374        f.close()
375
376    output = "" if is_redirected else stdout.getvalue()
377    return ShellCommandResult(cmd, output, "", 0, False)
378
379def executeBuiltinMkdir(cmd, cmd_shenv):
380    """executeBuiltinMkdir - Create new directories."""
381    args = expand_glob_expressions(cmd.args, cmd_shenv.cwd)[1:]
382    try:
383        opts, args = getopt.gnu_getopt(args, 'p')
384    except getopt.GetoptError as err:
385        raise InternalShellError(cmd, "Unsupported: 'mkdir':  %s" % str(err))
386
387    parent = False
388    for o, a in opts:
389        if o == "-p":
390            parent = True
391        else:
392            assert False, "unhandled option"
393
394    if len(args) == 0:
395        raise InternalShellError(cmd, "Error: 'mkdir' is missing an operand")
396
397    stderr = StringIO()
398    exitCode = 0
399    for dir in args:
400        cwd = cmd_shenv.cwd
401        dir = to_unicode(dir) if kIsWindows else to_bytes(dir)
402        cwd = to_unicode(cwd) if kIsWindows else to_bytes(cwd)
403        if not os.path.isabs(dir):
404            dir = os.path.realpath(os.path.join(cwd, dir))
405        if parent:
406            lit.util.mkdir_p(dir)
407        else:
408            try:
409                lit.util.mkdir(dir)
410            except OSError as err:
411                stderr.write("Error: 'mkdir' command failed, %s\n" % str(err))
412                exitCode = 1
413    return ShellCommandResult(cmd, "", stderr.getvalue(), exitCode, False)
414
415def executeBuiltinRm(cmd, cmd_shenv):
416    """executeBuiltinRm - Removes (deletes) files or directories."""
417    args = expand_glob_expressions(cmd.args, cmd_shenv.cwd)[1:]
418    try:
419        opts, args = getopt.gnu_getopt(args, "frR", ["--recursive"])
420    except getopt.GetoptError as err:
421        raise InternalShellError(cmd, "Unsupported: 'rm':  %s" % str(err))
422
423    force = False
424    recursive = False
425    for o, a in opts:
426        if o == "-f":
427            force = True
428        elif o in ("-r", "-R", "--recursive"):
429            recursive = True
430        else:
431            assert False, "unhandled option"
432
433    if len(args) == 0:
434        raise InternalShellError(cmd, "Error: 'rm' is missing an operand")
435
436    def on_rm_error(func, path, exc_info):
437        # path contains the path of the file that couldn't be removed
438        # let's just assume that it's read-only and remove it.
439        os.chmod(path, stat.S_IMODE( os.stat(path).st_mode) | stat.S_IWRITE)
440        os.remove(path)
441
442    stderr = StringIO()
443    exitCode = 0
444    for path in args:
445        cwd = cmd_shenv.cwd
446        path = to_unicode(path) if kIsWindows else to_bytes(path)
447        cwd = to_unicode(cwd) if kIsWindows else to_bytes(cwd)
448        if not os.path.isabs(path):
449            path = os.path.realpath(os.path.join(cwd, path))
450        if force and not os.path.exists(path):
451            continue
452        try:
453            if os.path.isdir(path):
454                if not recursive:
455                    stderr.write("Error: %s is a directory\n" % path)
456                    exitCode = 1
457                if platform.system() == 'Windows':
458                    # NOTE: use ctypes to access `SHFileOperationsW` on Windows to
459                    # use the NT style path to get access to long file paths which
460                    # cannot be removed otherwise.
461                    from ctypes.wintypes import BOOL, HWND, LPCWSTR, UINT, WORD
462                    from ctypes import addressof, byref, c_void_p, create_unicode_buffer
463                    from ctypes import Structure
464                    from ctypes import windll, WinError, POINTER
465
466                    class SHFILEOPSTRUCTW(Structure):
467                        _fields_ = [
468                                ('hWnd', HWND),
469                                ('wFunc', UINT),
470                                ('pFrom', LPCWSTR),
471                                ('pTo', LPCWSTR),
472                                ('fFlags', WORD),
473                                ('fAnyOperationsAborted', BOOL),
474                                ('hNameMappings', c_void_p),
475                                ('lpszProgressTitle', LPCWSTR),
476                        ]
477
478                    FO_MOVE, FO_COPY, FO_DELETE, FO_RENAME = range(1, 5)
479
480                    FOF_SILENT = 4
481                    FOF_NOCONFIRMATION = 16
482                    FOF_NOCONFIRMMKDIR = 512
483                    FOF_NOERRORUI = 1024
484
485                    FOF_NO_UI = FOF_SILENT | FOF_NOCONFIRMATION | FOF_NOERRORUI | FOF_NOCONFIRMMKDIR
486
487                    SHFileOperationW = windll.shell32.SHFileOperationW
488                    SHFileOperationW.argtypes = [POINTER(SHFILEOPSTRUCTW)]
489
490                    path = os.path.abspath(path)
491
492                    pFrom = create_unicode_buffer(path, len(path) + 2)
493                    pFrom[len(path)] = pFrom[len(path) + 1] = '\0'
494                    operation = SHFILEOPSTRUCTW(wFunc=UINT(FO_DELETE),
495                                                pFrom=LPCWSTR(addressof(pFrom)),
496                                                fFlags=FOF_NO_UI)
497                    result = SHFileOperationW(byref(operation))
498                    if result:
499                        raise WinError(result)
500                else:
501                    shutil.rmtree(path, onerror = on_rm_error if force else None)
502            else:
503                if force and not os.access(path, os.W_OK):
504                    os.chmod(path,
505                             stat.S_IMODE(os.stat(path).st_mode) | stat.S_IWRITE)
506                os.remove(path)
507        except OSError as err:
508            stderr.write("Error: 'rm' command failed, %s" % str(err))
509            exitCode = 1
510    return ShellCommandResult(cmd, "", stderr.getvalue(), exitCode, False)
511
512def executeBuiltinColon(cmd, cmd_shenv):
513    """executeBuiltinColon - Discard arguments and exit with status 0."""
514    return ShellCommandResult(cmd, "", "", 0, False)
515
516def processRedirects(cmd, stdin_source, cmd_shenv, opened_files):
517    """Return the standard fds for cmd after applying redirects
518
519    Returns the three standard file descriptors for the new child process.  Each
520    fd may be an open, writable file object or a sentinel value from the
521    subprocess module.
522    """
523
524    # Apply the redirections, we use (N,) as a sentinel to indicate stdin,
525    # stdout, stderr for N equal to 0, 1, or 2 respectively. Redirects to or
526    # from a file are represented with a list [file, mode, file-object]
527    # where file-object is initially None.
528    redirects = [(0,), (1,), (2,)]
529    for (op, filename) in cmd.redirects:
530        if op == ('>',2):
531            redirects[2] = [filename, 'w', None]
532        elif op == ('>>',2):
533            redirects[2] = [filename, 'a', None]
534        elif op == ('>&',2) and filename in '012':
535            redirects[2] = redirects[int(filename)]
536        elif op == ('>&',) or op == ('&>',):
537            redirects[1] = redirects[2] = [filename, 'w', None]
538        elif op == ('>',):
539            redirects[1] = [filename, 'w', None]
540        elif op == ('>>',):
541            redirects[1] = [filename, 'a', None]
542        elif op == ('<',):
543            redirects[0] = [filename, 'r', None]
544        else:
545            raise InternalShellError(cmd, "Unsupported redirect: %r" % ((op, filename),))
546
547    # Open file descriptors in a second pass.
548    std_fds = [None, None, None]
549    for (index, r) in enumerate(redirects):
550        # Handle the sentinel values for defaults up front.
551        if isinstance(r, tuple):
552            if r == (0,):
553                fd = stdin_source
554            elif r == (1,):
555                if index == 0:
556                    raise InternalShellError(cmd, "Unsupported redirect for stdin")
557                elif index == 1:
558                    fd = subprocess.PIPE
559                else:
560                    fd = subprocess.STDOUT
561            elif r == (2,):
562                if index != 2:
563                    raise InternalShellError(cmd, "Unsupported redirect on stdout")
564                fd = subprocess.PIPE
565            else:
566                raise InternalShellError(cmd, "Bad redirect")
567            std_fds[index] = fd
568            continue
569
570        (filename, mode, fd) = r
571
572        # Check if we already have an open fd. This can happen if stdout and
573        # stderr go to the same place.
574        if fd is not None:
575            std_fds[index] = fd
576            continue
577
578        redir_filename = None
579        name = expand_glob(filename, cmd_shenv.cwd)
580        if len(name) != 1:
581           raise InternalShellError(cmd, "Unsupported: glob in "
582                                    "redirect expanded to multiple files")
583        name = name[0]
584        if kAvoidDevNull and name == kDevNull:
585            fd = tempfile.TemporaryFile(mode=mode)
586        elif kIsWindows and name == '/dev/tty':
587            # Simulate /dev/tty on Windows.
588            # "CON" is a special filename for the console.
589            fd = open("CON", mode)
590        else:
591            # Make sure relative paths are relative to the cwd.
592            redir_filename = os.path.join(cmd_shenv.cwd, name)
593            redir_filename = to_unicode(redir_filename) \
594                    if kIsWindows else to_bytes(redir_filename)
595            fd = open(redir_filename, mode)
596        # Workaround a Win32 and/or subprocess bug when appending.
597        #
598        # FIXME: Actually, this is probably an instance of PR6753.
599        if mode == 'a':
600            fd.seek(0, 2)
601        # Mutate the underlying redirect list so that we can redirect stdout
602        # and stderr to the same place without opening the file twice.
603        r[2] = fd
604        opened_files.append((filename, mode, fd) + (redir_filename,))
605        std_fds[index] = fd
606
607    return std_fds
608
609def _executeShCmd(cmd, shenv, results, timeoutHelper):
610    if timeoutHelper.timeoutReached():
611        # Prevent further recursion if the timeout has been hit
612        # as we should try avoid launching more processes.
613        return None
614
615    if isinstance(cmd, ShUtil.Seq):
616        if cmd.op == ';':
617            res = _executeShCmd(cmd.lhs, shenv, results, timeoutHelper)
618            return _executeShCmd(cmd.rhs, shenv, results, timeoutHelper)
619
620        if cmd.op == '&':
621            raise InternalShellError(cmd,"unsupported shell operator: '&'")
622
623        if cmd.op == '||':
624            res = _executeShCmd(cmd.lhs, shenv, results, timeoutHelper)
625            if res != 0:
626                res = _executeShCmd(cmd.rhs, shenv, results, timeoutHelper)
627            return res
628
629        if cmd.op == '&&':
630            res = _executeShCmd(cmd.lhs, shenv, results, timeoutHelper)
631            if res is None:
632                return res
633
634            if res == 0:
635                res = _executeShCmd(cmd.rhs, shenv, results, timeoutHelper)
636            return res
637
638        raise ValueError('Unknown shell command: %r' % cmd.op)
639    assert isinstance(cmd, ShUtil.Pipeline)
640
641    procs = []
642    proc_not_counts = []
643    default_stdin = subprocess.PIPE
644    stderrTempFiles = []
645    opened_files = []
646    named_temp_files = []
647    builtin_commands = set(['cat', 'diff'])
648    builtin_commands_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "builtin_commands")
649    inproc_builtins = {'cd': executeBuiltinCd,
650                       'export': executeBuiltinExport,
651                       'echo': executeBuiltinEcho,
652                       'mkdir': executeBuiltinMkdir,
653                       'popd': executeBuiltinPopd,
654                       'pushd': executeBuiltinPushd,
655                       'rm': executeBuiltinRm,
656                       ':': executeBuiltinColon}
657    # To avoid deadlock, we use a single stderr stream for piped
658    # output. This is null until we have seen some output using
659    # stderr.
660    for i,j in enumerate(cmd.commands):
661        # Reference the global environment by default.
662        cmd_shenv = shenv
663        args = list(j.args)
664        not_args = []
665        not_count = 0
666        not_crash = False
667        while True:
668            if args[0] == 'env':
669                # Create a copy of the global environment and modify it for
670                # this one command. There might be multiple envs in a pipeline,
671                # and there might be multiple envs in a command (usually when
672                # one comes from a substitution):
673                #   env FOO=1 llc < %s | env BAR=2 llvm-mc | FileCheck %s
674                #   env FOO=1 %{another_env_plus_cmd} | FileCheck %s
675                if cmd_shenv is shenv:
676                    cmd_shenv = ShellEnvironment(shenv.cwd, shenv.env)
677                args = updateEnv(cmd_shenv, args)
678                if not args:
679                    raise InternalShellError(j, "Error: 'env' requires a"
680                                                " subcommand")
681            elif args[0] == 'not':
682                not_args.append(args.pop(0))
683                not_count += 1
684                if args and args[0] == '--crash':
685                    not_args.append(args.pop(0))
686                    not_crash = True
687                if not args:
688                    raise InternalShellError(j, "Error: 'not' requires a"
689                                                " subcommand")
690            elif args[0] == '!':
691                not_args.append(args.pop(0))
692                not_count += 1
693                if not args:
694                    raise InternalShellError(j, "Error: '!' requires a"
695                                                " subcommand")
696            else:
697                break
698
699        # Handle in-process builtins.
700        #
701        # Handle "echo" as a builtin if it is not part of a pipeline. This
702        # greatly speeds up tests that construct input files by repeatedly
703        # echo-appending to a file.
704        # FIXME: Standardize on the builtin echo implementation. We can use a
705        # temporary file to sidestep blocking pipe write issues.
706        inproc_builtin = inproc_builtins.get(args[0], None)
707        if inproc_builtin and (args[0] != 'echo' or len(cmd.commands) == 1):
708            # env calling an in-process builtin is useless, so we take the safe
709            # approach of complaining.
710            if not cmd_shenv is shenv:
711                raise InternalShellError(j, "Error: 'env' cannot call '{}'"
712                                            .format(args[0]))
713            if not_crash:
714                raise InternalShellError(j, "Error: 'not --crash' cannot call"
715                                            " '{}'".format(args[0]))
716            if len(cmd.commands) != 1:
717                raise InternalShellError(j, "Unsupported: '{}' cannot be part"
718                                            " of a pipeline".format(args[0]))
719            result = inproc_builtin(Command(args, j.redirects), cmd_shenv)
720            if not_count % 2:
721                result.exitCode = int(not result.exitCode)
722            result.command.args = j.args;
723            results.append(result)
724            return result.exitCode
725
726        # Resolve any out-of-process builtin command before adding back 'not'
727        # commands.
728        if args[0] in builtin_commands:
729            args.insert(0, sys.executable)
730            cmd_shenv.env['PYTHONPATH'] = \
731                os.path.dirname(os.path.abspath(__file__))
732            args[1] = os.path.join(builtin_commands_dir, args[1] + ".py")
733
734        # We had to search through the 'not' commands to find all the 'env'
735        # commands and any other in-process builtin command.  We don't want to
736        # reimplement 'not' and its '--crash' here, so just push all 'not'
737        # commands back to be called as external commands.  Because this
738        # approach effectively moves all 'env' commands up front, it relies on
739        # the assumptions that (1) environment variables are not intended to be
740        # relevant to 'not' commands and (2) the 'env' command should always
741        # blindly pass along the status it receives from any command it calls.
742
743        # For plain negations, either 'not' without '--crash', or the shell
744        # operator '!', leave them out from the command to execute and
745        # invert the result code afterwards.
746        if not_crash:
747            args = not_args + args
748            not_count = 0
749        else:
750            not_args = []
751
752        stdin, stdout, stderr = processRedirects(j, default_stdin, cmd_shenv,
753                                                 opened_files)
754
755        # If stderr wants to come from stdout, but stdout isn't a pipe, then put
756        # stderr on a pipe and treat it as stdout.
757        if (stderr == subprocess.STDOUT and stdout != subprocess.PIPE):
758            stderr = subprocess.PIPE
759            stderrIsStdout = True
760        else:
761            stderrIsStdout = False
762
763            # Don't allow stderr on a PIPE except for the last
764            # process, this could deadlock.
765            #
766            # FIXME: This is slow, but so is deadlock.
767            if stderr == subprocess.PIPE and j != cmd.commands[-1]:
768                stderr = tempfile.TemporaryFile(mode='w+b')
769                stderrTempFiles.append((i, stderr))
770
771        # Resolve the executable path ourselves.
772        executable = None
773        # For paths relative to cwd, use the cwd of the shell environment.
774        if args[0].startswith('.'):
775            exe_in_cwd = os.path.join(cmd_shenv.cwd, args[0])
776            if os.path.isfile(exe_in_cwd):
777                executable = exe_in_cwd
778        if not executable:
779            executable = lit.util.which(args[0], cmd_shenv.env['PATH'])
780        if not executable:
781            raise InternalShellError(j, '%r: command not found' % args[0])
782
783        # Replace uses of /dev/null with temporary files.
784        if kAvoidDevNull:
785            # In Python 2.x, basestring is the base class for all string (including unicode)
786            # In Python 3.x, basestring no longer exist and str is always unicode
787            try:
788                str_type = basestring
789            except NameError:
790                str_type = str
791            for i,arg in enumerate(args):
792                if isinstance(arg, str_type) and kDevNull in arg:
793                    f = tempfile.NamedTemporaryFile(delete=False)
794                    f.close()
795                    named_temp_files.append(f.name)
796                    args[i] = arg.replace(kDevNull, f.name)
797
798        # Expand all glob expressions
799        args = expand_glob_expressions(args, cmd_shenv.cwd)
800
801        # On Windows, do our own command line quoting for better compatibility
802        # with some core utility distributions.
803        if kIsWindows:
804            args = quote_windows_command(args)
805
806        try:
807            procs.append(subprocess.Popen(args, cwd=cmd_shenv.cwd,
808                                          executable = executable,
809                                          stdin = stdin,
810                                          stdout = stdout,
811                                          stderr = stderr,
812                                          env = cmd_shenv.env,
813                                          close_fds = kUseCloseFDs,
814                                          universal_newlines = True,
815                                          errors = 'replace'))
816            proc_not_counts.append(not_count)
817            # Let the helper know about this process
818            timeoutHelper.addProcess(procs[-1])
819        except OSError as e:
820            raise InternalShellError(j, 'Could not create process ({}) due to {}'.format(executable, e))
821
822        # Immediately close stdin for any process taking stdin from us.
823        if stdin == subprocess.PIPE:
824            procs[-1].stdin.close()
825            procs[-1].stdin = None
826
827        # Update the current stdin source.
828        if stdout == subprocess.PIPE:
829            default_stdin = procs[-1].stdout
830        elif stderrIsStdout:
831            default_stdin = procs[-1].stderr
832        else:
833            default_stdin = subprocess.PIPE
834
835    # Explicitly close any redirected files. We need to do this now because we
836    # need to release any handles we may have on the temporary files (important
837    # on Win32, for example). Since we have already spawned the subprocess, our
838    # handles have already been transferred so we do not need them anymore.
839    for (name, mode, f, path) in opened_files:
840        f.close()
841
842    # FIXME: There is probably still deadlock potential here. Yawn.
843    procData = [None] * len(procs)
844    procData[-1] = procs[-1].communicate()
845
846    for i in range(len(procs) - 1):
847        if procs[i].stdout is not None:
848            out = procs[i].stdout.read()
849        else:
850            out = ''
851        if procs[i].stderr is not None:
852            err = procs[i].stderr.read()
853        else:
854            err = ''
855        procData[i] = (out,err)
856
857    # Read stderr out of the temp files.
858    for i,f in stderrTempFiles:
859        f.seek(0, 0)
860        procData[i] = (procData[i][0], f.read())
861        f.close()
862
863    exitCode = None
864    for i,(out,err) in enumerate(procData):
865        res = procs[i].wait()
866        # Detect Ctrl-C in subprocess.
867        if res == -signal.SIGINT:
868            raise KeyboardInterrupt
869        if proc_not_counts[i] % 2:
870            res = not res
871        elif proc_not_counts[i] > 1:
872            res = 1 if res != 0 else 0
873
874        # Ensure the resulting output is always of string type.
875        try:
876            if out is None:
877                out = ''
878            else:
879                out = to_string(out.decode('utf-8', errors='replace'))
880        except:
881            out = str(out)
882        try:
883            if err is None:
884                err = ''
885            else:
886                err = to_string(err.decode('utf-8', errors='replace'))
887        except:
888            err = str(err)
889
890        # Gather the redirected output files for failed commands.
891        output_files = []
892        if res != 0:
893            for (name, mode, f, path) in sorted(opened_files):
894                if path is not None and mode in ('w', 'a'):
895                    try:
896                        with open(path, 'rb') as f:
897                            data = f.read()
898                    except:
899                        data = None
900                    if data is not None:
901                        output_files.append((name, path, data))
902
903        results.append(ShellCommandResult(
904            cmd.commands[i], out, err, res, timeoutHelper.timeoutReached(),
905            output_files))
906        if cmd.pipe_err:
907            # Take the last failing exit code from the pipeline.
908            if not exitCode or res != 0:
909                exitCode = res
910        else:
911            exitCode = res
912
913    # Remove any named temporary files we created.
914    for f in named_temp_files:
915        try:
916            os.remove(f)
917        except OSError:
918            pass
919
920    if cmd.negate:
921        exitCode = not exitCode
922
923    return exitCode
924
925def executeScriptInternal(test, litConfig, tmpBase, commands, cwd):
926    cmds = []
927    for i, ln in enumerate(commands):
928        match = re.match(kPdbgRegex, ln)
929        if match:
930            command = match.group(2)
931            ln = commands[i] = \
932                match.expand(": '\\1'; \\2" if command else ": '\\1'")
933        try:
934            cmds.append(ShUtil.ShParser(ln, litConfig.isWindows,
935                                        test.config.pipefail).parse())
936        except:
937            return lit.Test.Result(Test.FAIL, "shell parser error on: %r" % ln)
938
939    cmd = cmds[0]
940    for c in cmds[1:]:
941        cmd = ShUtil.Seq(cmd, '&&', c)
942
943    results = []
944    timeoutInfo = None
945    try:
946        shenv = ShellEnvironment(cwd, test.config.environment)
947        exitCode, timeoutInfo = executeShCmd(cmd, shenv, results, timeout=litConfig.maxIndividualTestTime)
948    except InternalShellError:
949        e = sys.exc_info()[1]
950        exitCode = 127
951        results.append(
952            ShellCommandResult(e.command, '', e.message, exitCode, False))
953
954    out = err = ''
955    for i,result in enumerate(results):
956        # Write the command line run.
957        out += '$ %s\n' % (' '.join('"%s"' % s
958                                    for s in result.command.args),)
959
960        # If nothing interesting happened, move on.
961        if litConfig.maxIndividualTestTime == 0 and \
962               result.exitCode == 0 and \
963               not result.stdout.strip() and not result.stderr.strip():
964            continue
965
966        # Otherwise, something failed or was printed, show it.
967
968        # Add the command output, if redirected.
969        for (name, path, data) in result.outputFiles:
970            if data.strip():
971                out += "# redirected output from %r:\n" % (name,)
972                data = to_string(data.decode('utf-8', errors='replace'))
973                if len(data) > 1024:
974                    out += data[:1024] + "\n...\n"
975                    out += "note: data was truncated\n"
976                else:
977                    out += data
978                out += "\n"
979
980        if result.stdout.strip():
981            out += '# command output:\n%s\n' % (result.stdout,)
982        if result.stderr.strip():
983            out += '# command stderr:\n%s\n' % (result.stderr,)
984        if not result.stdout.strip() and not result.stderr.strip():
985            out += "note: command had no output on stdout or stderr\n"
986
987        # Show the error conditions:
988        if result.exitCode != 0:
989            # On Windows, a negative exit code indicates a signal, and those are
990            # easier to recognize or look up if we print them in hex.
991            if litConfig.isWindows and result.exitCode < 0:
992                codeStr = hex(int(result.exitCode & 0xFFFFFFFF)).rstrip("L")
993            else:
994                codeStr = str(result.exitCode)
995            out += "error: command failed with exit status: %s\n" % (
996                codeStr,)
997        if litConfig.maxIndividualTestTime > 0 and result.timeoutReached:
998            out += 'error: command reached timeout: %s\n' % (
999                str(result.timeoutReached),)
1000
1001    return out, err, exitCode, timeoutInfo
1002
1003def executeScript(test, litConfig, tmpBase, commands, cwd):
1004    bashPath = litConfig.getBashPath()
1005    isWin32CMDEXE = (litConfig.isWindows and not bashPath)
1006    script = tmpBase + '.script'
1007    if isWin32CMDEXE:
1008        script += '.bat'
1009
1010    # Write script file
1011    mode = 'w'
1012    open_kwargs = {}
1013    if litConfig.isWindows and not isWin32CMDEXE:
1014        mode += 'b'  # Avoid CRLFs when writing bash scripts.
1015    elif sys.version_info > (3,0):
1016        open_kwargs['encoding'] = 'utf-8'
1017    f = open(script, mode, **open_kwargs)
1018    if isWin32CMDEXE:
1019        for i, ln in enumerate(commands):
1020            match = re.match(kPdbgRegex, ln)
1021            if match:
1022                command = match.group(2)
1023                commands[i] = \
1024                    match.expand("echo '\\1' > nul && " if command
1025                                 else "echo '\\1' > nul")
1026        if litConfig.echo_all_commands:
1027            f.write('@echo on\n')
1028        else:
1029            f.write('@echo off\n')
1030        f.write('\n@if %ERRORLEVEL% NEQ 0 EXIT\n'.join(commands))
1031    else:
1032        for i, ln in enumerate(commands):
1033            match = re.match(kPdbgRegex, ln)
1034            if match:
1035                command = match.group(2)
1036                commands[i] = match.expand(": '\\1'; \\2" if command
1037                                           else ": '\\1'")
1038        if test.config.pipefail:
1039            f.write(b'set -o pipefail;' if mode == 'wb' else 'set -o pipefail;')
1040        if litConfig.echo_all_commands:
1041            f.write(b'set -x;' if mode == 'wb' else 'set -x;')
1042        if sys.version_info > (3,0) and mode == 'wb':
1043            f.write(bytes('{ ' + '; } &&\n{ '.join(commands) + '; }', 'utf-8'))
1044        else:
1045            f.write('{ ' + '; } &&\n{ '.join(commands) + '; }')
1046    f.write(b'\n' if mode == 'wb' else '\n')
1047    f.close()
1048
1049    if isWin32CMDEXE:
1050        command = ['cmd','/c', script]
1051    else:
1052        if bashPath:
1053            command = [bashPath, script]
1054        else:
1055            command = ['/bin/sh', script]
1056        if litConfig.useValgrind:
1057            # FIXME: Running valgrind on sh is overkill. We probably could just
1058            # run on clang with no real loss.
1059            command = litConfig.valgrindArgs + command
1060
1061    try:
1062        out, err, exitCode = lit.util.executeCommand(command, cwd=cwd,
1063                                       env=test.config.environment,
1064                                       timeout=litConfig.maxIndividualTestTime)
1065        return (out, err, exitCode, None)
1066    except lit.util.ExecuteCommandTimeoutException as e:
1067        return (e.out, e.err, e.exitCode, e.msg)
1068
1069def parseIntegratedTestScriptCommands(source_path, keywords):
1070    """
1071    parseIntegratedTestScriptCommands(source_path) -> commands
1072
1073    Parse the commands in an integrated test script file into a list of
1074    (line_number, command_type, line).
1075    """
1076
1077    # This code is carefully written to be dual compatible with Python 2.5+ and
1078    # Python 3 without requiring input files to always have valid codings. The
1079    # trick we use is to open the file in binary mode and use the regular
1080    # expression library to find the commands, with it scanning strings in
1081    # Python2 and bytes in Python3.
1082    #
1083    # Once we find a match, we do require each script line to be decodable to
1084    # UTF-8, so we convert the outputs to UTF-8 before returning. This way the
1085    # remaining code can work with "strings" agnostic of the executing Python
1086    # version.
1087
1088    keywords_re = re.compile(
1089        to_bytes("(%s)(.*)\n" % ("|".join(re.escape(k) for k in keywords),)))
1090
1091    f = open(source_path, 'rb')
1092    try:
1093        # Read the entire file contents.
1094        data = f.read()
1095
1096        # Ensure the data ends with a newline.
1097        if not data.endswith(to_bytes('\n')):
1098            data = data + to_bytes('\n')
1099
1100        # Iterate over the matches.
1101        line_number = 1
1102        last_match_position = 0
1103        for match in keywords_re.finditer(data):
1104            # Compute the updated line number by counting the intervening
1105            # newlines.
1106            match_position = match.start()
1107            line_number += data.count(to_bytes('\n'), last_match_position,
1108                                      match_position)
1109            last_match_position = match_position
1110
1111            # Convert the keyword and line to UTF-8 strings and yield the
1112            # command. Note that we take care to return regular strings in
1113            # Python 2, to avoid other code having to differentiate between the
1114            # str and unicode types.
1115            #
1116            # Opening the file in binary mode prevented Windows \r newline
1117            # characters from being converted to Unix \n newlines, so manually
1118            # strip those from the yielded lines.
1119            keyword,ln = match.groups()
1120            yield (line_number, to_string(keyword.decode('utf-8')),
1121                   to_string(ln.decode('utf-8').rstrip('\r')))
1122    finally:
1123        f.close()
1124
1125def getTempPaths(test):
1126    """Get the temporary location, this is always relative to the test suite
1127    root, not test source root."""
1128    execpath = test.getExecPath()
1129    execdir,execbase = os.path.split(execpath)
1130    tmpDir = os.path.join(execdir, 'Output')
1131    tmpBase = os.path.join(tmpDir, execbase)
1132    return tmpDir, tmpBase
1133
1134def colonNormalizePath(path):
1135    if kIsWindows:
1136        return re.sub(r'^(.):', r'\1', path.replace('\\', '/'))
1137    else:
1138        assert path[0] == '/'
1139        return path[1:]
1140
1141def getDefaultSubstitutions(test, tmpDir, tmpBase, normalize_slashes=False):
1142    sourcepath = test.getSourcePath()
1143    sourcedir = os.path.dirname(sourcepath)
1144
1145    # Normalize slashes, if requested.
1146    if normalize_slashes:
1147        sourcepath = sourcepath.replace('\\', '/')
1148        sourcedir = sourcedir.replace('\\', '/')
1149        tmpDir = tmpDir.replace('\\', '/')
1150        tmpBase = tmpBase.replace('\\', '/')
1151
1152    substitutions = []
1153    substitutions.extend(test.config.substitutions)
1154    tmpName = tmpBase + '.tmp'
1155    baseName = os.path.basename(tmpBase)
1156    substitutions.extend([('%s', sourcepath),
1157                          ('%S', sourcedir),
1158                          ('%p', sourcedir),
1159                          ('%{pathsep}', os.pathsep),
1160                          ('%t', tmpName),
1161                          ('%basename_t', baseName),
1162                          ('%T', tmpDir)])
1163
1164    substitutions.extend([
1165        ('%{fs-src-root}', pathlib.Path(sourcedir).anchor),
1166        ('%{fs-tmp-root}', pathlib.Path(tmpBase).anchor),
1167        ('%{fs-sep}', os.path.sep),
1168    ])
1169
1170    # "%/[STpst]" should be normalized.
1171    substitutions.extend([
1172            ('%/s', sourcepath.replace('\\', '/')),
1173            ('%/S', sourcedir.replace('\\', '/')),
1174            ('%/p', sourcedir.replace('\\', '/')),
1175            ('%/t', tmpBase.replace('\\', '/') + '.tmp'),
1176            ('%/T', tmpDir.replace('\\', '/')),
1177            ])
1178
1179    # "%{/[STpst]:regex_replacement}" should be normalized like "%/[STpst]" but we're
1180    # also in a regex replacement context of a s@@@ regex.
1181    def regex_escape(s):
1182        s = s.replace('@', r'\@')
1183        s = s.replace('&', r'\&')
1184        return s
1185    substitutions.extend([
1186            ('%{/s:regex_replacement}',
1187             regex_escape(sourcepath.replace('\\', '/'))),
1188            ('%{/S:regex_replacement}',
1189             regex_escape(sourcedir.replace('\\', '/'))),
1190            ('%{/p:regex_replacement}',
1191             regex_escape(sourcedir.replace('\\', '/'))),
1192            ('%{/t:regex_replacement}',
1193             regex_escape(tmpBase.replace('\\', '/')) + '.tmp'),
1194            ('%{/T:regex_replacement}',
1195             regex_escape(tmpDir.replace('\\', '/'))),
1196            ])
1197
1198    # "%:[STpst]" are normalized paths without colons and without a leading
1199    # slash.
1200    substitutions.extend([
1201            ('%:s', colonNormalizePath(sourcepath)),
1202            ('%:S', colonNormalizePath(sourcedir)),
1203            ('%:p', colonNormalizePath(sourcedir)),
1204            ('%:t', colonNormalizePath(tmpBase + '.tmp')),
1205            ('%:T', colonNormalizePath(tmpDir)),
1206            ])
1207    return substitutions
1208
1209def _memoize(f):
1210    cache = {}  # Intentionally unbounded, see applySubstitutions()
1211    def memoized(x):
1212        if x not in cache:
1213            cache[x] = f(x)
1214        return cache[x]
1215    return memoized
1216
1217@_memoize
1218def _caching_re_compile(r):
1219    return re.compile(r)
1220
1221def applySubstitutions(script, substitutions, conditions={},
1222                       recursion_limit=None):
1223    """
1224    Apply substitutions to the script.  Allow full regular expression syntax.
1225    Replace each matching occurrence of regular expression pattern a with
1226    substitution b in line ln.
1227
1228    If a substitution expands into another substitution, it is expanded
1229    recursively until the line has no more expandable substitutions. If
1230    the line can still can be substituted after being substituted
1231    `recursion_limit` times, it is an error. If the `recursion_limit` is
1232    `None` (the default), no recursive substitution is performed at all.
1233    """
1234
1235    # We use #_MARKER_# to hide %% while we do the other substitutions.
1236    def escapePercents(ln):
1237        return _caching_re_compile('%%').sub('#_MARKER_#', ln)
1238
1239    def unescapePercents(ln):
1240        return _caching_re_compile('#_MARKER_#').sub('%', ln)
1241
1242    def substituteIfElse(ln):
1243        # early exit to avoid wasting time on lines without
1244        # conditional substitutions
1245        if ln.find('%if ') == -1:
1246            return ln
1247
1248        def tryParseIfCond(ln):
1249            # space is important to not conflict with other (possible)
1250            # substitutions
1251            if not ln.startswith('%if '):
1252                return None, ln
1253            ln = ln[4:]
1254
1255            # stop at '%{'
1256            match = _caching_re_compile('%{').search(ln)
1257            if not match:
1258                raise ValueError("'%{' is missing for %if substitution")
1259            cond = ln[:match.start()]
1260
1261            # eat '%{' as well
1262            ln = ln[match.end():]
1263            return cond, ln
1264
1265        def tryParseElse(ln):
1266            match = _caching_re_compile('^\s*%else\s*(%{)?').search(ln)
1267            if not match:
1268                return False, ln
1269            if not match.group(1):
1270                raise ValueError("'%{' is missing for %else substitution")
1271            return True, ln[match.end():]
1272
1273        def tryParseEnd(ln):
1274            if ln.startswith('%}'):
1275                return True, ln[2:]
1276            return False, ln
1277
1278        def parseText(ln, isNested):
1279            # parse everything until %if, or %} if we're parsing a
1280            # nested expression.
1281            match = _caching_re_compile(
1282                '(.*?)(?:%if|%})' if isNested else '(.*?)(?:%if)').search(ln)
1283            if not match:
1284                # there is no terminating pattern, so treat the whole
1285                # line as text
1286                return ln, ''
1287            text_end = match.end(1)
1288            return ln[:text_end], ln[text_end:]
1289
1290        def parseRecursive(ln, isNested):
1291            result = ''
1292            while len(ln):
1293                if isNested:
1294                    found_end, _ = tryParseEnd(ln)
1295                    if found_end:
1296                        break
1297
1298                # %if cond %{ branch_if %} %else %{ branch_else %}
1299                cond, ln = tryParseIfCond(ln)
1300                if cond:
1301                    branch_if, ln = parseRecursive(ln, isNested=True)
1302                    found_end, ln = tryParseEnd(ln)
1303                    if not found_end:
1304                        raise ValueError("'%}' is missing for %if substitution")
1305
1306                    branch_else = ''
1307                    found_else, ln = tryParseElse(ln)
1308                    if found_else:
1309                        branch_else, ln = parseRecursive(ln, isNested=True)
1310                        found_end, ln = tryParseEnd(ln)
1311                        if not found_end:
1312                            raise ValueError("'%}' is missing for %else substitution")
1313
1314                    if BooleanExpression.evaluate(cond, conditions):
1315                        result += branch_if
1316                    else:
1317                        result += branch_else
1318                    continue
1319
1320                # The rest is handled as plain text.
1321                text, ln = parseText(ln, isNested)
1322                result += text
1323
1324            return result, ln
1325
1326        result, ln = parseRecursive(ln, isNested=False)
1327        assert len(ln) == 0
1328        return result
1329
1330    def processLine(ln):
1331        # Apply substitutions
1332        ln = substituteIfElse(escapePercents(ln))
1333        for a,b in substitutions:
1334            if kIsWindows:
1335                b = b.replace("\\","\\\\")
1336            # re.compile() has a built-in LRU cache with 512 entries. In some
1337            # test suites lit ends up thrashing that cache, which made e.g.
1338            # check-llvm run 50% slower.  Use an explicit, unbounded cache
1339            # to prevent that from happening.  Since lit is fairly
1340            # short-lived, since the set of substitutions is fairly small, and
1341            # since thrashing has such bad consequences, not bounding the cache
1342            # seems reasonable.
1343            ln = _caching_re_compile(a).sub(str(b), escapePercents(ln))
1344
1345        # Strip the trailing newline and any extra whitespace.
1346        return ln.strip()
1347
1348    def processLineToFixedPoint(ln):
1349        assert isinstance(recursion_limit, int) and recursion_limit >= 0
1350        origLine = ln
1351        steps = 0
1352        processed = processLine(ln)
1353        while processed != ln and steps < recursion_limit:
1354            ln = processed
1355            processed = processLine(ln)
1356            steps += 1
1357
1358        if processed != ln:
1359            raise ValueError("Recursive substitution of '%s' did not complete "
1360                             "in the provided recursion limit (%s)" % \
1361                             (origLine, recursion_limit))
1362
1363        return processed
1364
1365    process = processLine if recursion_limit is None else processLineToFixedPoint
1366
1367    return [unescapePercents(process(ln)) for ln in script]
1368
1369
1370class ParserKind(object):
1371    """
1372    An enumeration representing the style of an integrated test keyword or
1373    command.
1374
1375    TAG: A keyword taking no value. Ex 'END.'
1376    COMMAND: A keyword taking a list of shell commands. Ex 'RUN:'
1377    LIST: A keyword taking a comma-separated list of values.
1378    BOOLEAN_EXPR: A keyword taking a comma-separated list of
1379        boolean expressions. Ex 'XFAIL:'
1380    INTEGER: A keyword taking a single integer. Ex 'ALLOW_RETRIES:'
1381    CUSTOM: A keyword with custom parsing semantics.
1382    """
1383    TAG = 0
1384    COMMAND = 1
1385    LIST = 2
1386    BOOLEAN_EXPR = 3
1387    INTEGER = 4
1388    CUSTOM = 5
1389
1390    @staticmethod
1391    def allowedKeywordSuffixes(value):
1392        return { ParserKind.TAG:          ['.'],
1393                 ParserKind.COMMAND:      [':'],
1394                 ParserKind.LIST:         [':'],
1395                 ParserKind.BOOLEAN_EXPR: [':'],
1396                 ParserKind.INTEGER:      [':'],
1397                 ParserKind.CUSTOM:       [':', '.']
1398               } [value]
1399
1400    @staticmethod
1401    def str(value):
1402        return { ParserKind.TAG:          'TAG',
1403                 ParserKind.COMMAND:      'COMMAND',
1404                 ParserKind.LIST:         'LIST',
1405                 ParserKind.BOOLEAN_EXPR: 'BOOLEAN_EXPR',
1406                 ParserKind.INTEGER:      'INTEGER',
1407                 ParserKind.CUSTOM:       'CUSTOM'
1408               } [value]
1409
1410
1411class IntegratedTestKeywordParser(object):
1412    """A parser for LLVM/Clang style integrated test scripts.
1413
1414    keyword: The keyword to parse for. It must end in either '.' or ':'.
1415    kind: An value of ParserKind.
1416    parser: A custom parser. This value may only be specified with
1417            ParserKind.CUSTOM.
1418    """
1419    def __init__(self, keyword, kind, parser=None, initial_value=None):
1420        allowedSuffixes = ParserKind.allowedKeywordSuffixes(kind)
1421        if len(keyword) == 0 or keyword[-1] not in allowedSuffixes:
1422            if len(allowedSuffixes) == 1:
1423                raise ValueError("Keyword '%s' of kind '%s' must end in '%s'"
1424                                 % (keyword, ParserKind.str(kind),
1425                                    allowedSuffixes[0]))
1426            else:
1427                raise ValueError("Keyword '%s' of kind '%s' must end in "
1428                                 " one of '%s'"
1429                                 % (keyword, ParserKind.str(kind),
1430                                    ' '.join(allowedSuffixes)))
1431
1432        if parser is not None and kind != ParserKind.CUSTOM:
1433            raise ValueError("custom parsers can only be specified with "
1434                             "ParserKind.CUSTOM")
1435        self.keyword = keyword
1436        self.kind = kind
1437        self.parsed_lines = []
1438        self.value = initial_value
1439        self.parser = parser
1440
1441        if kind == ParserKind.COMMAND:
1442            self.parser = lambda line_number, line, output: \
1443                                 self._handleCommand(line_number, line, output,
1444                                                     self.keyword)
1445        elif kind == ParserKind.LIST:
1446            self.parser = self._handleList
1447        elif kind == ParserKind.BOOLEAN_EXPR:
1448            self.parser = self._handleBooleanExpr
1449        elif kind == ParserKind.INTEGER:
1450            self.parser = self._handleSingleInteger
1451        elif kind == ParserKind.TAG:
1452            self.parser = self._handleTag
1453        elif kind == ParserKind.CUSTOM:
1454            if parser is None:
1455                raise ValueError("ParserKind.CUSTOM requires a custom parser")
1456            self.parser = parser
1457        else:
1458            raise ValueError("Unknown kind '%s'" % kind)
1459
1460    def parseLine(self, line_number, line):
1461        try:
1462            self.parsed_lines += [(line_number, line)]
1463            self.value = self.parser(line_number, line, self.value)
1464        except ValueError as e:
1465            raise ValueError(str(e) + ("\nin %s directive on test line %d" %
1466                                       (self.keyword, line_number)))
1467
1468    def getValue(self):
1469        return self.value
1470
1471    @staticmethod
1472    def _handleTag(line_number, line, output):
1473        """A helper for parsing TAG type keywords"""
1474        return (not line.strip() or output)
1475
1476    @staticmethod
1477    def _handleCommand(line_number, line, output, keyword):
1478        """A helper for parsing COMMAND type keywords"""
1479        # Trim trailing whitespace.
1480        line = line.rstrip()
1481        # Substitute line number expressions
1482        line = re.sub(r'%\(line\)', str(line_number), line)
1483
1484        def replace_line_number(match):
1485            if match.group(1) == '+':
1486                return str(line_number + int(match.group(2)))
1487            if match.group(1) == '-':
1488                return str(line_number - int(match.group(2)))
1489        line = re.sub(r'%\(line *([\+-]) *(\d+)\)', replace_line_number, line)
1490        # Collapse lines with trailing '\\'.
1491        if output and output[-1][-1] == '\\':
1492            output[-1] = output[-1][:-1] + line
1493        else:
1494            if output is None:
1495                output = []
1496            pdbg = "%dbg({keyword} at line {line_number})".format(
1497                keyword=keyword,
1498                line_number=line_number)
1499            assert re.match(kPdbgRegex + "$", pdbg), \
1500                   "kPdbgRegex expected to match actual %dbg usage"
1501            line = "{pdbg} {real_command}".format(
1502                pdbg=pdbg,
1503                real_command=line)
1504            output.append(line)
1505        return output
1506
1507    @staticmethod
1508    def _handleList(line_number, line, output):
1509        """A parser for LIST type keywords"""
1510        if output is None:
1511            output = []
1512        output.extend([s.strip() for s in line.split(',')])
1513        return output
1514
1515    @staticmethod
1516    def _handleSingleInteger(line_number, line, output):
1517        """A parser for INTEGER type keywords"""
1518        if output is None:
1519            output = []
1520        try:
1521            n = int(line)
1522        except ValueError:
1523            raise ValueError("INTEGER parser requires the input to be an integer (got {})".format(line))
1524        output.append(n)
1525        return output
1526
1527    @staticmethod
1528    def _handleBooleanExpr(line_number, line, output):
1529        """A parser for BOOLEAN_EXPR type keywords"""
1530        parts = [s.strip() for s in line.split(',') if s.strip() != '']
1531        if output and output[-1][-1] == '\\':
1532            output[-1] = output[-1][:-1] + parts[0]
1533            del parts[0]
1534        if output is None:
1535            output = []
1536        output.extend(parts)
1537        # Evaluate each expression to verify syntax.
1538        # We don't want any results, just the raised ValueError.
1539        for s in output:
1540            if s != '*' and not s.endswith('\\'):
1541                BooleanExpression.evaluate(s, [])
1542        return output
1543
1544
1545def _parseKeywords(sourcepath, additional_parsers=[],
1546                   require_script=True):
1547    """_parseKeywords
1548
1549    Scan an LLVM/Clang style integrated test script and extract all the lines
1550    pertaining to a special parser. This includes 'RUN', 'XFAIL', 'REQUIRES',
1551    'UNSUPPORTED' and 'ALLOW_RETRIES', as well as other specified custom
1552    parsers.
1553
1554    Returns a dictionary mapping each custom parser to its value after
1555    parsing the test.
1556    """
1557    # Install the built-in keyword parsers.
1558    script = []
1559    builtin_parsers = [
1560        IntegratedTestKeywordParser('RUN:', ParserKind.COMMAND, initial_value=script),
1561        IntegratedTestKeywordParser('XFAIL:', ParserKind.BOOLEAN_EXPR),
1562        IntegratedTestKeywordParser('REQUIRES:', ParserKind.BOOLEAN_EXPR),
1563        IntegratedTestKeywordParser('UNSUPPORTED:', ParserKind.BOOLEAN_EXPR),
1564        IntegratedTestKeywordParser('ALLOW_RETRIES:', ParserKind.INTEGER),
1565        IntegratedTestKeywordParser('END.', ParserKind.TAG)
1566    ]
1567    keyword_parsers = {p.keyword: p for p in builtin_parsers}
1568
1569    # Install user-defined additional parsers.
1570    for parser in additional_parsers:
1571        if not isinstance(parser, IntegratedTestKeywordParser):
1572            raise ValueError('Additional parser must be an instance of '
1573                             'IntegratedTestKeywordParser')
1574        if parser.keyword in keyword_parsers:
1575            raise ValueError("Parser for keyword '%s' already exists"
1576                             % parser.keyword)
1577        keyword_parsers[parser.keyword] = parser
1578
1579    # Collect the test lines from the script.
1580    for line_number, command_type, ln in \
1581            parseIntegratedTestScriptCommands(sourcepath,
1582                                              keyword_parsers.keys()):
1583        parser = keyword_parsers[command_type]
1584        parser.parseLine(line_number, ln)
1585        if command_type == 'END.' and parser.getValue() is True:
1586            break
1587
1588    # Verify the script contains a run line.
1589    if require_script and not script:
1590        raise ValueError("Test has no 'RUN:' line")
1591
1592    # Check for unterminated run lines.
1593    if script and script[-1][-1] == '\\':
1594        raise ValueError("Test has unterminated 'RUN:' lines (with '\\')")
1595
1596    # Check boolean expressions for unterminated lines.
1597    for key in keyword_parsers:
1598        kp = keyword_parsers[key]
1599        if kp.kind != ParserKind.BOOLEAN_EXPR:
1600            continue
1601        value = kp.getValue()
1602        if value and value[-1][-1] == '\\':
1603            raise ValueError("Test has unterminated '{key}' lines (with '\\')"
1604                             .format(key=key))
1605
1606    # Make sure there's at most one ALLOW_RETRIES: line
1607    allowed_retries = keyword_parsers['ALLOW_RETRIES:'].getValue()
1608    if allowed_retries and len(allowed_retries) > 1:
1609        raise ValueError("Test has more than one ALLOW_RETRIES lines")
1610
1611    return {p.keyword: p.getValue() for p in keyword_parsers.values()}
1612
1613
1614def parseIntegratedTestScript(test, additional_parsers=[],
1615                              require_script=True):
1616    """parseIntegratedTestScript - Scan an LLVM/Clang style integrated test
1617    script and extract the lines to 'RUN' as well as 'XFAIL', 'REQUIRES',
1618    'UNSUPPORTED' and 'ALLOW_RETRIES' information into the given test.
1619
1620    If additional parsers are specified then the test is also scanned for the
1621    keywords they specify and all matches are passed to the custom parser.
1622
1623    If 'require_script' is False an empty script
1624    may be returned. This can be used for test formats where the actual script
1625    is optional or ignored.
1626    """
1627    # Parse the test sources and extract test properties
1628    try:
1629        parsed = _parseKeywords(test.getSourcePath(), additional_parsers,
1630                                require_script)
1631    except ValueError as e:
1632        return lit.Test.Result(Test.UNRESOLVED, str(e))
1633    script = parsed['RUN:'] or []
1634    test.xfails += parsed['XFAIL:'] or []
1635    test.requires += parsed['REQUIRES:'] or []
1636    test.unsupported += parsed['UNSUPPORTED:'] or []
1637    if parsed['ALLOW_RETRIES:']:
1638        test.allowed_retries = parsed['ALLOW_RETRIES:'][0]
1639
1640    # Enforce REQUIRES:
1641    missing_required_features = test.getMissingRequiredFeatures()
1642    if missing_required_features:
1643        msg = ', '.join(missing_required_features)
1644        return lit.Test.Result(Test.UNSUPPORTED,
1645                               "Test requires the following unavailable "
1646                               "features: %s" % msg)
1647
1648    # Enforce UNSUPPORTED:
1649    unsupported_features = test.getUnsupportedFeatures()
1650    if unsupported_features:
1651        msg = ', '.join(unsupported_features)
1652        return lit.Test.Result(
1653            Test.UNSUPPORTED,
1654            "Test does not support the following features "
1655            "and/or targets: %s" % msg)
1656
1657    # Enforce limit_to_features.
1658    if not test.isWithinFeatureLimits():
1659        msg = ', '.join(test.config.limit_to_features)
1660        return lit.Test.Result(Test.UNSUPPORTED,
1661                               "Test does not require any of the features "
1662                               "specified in limit_to_features: %s" % msg)
1663
1664    return script
1665
1666
1667def _runShTest(test, litConfig, useExternalSh, script, tmpBase):
1668    def runOnce(execdir):
1669        if useExternalSh:
1670            res = executeScript(test, litConfig, tmpBase, script, execdir)
1671        else:
1672            res = executeScriptInternal(test, litConfig, tmpBase, script, execdir)
1673        if isinstance(res, lit.Test.Result):
1674            return res
1675
1676        out,err,exitCode,timeoutInfo = res
1677        if exitCode == 0:
1678            status = Test.PASS
1679        else:
1680            if timeoutInfo is None:
1681                status = Test.FAIL
1682            else:
1683                status = Test.TIMEOUT
1684        return out,err,exitCode,timeoutInfo,status
1685
1686    # Create the output directory if it does not already exist.
1687    lit.util.mkdir_p(os.path.dirname(tmpBase))
1688
1689    # Re-run failed tests up to test.allowed_retries times.
1690    execdir = os.path.dirname(test.getExecPath())
1691    attempts = test.allowed_retries + 1
1692    for i in range(attempts):
1693        res = runOnce(execdir)
1694        if isinstance(res, lit.Test.Result):
1695            return res
1696
1697        out,err,exitCode,timeoutInfo,status = res
1698        if status != Test.FAIL:
1699            break
1700
1701    # If we had to run the test more than once, count it as a flaky pass. These
1702    # will be printed separately in the test summary.
1703    if i > 0 and status == Test.PASS:
1704        status = Test.FLAKYPASS
1705
1706    # Form the output log.
1707    output = """Script:\n--\n%s\n--\nExit Code: %d\n""" % (
1708        '\n'.join(script), exitCode)
1709
1710    if timeoutInfo is not None:
1711        output += """Timeout: %s\n""" % (timeoutInfo,)
1712    output += "\n"
1713
1714    # Append the outputs, if present.
1715    if out:
1716        output += """Command Output (stdout):\n--\n%s\n--\n""" % (out,)
1717    if err:
1718        output += """Command Output (stderr):\n--\n%s\n--\n""" % (err,)
1719
1720    return lit.Test.Result(status, output)
1721
1722
1723def executeShTest(test, litConfig, useExternalSh,
1724                  extra_substitutions=[],
1725                  preamble_commands=[]):
1726    if test.config.unsupported:
1727        return lit.Test.Result(Test.UNSUPPORTED, 'Test is unsupported')
1728
1729    script = list(preamble_commands)
1730    parsed = parseIntegratedTestScript(test, require_script=not script)
1731    if isinstance(parsed, lit.Test.Result):
1732        return parsed
1733    script += parsed
1734
1735    if litConfig.noExecute:
1736        return lit.Test.Result(Test.PASS)
1737
1738    tmpDir, tmpBase = getTempPaths(test)
1739    substitutions = list(extra_substitutions)
1740    substitutions += getDefaultSubstitutions(test, tmpDir, tmpBase,
1741                                             normalize_slashes=useExternalSh)
1742    conditions = { feature: True for feature in test.config.available_features }
1743    script = applySubstitutions(script, substitutions, conditions,
1744                                recursion_limit=test.config.recursiveExpansionLimit)
1745
1746    return _runShTest(test, litConfig, useExternalSh, script, tmpBase)
1747