1ee3c74fbSChris Lattner //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2ee3c74fbSChris Lattner //
3ee3c74fbSChris Lattner //                     The LLVM Compiler Infrastructure
4ee3c74fbSChris Lattner //
5ee3c74fbSChris Lattner // This file is distributed under the University of Illinois Open Source
6ee3c74fbSChris Lattner // License. See LICENSE.TXT for details.
7ee3c74fbSChris Lattner //
8ee3c74fbSChris Lattner //===----------------------------------------------------------------------===//
9ee3c74fbSChris Lattner //
10ee3c74fbSChris Lattner // FileCheck does a line-by line check of a file that validates whether it
11ee3c74fbSChris Lattner // contains the expected content.  This is useful for regression tests etc.
12ee3c74fbSChris Lattner //
13b5ecceffSJames Henderson // This program exits with an exit status of 2 on error, exit status of 0 if
14ee3c74fbSChris Lattner // the file matched the expected contents, and exit status of 1 if it did not
15ee3c74fbSChris Lattner // contain the expected contents.
16ee3c74fbSChris Lattner //
17ee3c74fbSChris Lattner //===----------------------------------------------------------------------===//
18ee3c74fbSChris Lattner 
1991d19d8eSChandler Carruth #include "llvm/ADT/SmallString.h"
2091d19d8eSChandler Carruth #include "llvm/ADT/StringExtras.h"
2191d19d8eSChandler Carruth #include "llvm/ADT/StringMap.h"
2213df4626SMatt Arsenault #include "llvm/ADT/StringSet.h"
23ee3c74fbSChris Lattner #include "llvm/Support/CommandLine.h"
24197194b6SRui Ueyama #include "llvm/Support/InitLLVM.h"
25ee3c74fbSChris Lattner #include "llvm/Support/MemoryBuffer.h"
26f08d2db9SChris Lattner #include "llvm/Support/Regex.h"
27ee3c74fbSChris Lattner #include "llvm/Support/SourceMgr.h"
28ee3c74fbSChris Lattner #include "llvm/Support/raw_ostream.h"
298879e06dSChris Lattner #include <algorithm>
30981af002SWill Dietz #include <cctype>
31bcf5b441SJoel E. Denny #include <list>
32e8b8f1bcSEli Bendersky #include <map>
33e8b8f1bcSEli Bendersky #include <string>
34a6e9c3e4SRafael Espindola #include <system_error>
35e8b8f1bcSEli Bendersky #include <vector>
36ee3c74fbSChris Lattner using namespace llvm;
37ee3c74fbSChris Lattner 
38ee3c74fbSChris Lattner static cl::opt<std::string>
39ee3c74fbSChris Lattner     CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40ee3c74fbSChris Lattner 
41ee3c74fbSChris Lattner static cl::opt<std::string>
42ee3c74fbSChris Lattner     InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43ee3c74fbSChris Lattner                   cl::init("-"), cl::value_desc("filename"));
44ee3c74fbSChris Lattner 
45e8f2fb20SChandler Carruth static cl::list<std::string> CheckPrefixes(
46e8f2fb20SChandler Carruth     "check-prefix",
47ee3c74fbSChris Lattner     cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48fd557cb0SDaniel Sanders static cl::alias CheckPrefixesAlias(
49fd557cb0SDaniel Sanders     "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated,
50fd557cb0SDaniel Sanders     cl::NotHidden,
51fd557cb0SDaniel Sanders     cl::desc(
52fd557cb0SDaniel Sanders         "Alias for -check-prefix permitting multiple comma separated values"));
53ee3c74fbSChris Lattner 
54e8f2fb20SChandler Carruth static cl::opt<bool> NoCanonicalizeWhiteSpace(
55e8f2fb20SChandler Carruth     "strict-whitespace",
562c3e5cdfSChris Lattner     cl::desc("Do not treat all horizontal whitespace as equivalent"));
572c3e5cdfSChris Lattner 
5856ccdbbdSAlexander Kornienko static cl::list<std::string> ImplicitCheckNot(
5956ccdbbdSAlexander Kornienko     "implicit-check-not",
6056ccdbbdSAlexander Kornienko     cl::desc("Add an implicit negative check with this pattern to every\n"
6156ccdbbdSAlexander Kornienko              "positive check. This can be used to ensure that no instances of\n"
6256ccdbbdSAlexander Kornienko              "this pattern occur which are not matched by a positive pattern"),
6356ccdbbdSAlexander Kornienko     cl::value_desc("pattern"));
6456ccdbbdSAlexander Kornienko 
6546e1fd61SAlexander Richardson static cl::list<std::string> GlobalDefines("D", cl::Prefix,
6646e1fd61SAlexander Richardson     cl::desc("Define a variable to be used in capture patterns."),
6746e1fd61SAlexander Richardson     cl::value_desc("VAR=VALUE"));
6846e1fd61SAlexander Richardson 
691b9f936fSJustin Bogner static cl::opt<bool> AllowEmptyInput(
701b9f936fSJustin Bogner     "allow-empty", cl::init(false),
711b9f936fSJustin Bogner     cl::desc("Allow the input file to be empty. This is useful when making\n"
721b9f936fSJustin Bogner              "checks that some error message does not occur, for example."));
731b9f936fSJustin Bogner 
7485913ccaSJames Y Knight static cl::opt<bool> MatchFullLines(
7585913ccaSJames Y Knight     "match-full-lines", cl::init(false),
7685913ccaSJames Y Knight     cl::desc("Require all positive matches to cover an entire input line.\n"
7785913ccaSJames Y Knight              "Allows leading and trailing whitespace if --strict-whitespace\n"
7885913ccaSJames Y Knight              "is not also passed."));
7985913ccaSJames Y Knight 
80f55e72a5SArtem Belevich static cl::opt<bool> EnableVarScope(
81f55e72a5SArtem Belevich     "enable-var-scope", cl::init(false),
82f55e72a5SArtem Belevich     cl::desc("Enables scope for regex variables. Variables with names that\n"
83f55e72a5SArtem Belevich              "do not start with '$' will be reset at the beginning of\n"
84f55e72a5SArtem Belevich              "each CHECK-LABEL block."));
85f55e72a5SArtem Belevich 
86bcf5b441SJoel E. Denny static cl::opt<bool> AllowDeprecatedDagOverlap(
87bcf5b441SJoel E. Denny     "allow-deprecated-dag-overlap", cl::init(false),
88bcf5b441SJoel E. Denny     cl::desc("Enable overlapping among matches in a group of consecutive\n"
89bcf5b441SJoel E. Denny              "CHECK-DAG directives.  This option is deprecated and is only\n"
90bcf5b441SJoel E. Denny              "provided for convenience as old tests are migrated to the new\n"
91bcf5b441SJoel E. Denny              "non-overlapping CHECK-DAG implementation.\n"));
92bcf5b441SJoel E. Denny 
93dc5ba317SJoel E. Denny static cl::opt<bool> Verbose("v", cl::init(false),
94dc5ba317SJoel E. Denny                              cl::desc("Print directive pattern matches.\n"));
95dc5ba317SJoel E. Denny 
96dc5ba317SJoel E. Denny static cl::opt<bool> VerboseVerbose(
97dc5ba317SJoel E. Denny     "vv", cl::init(false),
98dc5ba317SJoel E. Denny     cl::desc("Print information helpful in diagnosing internal FileCheck\n"
99dc5ba317SJoel E. Denny              "issues.  Implies -v.\n"));
100*346dfbe2SGeorge Karpenkov static const char * DumpInputEnv = "FILECHECK_DUMP_INPUT_ON_FAILURE";
101*346dfbe2SGeorge Karpenkov 
102*346dfbe2SGeorge Karpenkov static cl::opt<bool> DumpInputOnFailure(
103*346dfbe2SGeorge Karpenkov     "dump-input-on-failure", cl::init(std::getenv(DumpInputEnv)),
104*346dfbe2SGeorge Karpenkov     cl::desc("Dump original input to stderr before failing.\n"
105*346dfbe2SGeorge Karpenkov              "The value can be also controlled using\n"
106*346dfbe2SGeorge Karpenkov              "FILECHECK_DUMP_INPUT_ON_FAILURE environment variable.\n"));
107dc5ba317SJoel E. Denny 
10813df4626SMatt Arsenault typedef cl::list<std::string>::const_iterator prefix_iterator;
10913df4626SMatt Arsenault 
11074d50731SChris Lattner //===----------------------------------------------------------------------===//
11174d50731SChris Lattner // Pattern Handling Code.
11274d50731SChris Lattner //===----------------------------------------------------------------------===//
11374d50731SChris Lattner 
11438820972SMatt Arsenault namespace Check {
11538820972SMatt Arsenault enum CheckType {
11638820972SMatt Arsenault   CheckNone = 0,
11738820972SMatt Arsenault   CheckPlain,
11838820972SMatt Arsenault   CheckNext,
11901ac1707SDuncan P. N. Exon Smith   CheckSame,
12038820972SMatt Arsenault   CheckNot,
12138820972SMatt Arsenault   CheckDAG,
12238820972SMatt Arsenault   CheckLabel,
1235507f668SJames Henderson   CheckEmpty,
1240a4c44bdSChris Lattner 
1254dabac20SChandler Carruth   /// Indicates the pattern only matches the end of file. This is used for
1264dabac20SChandler Carruth   /// trailing CHECK-NOTs.
127a908e7bdSPaul Robinson   CheckEOF,
1284dabac20SChandler Carruth 
1294dabac20SChandler Carruth   /// Marks when parsing found a -NOT check combined with another CHECK suffix.
130a908e7bdSPaul Robinson   CheckBadNot
13138820972SMatt Arsenault };
13238820972SMatt Arsenault }
133eba55822SJakob Stoklund Olesen 
13438820972SMatt Arsenault class Pattern {
13538820972SMatt Arsenault   SMLoc PatternLoc;
13691a1b2c9SMichael Liao 
1374dabac20SChandler Carruth   /// A fixed string to match as the pattern or empty if this pattern requires
1384dabac20SChandler Carruth   /// a regex match.
139221460e0SChris Lattner   StringRef FixedStr;
140b16ab0c4SChris Lattner 
1414dabac20SChandler Carruth   /// A regex string to match as the pattern or empty if this pattern requires
1424dabac20SChandler Carruth   /// a fixed string to match.
143b16ab0c4SChris Lattner   std::string RegExStr;
1448879e06dSChris Lattner 
1454dabac20SChandler Carruth   /// Entries in this vector map to uses of a variable in the pattern, e.g.
1464dabac20SChandler Carruth   /// "foo[[bar]]baz".  In this case, the RegExStr will contain "foobaz" and
1474dabac20SChandler Carruth   /// we'll get an entry in this vector that tells us to insert the value of
1484dabac20SChandler Carruth   /// bar at offset 3.
1498879e06dSChris Lattner   std::vector<std::pair<StringRef, unsigned>> VariableUses;
1508879e06dSChris Lattner 
1514dabac20SChandler Carruth   /// Maps definitions of variables to their parenthesized capture numbers.
1524dabac20SChandler Carruth   ///
1534dabac20SChandler Carruth   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to
1544dabac20SChandler Carruth   /// 1.
155e8b8f1bcSEli Bendersky   std::map<StringRef, unsigned> VariableDefs;
1568879e06dSChris Lattner 
157d1e020f7SSaleem Abdulrasool   Check::CheckType CheckTy;
1583b40b445SChris Lattner 
1594dabac20SChandler Carruth   /// Contains the number of line this pattern is in.
160d1e020f7SSaleem Abdulrasool   unsigned LineNumber;
161d1e020f7SSaleem Abdulrasool 
162d1e020f7SSaleem Abdulrasool public:
163d1e020f7SSaleem Abdulrasool   explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {}
16474d50731SChris Lattner 
1654dabac20SChandler Carruth   /// Returns the location in source code.
1660b707eb8SMichael Liao   SMLoc getLoc() const { return PatternLoc; }
1670b707eb8SMichael Liao 
168e8f2fb20SChandler Carruth   bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
16913df4626SMatt Arsenault                     unsigned LineNumber);
1708879e06dSChris Lattner   size_t Match(StringRef Buffer, size_t &MatchLen,
1718879e06dSChris Lattner                StringMap<StringRef> &VariableTable) const;
172dc5ba317SJoel E. Denny   void PrintVariableUses(const SourceMgr &SM, StringRef Buffer,
173dc5ba317SJoel E. Denny                          const StringMap<StringRef> &VariableTable,
174dc5ba317SJoel E. Denny                          SMRange MatchRange = None) const;
175dc5ba317SJoel E. Denny   void PrintFuzzyMatch(const SourceMgr &SM, StringRef Buffer,
176e0ef65abSDaniel Dunbar                        const StringMap<StringRef> &VariableTable) const;
177e0ef65abSDaniel Dunbar 
178e8f2fb20SChandler Carruth   bool hasVariable() const {
179e8f2fb20SChandler Carruth     return !(VariableUses.empty() && VariableDefs.empty());
180e8f2fb20SChandler Carruth   }
181f8bd2e5bSStephen Lin 
18238820972SMatt Arsenault   Check::CheckType getCheckTy() const { return CheckTy; }
18391a1b2c9SMichael Liao 
184b16ab0c4SChris Lattner private:
185e8b8f1bcSEli Bendersky   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
186e8b8f1bcSEli Bendersky   void AddBackrefToRegEx(unsigned BackrefNum);
187e8f2fb20SChandler Carruth   unsigned
188e8f2fb20SChandler Carruth   ComputeMatchDistance(StringRef Buffer,
189fd29d886SDaniel Dunbar                        const StringMap<StringRef> &VariableTable) const;
19092987fb3SAlexander Kornienko   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
19181e5cd9eSAdrian Prantl   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
1923b40b445SChris Lattner };
1933b40b445SChris Lattner 
1944dabac20SChandler Carruth /// Parses the given string into the Pattern.
1954dabac20SChandler Carruth ///
1964dabac20SChandler Carruth /// \p Prefix provides which prefix is being matched, \p SM provides the
1974dabac20SChandler Carruth /// SourceMgr used for error reports, and \p LineNumber is the line number in
1984dabac20SChandler Carruth /// the input file from which the pattern string was read. Returns true in
1994dabac20SChandler Carruth /// case of an error, false otherwise.
200e8f2fb20SChandler Carruth bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
201e8f2fb20SChandler Carruth                            SourceMgr &SM, unsigned LineNumber) {
20285913ccaSJames Y Knight   bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot;
20385913ccaSJames Y Knight 
20492987fb3SAlexander Kornienko   this->LineNumber = LineNumber;
2050a4c44bdSChris Lattner   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
2060a4c44bdSChris Lattner 
2071714676aSTom de Vries   if (!(NoCanonicalizeWhiteSpace && MatchFullLines))
20874d50731SChris Lattner     // Ignore trailing whitespace.
20974d50731SChris Lattner     while (!PatternStr.empty() &&
21074d50731SChris Lattner            (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
21174d50731SChris Lattner       PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
21274d50731SChris Lattner 
21374d50731SChris Lattner   // Check that there is something on the line.
2145507f668SJames Henderson   if (PatternStr.empty() && CheckTy != Check::CheckEmpty) {
21503b80a40SChris Lattner     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
216e8f2fb20SChandler Carruth                     "found empty check string with prefix '" + Prefix + ":'");
21774d50731SChris Lattner     return true;
21874d50731SChris Lattner   }
21974d50731SChris Lattner 
2205507f668SJames Henderson   if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) {
2215507f668SJames Henderson     SM.PrintMessage(
2225507f668SJames Henderson         PatternLoc, SourceMgr::DK_Error,
2235507f668SJames Henderson         "found non-empty check string for empty check with prefix '" + Prefix +
2245507f668SJames Henderson             ":'");
2255507f668SJames Henderson     return true;
2265507f668SJames Henderson   }
2275507f668SJames Henderson 
2285507f668SJames Henderson   if (CheckTy == Check::CheckEmpty) {
2295507f668SJames Henderson     RegExStr = "(\n$)";
2305507f668SJames Henderson     return false;
2315507f668SJames Henderson   }
2325507f668SJames Henderson 
233221460e0SChris Lattner   // Check to see if this is a fixed string, or if it has regex pieces.
23485913ccaSJames Y Knight   if (!MatchFullLinesHere &&
23585913ccaSJames Y Knight       (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
23685913ccaSJames Y Knight                                  PatternStr.find("[[") == StringRef::npos))) {
237221460e0SChris Lattner     FixedStr = PatternStr;
238221460e0SChris Lattner     return false;
239221460e0SChris Lattner   }
240221460e0SChris Lattner 
24185913ccaSJames Y Knight   if (MatchFullLinesHere) {
24285913ccaSJames Y Knight     RegExStr += '^';
24385913ccaSJames Y Knight     if (!NoCanonicalizeWhiteSpace)
24485913ccaSJames Y Knight       RegExStr += " *";
24585913ccaSJames Y Knight   }
24685913ccaSJames Y Knight 
2478879e06dSChris Lattner   // Paren value #0 is for the fully matched string.  Any new parenthesized
24853e0679dSChris Lattner   // values add from there.
2498879e06dSChris Lattner   unsigned CurParen = 1;
2508879e06dSChris Lattner 
251b16ab0c4SChris Lattner   // Otherwise, there is at least one regex piece.  Build up the regex pattern
252b16ab0c4SChris Lattner   // by escaping scary characters in fixed strings, building up one big regex.
253f08d2db9SChris Lattner   while (!PatternStr.empty()) {
2548879e06dSChris Lattner     // RegEx matches.
25553e0679dSChris Lattner     if (PatternStr.startswith("{{")) {
25643d50d4aSEli Bendersky       // This is the start of a regex match.  Scan for the }}.
257f08d2db9SChris Lattner       size_t End = PatternStr.find("}}");
258f08d2db9SChris Lattner       if (End == StringRef::npos) {
259f08d2db9SChris Lattner         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
26003b80a40SChris Lattner                         SourceMgr::DK_Error,
26103b80a40SChris Lattner                         "found start of regex string with no end '}}'");
262f08d2db9SChris Lattner         return true;
263f08d2db9SChris Lattner       }
264f08d2db9SChris Lattner 
265e53c95f1SChris Lattner       // Enclose {{}} patterns in parens just like [[]] even though we're not
266e53c95f1SChris Lattner       // capturing the result for any purpose.  This is required in case the
267e53c95f1SChris Lattner       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
268e53c95f1SChris Lattner       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
269e53c95f1SChris Lattner       RegExStr += '(';
270e53c95f1SChris Lattner       ++CurParen;
271e53c95f1SChris Lattner 
2728879e06dSChris Lattner       if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
2738879e06dSChris Lattner         return true;
274e53c95f1SChris Lattner       RegExStr += ')';
27553e0679dSChris Lattner 
2768879e06dSChris Lattner       PatternStr = PatternStr.substr(End + 2);
2778879e06dSChris Lattner       continue;
2788879e06dSChris Lattner     }
2798879e06dSChris Lattner 
2808879e06dSChris Lattner     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
2818879e06dSChris Lattner     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
2828879e06dSChris Lattner     // second form is [[foo]] which is a reference to foo.  The variable name
28357cb733bSDaniel Dunbar     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
2848879e06dSChris Lattner     // it.  This is to catch some common errors.
28553e0679dSChris Lattner     if (PatternStr.startswith("[[")) {
286061d2baaSEli Bendersky       // Find the closing bracket pair ending the match.  End is going to be an
287061d2baaSEli Bendersky       // offset relative to the beginning of the match string.
28881e5cd9eSAdrian Prantl       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
289061d2baaSEli Bendersky 
2908879e06dSChris Lattner       if (End == StringRef::npos) {
2918879e06dSChris Lattner         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
29203b80a40SChris Lattner                         SourceMgr::DK_Error,
29303b80a40SChris Lattner                         "invalid named regex reference, no ]] found");
294f08d2db9SChris Lattner         return true;
295f08d2db9SChris Lattner       }
296f08d2db9SChris Lattner 
297061d2baaSEli Bendersky       StringRef MatchStr = PatternStr.substr(2, End);
298061d2baaSEli Bendersky       PatternStr = PatternStr.substr(End + 4);
2998879e06dSChris Lattner 
3008879e06dSChris Lattner       // Get the regex name (e.g. "foo").
3018879e06dSChris Lattner       size_t NameEnd = MatchStr.find(':');
3028879e06dSChris Lattner       StringRef Name = MatchStr.substr(0, NameEnd);
3038879e06dSChris Lattner 
3048879e06dSChris Lattner       if (Name.empty()) {
30503b80a40SChris Lattner         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
30603b80a40SChris Lattner                         "invalid name in named regex: empty name");
3078879e06dSChris Lattner         return true;
3088879e06dSChris Lattner       }
3098879e06dSChris Lattner 
31092987fb3SAlexander Kornienko       // Verify that the name/expression is well formed. FileCheck currently
31192987fb3SAlexander Kornienko       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
31292987fb3SAlexander Kornienko       // is relaxed, more strict check is performed in \c EvaluateExpression.
31392987fb3SAlexander Kornienko       bool IsExpression = false;
31492987fb3SAlexander Kornienko       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
315f55e72a5SArtem Belevich         if (i == 0) {
316f55e72a5SArtem Belevich           if (Name[i] == '$')  // Global vars start with '$'
317f55e72a5SArtem Belevich             continue;
318f55e72a5SArtem Belevich           if (Name[i] == '@') {
31992987fb3SAlexander Kornienko             if (NameEnd != StringRef::npos) {
32092987fb3SAlexander Kornienko               SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
32192987fb3SAlexander Kornienko                               SourceMgr::DK_Error,
32292987fb3SAlexander Kornienko                               "invalid name in named regex definition");
32392987fb3SAlexander Kornienko               return true;
32492987fb3SAlexander Kornienko             }
32592987fb3SAlexander Kornienko             IsExpression = true;
32692987fb3SAlexander Kornienko             continue;
32792987fb3SAlexander Kornienko           }
328f55e72a5SArtem Belevich         }
32992987fb3SAlexander Kornienko         if (Name[i] != '_' && !isalnum(Name[i]) &&
33092987fb3SAlexander Kornienko             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
3318879e06dSChris Lattner           SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i),
33203b80a40SChris Lattner                           SourceMgr::DK_Error, "invalid name in named regex");
3338879e06dSChris Lattner           return true;
3348879e06dSChris Lattner         }
33592987fb3SAlexander Kornienko       }
3368879e06dSChris Lattner 
3378879e06dSChris Lattner       // Name can't start with a digit.
33883c74e9fSGuy Benyei       if (isdigit(static_cast<unsigned char>(Name[0]))) {
33903b80a40SChris Lattner         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
34003b80a40SChris Lattner                         "invalid name in named regex");
3418879e06dSChris Lattner         return true;
3428879e06dSChris Lattner       }
3438879e06dSChris Lattner 
3448879e06dSChris Lattner       // Handle [[foo]].
3458879e06dSChris Lattner       if (NameEnd == StringRef::npos) {
346e8b8f1bcSEli Bendersky         // Handle variables that were defined earlier on the same line by
347e8b8f1bcSEli Bendersky         // emitting a backreference.
348e8b8f1bcSEli Bendersky         if (VariableDefs.find(Name) != VariableDefs.end()) {
349e8b8f1bcSEli Bendersky           unsigned VarParenNum = VariableDefs[Name];
350e8b8f1bcSEli Bendersky           if (VarParenNum < 1 || VarParenNum > 9) {
351e8b8f1bcSEli Bendersky             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
352e8b8f1bcSEli Bendersky                             SourceMgr::DK_Error,
353e8b8f1bcSEli Bendersky                             "Can't back-reference more than 9 variables");
354e8b8f1bcSEli Bendersky             return true;
355e8b8f1bcSEli Bendersky           }
356e8b8f1bcSEli Bendersky           AddBackrefToRegEx(VarParenNum);
357e8b8f1bcSEli Bendersky         } else {
3588879e06dSChris Lattner           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
359e8b8f1bcSEli Bendersky         }
3608879e06dSChris Lattner         continue;
3618879e06dSChris Lattner       }
3628879e06dSChris Lattner 
3638879e06dSChris Lattner       // Handle [[foo:.*]].
364e8b8f1bcSEli Bendersky       VariableDefs[Name] = CurParen;
3658879e06dSChris Lattner       RegExStr += '(';
3668879e06dSChris Lattner       ++CurParen;
3678879e06dSChris Lattner 
3688879e06dSChris Lattner       if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM))
3698879e06dSChris Lattner         return true;
3708879e06dSChris Lattner 
3718879e06dSChris Lattner       RegExStr += ')';
3728879e06dSChris Lattner     }
3738879e06dSChris Lattner 
3748879e06dSChris Lattner     // Handle fixed string matches.
3758879e06dSChris Lattner     // Find the end, which is the start of the next regex.
3768879e06dSChris Lattner     size_t FixedMatchEnd = PatternStr.find("{{");
3778879e06dSChris Lattner     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
3786f4f77b7SHans Wennborg     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
3798879e06dSChris Lattner     PatternStr = PatternStr.substr(FixedMatchEnd);
380f08d2db9SChris Lattner   }
381f08d2db9SChris Lattner 
38285913ccaSJames Y Knight   if (MatchFullLinesHere) {
38385913ccaSJames Y Knight     if (!NoCanonicalizeWhiteSpace)
38485913ccaSJames Y Knight       RegExStr += " *";
38585913ccaSJames Y Knight     RegExStr += '$';
38685913ccaSJames Y Knight   }
38785913ccaSJames Y Knight 
38874d50731SChris Lattner   return false;
38974d50731SChris Lattner }
39074d50731SChris Lattner 
391e8f2fb20SChandler Carruth bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
392e8b8f1bcSEli Bendersky   Regex R(RS);
3938879e06dSChris Lattner   std::string Error;
3948879e06dSChris Lattner   if (!R.isValid(Error)) {
395e8b8f1bcSEli Bendersky     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
39603b80a40SChris Lattner                     "invalid regex: " + Error);
3978879e06dSChris Lattner     return true;
3988879e06dSChris Lattner   }
3998879e06dSChris Lattner 
400e8b8f1bcSEli Bendersky   RegExStr += RS.str();
4018879e06dSChris Lattner   CurParen += R.getNumMatches();
4028879e06dSChris Lattner   return false;
4038879e06dSChris Lattner }
404b16ab0c4SChris Lattner 
405e8b8f1bcSEli Bendersky void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
406e8b8f1bcSEli Bendersky   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
407e8f2fb20SChandler Carruth   std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
408e8b8f1bcSEli Bendersky   RegExStr += Backref;
409e8b8f1bcSEli Bendersky }
410e8b8f1bcSEli Bendersky 
4114dabac20SChandler Carruth /// Evaluates expression and stores the result to \p Value.
4124dabac20SChandler Carruth ///
4134dabac20SChandler Carruth /// Returns true on success and false when the expression has invalid syntax.
41492987fb3SAlexander Kornienko bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
41592987fb3SAlexander Kornienko   // The only supported expression is @LINE([\+-]\d+)?
41692987fb3SAlexander Kornienko   if (!Expr.startswith("@LINE"))
41792987fb3SAlexander Kornienko     return false;
41892987fb3SAlexander Kornienko   Expr = Expr.substr(StringRef("@LINE").size());
41992987fb3SAlexander Kornienko   int Offset = 0;
42092987fb3SAlexander Kornienko   if (!Expr.empty()) {
42192987fb3SAlexander Kornienko     if (Expr[0] == '+')
42292987fb3SAlexander Kornienko       Expr = Expr.substr(1);
42392987fb3SAlexander Kornienko     else if (Expr[0] != '-')
42492987fb3SAlexander Kornienko       return false;
42592987fb3SAlexander Kornienko     if (Expr.getAsInteger(10, Offset))
42692987fb3SAlexander Kornienko       return false;
42792987fb3SAlexander Kornienko   }
42892987fb3SAlexander Kornienko   Value = llvm::itostr(LineNumber + Offset);
42992987fb3SAlexander Kornienko   return true;
43092987fb3SAlexander Kornienko }
43192987fb3SAlexander Kornienko 
4324dabac20SChandler Carruth /// Matches the pattern string against the input buffer \p Buffer
4334dabac20SChandler Carruth ///
4344dabac20SChandler Carruth /// This returns the position that is matched or npos if there is no match. If
4354dabac20SChandler Carruth /// there is a match, the size of the matched string is returned in \p
4364dabac20SChandler Carruth /// MatchLen.
4374dabac20SChandler Carruth ///
4384dabac20SChandler Carruth /// The \p VariableTable StringMap provides the current values of filecheck
4394dabac20SChandler Carruth /// variables and is updated if this match defines new values.
4408879e06dSChris Lattner size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
4418879e06dSChris Lattner                       StringMap<StringRef> &VariableTable) const {
442eba55822SJakob Stoklund Olesen   // If this is the EOF pattern, match it immediately.
44338820972SMatt Arsenault   if (CheckTy == Check::CheckEOF) {
444eba55822SJakob Stoklund Olesen     MatchLen = 0;
445eba55822SJakob Stoklund Olesen     return Buffer.size();
446eba55822SJakob Stoklund Olesen   }
447eba55822SJakob Stoklund Olesen 
448221460e0SChris Lattner   // If this is a fixed string pattern, just match it now.
449221460e0SChris Lattner   if (!FixedStr.empty()) {
450221460e0SChris Lattner     MatchLen = FixedStr.size();
451221460e0SChris Lattner     return Buffer.find(FixedStr);
452221460e0SChris Lattner   }
453221460e0SChris Lattner 
454b16ab0c4SChris Lattner   // Regex match.
4558879e06dSChris Lattner 
4568879e06dSChris Lattner   // If there are variable uses, we need to create a temporary string with the
4578879e06dSChris Lattner   // actual value.
4588879e06dSChris Lattner   StringRef RegExToMatch = RegExStr;
4598879e06dSChris Lattner   std::string TmpStr;
4608879e06dSChris Lattner   if (!VariableUses.empty()) {
4618879e06dSChris Lattner     TmpStr = RegExStr;
4628879e06dSChris Lattner 
4638879e06dSChris Lattner     unsigned InsertOffset = 0;
4648f870499SBenjamin Kramer     for (const auto &VariableUse : VariableUses) {
46592987fb3SAlexander Kornienko       std::string Value;
46692987fb3SAlexander Kornienko 
4678f870499SBenjamin Kramer       if (VariableUse.first[0] == '@') {
4688f870499SBenjamin Kramer         if (!EvaluateExpression(VariableUse.first, Value))
46992987fb3SAlexander Kornienko           return StringRef::npos;
47092987fb3SAlexander Kornienko       } else {
471e0ef65abSDaniel Dunbar         StringMap<StringRef>::iterator it =
4728f870499SBenjamin Kramer             VariableTable.find(VariableUse.first);
473e0ef65abSDaniel Dunbar         // If the variable is undefined, return an error.
474e0ef65abSDaniel Dunbar         if (it == VariableTable.end())
475e0ef65abSDaniel Dunbar           return StringRef::npos;
476e0ef65abSDaniel Dunbar 
4776f4f77b7SHans Wennborg         // Look up the value and escape it so that we can put it into the regex.
4786f4f77b7SHans Wennborg         Value += Regex::escape(it->second);
47992987fb3SAlexander Kornienko       }
4808879e06dSChris Lattner 
4818879e06dSChris Lattner       // Plop it into the regex at the adjusted offset.
4828f870499SBenjamin Kramer       TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
4838879e06dSChris Lattner                     Value.begin(), Value.end());
4848879e06dSChris Lattner       InsertOffset += Value.size();
4858879e06dSChris Lattner     }
4868879e06dSChris Lattner 
4878879e06dSChris Lattner     // Match the newly constructed regex.
4888879e06dSChris Lattner     RegExToMatch = TmpStr;
4898879e06dSChris Lattner   }
4908879e06dSChris Lattner 
491b16ab0c4SChris Lattner   SmallVector<StringRef, 4> MatchInfo;
4928879e06dSChris Lattner   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
493f08d2db9SChris Lattner     return StringRef::npos;
494b16ab0c4SChris Lattner 
495b16ab0c4SChris Lattner   // Successful regex match.
496b16ab0c4SChris Lattner   assert(!MatchInfo.empty() && "Didn't get any match");
497b16ab0c4SChris Lattner   StringRef FullMatch = MatchInfo[0];
498b16ab0c4SChris Lattner 
4998879e06dSChris Lattner   // If this defines any variables, remember their values.
5008f870499SBenjamin Kramer   for (const auto &VariableDef : VariableDefs) {
5018f870499SBenjamin Kramer     assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
5028f870499SBenjamin Kramer     VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
5030a4c44bdSChris Lattner   }
5040a4c44bdSChris Lattner 
505dc5ba317SJoel E. Denny   // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after
506dc5ba317SJoel E. Denny   // the required preceding newline, which is consumed by the pattern in the
507dc5ba317SJoel E. Denny   // case of CHECK-EMPTY but not CHECK-NEXT.
508dc5ba317SJoel E. Denny   size_t MatchStartSkip = CheckTy == Check::CheckEmpty;
509dc5ba317SJoel E. Denny   MatchLen = FullMatch.size() - MatchStartSkip;
510dc5ba317SJoel E. Denny   return FullMatch.data() - Buffer.data() + MatchStartSkip;
511f08d2db9SChris Lattner }
512f08d2db9SChris Lattner 
5134dabac20SChandler Carruth 
5144dabac20SChandler Carruth /// Computes an arbitrary estimate for the quality of matching this pattern at
5154dabac20SChandler Carruth /// the start of \p Buffer; a distance of zero should correspond to a perfect
5164dabac20SChandler Carruth /// match.
517e8f2fb20SChandler Carruth unsigned
518e8f2fb20SChandler Carruth Pattern::ComputeMatchDistance(StringRef Buffer,
519fd29d886SDaniel Dunbar                               const StringMap<StringRef> &VariableTable) const {
520fd29d886SDaniel Dunbar   // Just compute the number of matching characters. For regular expressions, we
521fd29d886SDaniel Dunbar   // just compare against the regex itself and hope for the best.
522fd29d886SDaniel Dunbar   //
523fd29d886SDaniel Dunbar   // FIXME: One easy improvement here is have the regex lib generate a single
524fd29d886SDaniel Dunbar   // example regular expression which matches, and use that as the example
525fd29d886SDaniel Dunbar   // string.
526fd29d886SDaniel Dunbar   StringRef ExampleString(FixedStr);
527fd29d886SDaniel Dunbar   if (ExampleString.empty())
528fd29d886SDaniel Dunbar     ExampleString = RegExStr;
529fd29d886SDaniel Dunbar 
530e9aa36c8SDaniel Dunbar   // Only compare up to the first line in the buffer, or the string size.
531e9aa36c8SDaniel Dunbar   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
532e9aa36c8SDaniel Dunbar   BufferPrefix = BufferPrefix.split('\n').first;
533e9aa36c8SDaniel Dunbar   return BufferPrefix.edit_distance(ExampleString);
534fd29d886SDaniel Dunbar }
535fd29d886SDaniel Dunbar 
536dc5ba317SJoel E. Denny void Pattern::PrintVariableUses(const SourceMgr &SM, StringRef Buffer,
537dc5ba317SJoel E. Denny                                 const StringMap<StringRef> &VariableTable,
538dc5ba317SJoel E. Denny                                 SMRange MatchRange) const {
539e0ef65abSDaniel Dunbar   // If this was a regular expression using variables, print the current
540e0ef65abSDaniel Dunbar   // variable values.
541e0ef65abSDaniel Dunbar   if (!VariableUses.empty()) {
5428f870499SBenjamin Kramer     for (const auto &VariableUse : VariableUses) {
543e69170a1SAlp Toker       SmallString<256> Msg;
544e69170a1SAlp Toker       raw_svector_ostream OS(Msg);
5458f870499SBenjamin Kramer       StringRef Var = VariableUse.first;
54692987fb3SAlexander Kornienko       if (Var[0] == '@') {
54792987fb3SAlexander Kornienko         std::string Value;
54892987fb3SAlexander Kornienko         if (EvaluateExpression(Var, Value)) {
54992987fb3SAlexander Kornienko           OS << "with expression \"";
55092987fb3SAlexander Kornienko           OS.write_escaped(Var) << "\" equal to \"";
55192987fb3SAlexander Kornienko           OS.write_escaped(Value) << "\"";
55292987fb3SAlexander Kornienko         } else {
55392987fb3SAlexander Kornienko           OS << "uses incorrect expression \"";
55492987fb3SAlexander Kornienko           OS.write_escaped(Var) << "\"";
55592987fb3SAlexander Kornienko         }
55692987fb3SAlexander Kornienko       } else {
55792987fb3SAlexander Kornienko         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
558e0ef65abSDaniel Dunbar 
559e0ef65abSDaniel Dunbar         // Check for undefined variable references.
560e0ef65abSDaniel Dunbar         if (it == VariableTable.end()) {
561e0ef65abSDaniel Dunbar           OS << "uses undefined variable \"";
56292987fb3SAlexander Kornienko           OS.write_escaped(Var) << "\"";
563e0ef65abSDaniel Dunbar         } else {
564e0ef65abSDaniel Dunbar           OS << "with variable \"";
565e0ef65abSDaniel Dunbar           OS.write_escaped(Var) << "\" equal to \"";
566e0ef65abSDaniel Dunbar           OS.write_escaped(it->second) << "\"";
567e0ef65abSDaniel Dunbar         }
56892987fb3SAlexander Kornienko       }
569e0ef65abSDaniel Dunbar 
570dc5ba317SJoel E. Denny       if (MatchRange.isValid())
571dc5ba317SJoel E. Denny         SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, OS.str(),
572dc5ba317SJoel E. Denny                         {MatchRange});
573dc5ba317SJoel E. Denny       else
574dc5ba317SJoel E. Denny         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
575dc5ba317SJoel E. Denny                         SourceMgr::DK_Note, OS.str());
576dc5ba317SJoel E. Denny     }
577e0ef65abSDaniel Dunbar   }
578e0ef65abSDaniel Dunbar }
579fd29d886SDaniel Dunbar 
580dc5ba317SJoel E. Denny void Pattern::PrintFuzzyMatch(
581dc5ba317SJoel E. Denny     const SourceMgr &SM, StringRef Buffer,
582dc5ba317SJoel E. Denny     const StringMap<StringRef> &VariableTable) const {
583fd29d886SDaniel Dunbar   // Attempt to find the closest/best fuzzy match.  Usually an error happens
584fd29d886SDaniel Dunbar   // because some string in the output didn't exactly match. In these cases, we
585fd29d886SDaniel Dunbar   // would like to show the user a best guess at what "should have" matched, to
586fd29d886SDaniel Dunbar   // save them having to actually check the input manually.
587fd29d886SDaniel Dunbar   size_t NumLinesForward = 0;
588fd29d886SDaniel Dunbar   size_t Best = StringRef::npos;
589fd29d886SDaniel Dunbar   double BestQuality = 0;
590fd29d886SDaniel Dunbar 
591fd29d886SDaniel Dunbar   // Use an arbitrary 4k limit on how far we will search.
5922bf486ebSDan Gohman   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
593fd29d886SDaniel Dunbar     if (Buffer[i] == '\n')
594fd29d886SDaniel Dunbar       ++NumLinesForward;
595fd29d886SDaniel Dunbar 
596df22bbf7SDan Gohman     // Patterns have leading whitespace stripped, so skip whitespace when
597df22bbf7SDan Gohman     // looking for something which looks like a pattern.
598df22bbf7SDan Gohman     if (Buffer[i] == ' ' || Buffer[i] == '\t')
599df22bbf7SDan Gohman       continue;
600df22bbf7SDan Gohman 
601fd29d886SDaniel Dunbar     // Compute the "quality" of this match as an arbitrary combination of the
602fd29d886SDaniel Dunbar     // match distance and the number of lines skipped to get to this match.
603fd29d886SDaniel Dunbar     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
604fd29d886SDaniel Dunbar     double Quality = Distance + (NumLinesForward / 100.);
605fd29d886SDaniel Dunbar 
606fd29d886SDaniel Dunbar     if (Quality < BestQuality || Best == StringRef::npos) {
607fd29d886SDaniel Dunbar       Best = i;
608fd29d886SDaniel Dunbar       BestQuality = Quality;
609fd29d886SDaniel Dunbar     }
610fd29d886SDaniel Dunbar   }
611fd29d886SDaniel Dunbar 
612fd29d886SDaniel Dunbar   // Print the "possible intended match here" line if we found something
613c069cc8eSDaniel Dunbar   // reasonable and not equal to what we showed in the "scanning from here"
614c069cc8eSDaniel Dunbar   // line.
615c069cc8eSDaniel Dunbar   if (Best && Best != StringRef::npos && BestQuality < 50) {
616fd29d886SDaniel Dunbar     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
61703b80a40SChris Lattner                     SourceMgr::DK_Note, "possible intended match here");
618fd29d886SDaniel Dunbar 
619fd29d886SDaniel Dunbar     // FIXME: If we wanted to be really friendly we would show why the match
620fd29d886SDaniel Dunbar     // failed, as it can be hard to spot simple one character differences.
621fd29d886SDaniel Dunbar   }
622e0ef65abSDaniel Dunbar }
62374d50731SChris Lattner 
6244dabac20SChandler Carruth /// Finds the closing sequence of a regex variable usage or definition.
6254dabac20SChandler Carruth ///
6264dabac20SChandler Carruth /// \p Str has to point in the beginning of the definition (right after the
6274dabac20SChandler Carruth /// opening sequence). Returns the offset of the closing sequence within Str,
6284dabac20SChandler Carruth /// or npos if it was not found.
62981e5cd9eSAdrian Prantl size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
630061d2baaSEli Bendersky   // Offset keeps track of the current offset within the input Str
631061d2baaSEli Bendersky   size_t Offset = 0;
632061d2baaSEli Bendersky   // [...] Nesting depth
633061d2baaSEli Bendersky   size_t BracketDepth = 0;
634061d2baaSEli Bendersky 
635061d2baaSEli Bendersky   while (!Str.empty()) {
636061d2baaSEli Bendersky     if (Str.startswith("]]") && BracketDepth == 0)
637061d2baaSEli Bendersky       return Offset;
638061d2baaSEli Bendersky     if (Str[0] == '\\') {
639061d2baaSEli Bendersky       // Backslash escapes the next char within regexes, so skip them both.
640061d2baaSEli Bendersky       Str = Str.substr(2);
641061d2baaSEli Bendersky       Offset += 2;
642061d2baaSEli Bendersky     } else {
643061d2baaSEli Bendersky       switch (Str[0]) {
644061d2baaSEli Bendersky       default:
645061d2baaSEli Bendersky         break;
646061d2baaSEli Bendersky       case '[':
647061d2baaSEli Bendersky         BracketDepth++;
648061d2baaSEli Bendersky         break;
649061d2baaSEli Bendersky       case ']':
65081e5cd9eSAdrian Prantl         if (BracketDepth == 0) {
65181e5cd9eSAdrian Prantl           SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
65281e5cd9eSAdrian Prantl                           SourceMgr::DK_Error,
65381e5cd9eSAdrian Prantl                           "missing closing \"]\" for regex variable");
65481e5cd9eSAdrian Prantl           exit(1);
65581e5cd9eSAdrian Prantl         }
656061d2baaSEli Bendersky         BracketDepth--;
657061d2baaSEli Bendersky         break;
658061d2baaSEli Bendersky       }
659061d2baaSEli Bendersky       Str = Str.substr(1);
660061d2baaSEli Bendersky       Offset++;
661061d2baaSEli Bendersky     }
662061d2baaSEli Bendersky   }
663061d2baaSEli Bendersky 
664061d2baaSEli Bendersky   return StringRef::npos;
665061d2baaSEli Bendersky }
666061d2baaSEli Bendersky 
66774d50731SChris Lattner //===----------------------------------------------------------------------===//
66874d50731SChris Lattner // Check Strings.
66974d50731SChris Lattner //===----------------------------------------------------------------------===//
6703b40b445SChris Lattner 
6714dabac20SChandler Carruth /// A check that we found in the input file.
6723b40b445SChris Lattner struct CheckString {
6734dabac20SChandler Carruth   /// The pattern to match.
6743b40b445SChris Lattner   Pattern Pat;
67526cccfe1SChris Lattner 
6764dabac20SChandler Carruth   /// Which prefix name this check matched.
67713df4626SMatt Arsenault   StringRef Prefix;
67813df4626SMatt Arsenault 
6794dabac20SChandler Carruth   /// The location in the match file that the check string was specified.
68026cccfe1SChris Lattner   SMLoc Loc;
68126cccfe1SChris Lattner 
6824dabac20SChandler Carruth   /// All of the strings that are disallowed from occurring between this match
6834dabac20SChandler Carruth   /// string and the previous one (or start of file).
68491a1b2c9SMichael Liao   std::vector<Pattern> DagNotStrings;
685236d2d5eSChris Lattner 
68685913ccaSJames Y Knight   CheckString(const Pattern &P, StringRef S, SMLoc L)
68785913ccaSJames Y Knight       : Pat(P), Prefix(S), Loc(L) {}
688dcc7d48dSMichael Liao 
689e93a3a08SStephen Lin   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
690f8bd2e5bSStephen Lin                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
691dcc7d48dSMichael Liao 
692dcc7d48dSMichael Liao   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
69301ac1707SDuncan P. N. Exon Smith   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
694dcc7d48dSMichael Liao   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
69591a1b2c9SMichael Liao                 const std::vector<const Pattern *> &NotStrings,
69691a1b2c9SMichael Liao                 StringMap<StringRef> &VariableTable) const;
69791a1b2c9SMichael Liao   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
69891a1b2c9SMichael Liao                   std::vector<const Pattern *> &NotStrings,
699dcc7d48dSMichael Liao                   StringMap<StringRef> &VariableTable) const;
70026cccfe1SChris Lattner };
70126cccfe1SChris Lattner 
70220247900SChandler Carruth /// Canonicalize whitespaces in the file. Line endings are replaced with
70320247900SChandler Carruth /// UNIX-style '\n'.
704b03c166aSChandler Carruth static StringRef CanonicalizeFile(MemoryBuffer &MB,
70520247900SChandler Carruth                                   SmallVectorImpl<char> &OutputBuffer) {
70620247900SChandler Carruth   OutputBuffer.reserve(MB.getBufferSize());
707a2f8fc5aSChris Lattner 
70820247900SChandler Carruth   for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
709a2f8fc5aSChris Lattner        Ptr != End; ++Ptr) {
710fd781bf0SNAKAMURA Takumi     // Eliminate trailing dosish \r.
711fd781bf0SNAKAMURA Takumi     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
712fd781bf0SNAKAMURA Takumi       continue;
713fd781bf0SNAKAMURA Takumi     }
714fd781bf0SNAKAMURA Takumi 
7155ea04c38SGuy Benyei     // If current char is not a horizontal whitespace or if horizontal
7165ea04c38SGuy Benyei     // whitespace canonicalization is disabled, dump it to output as is.
717b03c166aSChandler Carruth     if (NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) {
71820247900SChandler Carruth       OutputBuffer.push_back(*Ptr);
719a2f8fc5aSChris Lattner       continue;
720a2f8fc5aSChris Lattner     }
721a2f8fc5aSChris Lattner 
722a2f8fc5aSChris Lattner     // Otherwise, add one space and advance over neighboring space.
72320247900SChandler Carruth     OutputBuffer.push_back(' ');
724e8f2fb20SChandler Carruth     while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t'))
725a2f8fc5aSChris Lattner       ++Ptr;
726a2f8fc5aSChris Lattner   }
727a2f8fc5aSChris Lattner 
72820247900SChandler Carruth   // Add a null byte and then return all but that byte.
72920247900SChandler Carruth   OutputBuffer.push_back('\0');
73020247900SChandler Carruth   return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
731a2f8fc5aSChris Lattner }
732a2f8fc5aSChris Lattner 
73338820972SMatt Arsenault static bool IsPartOfWord(char c) {
73438820972SMatt Arsenault   return (isalnum(c) || c == '-' || c == '_');
73538820972SMatt Arsenault }
73638820972SMatt Arsenault 
73713df4626SMatt Arsenault // Get the size of the prefix extension.
73813df4626SMatt Arsenault static size_t CheckTypeSize(Check::CheckType Ty) {
73913df4626SMatt Arsenault   switch (Ty) {
74013df4626SMatt Arsenault   case Check::CheckNone:
741a908e7bdSPaul Robinson   case Check::CheckBadNot:
74213df4626SMatt Arsenault     return 0;
74313df4626SMatt Arsenault 
74413df4626SMatt Arsenault   case Check::CheckPlain:
74513df4626SMatt Arsenault     return sizeof(":") - 1;
74613df4626SMatt Arsenault 
74713df4626SMatt Arsenault   case Check::CheckNext:
74813df4626SMatt Arsenault     return sizeof("-NEXT:") - 1;
74913df4626SMatt Arsenault 
75001ac1707SDuncan P. N. Exon Smith   case Check::CheckSame:
75101ac1707SDuncan P. N. Exon Smith     return sizeof("-SAME:") - 1;
75201ac1707SDuncan P. N. Exon Smith 
75313df4626SMatt Arsenault   case Check::CheckNot:
75413df4626SMatt Arsenault     return sizeof("-NOT:") - 1;
75513df4626SMatt Arsenault 
75613df4626SMatt Arsenault   case Check::CheckDAG:
75713df4626SMatt Arsenault     return sizeof("-DAG:") - 1;
75813df4626SMatt Arsenault 
75913df4626SMatt Arsenault   case Check::CheckLabel:
76013df4626SMatt Arsenault     return sizeof("-LABEL:") - 1;
76113df4626SMatt Arsenault 
7625507f668SJames Henderson   case Check::CheckEmpty:
7635507f668SJames Henderson     return sizeof("-EMPTY:") - 1;
7645507f668SJames Henderson 
76513df4626SMatt Arsenault   case Check::CheckEOF:
76613df4626SMatt Arsenault     llvm_unreachable("Should not be using EOF size");
76713df4626SMatt Arsenault   }
76813df4626SMatt Arsenault 
76913df4626SMatt Arsenault   llvm_unreachable("Bad check type");
77013df4626SMatt Arsenault }
77113df4626SMatt Arsenault 
772dc5ba317SJoel E. Denny // Get a description of the type.
773dc5ba317SJoel E. Denny static std::string CheckTypeName(StringRef Prefix, Check::CheckType Ty) {
774dc5ba317SJoel E. Denny   switch (Ty) {
775dc5ba317SJoel E. Denny   case Check::CheckNone:
776dc5ba317SJoel E. Denny     return "invalid";
777dc5ba317SJoel E. Denny   case Check::CheckPlain:
778dc5ba317SJoel E. Denny     return Prefix;
779dc5ba317SJoel E. Denny   case Check::CheckNext:
780dc5ba317SJoel E. Denny     return Prefix.str() + "-NEXT";
781dc5ba317SJoel E. Denny   case Check::CheckSame:
782dc5ba317SJoel E. Denny     return Prefix.str() + "-SAME";
783dc5ba317SJoel E. Denny   case Check::CheckNot:
784dc5ba317SJoel E. Denny     return Prefix.str() + "-NOT";
785dc5ba317SJoel E. Denny   case Check::CheckDAG:
786dc5ba317SJoel E. Denny     return Prefix.str() + "-DAG";
787dc5ba317SJoel E. Denny   case Check::CheckLabel:
788dc5ba317SJoel E. Denny     return Prefix.str() + "-LABEL";
789dc5ba317SJoel E. Denny   case Check::CheckEmpty:
790dc5ba317SJoel E. Denny     return Prefix.str() + "-EMPTY";
791dc5ba317SJoel E. Denny   case Check::CheckEOF:
792dc5ba317SJoel E. Denny     return "implicit EOF";
793dc5ba317SJoel E. Denny   case Check::CheckBadNot:
794dc5ba317SJoel E. Denny     return "bad NOT";
795dc5ba317SJoel E. Denny   }
796dc5ba317SJoel E. Denny   llvm_unreachable("unknown CheckType");
797dc5ba317SJoel E. Denny }
798dc5ba317SJoel E. Denny 
79913df4626SMatt Arsenault static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
80083e63d96SGeorge Rimar   if (Buffer.size() <= Prefix.size())
80183e63d96SGeorge Rimar     return Check::CheckNone;
80283e63d96SGeorge Rimar 
803c4d2d471SMatt Arsenault   char NextChar = Buffer[Prefix.size()];
80438820972SMatt Arsenault 
80538820972SMatt Arsenault   // Verify that the : is present after the prefix.
80613df4626SMatt Arsenault   if (NextChar == ':')
80738820972SMatt Arsenault     return Check::CheckPlain;
80838820972SMatt Arsenault 
80913df4626SMatt Arsenault   if (NextChar != '-')
81038820972SMatt Arsenault     return Check::CheckNone;
81138820972SMatt Arsenault 
812c4d2d471SMatt Arsenault   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
81313df4626SMatt Arsenault   if (Rest.startswith("NEXT:"))
81438820972SMatt Arsenault     return Check::CheckNext;
81538820972SMatt Arsenault 
81601ac1707SDuncan P. N. Exon Smith   if (Rest.startswith("SAME:"))
81701ac1707SDuncan P. N. Exon Smith     return Check::CheckSame;
81801ac1707SDuncan P. N. Exon Smith 
81913df4626SMatt Arsenault   if (Rest.startswith("NOT:"))
82038820972SMatt Arsenault     return Check::CheckNot;
82138820972SMatt Arsenault 
82213df4626SMatt Arsenault   if (Rest.startswith("DAG:"))
82338820972SMatt Arsenault     return Check::CheckDAG;
82438820972SMatt Arsenault 
82513df4626SMatt Arsenault   if (Rest.startswith("LABEL:"))
82638820972SMatt Arsenault     return Check::CheckLabel;
82713df4626SMatt Arsenault 
8285507f668SJames Henderson   if (Rest.startswith("EMPTY:"))
8295507f668SJames Henderson     return Check::CheckEmpty;
8305507f668SJames Henderson 
831a908e7bdSPaul Robinson   // You can't combine -NOT with another suffix.
832a908e7bdSPaul Robinson   if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
833a908e7bdSPaul Robinson       Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
8345507f668SJames Henderson       Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") ||
8355507f668SJames Henderson       Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:"))
836a908e7bdSPaul Robinson     return Check::CheckBadNot;
837a908e7bdSPaul Robinson 
83813df4626SMatt Arsenault   return Check::CheckNone;
83938820972SMatt Arsenault }
84038820972SMatt Arsenault 
84113df4626SMatt Arsenault // From the given position, find the next character after the word.
84213df4626SMatt Arsenault static size_t SkipWord(StringRef Str, size_t Loc) {
84313df4626SMatt Arsenault   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
84413df4626SMatt Arsenault     ++Loc;
84513df4626SMatt Arsenault   return Loc;
84613df4626SMatt Arsenault }
84713df4626SMatt Arsenault 
848726774cbSChandler Carruth /// Search the buffer for the first prefix in the prefix regular expression.
849726774cbSChandler Carruth ///
850726774cbSChandler Carruth /// This searches the buffer using the provided regular expression, however it
851726774cbSChandler Carruth /// enforces constraints beyond that:
852726774cbSChandler Carruth /// 1) The found prefix must not be a suffix of something that looks like
853726774cbSChandler Carruth ///    a valid prefix.
854726774cbSChandler Carruth /// 2) The found prefix must be followed by a valid check type suffix using \c
855726774cbSChandler Carruth ///    FindCheckType above.
856726774cbSChandler Carruth ///
857726774cbSChandler Carruth /// The first match of the regular expression to satisfy these two is returned,
858726774cbSChandler Carruth /// otherwise an empty StringRef is returned to indicate failure.
859726774cbSChandler Carruth ///
860726774cbSChandler Carruth /// If this routine returns a valid prefix, it will also shrink \p Buffer to
861726774cbSChandler Carruth /// start at the beginning of the returned prefix, increment \p LineNumber for
862726774cbSChandler Carruth /// each new line consumed from \p Buffer, and set \p CheckTy to the type of
863726774cbSChandler Carruth /// check found by examining the suffix.
864726774cbSChandler Carruth ///
865726774cbSChandler Carruth /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
866726774cbSChandler Carruth /// is unspecified.
867726774cbSChandler Carruth static StringRef FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer,
86813df4626SMatt Arsenault                                          unsigned &LineNumber,
869726774cbSChandler Carruth                                          Check::CheckType &CheckTy) {
870726774cbSChandler Carruth   SmallVector<StringRef, 2> Matches;
871726774cbSChandler Carruth 
87213df4626SMatt Arsenault   while (!Buffer.empty()) {
873726774cbSChandler Carruth     // Find the first (longest) match using the RE.
874726774cbSChandler Carruth     if (!PrefixRE.match(Buffer, &Matches))
875726774cbSChandler Carruth       // No match at all, bail.
876726774cbSChandler Carruth       return StringRef();
877726774cbSChandler Carruth 
878726774cbSChandler Carruth     StringRef Prefix = Matches[0];
879726774cbSChandler Carruth     Matches.clear();
880726774cbSChandler Carruth 
881726774cbSChandler Carruth     assert(Prefix.data() >= Buffer.data() &&
882726774cbSChandler Carruth            Prefix.data() < Buffer.data() + Buffer.size() &&
883726774cbSChandler Carruth            "Prefix doesn't start inside of buffer!");
884726774cbSChandler Carruth     size_t Loc = Prefix.data() - Buffer.data();
885726774cbSChandler Carruth     StringRef Skipped = Buffer.substr(0, Loc);
886726774cbSChandler Carruth     Buffer = Buffer.drop_front(Loc);
887726774cbSChandler Carruth     LineNumber += Skipped.count('\n');
888726774cbSChandler Carruth 
889726774cbSChandler Carruth     // Check that the matched prefix isn't a suffix of some other check-like
890726774cbSChandler Carruth     // word.
891726774cbSChandler Carruth     // FIXME: This is a very ad-hoc check. it would be better handled in some
892726774cbSChandler Carruth     // other way. Among other things it seems hard to distinguish between
893726774cbSChandler Carruth     // intentional and unintentional uses of this feature.
894726774cbSChandler Carruth     if (Skipped.empty() || !IsPartOfWord(Skipped.back())) {
895726774cbSChandler Carruth       // Now extract the type.
896726774cbSChandler Carruth       CheckTy = FindCheckType(Buffer, Prefix);
897726774cbSChandler Carruth 
898726774cbSChandler Carruth       // If we've found a valid check type for this prefix, we're done.
899726774cbSChandler Carruth       if (CheckTy != Check::CheckNone)
90013df4626SMatt Arsenault         return Prefix;
90113df4626SMatt Arsenault     }
90213df4626SMatt Arsenault 
903726774cbSChandler Carruth     // If we didn't successfully find a prefix, we need to skip this invalid
904726774cbSChandler Carruth     // prefix and continue scanning. We directly skip the prefix that was
905726774cbSChandler Carruth     // matched and any additional parts of that check-like word.
906726774cbSChandler Carruth     Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size()));
90713df4626SMatt Arsenault   }
90813df4626SMatt Arsenault 
909726774cbSChandler Carruth   // We ran out of buffer while skipping partial matches so give up.
91013df4626SMatt Arsenault   return StringRef();
91138820972SMatt Arsenault }
912ee3c74fbSChris Lattner 
9134dabac20SChandler Carruth /// Read the check file, which specifies the sequence of expected strings.
9144dabac20SChandler Carruth ///
9154dabac20SChandler Carruth /// The strings are added to the CheckStrings vector. Returns true in case of
9164dabac20SChandler Carruth /// an error, false otherwise.
917726774cbSChandler Carruth static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
91826cccfe1SChris Lattner                           std::vector<CheckString> &CheckStrings) {
91956ccdbbdSAlexander Kornienko   std::vector<Pattern> ImplicitNegativeChecks;
92056ccdbbdSAlexander Kornienko   for (const auto &PatternString : ImplicitCheckNot) {
92156ccdbbdSAlexander Kornienko     // Create a buffer with fake command line content in order to display the
92256ccdbbdSAlexander Kornienko     // command line option responsible for the specific implicit CHECK-NOT.
923ff43d69dSDavid Blaikie     std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
92456ccdbbdSAlexander Kornienko     std::string Suffix = "'";
9253560ff2cSRafael Espindola     std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
9263560ff2cSRafael Espindola         Prefix + PatternString + Suffix, "command line");
9273560ff2cSRafael Espindola 
92856ccdbbdSAlexander Kornienko     StringRef PatternInBuffer =
92956ccdbbdSAlexander Kornienko         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
9301961f14cSDavid Blaikie     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
93156ccdbbdSAlexander Kornienko 
93256ccdbbdSAlexander Kornienko     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
93356ccdbbdSAlexander Kornienko     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
93456ccdbbdSAlexander Kornienko                                                "IMPLICIT-CHECK", SM, 0);
93556ccdbbdSAlexander Kornienko   }
93656ccdbbdSAlexander Kornienko 
93756ccdbbdSAlexander Kornienko   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
938236d2d5eSChris Lattner 
93943d50d4aSEli Bendersky   // LineNumber keeps track of the line on which CheckPrefix instances are
94043d50d4aSEli Bendersky   // found.
94192987fb3SAlexander Kornienko   unsigned LineNumber = 1;
94292987fb3SAlexander Kornienko 
943ee3c74fbSChris Lattner   while (1) {
94413df4626SMatt Arsenault     Check::CheckType CheckTy;
94513df4626SMatt Arsenault 
94613df4626SMatt Arsenault     // See if a prefix occurs in the memory buffer.
947726774cbSChandler Carruth     StringRef UsedPrefix = FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber,
948726774cbSChandler Carruth                                                    CheckTy);
94913df4626SMatt Arsenault     if (UsedPrefix.empty())
950ee3c74fbSChris Lattner       break;
951726774cbSChandler Carruth     assert(UsedPrefix.data() == Buffer.data() &&
952726774cbSChandler Carruth            "Failed to move Buffer's start forward, or pointed prefix outside "
953726774cbSChandler Carruth            "of the buffer!");
95492987fb3SAlexander Kornienko 
95513df4626SMatt Arsenault     // Location to use for error messages.
956726774cbSChandler Carruth     const char *UsedPrefixStart = UsedPrefix.data();
95792987fb3SAlexander Kornienko 
958726774cbSChandler Carruth     // Skip the buffer to the end.
95913df4626SMatt Arsenault     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
96010f10cedSChris Lattner 
961a908e7bdSPaul Robinson     // Complain about useful-looking but unsupported suffixes.
962a908e7bdSPaul Robinson     if (CheckTy == Check::CheckBadNot) {
963e8f2fb20SChandler Carruth       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
964a908e7bdSPaul Robinson                       "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
965a908e7bdSPaul Robinson       return true;
966a908e7bdSPaul Robinson     }
967a908e7bdSPaul Robinson 
96838820972SMatt Arsenault     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
969a26bc914STom de Vries     // leading whitespace.
9701714676aSTom de Vries     if (!(NoCanonicalizeWhiteSpace && MatchFullLines))
971236d2d5eSChris Lattner       Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
972ee3c74fbSChris Lattner 
973ee3c74fbSChris Lattner     // Scan ahead to the end of line.
974caa5fc0cSChris Lattner     size_t EOL = Buffer.find_first_of("\n\r");
975ee3c74fbSChris Lattner 
976838fb09aSDan Gohman     // Remember the location of the start of the pattern, for diagnostics.
977838fb09aSDan Gohman     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
978838fb09aSDan Gohman 
97974d50731SChris Lattner     // Parse the pattern.
98038820972SMatt Arsenault     Pattern P(CheckTy);
98113df4626SMatt Arsenault     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
982ee3c74fbSChris Lattner       return true;
983ee3c74fbSChris Lattner 
984f8bd2e5bSStephen Lin     // Verify that CHECK-LABEL lines do not define or use variables
98538820972SMatt Arsenault     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
986e8f2fb20SChandler Carruth       SM.PrintMessage(
987e8f2fb20SChandler Carruth           SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error,
98813df4626SMatt Arsenault           "found '" + UsedPrefix + "-LABEL:'"
98913df4626SMatt Arsenault                                    " with variable definition or use");
990f8bd2e5bSStephen Lin       return true;
991f8bd2e5bSStephen Lin     }
992f8bd2e5bSStephen Lin 
993236d2d5eSChris Lattner     Buffer = Buffer.substr(EOL);
99474d50731SChris Lattner 
9955507f668SJames Henderson     // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them.
9965507f668SJames Henderson     if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame ||
9975507f668SJames Henderson          CheckTy == Check::CheckEmpty) &&
99801ac1707SDuncan P. N. Exon Smith         CheckStrings.empty()) {
9995507f668SJames Henderson       StringRef Type = CheckTy == Check::CheckNext
10005507f668SJames Henderson                            ? "NEXT"
10015507f668SJames Henderson                            : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME";
100213df4626SMatt Arsenault       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
100303b80a40SChris Lattner                       SourceMgr::DK_Error,
1004e8f2fb20SChandler Carruth                       "found '" + UsedPrefix + "-" + Type +
1005e8f2fb20SChandler Carruth                           "' without previous '" + UsedPrefix + ": line");
1006da108b4eSChris Lattner       return true;
1007da108b4eSChris Lattner     }
1008da108b4eSChris Lattner 
100991a1b2c9SMichael Liao     // Handle CHECK-DAG/-NOT.
101038820972SMatt Arsenault     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
101191a1b2c9SMichael Liao       DagNotMatches.push_back(P);
101274d50731SChris Lattner       continue;
101374d50731SChris Lattner     }
101474d50731SChris Lattner 
1015ee3c74fbSChris Lattner     // Okay, add the string we captured to the output vector and move on.
101685913ccaSJames Y Knight     CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
101791a1b2c9SMichael Liao     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
101856ccdbbdSAlexander Kornienko     DagNotMatches = ImplicitNegativeChecks;
1019ee3c74fbSChris Lattner   }
1020ee3c74fbSChris Lattner 
102113df4626SMatt Arsenault   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
102213df4626SMatt Arsenault   // prefix as a filler for the error message.
102391a1b2c9SMichael Liao   if (!DagNotMatches.empty()) {
1024f5e2fc47SBenjamin Kramer     CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
102585913ccaSJames Y Knight                               SMLoc::getFromPointer(Buffer.data()));
102691a1b2c9SMichael Liao     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
1027eba55822SJakob Stoklund Olesen   }
1028eba55822SJakob Stoklund Olesen 
1029ee3c74fbSChris Lattner   if (CheckStrings.empty()) {
103013df4626SMatt Arsenault     errs() << "error: no check strings found with prefix"
103113df4626SMatt Arsenault            << (CheckPrefixes.size() > 1 ? "es " : " ");
10323e3ef2f2SChris Bieneman     prefix_iterator I = CheckPrefixes.begin();
10333e3ef2f2SChris Bieneman     prefix_iterator E = CheckPrefixes.end();
10343e3ef2f2SChris Bieneman     if (I != E) {
10353e3ef2f2SChris Bieneman       errs() << "\'" << *I << ":'";
10363e3ef2f2SChris Bieneman       ++I;
103713df4626SMatt Arsenault     }
10383e3ef2f2SChris Bieneman     for (; I != E; ++I)
10393e3ef2f2SChris Bieneman       errs() << ", \'" << *I << ":'";
104013df4626SMatt Arsenault 
104113df4626SMatt Arsenault     errs() << '\n';
1042ee3c74fbSChris Lattner     return true;
1043ee3c74fbSChris Lattner   }
1044ee3c74fbSChris Lattner 
1045ee3c74fbSChris Lattner   return false;
1046ee3c74fbSChris Lattner }
1047ee3c74fbSChris Lattner 
1048dc5ba317SJoel E. Denny static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
1049dc5ba317SJoel E. Denny                        StringRef Prefix, SMLoc Loc, const Pattern &Pat,
1050dc5ba317SJoel E. Denny                        StringRef Buffer, StringMap<StringRef> &VariableTable,
1051dc5ba317SJoel E. Denny                        size_t MatchPos, size_t MatchLen) {
1052dc5ba317SJoel E. Denny   if (ExpectedMatch) {
1053dc5ba317SJoel E. Denny     if (!Verbose)
1054dc5ba317SJoel E. Denny       return;
1055dc5ba317SJoel E. Denny     if (!VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF)
1056dc5ba317SJoel E. Denny       return;
1057dc5ba317SJoel E. Denny   }
1058dc5ba317SJoel E. Denny   SMLoc MatchStart = SMLoc::getFromPointer(Buffer.data() + MatchPos);
1059dc5ba317SJoel E. Denny   SMLoc MatchEnd = SMLoc::getFromPointer(Buffer.data() + MatchPos + MatchLen);
1060dc5ba317SJoel E. Denny   SMRange MatchRange(MatchStart, MatchEnd);
1061dc5ba317SJoel E. Denny   SM.PrintMessage(
1062dc5ba317SJoel E. Denny       Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error,
1063dc5ba317SJoel E. Denny       CheckTypeName(Prefix, Pat.getCheckTy()) + ": " +
1064dc5ba317SJoel E. Denny           (ExpectedMatch ? "expected" : "excluded") +
1065dc5ba317SJoel E. Denny           " string found in input");
1066dc5ba317SJoel E. Denny   SM.PrintMessage(MatchStart, SourceMgr::DK_Note, "found here", {MatchRange});
1067dc5ba317SJoel E. Denny   Pat.PrintVariableUses(SM, Buffer, VariableTable, MatchRange);
1068dc5ba317SJoel E. Denny }
1069dc5ba317SJoel E. Denny 
1070dc5ba317SJoel E. Denny static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
1071dc5ba317SJoel E. Denny                        const CheckString &CheckStr, StringRef Buffer,
1072dc5ba317SJoel E. Denny                        StringMap<StringRef> &VariableTable, size_t MatchPos,
1073dc5ba317SJoel E. Denny                        size_t MatchLen) {
1074dc5ba317SJoel E. Denny   PrintMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
1075dc5ba317SJoel E. Denny              Buffer, VariableTable, MatchPos, MatchLen);
1076dc5ba317SJoel E. Denny }
1077dc5ba317SJoel E. Denny 
1078dc5ba317SJoel E. Denny static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
1079dc5ba317SJoel E. Denny                          StringRef Prefix, SMLoc Loc, const Pattern &Pat,
1080e8f2fb20SChandler Carruth                          StringRef Buffer,
1081e0ef65abSDaniel Dunbar                          StringMap<StringRef> &VariableTable) {
1082dc5ba317SJoel E. Denny   if (!ExpectedMatch && !VerboseVerbose)
1083dc5ba317SJoel E. Denny     return;
1084dc5ba317SJoel E. Denny 
1085da108b4eSChris Lattner   // Otherwise, we have an error, emit an error message.
1086dc5ba317SJoel E. Denny   SM.PrintMessage(Loc,
1087dc5ba317SJoel E. Denny                   ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark,
1088dc5ba317SJoel E. Denny                   CheckTypeName(Prefix, Pat.getCheckTy()) + ": " +
1089dc5ba317SJoel E. Denny                       (ExpectedMatch ? "expected" : "excluded") +
1090dc5ba317SJoel E. Denny                       " string not found in input");
1091da108b4eSChris Lattner 
1092da108b4eSChris Lattner   // Print the "scanning from here" line.  If the current position is at the
1093da108b4eSChris Lattner   // end of a line, advance to the start of the next line.
1094caa5fc0cSChris Lattner   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
1095da108b4eSChris Lattner 
109603b80a40SChris Lattner   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
109703b80a40SChris Lattner                   "scanning from here");
1098e0ef65abSDaniel Dunbar 
1099e0ef65abSDaniel Dunbar   // Allow the pattern to print additional information if desired.
1100dc5ba317SJoel E. Denny   Pat.PrintVariableUses(SM, Buffer, VariableTable);
1101dc5ba317SJoel E. Denny   if (ExpectedMatch)
1102dc5ba317SJoel E. Denny     Pat.PrintFuzzyMatch(SM, Buffer, VariableTable);
110391a1b2c9SMichael Liao }
110491a1b2c9SMichael Liao 
1105dc5ba317SJoel E. Denny static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
1106dc5ba317SJoel E. Denny                          const CheckString &CheckStr, StringRef Buffer,
110791a1b2c9SMichael Liao                          StringMap<StringRef> &VariableTable) {
1108dc5ba317SJoel E. Denny   PrintNoMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
1109dc5ba317SJoel E. Denny                Buffer, VariableTable);
1110da108b4eSChris Lattner }
1111da108b4eSChris Lattner 
11124dabac20SChandler Carruth /// Count the number of newlines in the specified range.
1113592fe880SRichard Smith static unsigned CountNumNewlinesBetween(StringRef Range,
1114592fe880SRichard Smith                                         const char *&FirstNewLine) {
1115da108b4eSChris Lattner   unsigned NumNewLines = 0;
111637183584SChris Lattner   while (1) {
1117da108b4eSChris Lattner     // Scan for newline.
111837183584SChris Lattner     Range = Range.substr(Range.find_first_of("\n\r"));
1119e8f2fb20SChandler Carruth     if (Range.empty())
1120e8f2fb20SChandler Carruth       return NumNewLines;
1121da108b4eSChris Lattner 
1122da108b4eSChris Lattner     ++NumNewLines;
1123da108b4eSChris Lattner 
1124da108b4eSChris Lattner     // Handle \n\r and \r\n as a single newline.
1125e8f2fb20SChandler Carruth     if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') &&
112637183584SChris Lattner         (Range[0] != Range[1]))
112737183584SChris Lattner       Range = Range.substr(1);
112837183584SChris Lattner     Range = Range.substr(1);
1129592fe880SRichard Smith 
1130592fe880SRichard Smith     if (NumNewLines == 1)
1131592fe880SRichard Smith       FirstNewLine = Range.begin();
1132da108b4eSChris Lattner   }
1133da108b4eSChris Lattner }
1134da108b4eSChris Lattner 
11354dabac20SChandler Carruth /// Match check string and its "not strings" and/or "dag strings".
1136dcc7d48dSMichael Liao size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1137e93a3a08SStephen Lin                           bool IsLabelScanMode, size_t &MatchLen,
1138dcc7d48dSMichael Liao                           StringMap<StringRef> &VariableTable) const {
113991a1b2c9SMichael Liao   size_t LastPos = 0;
114091a1b2c9SMichael Liao   std::vector<const Pattern *> NotStrings;
114191a1b2c9SMichael Liao 
1142e93a3a08SStephen Lin   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1143e93a3a08SStephen Lin   // bounds; we have not processed variable definitions within the bounded block
1144e93a3a08SStephen Lin   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1145e93a3a08SStephen Lin   // over the block again (including the last CHECK-LABEL) in normal mode.
1146e93a3a08SStephen Lin   if (!IsLabelScanMode) {
114791a1b2c9SMichael Liao     // Match "dag strings" (with mixed "not strings" if any).
114891a1b2c9SMichael Liao     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
114991a1b2c9SMichael Liao     if (LastPos == StringRef::npos)
115091a1b2c9SMichael Liao       return StringRef::npos;
1151e93a3a08SStephen Lin   }
115291a1b2c9SMichael Liao 
115391a1b2c9SMichael Liao   // Match itself from the last position after matching CHECK-DAG.
115491a1b2c9SMichael Liao   StringRef MatchBuffer = Buffer.substr(LastPos);
115591a1b2c9SMichael Liao   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1156dcc7d48dSMichael Liao   if (MatchPos == StringRef::npos) {
1157dc5ba317SJoel E. Denny     PrintNoMatch(true, SM, *this, MatchBuffer, VariableTable);
1158dcc7d48dSMichael Liao     return StringRef::npos;
1159dcc7d48dSMichael Liao   }
1160dc5ba317SJoel E. Denny   PrintMatch(true, SM, *this, MatchBuffer, VariableTable, MatchPos, MatchLen);
1161dcc7d48dSMichael Liao 
1162e93a3a08SStephen Lin   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1163e93a3a08SStephen Lin   // or CHECK-NOT
1164e93a3a08SStephen Lin   if (!IsLabelScanMode) {
116591a1b2c9SMichael Liao     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1166dcc7d48dSMichael Liao 
1167dcc7d48dSMichael Liao     // If this check is a "CHECK-NEXT", verify that the previous match was on
1168dcc7d48dSMichael Liao     // the previous line (i.e. that there is one newline between them).
1169dcc7d48dSMichael Liao     if (CheckNext(SM, SkippedRegion))
1170dcc7d48dSMichael Liao       return StringRef::npos;
1171dcc7d48dSMichael Liao 
117201ac1707SDuncan P. N. Exon Smith     // If this check is a "CHECK-SAME", verify that the previous match was on
117301ac1707SDuncan P. N. Exon Smith     // the same line (i.e. that there is no newline between them).
117401ac1707SDuncan P. N. Exon Smith     if (CheckSame(SM, SkippedRegion))
117501ac1707SDuncan P. N. Exon Smith       return StringRef::npos;
117601ac1707SDuncan P. N. Exon Smith 
1177dcc7d48dSMichael Liao     // If this match had "not strings", verify that they don't exist in the
1178dcc7d48dSMichael Liao     // skipped region.
117991a1b2c9SMichael Liao     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1180dcc7d48dSMichael Liao       return StringRef::npos;
1181f8bd2e5bSStephen Lin   }
1182dcc7d48dSMichael Liao 
11837dfb92b9SMehdi Amini   return LastPos + MatchPos;
1184dcc7d48dSMichael Liao }
1185dcc7d48dSMichael Liao 
11864dabac20SChandler Carruth /// Verify there is a single line in the given buffer.
1187dcc7d48dSMichael Liao bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
11885507f668SJames Henderson   if (Pat.getCheckTy() != Check::CheckNext &&
11895507f668SJames Henderson       Pat.getCheckTy() != Check::CheckEmpty)
1190dcc7d48dSMichael Liao     return false;
1191dcc7d48dSMichael Liao 
11925507f668SJames Henderson   Twine CheckName =
11935507f668SJames Henderson       Prefix +
11945507f668SJames Henderson       Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT");
11955507f668SJames Henderson 
1196dcc7d48dSMichael Liao   // Count the number of newlines between the previous match and this one.
1197dcc7d48dSMichael Liao   assert(Buffer.data() !=
1198e8f2fb20SChandler Carruth              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1199e8f2fb20SChandler Carruth                                     SMLoc::getFromPointer(Buffer.data())))
1200e8f2fb20SChandler Carruth                  ->getBufferStart() &&
12015507f668SJames Henderson          "CHECK-NEXT and CHECK-EMPTY can't be the first check in a file");
1202dcc7d48dSMichael Liao 
120366f09ad0SCraig Topper   const char *FirstNewLine = nullptr;
1204592fe880SRichard Smith   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1205dcc7d48dSMichael Liao 
1206dcc7d48dSMichael Liao   if (NumNewLines == 0) {
1207e8f2fb20SChandler Carruth     SM.PrintMessage(Loc, SourceMgr::DK_Error,
12085507f668SJames Henderson                     CheckName + ": is on the same line as previous match");
1209e8f2fb20SChandler Carruth     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1210e8f2fb20SChandler Carruth                     "'next' match was here");
1211dcc7d48dSMichael Liao     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1212dcc7d48dSMichael Liao                     "previous match ended here");
1213dcc7d48dSMichael Liao     return true;
1214dcc7d48dSMichael Liao   }
1215dcc7d48dSMichael Liao 
1216dcc7d48dSMichael Liao   if (NumNewLines != 1) {
1217e8f2fb20SChandler Carruth     SM.PrintMessage(Loc, SourceMgr::DK_Error,
12185507f668SJames Henderson                     CheckName +
12195507f668SJames Henderson                         ": is not on the line after the previous match");
1220e8f2fb20SChandler Carruth     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1221e8f2fb20SChandler Carruth                     "'next' match was here");
1222dcc7d48dSMichael Liao     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1223dcc7d48dSMichael Liao                     "previous match ended here");
1224592fe880SRichard Smith     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1225592fe880SRichard Smith                     "non-matching line after previous match is here");
1226dcc7d48dSMichael Liao     return true;
1227dcc7d48dSMichael Liao   }
1228dcc7d48dSMichael Liao 
1229dcc7d48dSMichael Liao   return false;
1230dcc7d48dSMichael Liao }
1231dcc7d48dSMichael Liao 
12324dabac20SChandler Carruth /// Verify there is no newline in the given buffer.
123301ac1707SDuncan P. N. Exon Smith bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
123485913ccaSJames Y Knight   if (Pat.getCheckTy() != Check::CheckSame)
123501ac1707SDuncan P. N. Exon Smith     return false;
123601ac1707SDuncan P. N. Exon Smith 
123701ac1707SDuncan P. N. Exon Smith   // Count the number of newlines between the previous match and this one.
123801ac1707SDuncan P. N. Exon Smith   assert(Buffer.data() !=
123901ac1707SDuncan P. N. Exon Smith              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
124001ac1707SDuncan P. N. Exon Smith                                     SMLoc::getFromPointer(Buffer.data())))
124101ac1707SDuncan P. N. Exon Smith                  ->getBufferStart() &&
124201ac1707SDuncan P. N. Exon Smith          "CHECK-SAME can't be the first check in a file");
124301ac1707SDuncan P. N. Exon Smith 
124401ac1707SDuncan P. N. Exon Smith   const char *FirstNewLine = nullptr;
124501ac1707SDuncan P. N. Exon Smith   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
124601ac1707SDuncan P. N. Exon Smith 
124701ac1707SDuncan P. N. Exon Smith   if (NumNewLines != 0) {
124801ac1707SDuncan P. N. Exon Smith     SM.PrintMessage(Loc, SourceMgr::DK_Error,
124901ac1707SDuncan P. N. Exon Smith                     Prefix +
125001ac1707SDuncan P. N. Exon Smith                         "-SAME: is not on the same line as the previous match");
125101ac1707SDuncan P. N. Exon Smith     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
125201ac1707SDuncan P. N. Exon Smith                     "'next' match was here");
125301ac1707SDuncan P. N. Exon Smith     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
125401ac1707SDuncan P. N. Exon Smith                     "previous match ended here");
125501ac1707SDuncan P. N. Exon Smith     return true;
125601ac1707SDuncan P. N. Exon Smith   }
125701ac1707SDuncan P. N. Exon Smith 
125801ac1707SDuncan P. N. Exon Smith   return false;
125901ac1707SDuncan P. N. Exon Smith }
126001ac1707SDuncan P. N. Exon Smith 
12614dabac20SChandler Carruth /// Verify there's no "not strings" in the given buffer.
1262dcc7d48dSMichael Liao bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
126391a1b2c9SMichael Liao                            const std::vector<const Pattern *> &NotStrings,
1264dcc7d48dSMichael Liao                            StringMap<StringRef> &VariableTable) const {
12658f870499SBenjamin Kramer   for (const Pattern *Pat : NotStrings) {
126638820972SMatt Arsenault     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
126791a1b2c9SMichael Liao 
1268dcc7d48dSMichael Liao     size_t MatchLen = 0;
126991a1b2c9SMichael Liao     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1270dcc7d48dSMichael Liao 
1271dc5ba317SJoel E. Denny     if (Pos == StringRef::npos) {
1272dc5ba317SJoel E. Denny       PrintNoMatch(false, SM, Prefix, Pat->getLoc(), *Pat, Buffer,
1273dc5ba317SJoel E. Denny                    VariableTable);
1274e8f2fb20SChandler Carruth       continue;
1275dc5ba317SJoel E. Denny     }
1276dcc7d48dSMichael Liao 
1277dc5ba317SJoel E. Denny     PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, Buffer, VariableTable,
1278dc5ba317SJoel E. Denny                Pos, MatchLen);
1279dc5ba317SJoel E. Denny 
1280dcc7d48dSMichael Liao     return true;
1281dcc7d48dSMichael Liao   }
1282dcc7d48dSMichael Liao 
1283dcc7d48dSMichael Liao   return false;
1284dcc7d48dSMichael Liao }
1285dcc7d48dSMichael Liao 
12864dabac20SChandler Carruth /// Match "dag strings" and their mixed "not strings".
128791a1b2c9SMichael Liao size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
128891a1b2c9SMichael Liao                              std::vector<const Pattern *> &NotStrings,
128991a1b2c9SMichael Liao                              StringMap<StringRef> &VariableTable) const {
129091a1b2c9SMichael Liao   if (DagNotStrings.empty())
129191a1b2c9SMichael Liao     return 0;
129291a1b2c9SMichael Liao 
12936fc21c25SJoel E. Denny   // The start of the search range.
12946fc21c25SJoel E. Denny   size_t StartPos = 0;
129591a1b2c9SMichael Liao 
12966fc21c25SJoel E. Denny   struct MatchRange {
1297bcf5b441SJoel E. Denny     size_t Pos;
1298bcf5b441SJoel E. Denny     size_t End;
1299bcf5b441SJoel E. Denny   };
13006fc21c25SJoel E. Denny   // A sorted list of ranges for non-overlapping CHECK-DAG matches.  Match
13016fc21c25SJoel E. Denny   // ranges are erased from this list once they are no longer in the search
13026fc21c25SJoel E. Denny   // range.
13036fc21c25SJoel E. Denny   std::list<MatchRange> MatchRanges;
1304bcf5b441SJoel E. Denny 
13056fc21c25SJoel E. Denny   // We need PatItr and PatEnd later for detecting the end of a CHECK-DAG
13066fc21c25SJoel E. Denny   // group, so we don't use a range-based for loop here.
13076fc21c25SJoel E. Denny   for (auto PatItr = DagNotStrings.begin(), PatEnd = DagNotStrings.end();
13086fc21c25SJoel E. Denny        PatItr != PatEnd; ++PatItr) {
13096fc21c25SJoel E. Denny     const Pattern &Pat = *PatItr;
131038820972SMatt Arsenault     assert((Pat.getCheckTy() == Check::CheckDAG ||
131138820972SMatt Arsenault             Pat.getCheckTy() == Check::CheckNot) &&
131291a1b2c9SMichael Liao            "Invalid CHECK-DAG or CHECK-NOT!");
131391a1b2c9SMichael Liao 
131438820972SMatt Arsenault     if (Pat.getCheckTy() == Check::CheckNot) {
131591a1b2c9SMichael Liao       NotStrings.push_back(&Pat);
131691a1b2c9SMichael Liao       continue;
131791a1b2c9SMichael Liao     }
131891a1b2c9SMichael Liao 
131938820972SMatt Arsenault     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
132091a1b2c9SMichael Liao 
1321614c9861SJoel E. Denny     // CHECK-DAG always matches from the start.
1322bcf5b441SJoel E. Denny     size_t MatchLen = 0, MatchPos = StartPos;
1323bcf5b441SJoel E. Denny 
1324bcf5b441SJoel E. Denny     // Search for a match that doesn't overlap a previous match in this
1325bcf5b441SJoel E. Denny     // CHECK-DAG group.
13266fc21c25SJoel E. Denny     for (auto MI = MatchRanges.begin(), ME = MatchRanges.end(); true; ++MI) {
1327bcf5b441SJoel E. Denny       StringRef MatchBuffer = Buffer.substr(MatchPos);
1328bcf5b441SJoel E. Denny       size_t MatchPosBuf = Pat.Match(MatchBuffer, MatchLen, VariableTable);
132991a1b2c9SMichael Liao       // With a group of CHECK-DAGs, a single mismatching means the match on
133091a1b2c9SMichael Liao       // that group of CHECK-DAGs fails immediately.
1331bcf5b441SJoel E. Denny       if (MatchPosBuf == StringRef::npos) {
1332dc5ba317SJoel E. Denny         PrintNoMatch(true, SM, Prefix, Pat.getLoc(), Pat, MatchBuffer,
1333dc5ba317SJoel E. Denny                      VariableTable);
133491a1b2c9SMichael Liao         return StringRef::npos;
133591a1b2c9SMichael Liao       }
133691a1b2c9SMichael Liao       // Re-calc it as the offset relative to the start of the original string.
1337bcf5b441SJoel E. Denny       MatchPos += MatchPosBuf;
1338dc5ba317SJoel E. Denny       if (VerboseVerbose)
1339dc5ba317SJoel E. Denny         PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, Buffer, VariableTable,
1340dc5ba317SJoel E. Denny                    MatchPos, MatchLen);
13416fc21c25SJoel E. Denny       MatchRange M{MatchPos, MatchPos + MatchLen};
13426fc21c25SJoel E. Denny       if (AllowDeprecatedDagOverlap) {
13436fc21c25SJoel E. Denny         // We don't need to track all matches in this mode, so we just maintain
13446fc21c25SJoel E. Denny         // one match range that encompasses the current CHECK-DAG group's
13456fc21c25SJoel E. Denny         // matches.
13466fc21c25SJoel E. Denny         if (MatchRanges.empty())
13476fc21c25SJoel E. Denny           MatchRanges.insert(MatchRanges.end(), M);
13486fc21c25SJoel E. Denny         else {
13496fc21c25SJoel E. Denny           auto Block = MatchRanges.begin();
13506fc21c25SJoel E. Denny           Block->Pos = std::min(Block->Pos, M.Pos);
13516fc21c25SJoel E. Denny           Block->End = std::max(Block->End, M.End);
13526fc21c25SJoel E. Denny         }
1353bcf5b441SJoel E. Denny         break;
13546fc21c25SJoel E. Denny       }
1355bcf5b441SJoel E. Denny       // Iterate previous matches until overlapping match or insertion point.
1356bcf5b441SJoel E. Denny       bool Overlap = false;
1357bcf5b441SJoel E. Denny       for (; MI != ME; ++MI) {
1358bcf5b441SJoel E. Denny         if (M.Pos < MI->End) {
1359bcf5b441SJoel E. Denny           // !Overlap => New match has no overlap and is before this old match.
1360bcf5b441SJoel E. Denny           // Overlap => New match overlaps this old match.
1361bcf5b441SJoel E. Denny           Overlap = MI->Pos < M.End;
1362bcf5b441SJoel E. Denny           break;
1363bcf5b441SJoel E. Denny         }
1364bcf5b441SJoel E. Denny       }
1365bcf5b441SJoel E. Denny       if (!Overlap) {
1366bcf5b441SJoel E. Denny         // Insert non-overlapping match into list.
13676fc21c25SJoel E. Denny         MatchRanges.insert(MI, M);
1368bcf5b441SJoel E. Denny         break;
1369bcf5b441SJoel E. Denny       }
1370dc5ba317SJoel E. Denny       if (VerboseVerbose) {
1371dc5ba317SJoel E. Denny         SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos);
1372dc5ba317SJoel E. Denny         SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End);
1373dc5ba317SJoel E. Denny         SMRange OldRange(OldStart, OldEnd);
1374dc5ba317SJoel E. Denny         SM.PrintMessage(OldStart, SourceMgr::DK_Note,
1375dc5ba317SJoel E. Denny                         "match discarded, overlaps earlier DAG match here",
1376dc5ba317SJoel E. Denny                         {OldRange});
1377dc5ba317SJoel E. Denny       }
1378bcf5b441SJoel E. Denny       MatchPos = MI->End;
1379bcf5b441SJoel E. Denny     }
1380dc5ba317SJoel E. Denny     if (!VerboseVerbose)
1381dc5ba317SJoel E. Denny       PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, Buffer, VariableTable,
1382dc5ba317SJoel E. Denny                  MatchPos, MatchLen);
138391a1b2c9SMichael Liao 
13846fc21c25SJoel E. Denny     // Handle the end of a CHECK-DAG group.
13856fc21c25SJoel E. Denny     if (std::next(PatItr) == PatEnd ||
13866fc21c25SJoel E. Denny         std::next(PatItr)->getCheckTy() == Check::CheckNot) {
138791a1b2c9SMichael Liao       if (!NotStrings.empty()) {
13886fc21c25SJoel E. Denny         // If there are CHECK-NOTs between two CHECK-DAGs or from CHECK to
13896fc21c25SJoel E. Denny         // CHECK-DAG, verify that there are no 'not' strings occurred in that
139091a1b2c9SMichael Liao         // region.
13916fc21c25SJoel E. Denny         StringRef SkippedRegion =
13926fc21c25SJoel E. Denny             Buffer.slice(StartPos, MatchRanges.begin()->Pos);
1393cf708c32STim Northover         if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
139491a1b2c9SMichael Liao           return StringRef::npos;
139591a1b2c9SMichael Liao         // Clear "not strings".
139691a1b2c9SMichael Liao         NotStrings.clear();
139791a1b2c9SMichael Liao       }
13986fc21c25SJoel E. Denny       // All subsequent CHECK-DAGs and CHECK-NOTs should be matched from the
13996fc21c25SJoel E. Denny       // end of this CHECK-DAG group's match range.
14006fc21c25SJoel E. Denny       StartPos = MatchRanges.rbegin()->End;
14016fc21c25SJoel E. Denny       // Don't waste time checking for (impossible) overlaps before that.
14026fc21c25SJoel E. Denny       MatchRanges.clear();
14036fc21c25SJoel E. Denny     }
140491a1b2c9SMichael Liao   }
140591a1b2c9SMichael Liao 
14066fc21c25SJoel E. Denny   return StartPos;
140791a1b2c9SMichael Liao }
140891a1b2c9SMichael Liao 
140913df4626SMatt Arsenault // A check prefix must contain only alphanumeric, hyphens and underscores.
141013df4626SMatt Arsenault static bool ValidateCheckPrefix(StringRef CheckPrefix) {
141113df4626SMatt Arsenault   Regex Validator("^[a-zA-Z0-9_-]*$");
141213df4626SMatt Arsenault   return Validator.match(CheckPrefix);
141313df4626SMatt Arsenault }
141413df4626SMatt Arsenault 
141513df4626SMatt Arsenault static bool ValidateCheckPrefixes() {
141613df4626SMatt Arsenault   StringSet<> PrefixSet;
141713df4626SMatt Arsenault 
14188f870499SBenjamin Kramer   for (StringRef Prefix : CheckPrefixes) {
141924412b14SEli Bendersky     // Reject empty prefixes.
142024412b14SEli Bendersky     if (Prefix == "")
142124412b14SEli Bendersky       return false;
142224412b14SEli Bendersky 
14230356975cSDavid Blaikie     if (!PrefixSet.insert(Prefix).second)
142413df4626SMatt Arsenault       return false;
142513df4626SMatt Arsenault 
142613df4626SMatt Arsenault     if (!ValidateCheckPrefix(Prefix))
142713df4626SMatt Arsenault       return false;
142813df4626SMatt Arsenault   }
142913df4626SMatt Arsenault 
143013df4626SMatt Arsenault   return true;
143113df4626SMatt Arsenault }
143213df4626SMatt Arsenault 
1433726774cbSChandler Carruth // Combines the check prefixes into a single regex so that we can efficiently
1434726774cbSChandler Carruth // scan for any of the set.
1435726774cbSChandler Carruth //
1436726774cbSChandler Carruth // The semantics are that the longest-match wins which matches our regex
1437726774cbSChandler Carruth // library.
1438726774cbSChandler Carruth static Regex buildCheckPrefixRegex() {
143913df4626SMatt Arsenault   // I don't think there's a way to specify an initial value for cl::list,
144013df4626SMatt Arsenault   // so if nothing was specified, add the default
144113df4626SMatt Arsenault   if (CheckPrefixes.empty())
144213df4626SMatt Arsenault     CheckPrefixes.push_back("CHECK");
1443726774cbSChandler Carruth 
1444726774cbSChandler Carruth   // We already validated the contents of CheckPrefixes so just concatenate
1445726774cbSChandler Carruth   // them as alternatives.
1446726774cbSChandler Carruth   SmallString<32> PrefixRegexStr;
1447726774cbSChandler Carruth   for (StringRef Prefix : CheckPrefixes) {
1448726774cbSChandler Carruth     if (Prefix != CheckPrefixes.front())
1449726774cbSChandler Carruth       PrefixRegexStr.push_back('|');
1450726774cbSChandler Carruth 
1451726774cbSChandler Carruth     PrefixRegexStr.append(Prefix);
1452726774cbSChandler Carruth   }
1453726774cbSChandler Carruth 
1454726774cbSChandler Carruth   return Regex(PrefixRegexStr);
1455c2735158SRui Ueyama }
1456c2735158SRui Ueyama 
14572bd4f8b6SXinliang David Li static void DumpCommandLine(int argc, char **argv) {
14582bd4f8b6SXinliang David Li   errs() << "FileCheck command line: ";
14592bd4f8b6SXinliang David Li   for (int I = 0; I < argc; I++)
14602bd4f8b6SXinliang David Li     errs() << " " << argv[I];
14612bd4f8b6SXinliang David Li   errs() << "\n";
14622bd4f8b6SXinliang David Li }
14632bd4f8b6SXinliang David Li 
1464f55e72a5SArtem Belevich // Remove local variables from \p VariableTable. Global variables
1465f55e72a5SArtem Belevich // (start with '$') are preserved.
1466f55e72a5SArtem Belevich static void ClearLocalVars(StringMap<StringRef> &VariableTable) {
1467f55e72a5SArtem Belevich   SmallVector<StringRef, 16> LocalVars;
1468f55e72a5SArtem Belevich   for (const auto &Var : VariableTable)
1469f55e72a5SArtem Belevich     if (Var.first()[0] != '$')
1470f55e72a5SArtem Belevich       LocalVars.push_back(Var.first());
1471f55e72a5SArtem Belevich 
1472f55e72a5SArtem Belevich   for (const auto &Var : LocalVars)
1473f55e72a5SArtem Belevich     VariableTable.erase(Var);
1474f55e72a5SArtem Belevich }
1475f55e72a5SArtem Belevich 
147620247900SChandler Carruth /// Check the input to FileCheck provided in the \p Buffer against the \p
147720247900SChandler Carruth /// CheckStrings read from the check file.
147820247900SChandler Carruth ///
147920247900SChandler Carruth /// Returns false if the input fails to satisfy the checks.
148020247900SChandler Carruth bool CheckInput(SourceMgr &SM, StringRef Buffer,
148120247900SChandler Carruth                 ArrayRef<CheckString> CheckStrings) {
148220247900SChandler Carruth   bool ChecksFailed = false;
148320247900SChandler Carruth 
148420247900SChandler Carruth   /// VariableTable - This holds all the current filecheck variables.
148520247900SChandler Carruth   StringMap<StringRef> VariableTable;
148620247900SChandler Carruth 
148746e1fd61SAlexander Richardson   for (const auto& Def : GlobalDefines)
148846e1fd61SAlexander Richardson     VariableTable.insert(StringRef(Def).split('='));
148946e1fd61SAlexander Richardson 
149020247900SChandler Carruth   unsigned i = 0, j = 0, e = CheckStrings.size();
149120247900SChandler Carruth   while (true) {
149220247900SChandler Carruth     StringRef CheckRegion;
149320247900SChandler Carruth     if (j == e) {
149420247900SChandler Carruth       CheckRegion = Buffer;
149520247900SChandler Carruth     } else {
149620247900SChandler Carruth       const CheckString &CheckLabelStr = CheckStrings[j];
149720247900SChandler Carruth       if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
149820247900SChandler Carruth         ++j;
149920247900SChandler Carruth         continue;
150020247900SChandler Carruth       }
150120247900SChandler Carruth 
150220247900SChandler Carruth       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
150320247900SChandler Carruth       size_t MatchLabelLen = 0;
1504e8f2fb20SChandler Carruth       size_t MatchLabelPos =
1505e8f2fb20SChandler Carruth           CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable);
150620247900SChandler Carruth       if (MatchLabelPos == StringRef::npos)
150720247900SChandler Carruth         // Immediately bail of CHECK-LABEL fails, nothing else we can do.
150820247900SChandler Carruth         return false;
150920247900SChandler Carruth 
151020247900SChandler Carruth       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
151120247900SChandler Carruth       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
151220247900SChandler Carruth       ++j;
151320247900SChandler Carruth     }
151420247900SChandler Carruth 
1515f55e72a5SArtem Belevich     if (EnableVarScope)
1516f55e72a5SArtem Belevich       ClearLocalVars(VariableTable);
1517f55e72a5SArtem Belevich 
151820247900SChandler Carruth     for (; i != j; ++i) {
151920247900SChandler Carruth       const CheckString &CheckStr = CheckStrings[i];
152020247900SChandler Carruth 
152120247900SChandler Carruth       // Check each string within the scanned region, including a second check
152220247900SChandler Carruth       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
152320247900SChandler Carruth       size_t MatchLen = 0;
1524e8f2fb20SChandler Carruth       size_t MatchPos =
1525e8f2fb20SChandler Carruth           CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable);
152620247900SChandler Carruth 
152720247900SChandler Carruth       if (MatchPos == StringRef::npos) {
152820247900SChandler Carruth         ChecksFailed = true;
152920247900SChandler Carruth         i = j;
153020247900SChandler Carruth         break;
153120247900SChandler Carruth       }
153220247900SChandler Carruth 
153320247900SChandler Carruth       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
153420247900SChandler Carruth     }
153520247900SChandler Carruth 
153620247900SChandler Carruth     if (j == e)
153720247900SChandler Carruth       break;
153820247900SChandler Carruth   }
153920247900SChandler Carruth 
154020247900SChandler Carruth   // Success if no checks failed.
154120247900SChandler Carruth   return !ChecksFailed;
154220247900SChandler Carruth }
154320247900SChandler Carruth 
1544ee3c74fbSChris Lattner int main(int argc, char **argv) {
1545197194b6SRui Ueyama   InitLLVM X(argc, argv);
1546ee3c74fbSChris Lattner   cl::ParseCommandLineOptions(argc, argv);
1547ee3c74fbSChris Lattner 
154813df4626SMatt Arsenault   if (!ValidateCheckPrefixes()) {
154913df4626SMatt Arsenault     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
155013df4626SMatt Arsenault               "start with a letter and contain only alphanumeric characters, "
155113df4626SMatt Arsenault               "hyphens and underscores\n";
1552c2735158SRui Ueyama     return 2;
1553c2735158SRui Ueyama   }
1554c2735158SRui Ueyama 
1555726774cbSChandler Carruth   Regex PrefixRE = buildCheckPrefixRegex();
1556726774cbSChandler Carruth   std::string REError;
1557726774cbSChandler Carruth   if (!PrefixRE.isValid(REError)) {
1558726774cbSChandler Carruth     errs() << "Unable to combine check-prefix strings into a prefix regular "
1559726774cbSChandler Carruth               "expression! This is likely a bug in FileCheck's verification of "
1560726774cbSChandler Carruth               "the check-prefix strings. Regular expression parsing failed "
1561726774cbSChandler Carruth               "with the following error: "
1562726774cbSChandler Carruth            << REError << "\n";
1563726774cbSChandler Carruth     return 2;
1564726774cbSChandler Carruth   }
156513df4626SMatt Arsenault 
1566dc5ba317SJoel E. Denny   if (VerboseVerbose)
1567dc5ba317SJoel E. Denny     Verbose = true;
1568dc5ba317SJoel E. Denny 
1569ee3c74fbSChris Lattner   SourceMgr SM;
1570ee3c74fbSChris Lattner 
1571ee3c74fbSChris Lattner   // Read the expected strings from the check file.
157220247900SChandler Carruth   ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr =
157320247900SChandler Carruth       MemoryBuffer::getFileOrSTDIN(CheckFilename);
157420247900SChandler Carruth   if (std::error_code EC = CheckFileOrErr.getError()) {
157520247900SChandler Carruth     errs() << "Could not open check file '" << CheckFilename
157620247900SChandler Carruth            << "': " << EC.message() << '\n';
157720247900SChandler Carruth     return 2;
157820247900SChandler Carruth   }
157920247900SChandler Carruth   MemoryBuffer &CheckFile = *CheckFileOrErr.get();
158020247900SChandler Carruth 
158120247900SChandler Carruth   SmallString<4096> CheckFileBuffer;
1582b03c166aSChandler Carruth   StringRef CheckFileText = CanonicalizeFile(CheckFile, CheckFileBuffer);
158320247900SChandler Carruth 
158420247900SChandler Carruth   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
158520247900SChandler Carruth                             CheckFileText, CheckFile.getBufferIdentifier()),
158620247900SChandler Carruth                         SMLoc());
158720247900SChandler Carruth 
158826cccfe1SChris Lattner   std::vector<CheckString> CheckStrings;
1589726774cbSChandler Carruth   if (ReadCheckFile(SM, CheckFileText, PrefixRE, CheckStrings))
1590ee3c74fbSChris Lattner     return 2;
1591ee3c74fbSChris Lattner 
1592ee3c74fbSChris Lattner   // Open the file to check and add it to SourceMgr.
159320247900SChandler Carruth   ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr =
1594adf21f2aSRafael Espindola       MemoryBuffer::getFileOrSTDIN(InputFilename);
159520247900SChandler Carruth   if (std::error_code EC = InputFileOrErr.getError()) {
1596adf21f2aSRafael Espindola     errs() << "Could not open input file '" << InputFilename
1597adf21f2aSRafael Espindola            << "': " << EC.message() << '\n';
15988e1c6477SEli Bendersky     return 2;
1599ee3c74fbSChris Lattner   }
160020247900SChandler Carruth   MemoryBuffer &InputFile = *InputFileOrErr.get();
16012c3e5cdfSChris Lattner 
160220247900SChandler Carruth   if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) {
1603b692bed7SChris Lattner     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
16042bd4f8b6SXinliang David Li     DumpCommandLine(argc, argv);
16058e1c6477SEli Bendersky     return 2;
1606b692bed7SChris Lattner   }
1607b692bed7SChris Lattner 
160820247900SChandler Carruth   SmallString<4096> InputFileBuffer;
1609b03c166aSChandler Carruth   StringRef InputFileText = CanonicalizeFile(InputFile, InputFileBuffer);
16102c3e5cdfSChris Lattner 
1611e8f2fb20SChandler Carruth   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1612e8f2fb20SChandler Carruth                             InputFileText, InputFile.getBufferIdentifier()),
1613e8f2fb20SChandler Carruth                         SMLoc());
1614ee3c74fbSChris Lattner 
1615*346dfbe2SGeorge Karpenkov   int ExitCode = CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1;
1616*346dfbe2SGeorge Karpenkov   if (ExitCode == 1 && DumpInputOnFailure)
1617*346dfbe2SGeorge Karpenkov     errs() << "Full input was:\n<<<<<<\n" << InputFileText << "\n>>>>>>\n";
1618*346dfbe2SGeorge Karpenkov 
1619*346dfbe2SGeorge Karpenkov   return ExitCode;
1620ee3c74fbSChris Lattner }
1621