1ee3c74fbSChris Lattner //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2ee3c74fbSChris Lattner //
3ee3c74fbSChris Lattner //                     The LLVM Compiler Infrastructure
4ee3c74fbSChris Lattner //
5ee3c74fbSChris Lattner // This file is distributed under the University of Illinois Open Source
6ee3c74fbSChris Lattner // License. See LICENSE.TXT for details.
7ee3c74fbSChris Lattner //
8ee3c74fbSChris Lattner //===----------------------------------------------------------------------===//
9ee3c74fbSChris Lattner //
10ee3c74fbSChris Lattner // FileCheck does a line-by line check of a file that validates whether it
11ee3c74fbSChris Lattner // contains the expected content.  This is useful for regression tests etc.
12ee3c74fbSChris Lattner //
13b5ecceffSJames Henderson // This program exits with an exit status of 2 on error, exit status of 0 if
14ee3c74fbSChris Lattner // the file matched the expected contents, and exit status of 1 if it did not
15ee3c74fbSChris Lattner // contain the expected contents.
16ee3c74fbSChris Lattner //
17ee3c74fbSChris Lattner //===----------------------------------------------------------------------===//
18ee3c74fbSChris Lattner 
1991d19d8eSChandler Carruth #include "llvm/ADT/SmallString.h"
2091d19d8eSChandler Carruth #include "llvm/ADT/StringExtras.h"
2191d19d8eSChandler Carruth #include "llvm/ADT/StringMap.h"
2213df4626SMatt Arsenault #include "llvm/ADT/StringSet.h"
23ee3c74fbSChris Lattner #include "llvm/Support/CommandLine.h"
24197194b6SRui Ueyama #include "llvm/Support/InitLLVM.h"
25ee3c74fbSChris Lattner #include "llvm/Support/MemoryBuffer.h"
26f08d2db9SChris Lattner #include "llvm/Support/Regex.h"
27ee3c74fbSChris Lattner #include "llvm/Support/SourceMgr.h"
28ee3c74fbSChris Lattner #include "llvm/Support/raw_ostream.h"
298879e06dSChris Lattner #include <algorithm>
30981af002SWill Dietz #include <cctype>
31*bcf5b441SJoel E. Denny #include <list>
32e8b8f1bcSEli Bendersky #include <map>
33e8b8f1bcSEli Bendersky #include <string>
34a6e9c3e4SRafael Espindola #include <system_error>
35e8b8f1bcSEli Bendersky #include <vector>
36ee3c74fbSChris Lattner using namespace llvm;
37ee3c74fbSChris Lattner 
38ee3c74fbSChris Lattner static cl::opt<std::string>
39ee3c74fbSChris Lattner     CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40ee3c74fbSChris Lattner 
41ee3c74fbSChris Lattner static cl::opt<std::string>
42ee3c74fbSChris Lattner     InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43ee3c74fbSChris Lattner                   cl::init("-"), cl::value_desc("filename"));
44ee3c74fbSChris Lattner 
45e8f2fb20SChandler Carruth static cl::list<std::string> CheckPrefixes(
46e8f2fb20SChandler Carruth     "check-prefix",
47ee3c74fbSChris Lattner     cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48fd557cb0SDaniel Sanders static cl::alias CheckPrefixesAlias(
49fd557cb0SDaniel Sanders     "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated,
50fd557cb0SDaniel Sanders     cl::NotHidden,
51fd557cb0SDaniel Sanders     cl::desc(
52fd557cb0SDaniel Sanders         "Alias for -check-prefix permitting multiple comma separated values"));
53ee3c74fbSChris Lattner 
54e8f2fb20SChandler Carruth static cl::opt<bool> NoCanonicalizeWhiteSpace(
55e8f2fb20SChandler Carruth     "strict-whitespace",
562c3e5cdfSChris Lattner     cl::desc("Do not treat all horizontal whitespace as equivalent"));
572c3e5cdfSChris Lattner 
5856ccdbbdSAlexander Kornienko static cl::list<std::string> ImplicitCheckNot(
5956ccdbbdSAlexander Kornienko     "implicit-check-not",
6056ccdbbdSAlexander Kornienko     cl::desc("Add an implicit negative check with this pattern to every\n"
6156ccdbbdSAlexander Kornienko              "positive check. This can be used to ensure that no instances of\n"
6256ccdbbdSAlexander Kornienko              "this pattern occur which are not matched by a positive pattern"),
6356ccdbbdSAlexander Kornienko     cl::value_desc("pattern"));
6456ccdbbdSAlexander Kornienko 
6546e1fd61SAlexander Richardson static cl::list<std::string> GlobalDefines("D", cl::Prefix,
6646e1fd61SAlexander Richardson     cl::desc("Define a variable to be used in capture patterns."),
6746e1fd61SAlexander Richardson     cl::value_desc("VAR=VALUE"));
6846e1fd61SAlexander Richardson 
691b9f936fSJustin Bogner static cl::opt<bool> AllowEmptyInput(
701b9f936fSJustin Bogner     "allow-empty", cl::init(false),
711b9f936fSJustin Bogner     cl::desc("Allow the input file to be empty. This is useful when making\n"
721b9f936fSJustin Bogner              "checks that some error message does not occur, for example."));
731b9f936fSJustin Bogner 
7485913ccaSJames Y Knight static cl::opt<bool> MatchFullLines(
7585913ccaSJames Y Knight     "match-full-lines", cl::init(false),
7685913ccaSJames Y Knight     cl::desc("Require all positive matches to cover an entire input line.\n"
7785913ccaSJames Y Knight              "Allows leading and trailing whitespace if --strict-whitespace\n"
7885913ccaSJames Y Knight              "is not also passed."));
7985913ccaSJames Y Knight 
80f55e72a5SArtem Belevich static cl::opt<bool> EnableVarScope(
81f55e72a5SArtem Belevich     "enable-var-scope", cl::init(false),
82f55e72a5SArtem Belevich     cl::desc("Enables scope for regex variables. Variables with names that\n"
83f55e72a5SArtem Belevich              "do not start with '$' will be reset at the beginning of\n"
84f55e72a5SArtem Belevich              "each CHECK-LABEL block."));
85f55e72a5SArtem Belevich 
86*bcf5b441SJoel E. Denny static cl::opt<bool> AllowDeprecatedDagOverlap(
87*bcf5b441SJoel E. Denny     "allow-deprecated-dag-overlap", cl::init(false),
88*bcf5b441SJoel E. Denny     cl::desc("Enable overlapping among matches in a group of consecutive\n"
89*bcf5b441SJoel E. Denny              "CHECK-DAG directives.  This option is deprecated and is only\n"
90*bcf5b441SJoel E. Denny              "provided for convenience as old tests are migrated to the new\n"
91*bcf5b441SJoel E. Denny              "non-overlapping CHECK-DAG implementation.\n"));
92*bcf5b441SJoel E. Denny 
9313df4626SMatt Arsenault typedef cl::list<std::string>::const_iterator prefix_iterator;
9413df4626SMatt Arsenault 
9574d50731SChris Lattner //===----------------------------------------------------------------------===//
9674d50731SChris Lattner // Pattern Handling Code.
9774d50731SChris Lattner //===----------------------------------------------------------------------===//
9874d50731SChris Lattner 
9938820972SMatt Arsenault namespace Check {
10038820972SMatt Arsenault enum CheckType {
10138820972SMatt Arsenault   CheckNone = 0,
10238820972SMatt Arsenault   CheckPlain,
10338820972SMatt Arsenault   CheckNext,
10401ac1707SDuncan P. N. Exon Smith   CheckSame,
10538820972SMatt Arsenault   CheckNot,
10638820972SMatt Arsenault   CheckDAG,
10738820972SMatt Arsenault   CheckLabel,
1085507f668SJames Henderson   CheckEmpty,
1090a4c44bdSChris Lattner 
1104dabac20SChandler Carruth   /// Indicates the pattern only matches the end of file. This is used for
1114dabac20SChandler Carruth   /// trailing CHECK-NOTs.
112a908e7bdSPaul Robinson   CheckEOF,
1134dabac20SChandler Carruth 
1144dabac20SChandler Carruth   /// Marks when parsing found a -NOT check combined with another CHECK suffix.
115a908e7bdSPaul Robinson   CheckBadNot
11638820972SMatt Arsenault };
11738820972SMatt Arsenault }
118eba55822SJakob Stoklund Olesen 
11938820972SMatt Arsenault class Pattern {
12038820972SMatt Arsenault   SMLoc PatternLoc;
12191a1b2c9SMichael Liao 
1224dabac20SChandler Carruth   /// A fixed string to match as the pattern or empty if this pattern requires
1234dabac20SChandler Carruth   /// a regex match.
124221460e0SChris Lattner   StringRef FixedStr;
125b16ab0c4SChris Lattner 
1264dabac20SChandler Carruth   /// A regex string to match as the pattern or empty if this pattern requires
1274dabac20SChandler Carruth   /// a fixed string to match.
128b16ab0c4SChris Lattner   std::string RegExStr;
1298879e06dSChris Lattner 
1304dabac20SChandler Carruth   /// Entries in this vector map to uses of a variable in the pattern, e.g.
1314dabac20SChandler Carruth   /// "foo[[bar]]baz".  In this case, the RegExStr will contain "foobaz" and
1324dabac20SChandler Carruth   /// we'll get an entry in this vector that tells us to insert the value of
1334dabac20SChandler Carruth   /// bar at offset 3.
1348879e06dSChris Lattner   std::vector<std::pair<StringRef, unsigned>> VariableUses;
1358879e06dSChris Lattner 
1364dabac20SChandler Carruth   /// Maps definitions of variables to their parenthesized capture numbers.
1374dabac20SChandler Carruth   ///
1384dabac20SChandler Carruth   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to
1394dabac20SChandler Carruth   /// 1.
140e8b8f1bcSEli Bendersky   std::map<StringRef, unsigned> VariableDefs;
1418879e06dSChris Lattner 
142d1e020f7SSaleem Abdulrasool   Check::CheckType CheckTy;
1433b40b445SChris Lattner 
1444dabac20SChandler Carruth   /// Contains the number of line this pattern is in.
145d1e020f7SSaleem Abdulrasool   unsigned LineNumber;
146d1e020f7SSaleem Abdulrasool 
147d1e020f7SSaleem Abdulrasool public:
148d1e020f7SSaleem Abdulrasool   explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {}
14974d50731SChris Lattner 
1504dabac20SChandler Carruth   /// Returns the location in source code.
1510b707eb8SMichael Liao   SMLoc getLoc() const { return PatternLoc; }
1520b707eb8SMichael Liao 
153e8f2fb20SChandler Carruth   bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
15413df4626SMatt Arsenault                     unsigned LineNumber);
1558879e06dSChris Lattner   size_t Match(StringRef Buffer, size_t &MatchLen,
1568879e06dSChris Lattner                StringMap<StringRef> &VariableTable) const;
157e0ef65abSDaniel Dunbar   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
158e0ef65abSDaniel Dunbar                         const StringMap<StringRef> &VariableTable) const;
159e0ef65abSDaniel Dunbar 
160e8f2fb20SChandler Carruth   bool hasVariable() const {
161e8f2fb20SChandler Carruth     return !(VariableUses.empty() && VariableDefs.empty());
162e8f2fb20SChandler Carruth   }
163f8bd2e5bSStephen Lin 
16438820972SMatt Arsenault   Check::CheckType getCheckTy() const { return CheckTy; }
16591a1b2c9SMichael Liao 
166b16ab0c4SChris Lattner private:
167e8b8f1bcSEli Bendersky   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
168e8b8f1bcSEli Bendersky   void AddBackrefToRegEx(unsigned BackrefNum);
169e8f2fb20SChandler Carruth   unsigned
170e8f2fb20SChandler Carruth   ComputeMatchDistance(StringRef Buffer,
171fd29d886SDaniel Dunbar                        const StringMap<StringRef> &VariableTable) const;
17292987fb3SAlexander Kornienko   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
17381e5cd9eSAdrian Prantl   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
1743b40b445SChris Lattner };
1753b40b445SChris Lattner 
1764dabac20SChandler Carruth /// Parses the given string into the Pattern.
1774dabac20SChandler Carruth ///
1784dabac20SChandler Carruth /// \p Prefix provides which prefix is being matched, \p SM provides the
1794dabac20SChandler Carruth /// SourceMgr used for error reports, and \p LineNumber is the line number in
1804dabac20SChandler Carruth /// the input file from which the pattern string was read. Returns true in
1814dabac20SChandler Carruth /// case of an error, false otherwise.
182e8f2fb20SChandler Carruth bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
183e8f2fb20SChandler Carruth                            SourceMgr &SM, unsigned LineNumber) {
18485913ccaSJames Y Knight   bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot;
18585913ccaSJames Y Knight 
18692987fb3SAlexander Kornienko   this->LineNumber = LineNumber;
1870a4c44bdSChris Lattner   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
1880a4c44bdSChris Lattner 
1891714676aSTom de Vries   if (!(NoCanonicalizeWhiteSpace && MatchFullLines))
19074d50731SChris Lattner     // Ignore trailing whitespace.
19174d50731SChris Lattner     while (!PatternStr.empty() &&
19274d50731SChris Lattner            (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
19374d50731SChris Lattner       PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
19474d50731SChris Lattner 
19574d50731SChris Lattner   // Check that there is something on the line.
1965507f668SJames Henderson   if (PatternStr.empty() && CheckTy != Check::CheckEmpty) {
19703b80a40SChris Lattner     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
198e8f2fb20SChandler Carruth                     "found empty check string with prefix '" + Prefix + ":'");
19974d50731SChris Lattner     return true;
20074d50731SChris Lattner   }
20174d50731SChris Lattner 
2025507f668SJames Henderson   if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) {
2035507f668SJames Henderson     SM.PrintMessage(
2045507f668SJames Henderson         PatternLoc, SourceMgr::DK_Error,
2055507f668SJames Henderson         "found non-empty check string for empty check with prefix '" + Prefix +
2065507f668SJames Henderson             ":'");
2075507f668SJames Henderson     return true;
2085507f668SJames Henderson   }
2095507f668SJames Henderson 
2105507f668SJames Henderson   if (CheckTy == Check::CheckEmpty) {
2115507f668SJames Henderson     RegExStr = "(\n$)";
2125507f668SJames Henderson     return false;
2135507f668SJames Henderson   }
2145507f668SJames Henderson 
215221460e0SChris Lattner   // Check to see if this is a fixed string, or if it has regex pieces.
21685913ccaSJames Y Knight   if (!MatchFullLinesHere &&
21785913ccaSJames Y Knight       (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
21885913ccaSJames Y Knight                                  PatternStr.find("[[") == StringRef::npos))) {
219221460e0SChris Lattner     FixedStr = PatternStr;
220221460e0SChris Lattner     return false;
221221460e0SChris Lattner   }
222221460e0SChris Lattner 
22385913ccaSJames Y Knight   if (MatchFullLinesHere) {
22485913ccaSJames Y Knight     RegExStr += '^';
22585913ccaSJames Y Knight     if (!NoCanonicalizeWhiteSpace)
22685913ccaSJames Y Knight       RegExStr += " *";
22785913ccaSJames Y Knight   }
22885913ccaSJames Y Knight 
2298879e06dSChris Lattner   // Paren value #0 is for the fully matched string.  Any new parenthesized
23053e0679dSChris Lattner   // values add from there.
2318879e06dSChris Lattner   unsigned CurParen = 1;
2328879e06dSChris Lattner 
233b16ab0c4SChris Lattner   // Otherwise, there is at least one regex piece.  Build up the regex pattern
234b16ab0c4SChris Lattner   // by escaping scary characters in fixed strings, building up one big regex.
235f08d2db9SChris Lattner   while (!PatternStr.empty()) {
2368879e06dSChris Lattner     // RegEx matches.
23753e0679dSChris Lattner     if (PatternStr.startswith("{{")) {
23843d50d4aSEli Bendersky       // This is the start of a regex match.  Scan for the }}.
239f08d2db9SChris Lattner       size_t End = PatternStr.find("}}");
240f08d2db9SChris Lattner       if (End == StringRef::npos) {
241f08d2db9SChris Lattner         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
24203b80a40SChris Lattner                         SourceMgr::DK_Error,
24303b80a40SChris Lattner                         "found start of regex string with no end '}}'");
244f08d2db9SChris Lattner         return true;
245f08d2db9SChris Lattner       }
246f08d2db9SChris Lattner 
247e53c95f1SChris Lattner       // Enclose {{}} patterns in parens just like [[]] even though we're not
248e53c95f1SChris Lattner       // capturing the result for any purpose.  This is required in case the
249e53c95f1SChris Lattner       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
250e53c95f1SChris Lattner       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
251e53c95f1SChris Lattner       RegExStr += '(';
252e53c95f1SChris Lattner       ++CurParen;
253e53c95f1SChris Lattner 
2548879e06dSChris Lattner       if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
2558879e06dSChris Lattner         return true;
256e53c95f1SChris Lattner       RegExStr += ')';
25753e0679dSChris Lattner 
2588879e06dSChris Lattner       PatternStr = PatternStr.substr(End + 2);
2598879e06dSChris Lattner       continue;
2608879e06dSChris Lattner     }
2618879e06dSChris Lattner 
2628879e06dSChris Lattner     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
2638879e06dSChris Lattner     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
2648879e06dSChris Lattner     // second form is [[foo]] which is a reference to foo.  The variable name
26557cb733bSDaniel Dunbar     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
2668879e06dSChris Lattner     // it.  This is to catch some common errors.
26753e0679dSChris Lattner     if (PatternStr.startswith("[[")) {
268061d2baaSEli Bendersky       // Find the closing bracket pair ending the match.  End is going to be an
269061d2baaSEli Bendersky       // offset relative to the beginning of the match string.
27081e5cd9eSAdrian Prantl       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
271061d2baaSEli Bendersky 
2728879e06dSChris Lattner       if (End == StringRef::npos) {
2738879e06dSChris Lattner         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
27403b80a40SChris Lattner                         SourceMgr::DK_Error,
27503b80a40SChris Lattner                         "invalid named regex reference, no ]] found");
276f08d2db9SChris Lattner         return true;
277f08d2db9SChris Lattner       }
278f08d2db9SChris Lattner 
279061d2baaSEli Bendersky       StringRef MatchStr = PatternStr.substr(2, End);
280061d2baaSEli Bendersky       PatternStr = PatternStr.substr(End + 4);
2818879e06dSChris Lattner 
2828879e06dSChris Lattner       // Get the regex name (e.g. "foo").
2838879e06dSChris Lattner       size_t NameEnd = MatchStr.find(':');
2848879e06dSChris Lattner       StringRef Name = MatchStr.substr(0, NameEnd);
2858879e06dSChris Lattner 
2868879e06dSChris Lattner       if (Name.empty()) {
28703b80a40SChris Lattner         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
28803b80a40SChris Lattner                         "invalid name in named regex: empty name");
2898879e06dSChris Lattner         return true;
2908879e06dSChris Lattner       }
2918879e06dSChris Lattner 
29292987fb3SAlexander Kornienko       // Verify that the name/expression is well formed. FileCheck currently
29392987fb3SAlexander Kornienko       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
29492987fb3SAlexander Kornienko       // is relaxed, more strict check is performed in \c EvaluateExpression.
29592987fb3SAlexander Kornienko       bool IsExpression = false;
29692987fb3SAlexander Kornienko       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
297f55e72a5SArtem Belevich         if (i == 0) {
298f55e72a5SArtem Belevich           if (Name[i] == '$')  // Global vars start with '$'
299f55e72a5SArtem Belevich             continue;
300f55e72a5SArtem Belevich           if (Name[i] == '@') {
30192987fb3SAlexander Kornienko             if (NameEnd != StringRef::npos) {
30292987fb3SAlexander Kornienko               SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
30392987fb3SAlexander Kornienko                               SourceMgr::DK_Error,
30492987fb3SAlexander Kornienko                               "invalid name in named regex definition");
30592987fb3SAlexander Kornienko               return true;
30692987fb3SAlexander Kornienko             }
30792987fb3SAlexander Kornienko             IsExpression = true;
30892987fb3SAlexander Kornienko             continue;
30992987fb3SAlexander Kornienko           }
310f55e72a5SArtem Belevich         }
31192987fb3SAlexander Kornienko         if (Name[i] != '_' && !isalnum(Name[i]) &&
31292987fb3SAlexander Kornienko             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
3138879e06dSChris Lattner           SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i),
31403b80a40SChris Lattner                           SourceMgr::DK_Error, "invalid name in named regex");
3158879e06dSChris Lattner           return true;
3168879e06dSChris Lattner         }
31792987fb3SAlexander Kornienko       }
3188879e06dSChris Lattner 
3198879e06dSChris Lattner       // Name can't start with a digit.
32083c74e9fSGuy Benyei       if (isdigit(static_cast<unsigned char>(Name[0]))) {
32103b80a40SChris Lattner         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
32203b80a40SChris Lattner                         "invalid name in named regex");
3238879e06dSChris Lattner         return true;
3248879e06dSChris Lattner       }
3258879e06dSChris Lattner 
3268879e06dSChris Lattner       // Handle [[foo]].
3278879e06dSChris Lattner       if (NameEnd == StringRef::npos) {
328e8b8f1bcSEli Bendersky         // Handle variables that were defined earlier on the same line by
329e8b8f1bcSEli Bendersky         // emitting a backreference.
330e8b8f1bcSEli Bendersky         if (VariableDefs.find(Name) != VariableDefs.end()) {
331e8b8f1bcSEli Bendersky           unsigned VarParenNum = VariableDefs[Name];
332e8b8f1bcSEli Bendersky           if (VarParenNum < 1 || VarParenNum > 9) {
333e8b8f1bcSEli Bendersky             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
334e8b8f1bcSEli Bendersky                             SourceMgr::DK_Error,
335e8b8f1bcSEli Bendersky                             "Can't back-reference more than 9 variables");
336e8b8f1bcSEli Bendersky             return true;
337e8b8f1bcSEli Bendersky           }
338e8b8f1bcSEli Bendersky           AddBackrefToRegEx(VarParenNum);
339e8b8f1bcSEli Bendersky         } else {
3408879e06dSChris Lattner           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
341e8b8f1bcSEli Bendersky         }
3428879e06dSChris Lattner         continue;
3438879e06dSChris Lattner       }
3448879e06dSChris Lattner 
3458879e06dSChris Lattner       // Handle [[foo:.*]].
346e8b8f1bcSEli Bendersky       VariableDefs[Name] = CurParen;
3478879e06dSChris Lattner       RegExStr += '(';
3488879e06dSChris Lattner       ++CurParen;
3498879e06dSChris Lattner 
3508879e06dSChris Lattner       if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM))
3518879e06dSChris Lattner         return true;
3528879e06dSChris Lattner 
3538879e06dSChris Lattner       RegExStr += ')';
3548879e06dSChris Lattner     }
3558879e06dSChris Lattner 
3568879e06dSChris Lattner     // Handle fixed string matches.
3578879e06dSChris Lattner     // Find the end, which is the start of the next regex.
3588879e06dSChris Lattner     size_t FixedMatchEnd = PatternStr.find("{{");
3598879e06dSChris Lattner     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
3606f4f77b7SHans Wennborg     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
3618879e06dSChris Lattner     PatternStr = PatternStr.substr(FixedMatchEnd);
362f08d2db9SChris Lattner   }
363f08d2db9SChris Lattner 
36485913ccaSJames Y Knight   if (MatchFullLinesHere) {
36585913ccaSJames Y Knight     if (!NoCanonicalizeWhiteSpace)
36685913ccaSJames Y Knight       RegExStr += " *";
36785913ccaSJames Y Knight     RegExStr += '$';
36885913ccaSJames Y Knight   }
36985913ccaSJames Y Knight 
37074d50731SChris Lattner   return false;
37174d50731SChris Lattner }
37274d50731SChris Lattner 
373e8f2fb20SChandler Carruth bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
374e8b8f1bcSEli Bendersky   Regex R(RS);
3758879e06dSChris Lattner   std::string Error;
3768879e06dSChris Lattner   if (!R.isValid(Error)) {
377e8b8f1bcSEli Bendersky     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
37803b80a40SChris Lattner                     "invalid regex: " + Error);
3798879e06dSChris Lattner     return true;
3808879e06dSChris Lattner   }
3818879e06dSChris Lattner 
382e8b8f1bcSEli Bendersky   RegExStr += RS.str();
3838879e06dSChris Lattner   CurParen += R.getNumMatches();
3848879e06dSChris Lattner   return false;
3858879e06dSChris Lattner }
386b16ab0c4SChris Lattner 
387e8b8f1bcSEli Bendersky void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
388e8b8f1bcSEli Bendersky   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
389e8f2fb20SChandler Carruth   std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
390e8b8f1bcSEli Bendersky   RegExStr += Backref;
391e8b8f1bcSEli Bendersky }
392e8b8f1bcSEli Bendersky 
3934dabac20SChandler Carruth /// Evaluates expression and stores the result to \p Value.
3944dabac20SChandler Carruth ///
3954dabac20SChandler Carruth /// Returns true on success and false when the expression has invalid syntax.
39692987fb3SAlexander Kornienko bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
39792987fb3SAlexander Kornienko   // The only supported expression is @LINE([\+-]\d+)?
39892987fb3SAlexander Kornienko   if (!Expr.startswith("@LINE"))
39992987fb3SAlexander Kornienko     return false;
40092987fb3SAlexander Kornienko   Expr = Expr.substr(StringRef("@LINE").size());
40192987fb3SAlexander Kornienko   int Offset = 0;
40292987fb3SAlexander Kornienko   if (!Expr.empty()) {
40392987fb3SAlexander Kornienko     if (Expr[0] == '+')
40492987fb3SAlexander Kornienko       Expr = Expr.substr(1);
40592987fb3SAlexander Kornienko     else if (Expr[0] != '-')
40692987fb3SAlexander Kornienko       return false;
40792987fb3SAlexander Kornienko     if (Expr.getAsInteger(10, Offset))
40892987fb3SAlexander Kornienko       return false;
40992987fb3SAlexander Kornienko   }
41092987fb3SAlexander Kornienko   Value = llvm::itostr(LineNumber + Offset);
41192987fb3SAlexander Kornienko   return true;
41292987fb3SAlexander Kornienko }
41392987fb3SAlexander Kornienko 
4144dabac20SChandler Carruth /// Matches the pattern string against the input buffer \p Buffer
4154dabac20SChandler Carruth ///
4164dabac20SChandler Carruth /// This returns the position that is matched or npos if there is no match. If
4174dabac20SChandler Carruth /// there is a match, the size of the matched string is returned in \p
4184dabac20SChandler Carruth /// MatchLen.
4194dabac20SChandler Carruth ///
4204dabac20SChandler Carruth /// The \p VariableTable StringMap provides the current values of filecheck
4214dabac20SChandler Carruth /// variables and is updated if this match defines new values.
4228879e06dSChris Lattner size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
4238879e06dSChris Lattner                       StringMap<StringRef> &VariableTable) const {
424eba55822SJakob Stoklund Olesen   // If this is the EOF pattern, match it immediately.
42538820972SMatt Arsenault   if (CheckTy == Check::CheckEOF) {
426eba55822SJakob Stoklund Olesen     MatchLen = 0;
427eba55822SJakob Stoklund Olesen     return Buffer.size();
428eba55822SJakob Stoklund Olesen   }
429eba55822SJakob Stoklund Olesen 
430221460e0SChris Lattner   // If this is a fixed string pattern, just match it now.
431221460e0SChris Lattner   if (!FixedStr.empty()) {
432221460e0SChris Lattner     MatchLen = FixedStr.size();
433221460e0SChris Lattner     return Buffer.find(FixedStr);
434221460e0SChris Lattner   }
435221460e0SChris Lattner 
436b16ab0c4SChris Lattner   // Regex match.
4378879e06dSChris Lattner 
4388879e06dSChris Lattner   // If there are variable uses, we need to create a temporary string with the
4398879e06dSChris Lattner   // actual value.
4408879e06dSChris Lattner   StringRef RegExToMatch = RegExStr;
4418879e06dSChris Lattner   std::string TmpStr;
4428879e06dSChris Lattner   if (!VariableUses.empty()) {
4438879e06dSChris Lattner     TmpStr = RegExStr;
4448879e06dSChris Lattner 
4458879e06dSChris Lattner     unsigned InsertOffset = 0;
4468f870499SBenjamin Kramer     for (const auto &VariableUse : VariableUses) {
44792987fb3SAlexander Kornienko       std::string Value;
44892987fb3SAlexander Kornienko 
4498f870499SBenjamin Kramer       if (VariableUse.first[0] == '@') {
4508f870499SBenjamin Kramer         if (!EvaluateExpression(VariableUse.first, Value))
45192987fb3SAlexander Kornienko           return StringRef::npos;
45292987fb3SAlexander Kornienko       } else {
453e0ef65abSDaniel Dunbar         StringMap<StringRef>::iterator it =
4548f870499SBenjamin Kramer             VariableTable.find(VariableUse.first);
455e0ef65abSDaniel Dunbar         // If the variable is undefined, return an error.
456e0ef65abSDaniel Dunbar         if (it == VariableTable.end())
457e0ef65abSDaniel Dunbar           return StringRef::npos;
458e0ef65abSDaniel Dunbar 
4596f4f77b7SHans Wennborg         // Look up the value and escape it so that we can put it into the regex.
4606f4f77b7SHans Wennborg         Value += Regex::escape(it->second);
46192987fb3SAlexander Kornienko       }
4628879e06dSChris Lattner 
4638879e06dSChris Lattner       // Plop it into the regex at the adjusted offset.
4648f870499SBenjamin Kramer       TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
4658879e06dSChris Lattner                     Value.begin(), Value.end());
4668879e06dSChris Lattner       InsertOffset += Value.size();
4678879e06dSChris Lattner     }
4688879e06dSChris Lattner 
4698879e06dSChris Lattner     // Match the newly constructed regex.
4708879e06dSChris Lattner     RegExToMatch = TmpStr;
4718879e06dSChris Lattner   }
4728879e06dSChris Lattner 
473b16ab0c4SChris Lattner   SmallVector<StringRef, 4> MatchInfo;
4748879e06dSChris Lattner   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
475f08d2db9SChris Lattner     return StringRef::npos;
476b16ab0c4SChris Lattner 
477b16ab0c4SChris Lattner   // Successful regex match.
478b16ab0c4SChris Lattner   assert(!MatchInfo.empty() && "Didn't get any match");
479b16ab0c4SChris Lattner   StringRef FullMatch = MatchInfo[0];
480b16ab0c4SChris Lattner 
4818879e06dSChris Lattner   // If this defines any variables, remember their values.
4828f870499SBenjamin Kramer   for (const auto &VariableDef : VariableDefs) {
4838f870499SBenjamin Kramer     assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
4848f870499SBenjamin Kramer     VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
4850a4c44bdSChris Lattner   }
4860a4c44bdSChris Lattner 
487b16ab0c4SChris Lattner   MatchLen = FullMatch.size();
488b16ab0c4SChris Lattner   return FullMatch.data() - Buffer.data();
489f08d2db9SChris Lattner }
490f08d2db9SChris Lattner 
4914dabac20SChandler Carruth 
4924dabac20SChandler Carruth /// Computes an arbitrary estimate for the quality of matching this pattern at
4934dabac20SChandler Carruth /// the start of \p Buffer; a distance of zero should correspond to a perfect
4944dabac20SChandler Carruth /// match.
495e8f2fb20SChandler Carruth unsigned
496e8f2fb20SChandler Carruth Pattern::ComputeMatchDistance(StringRef Buffer,
497fd29d886SDaniel Dunbar                               const StringMap<StringRef> &VariableTable) const {
498fd29d886SDaniel Dunbar   // Just compute the number of matching characters. For regular expressions, we
499fd29d886SDaniel Dunbar   // just compare against the regex itself and hope for the best.
500fd29d886SDaniel Dunbar   //
501fd29d886SDaniel Dunbar   // FIXME: One easy improvement here is have the regex lib generate a single
502fd29d886SDaniel Dunbar   // example regular expression which matches, and use that as the example
503fd29d886SDaniel Dunbar   // string.
504fd29d886SDaniel Dunbar   StringRef ExampleString(FixedStr);
505fd29d886SDaniel Dunbar   if (ExampleString.empty())
506fd29d886SDaniel Dunbar     ExampleString = RegExStr;
507fd29d886SDaniel Dunbar 
508e9aa36c8SDaniel Dunbar   // Only compare up to the first line in the buffer, or the string size.
509e9aa36c8SDaniel Dunbar   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
510e9aa36c8SDaniel Dunbar   BufferPrefix = BufferPrefix.split('\n').first;
511e9aa36c8SDaniel Dunbar   return BufferPrefix.edit_distance(ExampleString);
512fd29d886SDaniel Dunbar }
513fd29d886SDaniel Dunbar 
5144dabac20SChandler Carruth /// Prints additional information about a failure to match involving this
5154dabac20SChandler Carruth /// pattern.
516e8f2fb20SChandler Carruth void Pattern::PrintFailureInfo(
517e8f2fb20SChandler Carruth     const SourceMgr &SM, StringRef Buffer,
518e0ef65abSDaniel Dunbar     const StringMap<StringRef> &VariableTable) const {
519e0ef65abSDaniel Dunbar   // If this was a regular expression using variables, print the current
520e0ef65abSDaniel Dunbar   // variable values.
521e0ef65abSDaniel Dunbar   if (!VariableUses.empty()) {
5228f870499SBenjamin Kramer     for (const auto &VariableUse : VariableUses) {
523e69170a1SAlp Toker       SmallString<256> Msg;
524e69170a1SAlp Toker       raw_svector_ostream OS(Msg);
5258f870499SBenjamin Kramer       StringRef Var = VariableUse.first;
52692987fb3SAlexander Kornienko       if (Var[0] == '@') {
52792987fb3SAlexander Kornienko         std::string Value;
52892987fb3SAlexander Kornienko         if (EvaluateExpression(Var, Value)) {
52992987fb3SAlexander Kornienko           OS << "with expression \"";
53092987fb3SAlexander Kornienko           OS.write_escaped(Var) << "\" equal to \"";
53192987fb3SAlexander Kornienko           OS.write_escaped(Value) << "\"";
53292987fb3SAlexander Kornienko         } else {
53392987fb3SAlexander Kornienko           OS << "uses incorrect expression \"";
53492987fb3SAlexander Kornienko           OS.write_escaped(Var) << "\"";
53592987fb3SAlexander Kornienko         }
53692987fb3SAlexander Kornienko       } else {
53792987fb3SAlexander Kornienko         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
538e0ef65abSDaniel Dunbar 
539e0ef65abSDaniel Dunbar         // Check for undefined variable references.
540e0ef65abSDaniel Dunbar         if (it == VariableTable.end()) {
541e0ef65abSDaniel Dunbar           OS << "uses undefined variable \"";
54292987fb3SAlexander Kornienko           OS.write_escaped(Var) << "\"";
543e0ef65abSDaniel Dunbar         } else {
544e0ef65abSDaniel Dunbar           OS << "with variable \"";
545e0ef65abSDaniel Dunbar           OS.write_escaped(Var) << "\" equal to \"";
546e0ef65abSDaniel Dunbar           OS.write_escaped(it->second) << "\"";
547e0ef65abSDaniel Dunbar         }
54892987fb3SAlexander Kornienko       }
549e0ef65abSDaniel Dunbar 
55003b80a40SChris Lattner       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
55103b80a40SChris Lattner                       OS.str());
552e0ef65abSDaniel Dunbar     }
553e0ef65abSDaniel Dunbar   }
554fd29d886SDaniel Dunbar 
555fd29d886SDaniel Dunbar   // Attempt to find the closest/best fuzzy match.  Usually an error happens
556fd29d886SDaniel Dunbar   // because some string in the output didn't exactly match. In these cases, we
557fd29d886SDaniel Dunbar   // would like to show the user a best guess at what "should have" matched, to
558fd29d886SDaniel Dunbar   // save them having to actually check the input manually.
559fd29d886SDaniel Dunbar   size_t NumLinesForward = 0;
560fd29d886SDaniel Dunbar   size_t Best = StringRef::npos;
561fd29d886SDaniel Dunbar   double BestQuality = 0;
562fd29d886SDaniel Dunbar 
563fd29d886SDaniel Dunbar   // Use an arbitrary 4k limit on how far we will search.
5642bf486ebSDan Gohman   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
565fd29d886SDaniel Dunbar     if (Buffer[i] == '\n')
566fd29d886SDaniel Dunbar       ++NumLinesForward;
567fd29d886SDaniel Dunbar 
568df22bbf7SDan Gohman     // Patterns have leading whitespace stripped, so skip whitespace when
569df22bbf7SDan Gohman     // looking for something which looks like a pattern.
570df22bbf7SDan Gohman     if (Buffer[i] == ' ' || Buffer[i] == '\t')
571df22bbf7SDan Gohman       continue;
572df22bbf7SDan Gohman 
573fd29d886SDaniel Dunbar     // Compute the "quality" of this match as an arbitrary combination of the
574fd29d886SDaniel Dunbar     // match distance and the number of lines skipped to get to this match.
575fd29d886SDaniel Dunbar     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
576fd29d886SDaniel Dunbar     double Quality = Distance + (NumLinesForward / 100.);
577fd29d886SDaniel Dunbar 
578fd29d886SDaniel Dunbar     if (Quality < BestQuality || Best == StringRef::npos) {
579fd29d886SDaniel Dunbar       Best = i;
580fd29d886SDaniel Dunbar       BestQuality = Quality;
581fd29d886SDaniel Dunbar     }
582fd29d886SDaniel Dunbar   }
583fd29d886SDaniel Dunbar 
584fd29d886SDaniel Dunbar   // Print the "possible intended match here" line if we found something
585c069cc8eSDaniel Dunbar   // reasonable and not equal to what we showed in the "scanning from here"
586c069cc8eSDaniel Dunbar   // line.
587c069cc8eSDaniel Dunbar   if (Best && Best != StringRef::npos && BestQuality < 50) {
588fd29d886SDaniel Dunbar     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
58903b80a40SChris Lattner                     SourceMgr::DK_Note, "possible intended match here");
590fd29d886SDaniel Dunbar 
591fd29d886SDaniel Dunbar     // FIXME: If we wanted to be really friendly we would show why the match
592fd29d886SDaniel Dunbar     // failed, as it can be hard to spot simple one character differences.
593fd29d886SDaniel Dunbar   }
594e0ef65abSDaniel Dunbar }
59574d50731SChris Lattner 
5964dabac20SChandler Carruth /// Finds the closing sequence of a regex variable usage or definition.
5974dabac20SChandler Carruth ///
5984dabac20SChandler Carruth /// \p Str has to point in the beginning of the definition (right after the
5994dabac20SChandler Carruth /// opening sequence). Returns the offset of the closing sequence within Str,
6004dabac20SChandler Carruth /// or npos if it was not found.
60181e5cd9eSAdrian Prantl size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
602061d2baaSEli Bendersky   // Offset keeps track of the current offset within the input Str
603061d2baaSEli Bendersky   size_t Offset = 0;
604061d2baaSEli Bendersky   // [...] Nesting depth
605061d2baaSEli Bendersky   size_t BracketDepth = 0;
606061d2baaSEli Bendersky 
607061d2baaSEli Bendersky   while (!Str.empty()) {
608061d2baaSEli Bendersky     if (Str.startswith("]]") && BracketDepth == 0)
609061d2baaSEli Bendersky       return Offset;
610061d2baaSEli Bendersky     if (Str[0] == '\\') {
611061d2baaSEli Bendersky       // Backslash escapes the next char within regexes, so skip them both.
612061d2baaSEli Bendersky       Str = Str.substr(2);
613061d2baaSEli Bendersky       Offset += 2;
614061d2baaSEli Bendersky     } else {
615061d2baaSEli Bendersky       switch (Str[0]) {
616061d2baaSEli Bendersky       default:
617061d2baaSEli Bendersky         break;
618061d2baaSEli Bendersky       case '[':
619061d2baaSEli Bendersky         BracketDepth++;
620061d2baaSEli Bendersky         break;
621061d2baaSEli Bendersky       case ']':
62281e5cd9eSAdrian Prantl         if (BracketDepth == 0) {
62381e5cd9eSAdrian Prantl           SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
62481e5cd9eSAdrian Prantl                           SourceMgr::DK_Error,
62581e5cd9eSAdrian Prantl                           "missing closing \"]\" for regex variable");
62681e5cd9eSAdrian Prantl           exit(1);
62781e5cd9eSAdrian Prantl         }
628061d2baaSEli Bendersky         BracketDepth--;
629061d2baaSEli Bendersky         break;
630061d2baaSEli Bendersky       }
631061d2baaSEli Bendersky       Str = Str.substr(1);
632061d2baaSEli Bendersky       Offset++;
633061d2baaSEli Bendersky     }
634061d2baaSEli Bendersky   }
635061d2baaSEli Bendersky 
636061d2baaSEli Bendersky   return StringRef::npos;
637061d2baaSEli Bendersky }
638061d2baaSEli Bendersky 
63974d50731SChris Lattner //===----------------------------------------------------------------------===//
64074d50731SChris Lattner // Check Strings.
64174d50731SChris Lattner //===----------------------------------------------------------------------===//
6423b40b445SChris Lattner 
6434dabac20SChandler Carruth /// A check that we found in the input file.
6443b40b445SChris Lattner struct CheckString {
6454dabac20SChandler Carruth   /// The pattern to match.
6463b40b445SChris Lattner   Pattern Pat;
64726cccfe1SChris Lattner 
6484dabac20SChandler Carruth   /// Which prefix name this check matched.
64913df4626SMatt Arsenault   StringRef Prefix;
65013df4626SMatt Arsenault 
6514dabac20SChandler Carruth   /// The location in the match file that the check string was specified.
65226cccfe1SChris Lattner   SMLoc Loc;
65326cccfe1SChris Lattner 
6544dabac20SChandler Carruth   /// All of the strings that are disallowed from occurring between this match
6554dabac20SChandler Carruth   /// string and the previous one (or start of file).
65691a1b2c9SMichael Liao   std::vector<Pattern> DagNotStrings;
657236d2d5eSChris Lattner 
65885913ccaSJames Y Knight   CheckString(const Pattern &P, StringRef S, SMLoc L)
65985913ccaSJames Y Knight       : Pat(P), Prefix(S), Loc(L) {}
660dcc7d48dSMichael Liao 
661e93a3a08SStephen Lin   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
662f8bd2e5bSStephen Lin                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
663dcc7d48dSMichael Liao 
664dcc7d48dSMichael Liao   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
66501ac1707SDuncan P. N. Exon Smith   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
666dcc7d48dSMichael Liao   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
66791a1b2c9SMichael Liao                 const std::vector<const Pattern *> &NotStrings,
66891a1b2c9SMichael Liao                 StringMap<StringRef> &VariableTable) const;
66991a1b2c9SMichael Liao   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
67091a1b2c9SMichael Liao                   std::vector<const Pattern *> &NotStrings,
671dcc7d48dSMichael Liao                   StringMap<StringRef> &VariableTable) const;
67226cccfe1SChris Lattner };
67326cccfe1SChris Lattner 
67420247900SChandler Carruth /// Canonicalize whitespaces in the file. Line endings are replaced with
67520247900SChandler Carruth /// UNIX-style '\n'.
676b03c166aSChandler Carruth static StringRef CanonicalizeFile(MemoryBuffer &MB,
67720247900SChandler Carruth                                   SmallVectorImpl<char> &OutputBuffer) {
67820247900SChandler Carruth   OutputBuffer.reserve(MB.getBufferSize());
679a2f8fc5aSChris Lattner 
68020247900SChandler Carruth   for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
681a2f8fc5aSChris Lattner        Ptr != End; ++Ptr) {
682fd781bf0SNAKAMURA Takumi     // Eliminate trailing dosish \r.
683fd781bf0SNAKAMURA Takumi     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
684fd781bf0SNAKAMURA Takumi       continue;
685fd781bf0SNAKAMURA Takumi     }
686fd781bf0SNAKAMURA Takumi 
6875ea04c38SGuy Benyei     // If current char is not a horizontal whitespace or if horizontal
6885ea04c38SGuy Benyei     // whitespace canonicalization is disabled, dump it to output as is.
689b03c166aSChandler Carruth     if (NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) {
69020247900SChandler Carruth       OutputBuffer.push_back(*Ptr);
691a2f8fc5aSChris Lattner       continue;
692a2f8fc5aSChris Lattner     }
693a2f8fc5aSChris Lattner 
694a2f8fc5aSChris Lattner     // Otherwise, add one space and advance over neighboring space.
69520247900SChandler Carruth     OutputBuffer.push_back(' ');
696e8f2fb20SChandler Carruth     while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t'))
697a2f8fc5aSChris Lattner       ++Ptr;
698a2f8fc5aSChris Lattner   }
699a2f8fc5aSChris Lattner 
70020247900SChandler Carruth   // Add a null byte and then return all but that byte.
70120247900SChandler Carruth   OutputBuffer.push_back('\0');
70220247900SChandler Carruth   return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
703a2f8fc5aSChris Lattner }
704a2f8fc5aSChris Lattner 
70538820972SMatt Arsenault static bool IsPartOfWord(char c) {
70638820972SMatt Arsenault   return (isalnum(c) || c == '-' || c == '_');
70738820972SMatt Arsenault }
70838820972SMatt Arsenault 
70913df4626SMatt Arsenault // Get the size of the prefix extension.
71013df4626SMatt Arsenault static size_t CheckTypeSize(Check::CheckType Ty) {
71113df4626SMatt Arsenault   switch (Ty) {
71213df4626SMatt Arsenault   case Check::CheckNone:
713a908e7bdSPaul Robinson   case Check::CheckBadNot:
71413df4626SMatt Arsenault     return 0;
71513df4626SMatt Arsenault 
71613df4626SMatt Arsenault   case Check::CheckPlain:
71713df4626SMatt Arsenault     return sizeof(":") - 1;
71813df4626SMatt Arsenault 
71913df4626SMatt Arsenault   case Check::CheckNext:
72013df4626SMatt Arsenault     return sizeof("-NEXT:") - 1;
72113df4626SMatt Arsenault 
72201ac1707SDuncan P. N. Exon Smith   case Check::CheckSame:
72301ac1707SDuncan P. N. Exon Smith     return sizeof("-SAME:") - 1;
72401ac1707SDuncan P. N. Exon Smith 
72513df4626SMatt Arsenault   case Check::CheckNot:
72613df4626SMatt Arsenault     return sizeof("-NOT:") - 1;
72713df4626SMatt Arsenault 
72813df4626SMatt Arsenault   case Check::CheckDAG:
72913df4626SMatt Arsenault     return sizeof("-DAG:") - 1;
73013df4626SMatt Arsenault 
73113df4626SMatt Arsenault   case Check::CheckLabel:
73213df4626SMatt Arsenault     return sizeof("-LABEL:") - 1;
73313df4626SMatt Arsenault 
7345507f668SJames Henderson   case Check::CheckEmpty:
7355507f668SJames Henderson     return sizeof("-EMPTY:") - 1;
7365507f668SJames Henderson 
73713df4626SMatt Arsenault   case Check::CheckEOF:
73813df4626SMatt Arsenault     llvm_unreachable("Should not be using EOF size");
73913df4626SMatt Arsenault   }
74013df4626SMatt Arsenault 
74113df4626SMatt Arsenault   llvm_unreachable("Bad check type");
74213df4626SMatt Arsenault }
74313df4626SMatt Arsenault 
74413df4626SMatt Arsenault static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
74583e63d96SGeorge Rimar   if (Buffer.size() <= Prefix.size())
74683e63d96SGeorge Rimar     return Check::CheckNone;
74783e63d96SGeorge Rimar 
748c4d2d471SMatt Arsenault   char NextChar = Buffer[Prefix.size()];
74938820972SMatt Arsenault 
75038820972SMatt Arsenault   // Verify that the : is present after the prefix.
75113df4626SMatt Arsenault   if (NextChar == ':')
75238820972SMatt Arsenault     return Check::CheckPlain;
75338820972SMatt Arsenault 
75413df4626SMatt Arsenault   if (NextChar != '-')
75538820972SMatt Arsenault     return Check::CheckNone;
75638820972SMatt Arsenault 
757c4d2d471SMatt Arsenault   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
75813df4626SMatt Arsenault   if (Rest.startswith("NEXT:"))
75938820972SMatt Arsenault     return Check::CheckNext;
76038820972SMatt Arsenault 
76101ac1707SDuncan P. N. Exon Smith   if (Rest.startswith("SAME:"))
76201ac1707SDuncan P. N. Exon Smith     return Check::CheckSame;
76301ac1707SDuncan P. N. Exon Smith 
76413df4626SMatt Arsenault   if (Rest.startswith("NOT:"))
76538820972SMatt Arsenault     return Check::CheckNot;
76638820972SMatt Arsenault 
76713df4626SMatt Arsenault   if (Rest.startswith("DAG:"))
76838820972SMatt Arsenault     return Check::CheckDAG;
76938820972SMatt Arsenault 
77013df4626SMatt Arsenault   if (Rest.startswith("LABEL:"))
77138820972SMatt Arsenault     return Check::CheckLabel;
77213df4626SMatt Arsenault 
7735507f668SJames Henderson   if (Rest.startswith("EMPTY:"))
7745507f668SJames Henderson     return Check::CheckEmpty;
7755507f668SJames Henderson 
776a908e7bdSPaul Robinson   // You can't combine -NOT with another suffix.
777a908e7bdSPaul Robinson   if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
778a908e7bdSPaul Robinson       Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
7795507f668SJames Henderson       Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") ||
7805507f668SJames Henderson       Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:"))
781a908e7bdSPaul Robinson     return Check::CheckBadNot;
782a908e7bdSPaul Robinson 
78313df4626SMatt Arsenault   return Check::CheckNone;
78438820972SMatt Arsenault }
78538820972SMatt Arsenault 
78613df4626SMatt Arsenault // From the given position, find the next character after the word.
78713df4626SMatt Arsenault static size_t SkipWord(StringRef Str, size_t Loc) {
78813df4626SMatt Arsenault   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
78913df4626SMatt Arsenault     ++Loc;
79013df4626SMatt Arsenault   return Loc;
79113df4626SMatt Arsenault }
79213df4626SMatt Arsenault 
793726774cbSChandler Carruth /// Search the buffer for the first prefix in the prefix regular expression.
794726774cbSChandler Carruth ///
795726774cbSChandler Carruth /// This searches the buffer using the provided regular expression, however it
796726774cbSChandler Carruth /// enforces constraints beyond that:
797726774cbSChandler Carruth /// 1) The found prefix must not be a suffix of something that looks like
798726774cbSChandler Carruth ///    a valid prefix.
799726774cbSChandler Carruth /// 2) The found prefix must be followed by a valid check type suffix using \c
800726774cbSChandler Carruth ///    FindCheckType above.
801726774cbSChandler Carruth ///
802726774cbSChandler Carruth /// The first match of the regular expression to satisfy these two is returned,
803726774cbSChandler Carruth /// otherwise an empty StringRef is returned to indicate failure.
804726774cbSChandler Carruth ///
805726774cbSChandler Carruth /// If this routine returns a valid prefix, it will also shrink \p Buffer to
806726774cbSChandler Carruth /// start at the beginning of the returned prefix, increment \p LineNumber for
807726774cbSChandler Carruth /// each new line consumed from \p Buffer, and set \p CheckTy to the type of
808726774cbSChandler Carruth /// check found by examining the suffix.
809726774cbSChandler Carruth ///
810726774cbSChandler Carruth /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
811726774cbSChandler Carruth /// is unspecified.
812726774cbSChandler Carruth static StringRef FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer,
81313df4626SMatt Arsenault                                          unsigned &LineNumber,
814726774cbSChandler Carruth                                          Check::CheckType &CheckTy) {
815726774cbSChandler Carruth   SmallVector<StringRef, 2> Matches;
816726774cbSChandler Carruth 
81713df4626SMatt Arsenault   while (!Buffer.empty()) {
818726774cbSChandler Carruth     // Find the first (longest) match using the RE.
819726774cbSChandler Carruth     if (!PrefixRE.match(Buffer, &Matches))
820726774cbSChandler Carruth       // No match at all, bail.
821726774cbSChandler Carruth       return StringRef();
822726774cbSChandler Carruth 
823726774cbSChandler Carruth     StringRef Prefix = Matches[0];
824726774cbSChandler Carruth     Matches.clear();
825726774cbSChandler Carruth 
826726774cbSChandler Carruth     assert(Prefix.data() >= Buffer.data() &&
827726774cbSChandler Carruth            Prefix.data() < Buffer.data() + Buffer.size() &&
828726774cbSChandler Carruth            "Prefix doesn't start inside of buffer!");
829726774cbSChandler Carruth     size_t Loc = Prefix.data() - Buffer.data();
830726774cbSChandler Carruth     StringRef Skipped = Buffer.substr(0, Loc);
831726774cbSChandler Carruth     Buffer = Buffer.drop_front(Loc);
832726774cbSChandler Carruth     LineNumber += Skipped.count('\n');
833726774cbSChandler Carruth 
834726774cbSChandler Carruth     // Check that the matched prefix isn't a suffix of some other check-like
835726774cbSChandler Carruth     // word.
836726774cbSChandler Carruth     // FIXME: This is a very ad-hoc check. it would be better handled in some
837726774cbSChandler Carruth     // other way. Among other things it seems hard to distinguish between
838726774cbSChandler Carruth     // intentional and unintentional uses of this feature.
839726774cbSChandler Carruth     if (Skipped.empty() || !IsPartOfWord(Skipped.back())) {
840726774cbSChandler Carruth       // Now extract the type.
841726774cbSChandler Carruth       CheckTy = FindCheckType(Buffer, Prefix);
842726774cbSChandler Carruth 
843726774cbSChandler Carruth       // If we've found a valid check type for this prefix, we're done.
844726774cbSChandler Carruth       if (CheckTy != Check::CheckNone)
84513df4626SMatt Arsenault         return Prefix;
84613df4626SMatt Arsenault     }
84713df4626SMatt Arsenault 
848726774cbSChandler Carruth     // If we didn't successfully find a prefix, we need to skip this invalid
849726774cbSChandler Carruth     // prefix and continue scanning. We directly skip the prefix that was
850726774cbSChandler Carruth     // matched and any additional parts of that check-like word.
851726774cbSChandler Carruth     Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size()));
85213df4626SMatt Arsenault   }
85313df4626SMatt Arsenault 
854726774cbSChandler Carruth   // We ran out of buffer while skipping partial matches so give up.
85513df4626SMatt Arsenault   return StringRef();
85638820972SMatt Arsenault }
857ee3c74fbSChris Lattner 
8584dabac20SChandler Carruth /// Read the check file, which specifies the sequence of expected strings.
8594dabac20SChandler Carruth ///
8604dabac20SChandler Carruth /// The strings are added to the CheckStrings vector. Returns true in case of
8614dabac20SChandler Carruth /// an error, false otherwise.
862726774cbSChandler Carruth static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
86326cccfe1SChris Lattner                           std::vector<CheckString> &CheckStrings) {
86456ccdbbdSAlexander Kornienko   std::vector<Pattern> ImplicitNegativeChecks;
86556ccdbbdSAlexander Kornienko   for (const auto &PatternString : ImplicitCheckNot) {
86656ccdbbdSAlexander Kornienko     // Create a buffer with fake command line content in order to display the
86756ccdbbdSAlexander Kornienko     // command line option responsible for the specific implicit CHECK-NOT.
868ff43d69dSDavid Blaikie     std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
86956ccdbbdSAlexander Kornienko     std::string Suffix = "'";
8703560ff2cSRafael Espindola     std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
8713560ff2cSRafael Espindola         Prefix + PatternString + Suffix, "command line");
8723560ff2cSRafael Espindola 
87356ccdbbdSAlexander Kornienko     StringRef PatternInBuffer =
87456ccdbbdSAlexander Kornienko         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
8751961f14cSDavid Blaikie     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
87656ccdbbdSAlexander Kornienko 
87756ccdbbdSAlexander Kornienko     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
87856ccdbbdSAlexander Kornienko     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
87956ccdbbdSAlexander Kornienko                                                "IMPLICIT-CHECK", SM, 0);
88056ccdbbdSAlexander Kornienko   }
88156ccdbbdSAlexander Kornienko 
88256ccdbbdSAlexander Kornienko   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
883236d2d5eSChris Lattner 
88443d50d4aSEli Bendersky   // LineNumber keeps track of the line on which CheckPrefix instances are
88543d50d4aSEli Bendersky   // found.
88692987fb3SAlexander Kornienko   unsigned LineNumber = 1;
88792987fb3SAlexander Kornienko 
888ee3c74fbSChris Lattner   while (1) {
88913df4626SMatt Arsenault     Check::CheckType CheckTy;
89013df4626SMatt Arsenault 
89113df4626SMatt Arsenault     // See if a prefix occurs in the memory buffer.
892726774cbSChandler Carruth     StringRef UsedPrefix = FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber,
893726774cbSChandler Carruth                                                    CheckTy);
89413df4626SMatt Arsenault     if (UsedPrefix.empty())
895ee3c74fbSChris Lattner       break;
896726774cbSChandler Carruth     assert(UsedPrefix.data() == Buffer.data() &&
897726774cbSChandler Carruth            "Failed to move Buffer's start forward, or pointed prefix outside "
898726774cbSChandler Carruth            "of the buffer!");
89992987fb3SAlexander Kornienko 
90013df4626SMatt Arsenault     // Location to use for error messages.
901726774cbSChandler Carruth     const char *UsedPrefixStart = UsedPrefix.data();
90292987fb3SAlexander Kornienko 
903726774cbSChandler Carruth     // Skip the buffer to the end.
90413df4626SMatt Arsenault     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
90510f10cedSChris Lattner 
906a908e7bdSPaul Robinson     // Complain about useful-looking but unsupported suffixes.
907a908e7bdSPaul Robinson     if (CheckTy == Check::CheckBadNot) {
908e8f2fb20SChandler Carruth       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
909a908e7bdSPaul Robinson                       "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
910a908e7bdSPaul Robinson       return true;
911a908e7bdSPaul Robinson     }
912a908e7bdSPaul Robinson 
91338820972SMatt Arsenault     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
914a26bc914STom de Vries     // leading whitespace.
9151714676aSTom de Vries     if (!(NoCanonicalizeWhiteSpace && MatchFullLines))
916236d2d5eSChris Lattner       Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
917ee3c74fbSChris Lattner 
918ee3c74fbSChris Lattner     // Scan ahead to the end of line.
919caa5fc0cSChris Lattner     size_t EOL = Buffer.find_first_of("\n\r");
920ee3c74fbSChris Lattner 
921838fb09aSDan Gohman     // Remember the location of the start of the pattern, for diagnostics.
922838fb09aSDan Gohman     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
923838fb09aSDan Gohman 
92474d50731SChris Lattner     // Parse the pattern.
92538820972SMatt Arsenault     Pattern P(CheckTy);
92613df4626SMatt Arsenault     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
927ee3c74fbSChris Lattner       return true;
928ee3c74fbSChris Lattner 
929f8bd2e5bSStephen Lin     // Verify that CHECK-LABEL lines do not define or use variables
93038820972SMatt Arsenault     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
931e8f2fb20SChandler Carruth       SM.PrintMessage(
932e8f2fb20SChandler Carruth           SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error,
93313df4626SMatt Arsenault           "found '" + UsedPrefix + "-LABEL:'"
93413df4626SMatt Arsenault                                    " with variable definition or use");
935f8bd2e5bSStephen Lin       return true;
936f8bd2e5bSStephen Lin     }
937f8bd2e5bSStephen Lin 
938236d2d5eSChris Lattner     Buffer = Buffer.substr(EOL);
93974d50731SChris Lattner 
9405507f668SJames Henderson     // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them.
9415507f668SJames Henderson     if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame ||
9425507f668SJames Henderson          CheckTy == Check::CheckEmpty) &&
94301ac1707SDuncan P. N. Exon Smith         CheckStrings.empty()) {
9445507f668SJames Henderson       StringRef Type = CheckTy == Check::CheckNext
9455507f668SJames Henderson                            ? "NEXT"
9465507f668SJames Henderson                            : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME";
94713df4626SMatt Arsenault       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
94803b80a40SChris Lattner                       SourceMgr::DK_Error,
949e8f2fb20SChandler Carruth                       "found '" + UsedPrefix + "-" + Type +
950e8f2fb20SChandler Carruth                           "' without previous '" + UsedPrefix + ": line");
951da108b4eSChris Lattner       return true;
952da108b4eSChris Lattner     }
953da108b4eSChris Lattner 
95491a1b2c9SMichael Liao     // Handle CHECK-DAG/-NOT.
95538820972SMatt Arsenault     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
95691a1b2c9SMichael Liao       DagNotMatches.push_back(P);
95774d50731SChris Lattner       continue;
95874d50731SChris Lattner     }
95974d50731SChris Lattner 
960ee3c74fbSChris Lattner     // Okay, add the string we captured to the output vector and move on.
96185913ccaSJames Y Knight     CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
96291a1b2c9SMichael Liao     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
96356ccdbbdSAlexander Kornienko     DagNotMatches = ImplicitNegativeChecks;
964ee3c74fbSChris Lattner   }
965ee3c74fbSChris Lattner 
96613df4626SMatt Arsenault   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
96713df4626SMatt Arsenault   // prefix as a filler for the error message.
96891a1b2c9SMichael Liao   if (!DagNotMatches.empty()) {
969f5e2fc47SBenjamin Kramer     CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
97085913ccaSJames Y Knight                               SMLoc::getFromPointer(Buffer.data()));
97191a1b2c9SMichael Liao     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
972eba55822SJakob Stoklund Olesen   }
973eba55822SJakob Stoklund Olesen 
974ee3c74fbSChris Lattner   if (CheckStrings.empty()) {
97513df4626SMatt Arsenault     errs() << "error: no check strings found with prefix"
97613df4626SMatt Arsenault            << (CheckPrefixes.size() > 1 ? "es " : " ");
9773e3ef2f2SChris Bieneman     prefix_iterator I = CheckPrefixes.begin();
9783e3ef2f2SChris Bieneman     prefix_iterator E = CheckPrefixes.end();
9793e3ef2f2SChris Bieneman     if (I != E) {
9803e3ef2f2SChris Bieneman       errs() << "\'" << *I << ":'";
9813e3ef2f2SChris Bieneman       ++I;
98213df4626SMatt Arsenault     }
9833e3ef2f2SChris Bieneman     for (; I != E; ++I)
9843e3ef2f2SChris Bieneman       errs() << ", \'" << *I << ":'";
98513df4626SMatt Arsenault 
98613df4626SMatt Arsenault     errs() << '\n';
987ee3c74fbSChris Lattner     return true;
988ee3c74fbSChris Lattner   }
989ee3c74fbSChris Lattner 
990ee3c74fbSChris Lattner   return false;
991ee3c74fbSChris Lattner }
992ee3c74fbSChris Lattner 
993e8f2fb20SChandler Carruth static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, const Pattern &Pat,
994e8f2fb20SChandler Carruth                              StringRef Buffer,
995e0ef65abSDaniel Dunbar                              StringMap<StringRef> &VariableTable) {
996da108b4eSChris Lattner   // Otherwise, we have an error, emit an error message.
99791a1b2c9SMichael Liao   SM.PrintMessage(Loc, SourceMgr::DK_Error,
99803b80a40SChris Lattner                   "expected string not found in input");
999da108b4eSChris Lattner 
1000da108b4eSChris Lattner   // Print the "scanning from here" line.  If the current position is at the
1001da108b4eSChris Lattner   // end of a line, advance to the start of the next line.
1002caa5fc0cSChris Lattner   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
1003da108b4eSChris Lattner 
100403b80a40SChris Lattner   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
100503b80a40SChris Lattner                   "scanning from here");
1006e0ef65abSDaniel Dunbar 
1007e0ef65abSDaniel Dunbar   // Allow the pattern to print additional information if desired.
100891a1b2c9SMichael Liao   Pat.PrintFailureInfo(SM, Buffer, VariableTable);
100991a1b2c9SMichael Liao }
101091a1b2c9SMichael Liao 
101191a1b2c9SMichael Liao static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
101291a1b2c9SMichael Liao                              StringRef Buffer,
101391a1b2c9SMichael Liao                              StringMap<StringRef> &VariableTable) {
101491a1b2c9SMichael Liao   PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
1015da108b4eSChris Lattner }
1016da108b4eSChris Lattner 
10174dabac20SChandler Carruth /// Count the number of newlines in the specified range.
1018592fe880SRichard Smith static unsigned CountNumNewlinesBetween(StringRef Range,
1019592fe880SRichard Smith                                         const char *&FirstNewLine) {
1020da108b4eSChris Lattner   unsigned NumNewLines = 0;
102137183584SChris Lattner   while (1) {
1022da108b4eSChris Lattner     // Scan for newline.
102337183584SChris Lattner     Range = Range.substr(Range.find_first_of("\n\r"));
1024e8f2fb20SChandler Carruth     if (Range.empty())
1025e8f2fb20SChandler Carruth       return NumNewLines;
1026da108b4eSChris Lattner 
1027da108b4eSChris Lattner     ++NumNewLines;
1028da108b4eSChris Lattner 
1029da108b4eSChris Lattner     // Handle \n\r and \r\n as a single newline.
1030e8f2fb20SChandler Carruth     if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') &&
103137183584SChris Lattner         (Range[0] != Range[1]))
103237183584SChris Lattner       Range = Range.substr(1);
103337183584SChris Lattner     Range = Range.substr(1);
1034592fe880SRichard Smith 
1035592fe880SRichard Smith     if (NumNewLines == 1)
1036592fe880SRichard Smith       FirstNewLine = Range.begin();
1037da108b4eSChris Lattner   }
1038da108b4eSChris Lattner }
1039da108b4eSChris Lattner 
10404dabac20SChandler Carruth /// Match check string and its "not strings" and/or "dag strings".
1041dcc7d48dSMichael Liao size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1042e93a3a08SStephen Lin                           bool IsLabelScanMode, size_t &MatchLen,
1043dcc7d48dSMichael Liao                           StringMap<StringRef> &VariableTable) const {
104491a1b2c9SMichael Liao   size_t LastPos = 0;
104591a1b2c9SMichael Liao   std::vector<const Pattern *> NotStrings;
104691a1b2c9SMichael Liao 
1047e93a3a08SStephen Lin   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1048e93a3a08SStephen Lin   // bounds; we have not processed variable definitions within the bounded block
1049e93a3a08SStephen Lin   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1050e93a3a08SStephen Lin   // over the block again (including the last CHECK-LABEL) in normal mode.
1051e93a3a08SStephen Lin   if (!IsLabelScanMode) {
105291a1b2c9SMichael Liao     // Match "dag strings" (with mixed "not strings" if any).
105391a1b2c9SMichael Liao     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
105491a1b2c9SMichael Liao     if (LastPos == StringRef::npos)
105591a1b2c9SMichael Liao       return StringRef::npos;
1056e93a3a08SStephen Lin   }
105791a1b2c9SMichael Liao 
105891a1b2c9SMichael Liao   // Match itself from the last position after matching CHECK-DAG.
105991a1b2c9SMichael Liao   StringRef MatchBuffer = Buffer.substr(LastPos);
106091a1b2c9SMichael Liao   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1061dcc7d48dSMichael Liao   if (MatchPos == StringRef::npos) {
106291a1b2c9SMichael Liao     PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1063dcc7d48dSMichael Liao     return StringRef::npos;
1064dcc7d48dSMichael Liao   }
1065dcc7d48dSMichael Liao 
1066e93a3a08SStephen Lin   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1067e93a3a08SStephen Lin   // or CHECK-NOT
1068e93a3a08SStephen Lin   if (!IsLabelScanMode) {
106991a1b2c9SMichael Liao     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1070dcc7d48dSMichael Liao 
1071dcc7d48dSMichael Liao     // If this check is a "CHECK-NEXT", verify that the previous match was on
1072dcc7d48dSMichael Liao     // the previous line (i.e. that there is one newline between them).
1073dcc7d48dSMichael Liao     if (CheckNext(SM, SkippedRegion))
1074dcc7d48dSMichael Liao       return StringRef::npos;
1075dcc7d48dSMichael Liao 
107601ac1707SDuncan P. N. Exon Smith     // If this check is a "CHECK-SAME", verify that the previous match was on
107701ac1707SDuncan P. N. Exon Smith     // the same line (i.e. that there is no newline between them).
107801ac1707SDuncan P. N. Exon Smith     if (CheckSame(SM, SkippedRegion))
107901ac1707SDuncan P. N. Exon Smith       return StringRef::npos;
108001ac1707SDuncan P. N. Exon Smith 
1081dcc7d48dSMichael Liao     // If this match had "not strings", verify that they don't exist in the
1082dcc7d48dSMichael Liao     // skipped region.
108391a1b2c9SMichael Liao     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1084dcc7d48dSMichael Liao       return StringRef::npos;
1085f8bd2e5bSStephen Lin   }
1086dcc7d48dSMichael Liao 
10877dfb92b9SMehdi Amini   return LastPos + MatchPos;
1088dcc7d48dSMichael Liao }
1089dcc7d48dSMichael Liao 
10904dabac20SChandler Carruth /// Verify there is a single line in the given buffer.
1091dcc7d48dSMichael Liao bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
10925507f668SJames Henderson   if (Pat.getCheckTy() != Check::CheckNext &&
10935507f668SJames Henderson       Pat.getCheckTy() != Check::CheckEmpty)
1094dcc7d48dSMichael Liao     return false;
1095dcc7d48dSMichael Liao 
10965507f668SJames Henderson   Twine CheckName =
10975507f668SJames Henderson       Prefix +
10985507f668SJames Henderson       Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT");
10995507f668SJames Henderson 
1100dcc7d48dSMichael Liao   // Count the number of newlines between the previous match and this one.
1101dcc7d48dSMichael Liao   assert(Buffer.data() !=
1102e8f2fb20SChandler Carruth              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1103e8f2fb20SChandler Carruth                                     SMLoc::getFromPointer(Buffer.data())))
1104e8f2fb20SChandler Carruth                  ->getBufferStart() &&
11055507f668SJames Henderson          "CHECK-NEXT and CHECK-EMPTY can't be the first check in a file");
1106dcc7d48dSMichael Liao 
110766f09ad0SCraig Topper   const char *FirstNewLine = nullptr;
1108592fe880SRichard Smith   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1109dcc7d48dSMichael Liao 
11105507f668SJames Henderson   // For CHECK-EMPTY, the preceding new line is consumed by the pattern, so
11115507f668SJames Henderson   // this needs to be re-added.
11125507f668SJames Henderson   if (Pat.getCheckTy() == Check::CheckEmpty)
11135507f668SJames Henderson     ++NumNewLines;
11145507f668SJames Henderson 
1115dcc7d48dSMichael Liao   if (NumNewLines == 0) {
1116e8f2fb20SChandler Carruth     SM.PrintMessage(Loc, SourceMgr::DK_Error,
11175507f668SJames Henderson                     CheckName + ": is on the same line as previous match");
1118e8f2fb20SChandler Carruth     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1119e8f2fb20SChandler Carruth                     "'next' match was here");
1120dcc7d48dSMichael Liao     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1121dcc7d48dSMichael Liao                     "previous match ended here");
1122dcc7d48dSMichael Liao     return true;
1123dcc7d48dSMichael Liao   }
1124dcc7d48dSMichael Liao 
1125dcc7d48dSMichael Liao   if (NumNewLines != 1) {
1126e8f2fb20SChandler Carruth     SM.PrintMessage(Loc, SourceMgr::DK_Error,
11275507f668SJames Henderson                     CheckName +
11285507f668SJames Henderson                         ": is not on the line after the previous match");
1129e8f2fb20SChandler Carruth     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1130e8f2fb20SChandler Carruth                     "'next' match was here");
1131dcc7d48dSMichael Liao     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1132dcc7d48dSMichael Liao                     "previous match ended here");
1133592fe880SRichard Smith     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1134592fe880SRichard Smith                     "non-matching line after previous match is here");
1135dcc7d48dSMichael Liao     return true;
1136dcc7d48dSMichael Liao   }
1137dcc7d48dSMichael Liao 
1138dcc7d48dSMichael Liao   return false;
1139dcc7d48dSMichael Liao }
1140dcc7d48dSMichael Liao 
11414dabac20SChandler Carruth /// Verify there is no newline in the given buffer.
114201ac1707SDuncan P. N. Exon Smith bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
114385913ccaSJames Y Knight   if (Pat.getCheckTy() != Check::CheckSame)
114401ac1707SDuncan P. N. Exon Smith     return false;
114501ac1707SDuncan P. N. Exon Smith 
114601ac1707SDuncan P. N. Exon Smith   // Count the number of newlines between the previous match and this one.
114701ac1707SDuncan P. N. Exon Smith   assert(Buffer.data() !=
114801ac1707SDuncan P. N. Exon Smith              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
114901ac1707SDuncan P. N. Exon Smith                                     SMLoc::getFromPointer(Buffer.data())))
115001ac1707SDuncan P. N. Exon Smith                  ->getBufferStart() &&
115101ac1707SDuncan P. N. Exon Smith          "CHECK-SAME can't be the first check in a file");
115201ac1707SDuncan P. N. Exon Smith 
115301ac1707SDuncan P. N. Exon Smith   const char *FirstNewLine = nullptr;
115401ac1707SDuncan P. N. Exon Smith   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
115501ac1707SDuncan P. N. Exon Smith 
115601ac1707SDuncan P. N. Exon Smith   if (NumNewLines != 0) {
115701ac1707SDuncan P. N. Exon Smith     SM.PrintMessage(Loc, SourceMgr::DK_Error,
115801ac1707SDuncan P. N. Exon Smith                     Prefix +
115901ac1707SDuncan P. N. Exon Smith                         "-SAME: is not on the same line as the previous match");
116001ac1707SDuncan P. N. Exon Smith     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
116101ac1707SDuncan P. N. Exon Smith                     "'next' match was here");
116201ac1707SDuncan P. N. Exon Smith     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
116301ac1707SDuncan P. N. Exon Smith                     "previous match ended here");
116401ac1707SDuncan P. N. Exon Smith     return true;
116501ac1707SDuncan P. N. Exon Smith   }
116601ac1707SDuncan P. N. Exon Smith 
116701ac1707SDuncan P. N. Exon Smith   return false;
116801ac1707SDuncan P. N. Exon Smith }
116901ac1707SDuncan P. N. Exon Smith 
11704dabac20SChandler Carruth /// Verify there's no "not strings" in the given buffer.
1171dcc7d48dSMichael Liao bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
117291a1b2c9SMichael Liao                            const std::vector<const Pattern *> &NotStrings,
1173dcc7d48dSMichael Liao                            StringMap<StringRef> &VariableTable) const {
11748f870499SBenjamin Kramer   for (const Pattern *Pat : NotStrings) {
117538820972SMatt Arsenault     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
117691a1b2c9SMichael Liao 
1177dcc7d48dSMichael Liao     size_t MatchLen = 0;
117891a1b2c9SMichael Liao     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1179dcc7d48dSMichael Liao 
1180e8f2fb20SChandler Carruth     if (Pos == StringRef::npos)
1181e8f2fb20SChandler Carruth       continue;
1182dcc7d48dSMichael Liao 
1183dcc7d48dSMichael Liao     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Pos),
1184e8f2fb20SChandler Carruth                     SourceMgr::DK_Error, Prefix + "-NOT: string occurred!");
118591a1b2c9SMichael Liao     SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
118613df4626SMatt Arsenault                     Prefix + "-NOT: pattern specified here");
1187dcc7d48dSMichael Liao     return true;
1188dcc7d48dSMichael Liao   }
1189dcc7d48dSMichael Liao 
1190dcc7d48dSMichael Liao   return false;
1191dcc7d48dSMichael Liao }
1192dcc7d48dSMichael Liao 
11934dabac20SChandler Carruth /// Match "dag strings" and their mixed "not strings".
119491a1b2c9SMichael Liao size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
119591a1b2c9SMichael Liao                              std::vector<const Pattern *> &NotStrings,
119691a1b2c9SMichael Liao                              StringMap<StringRef> &VariableTable) const {
119791a1b2c9SMichael Liao   if (DagNotStrings.empty())
119891a1b2c9SMichael Liao     return 0;
119991a1b2c9SMichael Liao 
120091a1b2c9SMichael Liao   size_t LastPos = 0;
120191a1b2c9SMichael Liao   size_t StartPos = LastPos;
120291a1b2c9SMichael Liao 
1203*bcf5b441SJoel E. Denny   // A sorted list of ranges for non-overlapping dag matches.
1204*bcf5b441SJoel E. Denny   struct Match {
1205*bcf5b441SJoel E. Denny     size_t Pos;
1206*bcf5b441SJoel E. Denny     size_t End;
1207*bcf5b441SJoel E. Denny   };
1208*bcf5b441SJoel E. Denny   std::list<Match> Matches;
1209*bcf5b441SJoel E. Denny 
12108f870499SBenjamin Kramer   for (const Pattern &Pat : DagNotStrings) {
121138820972SMatt Arsenault     assert((Pat.getCheckTy() == Check::CheckDAG ||
121238820972SMatt Arsenault             Pat.getCheckTy() == Check::CheckNot) &&
121391a1b2c9SMichael Liao            "Invalid CHECK-DAG or CHECK-NOT!");
121491a1b2c9SMichael Liao 
121538820972SMatt Arsenault     if (Pat.getCheckTy() == Check::CheckNot) {
121691a1b2c9SMichael Liao       NotStrings.push_back(&Pat);
121791a1b2c9SMichael Liao       continue;
121891a1b2c9SMichael Liao     }
121991a1b2c9SMichael Liao 
122038820972SMatt Arsenault     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
122191a1b2c9SMichael Liao 
1222614c9861SJoel E. Denny     // CHECK-DAG always matches from the start.
1223*bcf5b441SJoel E. Denny     size_t MatchLen = 0, MatchPos = StartPos;
1224*bcf5b441SJoel E. Denny 
1225*bcf5b441SJoel E. Denny     // Search for a match that doesn't overlap a previous match in this
1226*bcf5b441SJoel E. Denny     // CHECK-DAG group.
1227*bcf5b441SJoel E. Denny     for (auto MI = Matches.begin(), ME = Matches.end(); true; ++MI) {
1228*bcf5b441SJoel E. Denny       StringRef MatchBuffer = Buffer.substr(MatchPos);
1229*bcf5b441SJoel E. Denny       size_t MatchPosBuf = Pat.Match(MatchBuffer, MatchLen, VariableTable);
123091a1b2c9SMichael Liao       // With a group of CHECK-DAGs, a single mismatching means the match on
123191a1b2c9SMichael Liao       // that group of CHECK-DAGs fails immediately.
1232*bcf5b441SJoel E. Denny       if (MatchPosBuf == StringRef::npos) {
123391a1b2c9SMichael Liao         PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
123491a1b2c9SMichael Liao         return StringRef::npos;
123591a1b2c9SMichael Liao       }
123691a1b2c9SMichael Liao       // Re-calc it as the offset relative to the start of the original string.
1237*bcf5b441SJoel E. Denny       MatchPos += MatchPosBuf;
1238*bcf5b441SJoel E. Denny       if (AllowDeprecatedDagOverlap)
1239*bcf5b441SJoel E. Denny         break;
1240*bcf5b441SJoel E. Denny       // Iterate previous matches until overlapping match or insertion point.
1241*bcf5b441SJoel E. Denny       Match M{MatchPos, MatchPos + MatchLen};
1242*bcf5b441SJoel E. Denny       bool Overlap = false;
1243*bcf5b441SJoel E. Denny       for (; MI != ME; ++MI) {
1244*bcf5b441SJoel E. Denny         if (M.Pos < MI->End) {
1245*bcf5b441SJoel E. Denny           // !Overlap => New match has no overlap and is before this old match.
1246*bcf5b441SJoel E. Denny           // Overlap => New match overlaps this old match.
1247*bcf5b441SJoel E. Denny           Overlap = MI->Pos < M.End;
1248*bcf5b441SJoel E. Denny           break;
1249*bcf5b441SJoel E. Denny         }
1250*bcf5b441SJoel E. Denny       }
1251*bcf5b441SJoel E. Denny       if (!Overlap) {
1252*bcf5b441SJoel E. Denny         // Insert non-overlapping match into list.
1253*bcf5b441SJoel E. Denny         Matches.insert(MI, M);
1254*bcf5b441SJoel E. Denny         break;
1255*bcf5b441SJoel E. Denny       }
1256*bcf5b441SJoel E. Denny       MatchPos = MI->End;
1257*bcf5b441SJoel E. Denny     }
125891a1b2c9SMichael Liao 
125991a1b2c9SMichael Liao     if (!NotStrings.empty()) {
126091a1b2c9SMichael Liao       if (MatchPos < LastPos) {
126191a1b2c9SMichael Liao         // Reordered?
126291a1b2c9SMichael Liao         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
126391a1b2c9SMichael Liao                         SourceMgr::DK_Error,
126413df4626SMatt Arsenault                         Prefix + "-DAG: found a match of CHECK-DAG"
126591a1b2c9SMichael Liao                                  " reordering across a CHECK-NOT");
126691a1b2c9SMichael Liao         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
126791a1b2c9SMichael Liao                         SourceMgr::DK_Note,
126813df4626SMatt Arsenault                         Prefix + "-DAG: the farthest match of CHECK-DAG"
126991a1b2c9SMichael Liao                                  " is found here");
127091a1b2c9SMichael Liao         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
127113df4626SMatt Arsenault                         Prefix + "-NOT: the crossed pattern specified"
127291a1b2c9SMichael Liao                                  " here");
127391a1b2c9SMichael Liao         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
127413df4626SMatt Arsenault                         Prefix + "-DAG: the reordered pattern specified"
127591a1b2c9SMichael Liao                                  " here");
127691a1b2c9SMichael Liao         return StringRef::npos;
127791a1b2c9SMichael Liao       }
127891a1b2c9SMichael Liao       // All subsequent CHECK-DAGs should be matched from the farthest
1279*bcf5b441SJoel E. Denny       // position of all precedent CHECK-DAGs (not including this one).
128091a1b2c9SMichael Liao       StartPos = LastPos;
1281*bcf5b441SJoel E. Denny       // Don't waste time checking for (impossible) overlaps before that.
1282*bcf5b441SJoel E. Denny       Matches.clear();
1283*bcf5b441SJoel E. Denny       Matches.push_back(Match{MatchPos, MatchPos + MatchLen});
128491a1b2c9SMichael Liao       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
128591a1b2c9SMichael Liao       // CHECK-DAG, verify that there's no 'not' strings occurred in that
128691a1b2c9SMichael Liao       // region.
1287cf60ab31SBenjamin Kramer       StringRef SkippedRegion = Buffer.slice(LastPos, MatchPos);
1288cf708c32STim Northover       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
128991a1b2c9SMichael Liao         return StringRef::npos;
129091a1b2c9SMichael Liao       // Clear "not strings".
129191a1b2c9SMichael Liao       NotStrings.clear();
129291a1b2c9SMichael Liao     }
129391a1b2c9SMichael Liao 
129491a1b2c9SMichael Liao     // Update the last position with CHECK-DAG matches.
129591a1b2c9SMichael Liao     LastPos = std::max(MatchPos + MatchLen, LastPos);
129691a1b2c9SMichael Liao   }
129791a1b2c9SMichael Liao 
129891a1b2c9SMichael Liao   return LastPos;
129991a1b2c9SMichael Liao }
130091a1b2c9SMichael Liao 
130113df4626SMatt Arsenault // A check prefix must contain only alphanumeric, hyphens and underscores.
130213df4626SMatt Arsenault static bool ValidateCheckPrefix(StringRef CheckPrefix) {
130313df4626SMatt Arsenault   Regex Validator("^[a-zA-Z0-9_-]*$");
130413df4626SMatt Arsenault   return Validator.match(CheckPrefix);
130513df4626SMatt Arsenault }
130613df4626SMatt Arsenault 
130713df4626SMatt Arsenault static bool ValidateCheckPrefixes() {
130813df4626SMatt Arsenault   StringSet<> PrefixSet;
130913df4626SMatt Arsenault 
13108f870499SBenjamin Kramer   for (StringRef Prefix : CheckPrefixes) {
131124412b14SEli Bendersky     // Reject empty prefixes.
131224412b14SEli Bendersky     if (Prefix == "")
131324412b14SEli Bendersky       return false;
131424412b14SEli Bendersky 
13150356975cSDavid Blaikie     if (!PrefixSet.insert(Prefix).second)
131613df4626SMatt Arsenault       return false;
131713df4626SMatt Arsenault 
131813df4626SMatt Arsenault     if (!ValidateCheckPrefix(Prefix))
131913df4626SMatt Arsenault       return false;
132013df4626SMatt Arsenault   }
132113df4626SMatt Arsenault 
132213df4626SMatt Arsenault   return true;
132313df4626SMatt Arsenault }
132413df4626SMatt Arsenault 
1325726774cbSChandler Carruth // Combines the check prefixes into a single regex so that we can efficiently
1326726774cbSChandler Carruth // scan for any of the set.
1327726774cbSChandler Carruth //
1328726774cbSChandler Carruth // The semantics are that the longest-match wins which matches our regex
1329726774cbSChandler Carruth // library.
1330726774cbSChandler Carruth static Regex buildCheckPrefixRegex() {
133113df4626SMatt Arsenault   // I don't think there's a way to specify an initial value for cl::list,
133213df4626SMatt Arsenault   // so if nothing was specified, add the default
133313df4626SMatt Arsenault   if (CheckPrefixes.empty())
133413df4626SMatt Arsenault     CheckPrefixes.push_back("CHECK");
1335726774cbSChandler Carruth 
1336726774cbSChandler Carruth   // We already validated the contents of CheckPrefixes so just concatenate
1337726774cbSChandler Carruth   // them as alternatives.
1338726774cbSChandler Carruth   SmallString<32> PrefixRegexStr;
1339726774cbSChandler Carruth   for (StringRef Prefix : CheckPrefixes) {
1340726774cbSChandler Carruth     if (Prefix != CheckPrefixes.front())
1341726774cbSChandler Carruth       PrefixRegexStr.push_back('|');
1342726774cbSChandler Carruth 
1343726774cbSChandler Carruth     PrefixRegexStr.append(Prefix);
1344726774cbSChandler Carruth   }
1345726774cbSChandler Carruth 
1346726774cbSChandler Carruth   return Regex(PrefixRegexStr);
1347c2735158SRui Ueyama }
1348c2735158SRui Ueyama 
13492bd4f8b6SXinliang David Li static void DumpCommandLine(int argc, char **argv) {
13502bd4f8b6SXinliang David Li   errs() << "FileCheck command line: ";
13512bd4f8b6SXinliang David Li   for (int I = 0; I < argc; I++)
13522bd4f8b6SXinliang David Li     errs() << " " << argv[I];
13532bd4f8b6SXinliang David Li   errs() << "\n";
13542bd4f8b6SXinliang David Li }
13552bd4f8b6SXinliang David Li 
1356f55e72a5SArtem Belevich // Remove local variables from \p VariableTable. Global variables
1357f55e72a5SArtem Belevich // (start with '$') are preserved.
1358f55e72a5SArtem Belevich static void ClearLocalVars(StringMap<StringRef> &VariableTable) {
1359f55e72a5SArtem Belevich   SmallVector<StringRef, 16> LocalVars;
1360f55e72a5SArtem Belevich   for (const auto &Var : VariableTable)
1361f55e72a5SArtem Belevich     if (Var.first()[0] != '$')
1362f55e72a5SArtem Belevich       LocalVars.push_back(Var.first());
1363f55e72a5SArtem Belevich 
1364f55e72a5SArtem Belevich   for (const auto &Var : LocalVars)
1365f55e72a5SArtem Belevich     VariableTable.erase(Var);
1366f55e72a5SArtem Belevich }
1367f55e72a5SArtem Belevich 
136820247900SChandler Carruth /// Check the input to FileCheck provided in the \p Buffer against the \p
136920247900SChandler Carruth /// CheckStrings read from the check file.
137020247900SChandler Carruth ///
137120247900SChandler Carruth /// Returns false if the input fails to satisfy the checks.
137220247900SChandler Carruth bool CheckInput(SourceMgr &SM, StringRef Buffer,
137320247900SChandler Carruth                 ArrayRef<CheckString> CheckStrings) {
137420247900SChandler Carruth   bool ChecksFailed = false;
137520247900SChandler Carruth 
137620247900SChandler Carruth   /// VariableTable - This holds all the current filecheck variables.
137720247900SChandler Carruth   StringMap<StringRef> VariableTable;
137820247900SChandler Carruth 
137946e1fd61SAlexander Richardson   for (const auto& Def : GlobalDefines)
138046e1fd61SAlexander Richardson     VariableTable.insert(StringRef(Def).split('='));
138146e1fd61SAlexander Richardson 
138220247900SChandler Carruth   unsigned i = 0, j = 0, e = CheckStrings.size();
138320247900SChandler Carruth   while (true) {
138420247900SChandler Carruth     StringRef CheckRegion;
138520247900SChandler Carruth     if (j == e) {
138620247900SChandler Carruth       CheckRegion = Buffer;
138720247900SChandler Carruth     } else {
138820247900SChandler Carruth       const CheckString &CheckLabelStr = CheckStrings[j];
138920247900SChandler Carruth       if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
139020247900SChandler Carruth         ++j;
139120247900SChandler Carruth         continue;
139220247900SChandler Carruth       }
139320247900SChandler Carruth 
139420247900SChandler Carruth       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
139520247900SChandler Carruth       size_t MatchLabelLen = 0;
1396e8f2fb20SChandler Carruth       size_t MatchLabelPos =
1397e8f2fb20SChandler Carruth           CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable);
139820247900SChandler Carruth       if (MatchLabelPos == StringRef::npos)
139920247900SChandler Carruth         // Immediately bail of CHECK-LABEL fails, nothing else we can do.
140020247900SChandler Carruth         return false;
140120247900SChandler Carruth 
140220247900SChandler Carruth       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
140320247900SChandler Carruth       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
140420247900SChandler Carruth       ++j;
140520247900SChandler Carruth     }
140620247900SChandler Carruth 
1407f55e72a5SArtem Belevich     if (EnableVarScope)
1408f55e72a5SArtem Belevich       ClearLocalVars(VariableTable);
1409f55e72a5SArtem Belevich 
141020247900SChandler Carruth     for (; i != j; ++i) {
141120247900SChandler Carruth       const CheckString &CheckStr = CheckStrings[i];
141220247900SChandler Carruth 
141320247900SChandler Carruth       // Check each string within the scanned region, including a second check
141420247900SChandler Carruth       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
141520247900SChandler Carruth       size_t MatchLen = 0;
1416e8f2fb20SChandler Carruth       size_t MatchPos =
1417e8f2fb20SChandler Carruth           CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable);
141820247900SChandler Carruth 
141920247900SChandler Carruth       if (MatchPos == StringRef::npos) {
142020247900SChandler Carruth         ChecksFailed = true;
142120247900SChandler Carruth         i = j;
142220247900SChandler Carruth         break;
142320247900SChandler Carruth       }
142420247900SChandler Carruth 
142520247900SChandler Carruth       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
142620247900SChandler Carruth     }
142720247900SChandler Carruth 
142820247900SChandler Carruth     if (j == e)
142920247900SChandler Carruth       break;
143020247900SChandler Carruth   }
143120247900SChandler Carruth 
143220247900SChandler Carruth   // Success if no checks failed.
143320247900SChandler Carruth   return !ChecksFailed;
143420247900SChandler Carruth }
143520247900SChandler Carruth 
1436ee3c74fbSChris Lattner int main(int argc, char **argv) {
1437197194b6SRui Ueyama   InitLLVM X(argc, argv);
1438ee3c74fbSChris Lattner   cl::ParseCommandLineOptions(argc, argv);
1439ee3c74fbSChris Lattner 
144013df4626SMatt Arsenault   if (!ValidateCheckPrefixes()) {
144113df4626SMatt Arsenault     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
144213df4626SMatt Arsenault               "start with a letter and contain only alphanumeric characters, "
144313df4626SMatt Arsenault               "hyphens and underscores\n";
1444c2735158SRui Ueyama     return 2;
1445c2735158SRui Ueyama   }
1446c2735158SRui Ueyama 
1447726774cbSChandler Carruth   Regex PrefixRE = buildCheckPrefixRegex();
1448726774cbSChandler Carruth   std::string REError;
1449726774cbSChandler Carruth   if (!PrefixRE.isValid(REError)) {
1450726774cbSChandler Carruth     errs() << "Unable to combine check-prefix strings into a prefix regular "
1451726774cbSChandler Carruth               "expression! This is likely a bug in FileCheck's verification of "
1452726774cbSChandler Carruth               "the check-prefix strings. Regular expression parsing failed "
1453726774cbSChandler Carruth               "with the following error: "
1454726774cbSChandler Carruth            << REError << "\n";
1455726774cbSChandler Carruth     return 2;
1456726774cbSChandler Carruth   }
145713df4626SMatt Arsenault 
1458ee3c74fbSChris Lattner   SourceMgr SM;
1459ee3c74fbSChris Lattner 
1460ee3c74fbSChris Lattner   // Read the expected strings from the check file.
146120247900SChandler Carruth   ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr =
146220247900SChandler Carruth       MemoryBuffer::getFileOrSTDIN(CheckFilename);
146320247900SChandler Carruth   if (std::error_code EC = CheckFileOrErr.getError()) {
146420247900SChandler Carruth     errs() << "Could not open check file '" << CheckFilename
146520247900SChandler Carruth            << "': " << EC.message() << '\n';
146620247900SChandler Carruth     return 2;
146720247900SChandler Carruth   }
146820247900SChandler Carruth   MemoryBuffer &CheckFile = *CheckFileOrErr.get();
146920247900SChandler Carruth 
147020247900SChandler Carruth   SmallString<4096> CheckFileBuffer;
1471b03c166aSChandler Carruth   StringRef CheckFileText = CanonicalizeFile(CheckFile, CheckFileBuffer);
147220247900SChandler Carruth 
147320247900SChandler Carruth   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
147420247900SChandler Carruth                             CheckFileText, CheckFile.getBufferIdentifier()),
147520247900SChandler Carruth                         SMLoc());
147620247900SChandler Carruth 
147726cccfe1SChris Lattner   std::vector<CheckString> CheckStrings;
1478726774cbSChandler Carruth   if (ReadCheckFile(SM, CheckFileText, PrefixRE, CheckStrings))
1479ee3c74fbSChris Lattner     return 2;
1480ee3c74fbSChris Lattner 
1481ee3c74fbSChris Lattner   // Open the file to check and add it to SourceMgr.
148220247900SChandler Carruth   ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr =
1483adf21f2aSRafael Espindola       MemoryBuffer::getFileOrSTDIN(InputFilename);
148420247900SChandler Carruth   if (std::error_code EC = InputFileOrErr.getError()) {
1485adf21f2aSRafael Espindola     errs() << "Could not open input file '" << InputFilename
1486adf21f2aSRafael Espindola            << "': " << EC.message() << '\n';
14878e1c6477SEli Bendersky     return 2;
1488ee3c74fbSChris Lattner   }
148920247900SChandler Carruth   MemoryBuffer &InputFile = *InputFileOrErr.get();
14902c3e5cdfSChris Lattner 
149120247900SChandler Carruth   if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) {
1492b692bed7SChris Lattner     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
14932bd4f8b6SXinliang David Li     DumpCommandLine(argc, argv);
14948e1c6477SEli Bendersky     return 2;
1495b692bed7SChris Lattner   }
1496b692bed7SChris Lattner 
149720247900SChandler Carruth   SmallString<4096> InputFileBuffer;
1498b03c166aSChandler Carruth   StringRef InputFileText = CanonicalizeFile(InputFile, InputFileBuffer);
14992c3e5cdfSChris Lattner 
1500e8f2fb20SChandler Carruth   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1501e8f2fb20SChandler Carruth                             InputFileText, InputFile.getBufferIdentifier()),
1502e8f2fb20SChandler Carruth                         SMLoc());
1503ee3c74fbSChris Lattner 
150420247900SChandler Carruth   return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1;
1505ee3c74fbSChris Lattner }
1506