1ee3c74fbSChris Lattner //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2ee3c74fbSChris Lattner //
3ee3c74fbSChris Lattner //                     The LLVM Compiler Infrastructure
4ee3c74fbSChris Lattner //
5ee3c74fbSChris Lattner // This file is distributed under the University of Illinois Open Source
6ee3c74fbSChris Lattner // License. See LICENSE.TXT for details.
7ee3c74fbSChris Lattner //
8ee3c74fbSChris Lattner //===----------------------------------------------------------------------===//
9ee3c74fbSChris Lattner //
10ee3c74fbSChris Lattner // FileCheck does a line-by line check of a file that validates whether it
11ee3c74fbSChris Lattner // contains the expected content.  This is useful for regression tests etc.
12ee3c74fbSChris Lattner //
13b5ecceffSJames Henderson // This program exits with an exit status of 2 on error, exit status of 0 if
14ee3c74fbSChris Lattner // the file matched the expected contents, and exit status of 1 if it did not
15ee3c74fbSChris Lattner // contain the expected contents.
16ee3c74fbSChris Lattner //
17ee3c74fbSChris Lattner //===----------------------------------------------------------------------===//
18ee3c74fbSChris Lattner 
1991d19d8eSChandler Carruth #include "llvm/ADT/SmallString.h"
2091d19d8eSChandler Carruth #include "llvm/ADT/StringExtras.h"
2191d19d8eSChandler Carruth #include "llvm/ADT/StringMap.h"
2213df4626SMatt Arsenault #include "llvm/ADT/StringSet.h"
23ee3c74fbSChris Lattner #include "llvm/Support/CommandLine.h"
24197194b6SRui Ueyama #include "llvm/Support/InitLLVM.h"
25ee3c74fbSChris Lattner #include "llvm/Support/MemoryBuffer.h"
26f08d2db9SChris Lattner #include "llvm/Support/Regex.h"
27ee3c74fbSChris Lattner #include "llvm/Support/SourceMgr.h"
28ee3c74fbSChris Lattner #include "llvm/Support/raw_ostream.h"
298879e06dSChris Lattner #include <algorithm>
30981af002SWill Dietz #include <cctype>
31bcf5b441SJoel E. Denny #include <list>
32e8b8f1bcSEli Bendersky #include <map>
33e8b8f1bcSEli Bendersky #include <string>
34a6e9c3e4SRafael Espindola #include <system_error>
35e8b8f1bcSEli Bendersky #include <vector>
36ee3c74fbSChris Lattner using namespace llvm;
37ee3c74fbSChris Lattner 
38ee3c74fbSChris Lattner static cl::opt<std::string>
39ee3c74fbSChris Lattner     CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40ee3c74fbSChris Lattner 
41ee3c74fbSChris Lattner static cl::opt<std::string>
42ee3c74fbSChris Lattner     InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43ee3c74fbSChris Lattner                   cl::init("-"), cl::value_desc("filename"));
44ee3c74fbSChris Lattner 
45e8f2fb20SChandler Carruth static cl::list<std::string> CheckPrefixes(
46e8f2fb20SChandler Carruth     "check-prefix",
47ee3c74fbSChris Lattner     cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48fd557cb0SDaniel Sanders static cl::alias CheckPrefixesAlias(
49fd557cb0SDaniel Sanders     "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated,
50fd557cb0SDaniel Sanders     cl::NotHidden,
51fd557cb0SDaniel Sanders     cl::desc(
52fd557cb0SDaniel Sanders         "Alias for -check-prefix permitting multiple comma separated values"));
53ee3c74fbSChris Lattner 
54e8f2fb20SChandler Carruth static cl::opt<bool> NoCanonicalizeWhiteSpace(
55e8f2fb20SChandler Carruth     "strict-whitespace",
562c3e5cdfSChris Lattner     cl::desc("Do not treat all horizontal whitespace as equivalent"));
572c3e5cdfSChris Lattner 
5856ccdbbdSAlexander Kornienko static cl::list<std::string> ImplicitCheckNot(
5956ccdbbdSAlexander Kornienko     "implicit-check-not",
6056ccdbbdSAlexander Kornienko     cl::desc("Add an implicit negative check with this pattern to every\n"
6156ccdbbdSAlexander Kornienko              "positive check. This can be used to ensure that no instances of\n"
6256ccdbbdSAlexander Kornienko              "this pattern occur which are not matched by a positive pattern"),
6356ccdbbdSAlexander Kornienko     cl::value_desc("pattern"));
6456ccdbbdSAlexander Kornienko 
6546e1fd61SAlexander Richardson static cl::list<std::string> GlobalDefines("D", cl::Prefix,
6646e1fd61SAlexander Richardson     cl::desc("Define a variable to be used in capture patterns."),
6746e1fd61SAlexander Richardson     cl::value_desc("VAR=VALUE"));
6846e1fd61SAlexander Richardson 
691b9f936fSJustin Bogner static cl::opt<bool> AllowEmptyInput(
701b9f936fSJustin Bogner     "allow-empty", cl::init(false),
711b9f936fSJustin Bogner     cl::desc("Allow the input file to be empty. This is useful when making\n"
721b9f936fSJustin Bogner              "checks that some error message does not occur, for example."));
731b9f936fSJustin Bogner 
7485913ccaSJames Y Knight static cl::opt<bool> MatchFullLines(
7585913ccaSJames Y Knight     "match-full-lines", cl::init(false),
7685913ccaSJames Y Knight     cl::desc("Require all positive matches to cover an entire input line.\n"
7785913ccaSJames Y Knight              "Allows leading and trailing whitespace if --strict-whitespace\n"
7885913ccaSJames Y Knight              "is not also passed."));
7985913ccaSJames Y Knight 
80f55e72a5SArtem Belevich static cl::opt<bool> EnableVarScope(
81f55e72a5SArtem Belevich     "enable-var-scope", cl::init(false),
82f55e72a5SArtem Belevich     cl::desc("Enables scope for regex variables. Variables with names that\n"
83f55e72a5SArtem Belevich              "do not start with '$' will be reset at the beginning of\n"
84f55e72a5SArtem Belevich              "each CHECK-LABEL block."));
85f55e72a5SArtem Belevich 
86bcf5b441SJoel E. Denny static cl::opt<bool> AllowDeprecatedDagOverlap(
87bcf5b441SJoel E. Denny     "allow-deprecated-dag-overlap", cl::init(false),
88bcf5b441SJoel E. Denny     cl::desc("Enable overlapping among matches in a group of consecutive\n"
89bcf5b441SJoel E. Denny              "CHECK-DAG directives.  This option is deprecated and is only\n"
90bcf5b441SJoel E. Denny              "provided for convenience as old tests are migrated to the new\n"
91bcf5b441SJoel E. Denny              "non-overlapping CHECK-DAG implementation.\n"));
92bcf5b441SJoel E. Denny 
93*dc5ba317SJoel E. Denny static cl::opt<bool> Verbose("v", cl::init(false),
94*dc5ba317SJoel E. Denny                              cl::desc("Print directive pattern matches.\n"));
95*dc5ba317SJoel E. Denny 
96*dc5ba317SJoel E. Denny static cl::opt<bool> VerboseVerbose(
97*dc5ba317SJoel E. Denny     "vv", cl::init(false),
98*dc5ba317SJoel E. Denny     cl::desc("Print information helpful in diagnosing internal FileCheck\n"
99*dc5ba317SJoel E. Denny              "issues.  Implies -v.\n"));
100*dc5ba317SJoel E. Denny 
10113df4626SMatt Arsenault typedef cl::list<std::string>::const_iterator prefix_iterator;
10213df4626SMatt Arsenault 
10374d50731SChris Lattner //===----------------------------------------------------------------------===//
10474d50731SChris Lattner // Pattern Handling Code.
10574d50731SChris Lattner //===----------------------------------------------------------------------===//
10674d50731SChris Lattner 
10738820972SMatt Arsenault namespace Check {
10838820972SMatt Arsenault enum CheckType {
10938820972SMatt Arsenault   CheckNone = 0,
11038820972SMatt Arsenault   CheckPlain,
11138820972SMatt Arsenault   CheckNext,
11201ac1707SDuncan P. N. Exon Smith   CheckSame,
11338820972SMatt Arsenault   CheckNot,
11438820972SMatt Arsenault   CheckDAG,
11538820972SMatt Arsenault   CheckLabel,
1165507f668SJames Henderson   CheckEmpty,
1170a4c44bdSChris Lattner 
1184dabac20SChandler Carruth   /// Indicates the pattern only matches the end of file. This is used for
1194dabac20SChandler Carruth   /// trailing CHECK-NOTs.
120a908e7bdSPaul Robinson   CheckEOF,
1214dabac20SChandler Carruth 
1224dabac20SChandler Carruth   /// Marks when parsing found a -NOT check combined with another CHECK suffix.
123a908e7bdSPaul Robinson   CheckBadNot
12438820972SMatt Arsenault };
12538820972SMatt Arsenault }
126eba55822SJakob Stoklund Olesen 
12738820972SMatt Arsenault class Pattern {
12838820972SMatt Arsenault   SMLoc PatternLoc;
12991a1b2c9SMichael Liao 
1304dabac20SChandler Carruth   /// A fixed string to match as the pattern or empty if this pattern requires
1314dabac20SChandler Carruth   /// a regex match.
132221460e0SChris Lattner   StringRef FixedStr;
133b16ab0c4SChris Lattner 
1344dabac20SChandler Carruth   /// A regex string to match as the pattern or empty if this pattern requires
1354dabac20SChandler Carruth   /// a fixed string to match.
136b16ab0c4SChris Lattner   std::string RegExStr;
1378879e06dSChris Lattner 
1384dabac20SChandler Carruth   /// Entries in this vector map to uses of a variable in the pattern, e.g.
1394dabac20SChandler Carruth   /// "foo[[bar]]baz".  In this case, the RegExStr will contain "foobaz" and
1404dabac20SChandler Carruth   /// we'll get an entry in this vector that tells us to insert the value of
1414dabac20SChandler Carruth   /// bar at offset 3.
1428879e06dSChris Lattner   std::vector<std::pair<StringRef, unsigned>> VariableUses;
1438879e06dSChris Lattner 
1444dabac20SChandler Carruth   /// Maps definitions of variables to their parenthesized capture numbers.
1454dabac20SChandler Carruth   ///
1464dabac20SChandler Carruth   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to
1474dabac20SChandler Carruth   /// 1.
148e8b8f1bcSEli Bendersky   std::map<StringRef, unsigned> VariableDefs;
1498879e06dSChris Lattner 
150d1e020f7SSaleem Abdulrasool   Check::CheckType CheckTy;
1513b40b445SChris Lattner 
1524dabac20SChandler Carruth   /// Contains the number of line this pattern is in.
153d1e020f7SSaleem Abdulrasool   unsigned LineNumber;
154d1e020f7SSaleem Abdulrasool 
155d1e020f7SSaleem Abdulrasool public:
156d1e020f7SSaleem Abdulrasool   explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {}
15774d50731SChris Lattner 
1584dabac20SChandler Carruth   /// Returns the location in source code.
1590b707eb8SMichael Liao   SMLoc getLoc() const { return PatternLoc; }
1600b707eb8SMichael Liao 
161e8f2fb20SChandler Carruth   bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
16213df4626SMatt Arsenault                     unsigned LineNumber);
1638879e06dSChris Lattner   size_t Match(StringRef Buffer, size_t &MatchLen,
1648879e06dSChris Lattner                StringMap<StringRef> &VariableTable) const;
165*dc5ba317SJoel E. Denny   void PrintVariableUses(const SourceMgr &SM, StringRef Buffer,
166*dc5ba317SJoel E. Denny                          const StringMap<StringRef> &VariableTable,
167*dc5ba317SJoel E. Denny                          SMRange MatchRange = None) const;
168*dc5ba317SJoel E. Denny   void PrintFuzzyMatch(const SourceMgr &SM, StringRef Buffer,
169e0ef65abSDaniel Dunbar                        const StringMap<StringRef> &VariableTable) const;
170e0ef65abSDaniel Dunbar 
171e8f2fb20SChandler Carruth   bool hasVariable() const {
172e8f2fb20SChandler Carruth     return !(VariableUses.empty() && VariableDefs.empty());
173e8f2fb20SChandler Carruth   }
174f8bd2e5bSStephen Lin 
17538820972SMatt Arsenault   Check::CheckType getCheckTy() const { return CheckTy; }
17691a1b2c9SMichael Liao 
177b16ab0c4SChris Lattner private:
178e8b8f1bcSEli Bendersky   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
179e8b8f1bcSEli Bendersky   void AddBackrefToRegEx(unsigned BackrefNum);
180e8f2fb20SChandler Carruth   unsigned
181e8f2fb20SChandler Carruth   ComputeMatchDistance(StringRef Buffer,
182fd29d886SDaniel Dunbar                        const StringMap<StringRef> &VariableTable) const;
18392987fb3SAlexander Kornienko   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
18481e5cd9eSAdrian Prantl   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
1853b40b445SChris Lattner };
1863b40b445SChris Lattner 
1874dabac20SChandler Carruth /// Parses the given string into the Pattern.
1884dabac20SChandler Carruth ///
1894dabac20SChandler Carruth /// \p Prefix provides which prefix is being matched, \p SM provides the
1904dabac20SChandler Carruth /// SourceMgr used for error reports, and \p LineNumber is the line number in
1914dabac20SChandler Carruth /// the input file from which the pattern string was read. Returns true in
1924dabac20SChandler Carruth /// case of an error, false otherwise.
193e8f2fb20SChandler Carruth bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
194e8f2fb20SChandler Carruth                            SourceMgr &SM, unsigned LineNumber) {
19585913ccaSJames Y Knight   bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot;
19685913ccaSJames Y Knight 
19792987fb3SAlexander Kornienko   this->LineNumber = LineNumber;
1980a4c44bdSChris Lattner   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
1990a4c44bdSChris Lattner 
2001714676aSTom de Vries   if (!(NoCanonicalizeWhiteSpace && MatchFullLines))
20174d50731SChris Lattner     // Ignore trailing whitespace.
20274d50731SChris Lattner     while (!PatternStr.empty() &&
20374d50731SChris Lattner            (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
20474d50731SChris Lattner       PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
20574d50731SChris Lattner 
20674d50731SChris Lattner   // Check that there is something on the line.
2075507f668SJames Henderson   if (PatternStr.empty() && CheckTy != Check::CheckEmpty) {
20803b80a40SChris Lattner     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
209e8f2fb20SChandler Carruth                     "found empty check string with prefix '" + Prefix + ":'");
21074d50731SChris Lattner     return true;
21174d50731SChris Lattner   }
21274d50731SChris Lattner 
2135507f668SJames Henderson   if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) {
2145507f668SJames Henderson     SM.PrintMessage(
2155507f668SJames Henderson         PatternLoc, SourceMgr::DK_Error,
2165507f668SJames Henderson         "found non-empty check string for empty check with prefix '" + Prefix +
2175507f668SJames Henderson             ":'");
2185507f668SJames Henderson     return true;
2195507f668SJames Henderson   }
2205507f668SJames Henderson 
2215507f668SJames Henderson   if (CheckTy == Check::CheckEmpty) {
2225507f668SJames Henderson     RegExStr = "(\n$)";
2235507f668SJames Henderson     return false;
2245507f668SJames Henderson   }
2255507f668SJames Henderson 
226221460e0SChris Lattner   // Check to see if this is a fixed string, or if it has regex pieces.
22785913ccaSJames Y Knight   if (!MatchFullLinesHere &&
22885913ccaSJames Y Knight       (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
22985913ccaSJames Y Knight                                  PatternStr.find("[[") == StringRef::npos))) {
230221460e0SChris Lattner     FixedStr = PatternStr;
231221460e0SChris Lattner     return false;
232221460e0SChris Lattner   }
233221460e0SChris Lattner 
23485913ccaSJames Y Knight   if (MatchFullLinesHere) {
23585913ccaSJames Y Knight     RegExStr += '^';
23685913ccaSJames Y Knight     if (!NoCanonicalizeWhiteSpace)
23785913ccaSJames Y Knight       RegExStr += " *";
23885913ccaSJames Y Knight   }
23985913ccaSJames Y Knight 
2408879e06dSChris Lattner   // Paren value #0 is for the fully matched string.  Any new parenthesized
24153e0679dSChris Lattner   // values add from there.
2428879e06dSChris Lattner   unsigned CurParen = 1;
2438879e06dSChris Lattner 
244b16ab0c4SChris Lattner   // Otherwise, there is at least one regex piece.  Build up the regex pattern
245b16ab0c4SChris Lattner   // by escaping scary characters in fixed strings, building up one big regex.
246f08d2db9SChris Lattner   while (!PatternStr.empty()) {
2478879e06dSChris Lattner     // RegEx matches.
24853e0679dSChris Lattner     if (PatternStr.startswith("{{")) {
24943d50d4aSEli Bendersky       // This is the start of a regex match.  Scan for the }}.
250f08d2db9SChris Lattner       size_t End = PatternStr.find("}}");
251f08d2db9SChris Lattner       if (End == StringRef::npos) {
252f08d2db9SChris Lattner         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
25303b80a40SChris Lattner                         SourceMgr::DK_Error,
25403b80a40SChris Lattner                         "found start of regex string with no end '}}'");
255f08d2db9SChris Lattner         return true;
256f08d2db9SChris Lattner       }
257f08d2db9SChris Lattner 
258e53c95f1SChris Lattner       // Enclose {{}} patterns in parens just like [[]] even though we're not
259e53c95f1SChris Lattner       // capturing the result for any purpose.  This is required in case the
260e53c95f1SChris Lattner       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
261e53c95f1SChris Lattner       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
262e53c95f1SChris Lattner       RegExStr += '(';
263e53c95f1SChris Lattner       ++CurParen;
264e53c95f1SChris Lattner 
2658879e06dSChris Lattner       if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
2668879e06dSChris Lattner         return true;
267e53c95f1SChris Lattner       RegExStr += ')';
26853e0679dSChris Lattner 
2698879e06dSChris Lattner       PatternStr = PatternStr.substr(End + 2);
2708879e06dSChris Lattner       continue;
2718879e06dSChris Lattner     }
2728879e06dSChris Lattner 
2738879e06dSChris Lattner     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
2748879e06dSChris Lattner     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
2758879e06dSChris Lattner     // second form is [[foo]] which is a reference to foo.  The variable name
27657cb733bSDaniel Dunbar     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
2778879e06dSChris Lattner     // it.  This is to catch some common errors.
27853e0679dSChris Lattner     if (PatternStr.startswith("[[")) {
279061d2baaSEli Bendersky       // Find the closing bracket pair ending the match.  End is going to be an
280061d2baaSEli Bendersky       // offset relative to the beginning of the match string.
28181e5cd9eSAdrian Prantl       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
282061d2baaSEli Bendersky 
2838879e06dSChris Lattner       if (End == StringRef::npos) {
2848879e06dSChris Lattner         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
28503b80a40SChris Lattner                         SourceMgr::DK_Error,
28603b80a40SChris Lattner                         "invalid named regex reference, no ]] found");
287f08d2db9SChris Lattner         return true;
288f08d2db9SChris Lattner       }
289f08d2db9SChris Lattner 
290061d2baaSEli Bendersky       StringRef MatchStr = PatternStr.substr(2, End);
291061d2baaSEli Bendersky       PatternStr = PatternStr.substr(End + 4);
2928879e06dSChris Lattner 
2938879e06dSChris Lattner       // Get the regex name (e.g. "foo").
2948879e06dSChris Lattner       size_t NameEnd = MatchStr.find(':');
2958879e06dSChris Lattner       StringRef Name = MatchStr.substr(0, NameEnd);
2968879e06dSChris Lattner 
2978879e06dSChris Lattner       if (Name.empty()) {
29803b80a40SChris Lattner         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
29903b80a40SChris Lattner                         "invalid name in named regex: empty name");
3008879e06dSChris Lattner         return true;
3018879e06dSChris Lattner       }
3028879e06dSChris Lattner 
30392987fb3SAlexander Kornienko       // Verify that the name/expression is well formed. FileCheck currently
30492987fb3SAlexander Kornienko       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
30592987fb3SAlexander Kornienko       // is relaxed, more strict check is performed in \c EvaluateExpression.
30692987fb3SAlexander Kornienko       bool IsExpression = false;
30792987fb3SAlexander Kornienko       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
308f55e72a5SArtem Belevich         if (i == 0) {
309f55e72a5SArtem Belevich           if (Name[i] == '$')  // Global vars start with '$'
310f55e72a5SArtem Belevich             continue;
311f55e72a5SArtem Belevich           if (Name[i] == '@') {
31292987fb3SAlexander Kornienko             if (NameEnd != StringRef::npos) {
31392987fb3SAlexander Kornienko               SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
31492987fb3SAlexander Kornienko                               SourceMgr::DK_Error,
31592987fb3SAlexander Kornienko                               "invalid name in named regex definition");
31692987fb3SAlexander Kornienko               return true;
31792987fb3SAlexander Kornienko             }
31892987fb3SAlexander Kornienko             IsExpression = true;
31992987fb3SAlexander Kornienko             continue;
32092987fb3SAlexander Kornienko           }
321f55e72a5SArtem Belevich         }
32292987fb3SAlexander Kornienko         if (Name[i] != '_' && !isalnum(Name[i]) &&
32392987fb3SAlexander Kornienko             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
3248879e06dSChris Lattner           SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i),
32503b80a40SChris Lattner                           SourceMgr::DK_Error, "invalid name in named regex");
3268879e06dSChris Lattner           return true;
3278879e06dSChris Lattner         }
32892987fb3SAlexander Kornienko       }
3298879e06dSChris Lattner 
3308879e06dSChris Lattner       // Name can't start with a digit.
33183c74e9fSGuy Benyei       if (isdigit(static_cast<unsigned char>(Name[0]))) {
33203b80a40SChris Lattner         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
33303b80a40SChris Lattner                         "invalid name in named regex");
3348879e06dSChris Lattner         return true;
3358879e06dSChris Lattner       }
3368879e06dSChris Lattner 
3378879e06dSChris Lattner       // Handle [[foo]].
3388879e06dSChris Lattner       if (NameEnd == StringRef::npos) {
339e8b8f1bcSEli Bendersky         // Handle variables that were defined earlier on the same line by
340e8b8f1bcSEli Bendersky         // emitting a backreference.
341e8b8f1bcSEli Bendersky         if (VariableDefs.find(Name) != VariableDefs.end()) {
342e8b8f1bcSEli Bendersky           unsigned VarParenNum = VariableDefs[Name];
343e8b8f1bcSEli Bendersky           if (VarParenNum < 1 || VarParenNum > 9) {
344e8b8f1bcSEli Bendersky             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
345e8b8f1bcSEli Bendersky                             SourceMgr::DK_Error,
346e8b8f1bcSEli Bendersky                             "Can't back-reference more than 9 variables");
347e8b8f1bcSEli Bendersky             return true;
348e8b8f1bcSEli Bendersky           }
349e8b8f1bcSEli Bendersky           AddBackrefToRegEx(VarParenNum);
350e8b8f1bcSEli Bendersky         } else {
3518879e06dSChris Lattner           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
352e8b8f1bcSEli Bendersky         }
3538879e06dSChris Lattner         continue;
3548879e06dSChris Lattner       }
3558879e06dSChris Lattner 
3568879e06dSChris Lattner       // Handle [[foo:.*]].
357e8b8f1bcSEli Bendersky       VariableDefs[Name] = CurParen;
3588879e06dSChris Lattner       RegExStr += '(';
3598879e06dSChris Lattner       ++CurParen;
3608879e06dSChris Lattner 
3618879e06dSChris Lattner       if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM))
3628879e06dSChris Lattner         return true;
3638879e06dSChris Lattner 
3648879e06dSChris Lattner       RegExStr += ')';
3658879e06dSChris Lattner     }
3668879e06dSChris Lattner 
3678879e06dSChris Lattner     // Handle fixed string matches.
3688879e06dSChris Lattner     // Find the end, which is the start of the next regex.
3698879e06dSChris Lattner     size_t FixedMatchEnd = PatternStr.find("{{");
3708879e06dSChris Lattner     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
3716f4f77b7SHans Wennborg     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
3728879e06dSChris Lattner     PatternStr = PatternStr.substr(FixedMatchEnd);
373f08d2db9SChris Lattner   }
374f08d2db9SChris Lattner 
37585913ccaSJames Y Knight   if (MatchFullLinesHere) {
37685913ccaSJames Y Knight     if (!NoCanonicalizeWhiteSpace)
37785913ccaSJames Y Knight       RegExStr += " *";
37885913ccaSJames Y Knight     RegExStr += '$';
37985913ccaSJames Y Knight   }
38085913ccaSJames Y Knight 
38174d50731SChris Lattner   return false;
38274d50731SChris Lattner }
38374d50731SChris Lattner 
384e8f2fb20SChandler Carruth bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
385e8b8f1bcSEli Bendersky   Regex R(RS);
3868879e06dSChris Lattner   std::string Error;
3878879e06dSChris Lattner   if (!R.isValid(Error)) {
388e8b8f1bcSEli Bendersky     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
38903b80a40SChris Lattner                     "invalid regex: " + Error);
3908879e06dSChris Lattner     return true;
3918879e06dSChris Lattner   }
3928879e06dSChris Lattner 
393e8b8f1bcSEli Bendersky   RegExStr += RS.str();
3948879e06dSChris Lattner   CurParen += R.getNumMatches();
3958879e06dSChris Lattner   return false;
3968879e06dSChris Lattner }
397b16ab0c4SChris Lattner 
398e8b8f1bcSEli Bendersky void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
399e8b8f1bcSEli Bendersky   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
400e8f2fb20SChandler Carruth   std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
401e8b8f1bcSEli Bendersky   RegExStr += Backref;
402e8b8f1bcSEli Bendersky }
403e8b8f1bcSEli Bendersky 
4044dabac20SChandler Carruth /// Evaluates expression and stores the result to \p Value.
4054dabac20SChandler Carruth ///
4064dabac20SChandler Carruth /// Returns true on success and false when the expression has invalid syntax.
40792987fb3SAlexander Kornienko bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
40892987fb3SAlexander Kornienko   // The only supported expression is @LINE([\+-]\d+)?
40992987fb3SAlexander Kornienko   if (!Expr.startswith("@LINE"))
41092987fb3SAlexander Kornienko     return false;
41192987fb3SAlexander Kornienko   Expr = Expr.substr(StringRef("@LINE").size());
41292987fb3SAlexander Kornienko   int Offset = 0;
41392987fb3SAlexander Kornienko   if (!Expr.empty()) {
41492987fb3SAlexander Kornienko     if (Expr[0] == '+')
41592987fb3SAlexander Kornienko       Expr = Expr.substr(1);
41692987fb3SAlexander Kornienko     else if (Expr[0] != '-')
41792987fb3SAlexander Kornienko       return false;
41892987fb3SAlexander Kornienko     if (Expr.getAsInteger(10, Offset))
41992987fb3SAlexander Kornienko       return false;
42092987fb3SAlexander Kornienko   }
42192987fb3SAlexander Kornienko   Value = llvm::itostr(LineNumber + Offset);
42292987fb3SAlexander Kornienko   return true;
42392987fb3SAlexander Kornienko }
42492987fb3SAlexander Kornienko 
4254dabac20SChandler Carruth /// Matches the pattern string against the input buffer \p Buffer
4264dabac20SChandler Carruth ///
4274dabac20SChandler Carruth /// This returns the position that is matched or npos if there is no match. If
4284dabac20SChandler Carruth /// there is a match, the size of the matched string is returned in \p
4294dabac20SChandler Carruth /// MatchLen.
4304dabac20SChandler Carruth ///
4314dabac20SChandler Carruth /// The \p VariableTable StringMap provides the current values of filecheck
4324dabac20SChandler Carruth /// variables and is updated if this match defines new values.
4338879e06dSChris Lattner size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
4348879e06dSChris Lattner                       StringMap<StringRef> &VariableTable) const {
435eba55822SJakob Stoklund Olesen   // If this is the EOF pattern, match it immediately.
43638820972SMatt Arsenault   if (CheckTy == Check::CheckEOF) {
437eba55822SJakob Stoklund Olesen     MatchLen = 0;
438eba55822SJakob Stoklund Olesen     return Buffer.size();
439eba55822SJakob Stoklund Olesen   }
440eba55822SJakob Stoklund Olesen 
441221460e0SChris Lattner   // If this is a fixed string pattern, just match it now.
442221460e0SChris Lattner   if (!FixedStr.empty()) {
443221460e0SChris Lattner     MatchLen = FixedStr.size();
444221460e0SChris Lattner     return Buffer.find(FixedStr);
445221460e0SChris Lattner   }
446221460e0SChris Lattner 
447b16ab0c4SChris Lattner   // Regex match.
4488879e06dSChris Lattner 
4498879e06dSChris Lattner   // If there are variable uses, we need to create a temporary string with the
4508879e06dSChris Lattner   // actual value.
4518879e06dSChris Lattner   StringRef RegExToMatch = RegExStr;
4528879e06dSChris Lattner   std::string TmpStr;
4538879e06dSChris Lattner   if (!VariableUses.empty()) {
4548879e06dSChris Lattner     TmpStr = RegExStr;
4558879e06dSChris Lattner 
4568879e06dSChris Lattner     unsigned InsertOffset = 0;
4578f870499SBenjamin Kramer     for (const auto &VariableUse : VariableUses) {
45892987fb3SAlexander Kornienko       std::string Value;
45992987fb3SAlexander Kornienko 
4608f870499SBenjamin Kramer       if (VariableUse.first[0] == '@') {
4618f870499SBenjamin Kramer         if (!EvaluateExpression(VariableUse.first, Value))
46292987fb3SAlexander Kornienko           return StringRef::npos;
46392987fb3SAlexander Kornienko       } else {
464e0ef65abSDaniel Dunbar         StringMap<StringRef>::iterator it =
4658f870499SBenjamin Kramer             VariableTable.find(VariableUse.first);
466e0ef65abSDaniel Dunbar         // If the variable is undefined, return an error.
467e0ef65abSDaniel Dunbar         if (it == VariableTable.end())
468e0ef65abSDaniel Dunbar           return StringRef::npos;
469e0ef65abSDaniel Dunbar 
4706f4f77b7SHans Wennborg         // Look up the value and escape it so that we can put it into the regex.
4716f4f77b7SHans Wennborg         Value += Regex::escape(it->second);
47292987fb3SAlexander Kornienko       }
4738879e06dSChris Lattner 
4748879e06dSChris Lattner       // Plop it into the regex at the adjusted offset.
4758f870499SBenjamin Kramer       TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
4768879e06dSChris Lattner                     Value.begin(), Value.end());
4778879e06dSChris Lattner       InsertOffset += Value.size();
4788879e06dSChris Lattner     }
4798879e06dSChris Lattner 
4808879e06dSChris Lattner     // Match the newly constructed regex.
4818879e06dSChris Lattner     RegExToMatch = TmpStr;
4828879e06dSChris Lattner   }
4838879e06dSChris Lattner 
484b16ab0c4SChris Lattner   SmallVector<StringRef, 4> MatchInfo;
4858879e06dSChris Lattner   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
486f08d2db9SChris Lattner     return StringRef::npos;
487b16ab0c4SChris Lattner 
488b16ab0c4SChris Lattner   // Successful regex match.
489b16ab0c4SChris Lattner   assert(!MatchInfo.empty() && "Didn't get any match");
490b16ab0c4SChris Lattner   StringRef FullMatch = MatchInfo[0];
491b16ab0c4SChris Lattner 
4928879e06dSChris Lattner   // If this defines any variables, remember their values.
4938f870499SBenjamin Kramer   for (const auto &VariableDef : VariableDefs) {
4948f870499SBenjamin Kramer     assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
4958f870499SBenjamin Kramer     VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
4960a4c44bdSChris Lattner   }
4970a4c44bdSChris Lattner 
498*dc5ba317SJoel E. Denny   // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after
499*dc5ba317SJoel E. Denny   // the required preceding newline, which is consumed by the pattern in the
500*dc5ba317SJoel E. Denny   // case of CHECK-EMPTY but not CHECK-NEXT.
501*dc5ba317SJoel E. Denny   size_t MatchStartSkip = CheckTy == Check::CheckEmpty;
502*dc5ba317SJoel E. Denny   MatchLen = FullMatch.size() - MatchStartSkip;
503*dc5ba317SJoel E. Denny   return FullMatch.data() - Buffer.data() + MatchStartSkip;
504f08d2db9SChris Lattner }
505f08d2db9SChris Lattner 
5064dabac20SChandler Carruth 
5074dabac20SChandler Carruth /// Computes an arbitrary estimate for the quality of matching this pattern at
5084dabac20SChandler Carruth /// the start of \p Buffer; a distance of zero should correspond to a perfect
5094dabac20SChandler Carruth /// match.
510e8f2fb20SChandler Carruth unsigned
511e8f2fb20SChandler Carruth Pattern::ComputeMatchDistance(StringRef Buffer,
512fd29d886SDaniel Dunbar                               const StringMap<StringRef> &VariableTable) const {
513fd29d886SDaniel Dunbar   // Just compute the number of matching characters. For regular expressions, we
514fd29d886SDaniel Dunbar   // just compare against the regex itself and hope for the best.
515fd29d886SDaniel Dunbar   //
516fd29d886SDaniel Dunbar   // FIXME: One easy improvement here is have the regex lib generate a single
517fd29d886SDaniel Dunbar   // example regular expression which matches, and use that as the example
518fd29d886SDaniel Dunbar   // string.
519fd29d886SDaniel Dunbar   StringRef ExampleString(FixedStr);
520fd29d886SDaniel Dunbar   if (ExampleString.empty())
521fd29d886SDaniel Dunbar     ExampleString = RegExStr;
522fd29d886SDaniel Dunbar 
523e9aa36c8SDaniel Dunbar   // Only compare up to the first line in the buffer, or the string size.
524e9aa36c8SDaniel Dunbar   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
525e9aa36c8SDaniel Dunbar   BufferPrefix = BufferPrefix.split('\n').first;
526e9aa36c8SDaniel Dunbar   return BufferPrefix.edit_distance(ExampleString);
527fd29d886SDaniel Dunbar }
528fd29d886SDaniel Dunbar 
529*dc5ba317SJoel E. Denny void Pattern::PrintVariableUses(const SourceMgr &SM, StringRef Buffer,
530*dc5ba317SJoel E. Denny                                 const StringMap<StringRef> &VariableTable,
531*dc5ba317SJoel E. Denny                                 SMRange MatchRange) const {
532e0ef65abSDaniel Dunbar   // If this was a regular expression using variables, print the current
533e0ef65abSDaniel Dunbar   // variable values.
534e0ef65abSDaniel Dunbar   if (!VariableUses.empty()) {
5358f870499SBenjamin Kramer     for (const auto &VariableUse : VariableUses) {
536e69170a1SAlp Toker       SmallString<256> Msg;
537e69170a1SAlp Toker       raw_svector_ostream OS(Msg);
5388f870499SBenjamin Kramer       StringRef Var = VariableUse.first;
53992987fb3SAlexander Kornienko       if (Var[0] == '@') {
54092987fb3SAlexander Kornienko         std::string Value;
54192987fb3SAlexander Kornienko         if (EvaluateExpression(Var, Value)) {
54292987fb3SAlexander Kornienko           OS << "with expression \"";
54392987fb3SAlexander Kornienko           OS.write_escaped(Var) << "\" equal to \"";
54492987fb3SAlexander Kornienko           OS.write_escaped(Value) << "\"";
54592987fb3SAlexander Kornienko         } else {
54692987fb3SAlexander Kornienko           OS << "uses incorrect expression \"";
54792987fb3SAlexander Kornienko           OS.write_escaped(Var) << "\"";
54892987fb3SAlexander Kornienko         }
54992987fb3SAlexander Kornienko       } else {
55092987fb3SAlexander Kornienko         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
551e0ef65abSDaniel Dunbar 
552e0ef65abSDaniel Dunbar         // Check for undefined variable references.
553e0ef65abSDaniel Dunbar         if (it == VariableTable.end()) {
554e0ef65abSDaniel Dunbar           OS << "uses undefined variable \"";
55592987fb3SAlexander Kornienko           OS.write_escaped(Var) << "\"";
556e0ef65abSDaniel Dunbar         } else {
557e0ef65abSDaniel Dunbar           OS << "with variable \"";
558e0ef65abSDaniel Dunbar           OS.write_escaped(Var) << "\" equal to \"";
559e0ef65abSDaniel Dunbar           OS.write_escaped(it->second) << "\"";
560e0ef65abSDaniel Dunbar         }
56192987fb3SAlexander Kornienko       }
562e0ef65abSDaniel Dunbar 
563*dc5ba317SJoel E. Denny       if (MatchRange.isValid())
564*dc5ba317SJoel E. Denny         SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, OS.str(),
565*dc5ba317SJoel E. Denny                         {MatchRange});
566*dc5ba317SJoel E. Denny       else
567*dc5ba317SJoel E. Denny         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
568*dc5ba317SJoel E. Denny                         SourceMgr::DK_Note, OS.str());
569*dc5ba317SJoel E. Denny     }
570e0ef65abSDaniel Dunbar   }
571e0ef65abSDaniel Dunbar }
572fd29d886SDaniel Dunbar 
573*dc5ba317SJoel E. Denny void Pattern::PrintFuzzyMatch(
574*dc5ba317SJoel E. Denny     const SourceMgr &SM, StringRef Buffer,
575*dc5ba317SJoel E. Denny     const StringMap<StringRef> &VariableTable) const {
576fd29d886SDaniel Dunbar   // Attempt to find the closest/best fuzzy match.  Usually an error happens
577fd29d886SDaniel Dunbar   // because some string in the output didn't exactly match. In these cases, we
578fd29d886SDaniel Dunbar   // would like to show the user a best guess at what "should have" matched, to
579fd29d886SDaniel Dunbar   // save them having to actually check the input manually.
580fd29d886SDaniel Dunbar   size_t NumLinesForward = 0;
581fd29d886SDaniel Dunbar   size_t Best = StringRef::npos;
582fd29d886SDaniel Dunbar   double BestQuality = 0;
583fd29d886SDaniel Dunbar 
584fd29d886SDaniel Dunbar   // Use an arbitrary 4k limit on how far we will search.
5852bf486ebSDan Gohman   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
586fd29d886SDaniel Dunbar     if (Buffer[i] == '\n')
587fd29d886SDaniel Dunbar       ++NumLinesForward;
588fd29d886SDaniel Dunbar 
589df22bbf7SDan Gohman     // Patterns have leading whitespace stripped, so skip whitespace when
590df22bbf7SDan Gohman     // looking for something which looks like a pattern.
591df22bbf7SDan Gohman     if (Buffer[i] == ' ' || Buffer[i] == '\t')
592df22bbf7SDan Gohman       continue;
593df22bbf7SDan Gohman 
594fd29d886SDaniel Dunbar     // Compute the "quality" of this match as an arbitrary combination of the
595fd29d886SDaniel Dunbar     // match distance and the number of lines skipped to get to this match.
596fd29d886SDaniel Dunbar     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
597fd29d886SDaniel Dunbar     double Quality = Distance + (NumLinesForward / 100.);
598fd29d886SDaniel Dunbar 
599fd29d886SDaniel Dunbar     if (Quality < BestQuality || Best == StringRef::npos) {
600fd29d886SDaniel Dunbar       Best = i;
601fd29d886SDaniel Dunbar       BestQuality = Quality;
602fd29d886SDaniel Dunbar     }
603fd29d886SDaniel Dunbar   }
604fd29d886SDaniel Dunbar 
605fd29d886SDaniel Dunbar   // Print the "possible intended match here" line if we found something
606c069cc8eSDaniel Dunbar   // reasonable and not equal to what we showed in the "scanning from here"
607c069cc8eSDaniel Dunbar   // line.
608c069cc8eSDaniel Dunbar   if (Best && Best != StringRef::npos && BestQuality < 50) {
609fd29d886SDaniel Dunbar     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
61003b80a40SChris Lattner                     SourceMgr::DK_Note, "possible intended match here");
611fd29d886SDaniel Dunbar 
612fd29d886SDaniel Dunbar     // FIXME: If we wanted to be really friendly we would show why the match
613fd29d886SDaniel Dunbar     // failed, as it can be hard to spot simple one character differences.
614fd29d886SDaniel Dunbar   }
615e0ef65abSDaniel Dunbar }
61674d50731SChris Lattner 
6174dabac20SChandler Carruth /// Finds the closing sequence of a regex variable usage or definition.
6184dabac20SChandler Carruth ///
6194dabac20SChandler Carruth /// \p Str has to point in the beginning of the definition (right after the
6204dabac20SChandler Carruth /// opening sequence). Returns the offset of the closing sequence within Str,
6214dabac20SChandler Carruth /// or npos if it was not found.
62281e5cd9eSAdrian Prantl size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
623061d2baaSEli Bendersky   // Offset keeps track of the current offset within the input Str
624061d2baaSEli Bendersky   size_t Offset = 0;
625061d2baaSEli Bendersky   // [...] Nesting depth
626061d2baaSEli Bendersky   size_t BracketDepth = 0;
627061d2baaSEli Bendersky 
628061d2baaSEli Bendersky   while (!Str.empty()) {
629061d2baaSEli Bendersky     if (Str.startswith("]]") && BracketDepth == 0)
630061d2baaSEli Bendersky       return Offset;
631061d2baaSEli Bendersky     if (Str[0] == '\\') {
632061d2baaSEli Bendersky       // Backslash escapes the next char within regexes, so skip them both.
633061d2baaSEli Bendersky       Str = Str.substr(2);
634061d2baaSEli Bendersky       Offset += 2;
635061d2baaSEli Bendersky     } else {
636061d2baaSEli Bendersky       switch (Str[0]) {
637061d2baaSEli Bendersky       default:
638061d2baaSEli Bendersky         break;
639061d2baaSEli Bendersky       case '[':
640061d2baaSEli Bendersky         BracketDepth++;
641061d2baaSEli Bendersky         break;
642061d2baaSEli Bendersky       case ']':
64381e5cd9eSAdrian Prantl         if (BracketDepth == 0) {
64481e5cd9eSAdrian Prantl           SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
64581e5cd9eSAdrian Prantl                           SourceMgr::DK_Error,
64681e5cd9eSAdrian Prantl                           "missing closing \"]\" for regex variable");
64781e5cd9eSAdrian Prantl           exit(1);
64881e5cd9eSAdrian Prantl         }
649061d2baaSEli Bendersky         BracketDepth--;
650061d2baaSEli Bendersky         break;
651061d2baaSEli Bendersky       }
652061d2baaSEli Bendersky       Str = Str.substr(1);
653061d2baaSEli Bendersky       Offset++;
654061d2baaSEli Bendersky     }
655061d2baaSEli Bendersky   }
656061d2baaSEli Bendersky 
657061d2baaSEli Bendersky   return StringRef::npos;
658061d2baaSEli Bendersky }
659061d2baaSEli Bendersky 
66074d50731SChris Lattner //===----------------------------------------------------------------------===//
66174d50731SChris Lattner // Check Strings.
66274d50731SChris Lattner //===----------------------------------------------------------------------===//
6633b40b445SChris Lattner 
6644dabac20SChandler Carruth /// A check that we found in the input file.
6653b40b445SChris Lattner struct CheckString {
6664dabac20SChandler Carruth   /// The pattern to match.
6673b40b445SChris Lattner   Pattern Pat;
66826cccfe1SChris Lattner 
6694dabac20SChandler Carruth   /// Which prefix name this check matched.
67013df4626SMatt Arsenault   StringRef Prefix;
67113df4626SMatt Arsenault 
6724dabac20SChandler Carruth   /// The location in the match file that the check string was specified.
67326cccfe1SChris Lattner   SMLoc Loc;
67426cccfe1SChris Lattner 
6754dabac20SChandler Carruth   /// All of the strings that are disallowed from occurring between this match
6764dabac20SChandler Carruth   /// string and the previous one (or start of file).
67791a1b2c9SMichael Liao   std::vector<Pattern> DagNotStrings;
678236d2d5eSChris Lattner 
67985913ccaSJames Y Knight   CheckString(const Pattern &P, StringRef S, SMLoc L)
68085913ccaSJames Y Knight       : Pat(P), Prefix(S), Loc(L) {}
681dcc7d48dSMichael Liao 
682e93a3a08SStephen Lin   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
683f8bd2e5bSStephen Lin                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
684dcc7d48dSMichael Liao 
685dcc7d48dSMichael Liao   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
68601ac1707SDuncan P. N. Exon Smith   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
687dcc7d48dSMichael Liao   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
68891a1b2c9SMichael Liao                 const std::vector<const Pattern *> &NotStrings,
68991a1b2c9SMichael Liao                 StringMap<StringRef> &VariableTable) const;
69091a1b2c9SMichael Liao   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
69191a1b2c9SMichael Liao                   std::vector<const Pattern *> &NotStrings,
692dcc7d48dSMichael Liao                   StringMap<StringRef> &VariableTable) const;
69326cccfe1SChris Lattner };
69426cccfe1SChris Lattner 
69520247900SChandler Carruth /// Canonicalize whitespaces in the file. Line endings are replaced with
69620247900SChandler Carruth /// UNIX-style '\n'.
697b03c166aSChandler Carruth static StringRef CanonicalizeFile(MemoryBuffer &MB,
69820247900SChandler Carruth                                   SmallVectorImpl<char> &OutputBuffer) {
69920247900SChandler Carruth   OutputBuffer.reserve(MB.getBufferSize());
700a2f8fc5aSChris Lattner 
70120247900SChandler Carruth   for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
702a2f8fc5aSChris Lattner        Ptr != End; ++Ptr) {
703fd781bf0SNAKAMURA Takumi     // Eliminate trailing dosish \r.
704fd781bf0SNAKAMURA Takumi     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
705fd781bf0SNAKAMURA Takumi       continue;
706fd781bf0SNAKAMURA Takumi     }
707fd781bf0SNAKAMURA Takumi 
7085ea04c38SGuy Benyei     // If current char is not a horizontal whitespace or if horizontal
7095ea04c38SGuy Benyei     // whitespace canonicalization is disabled, dump it to output as is.
710b03c166aSChandler Carruth     if (NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) {
71120247900SChandler Carruth       OutputBuffer.push_back(*Ptr);
712a2f8fc5aSChris Lattner       continue;
713a2f8fc5aSChris Lattner     }
714a2f8fc5aSChris Lattner 
715a2f8fc5aSChris Lattner     // Otherwise, add one space and advance over neighboring space.
71620247900SChandler Carruth     OutputBuffer.push_back(' ');
717e8f2fb20SChandler Carruth     while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t'))
718a2f8fc5aSChris Lattner       ++Ptr;
719a2f8fc5aSChris Lattner   }
720a2f8fc5aSChris Lattner 
72120247900SChandler Carruth   // Add a null byte and then return all but that byte.
72220247900SChandler Carruth   OutputBuffer.push_back('\0');
72320247900SChandler Carruth   return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
724a2f8fc5aSChris Lattner }
725a2f8fc5aSChris Lattner 
72638820972SMatt Arsenault static bool IsPartOfWord(char c) {
72738820972SMatt Arsenault   return (isalnum(c) || c == '-' || c == '_');
72838820972SMatt Arsenault }
72938820972SMatt Arsenault 
73013df4626SMatt Arsenault // Get the size of the prefix extension.
73113df4626SMatt Arsenault static size_t CheckTypeSize(Check::CheckType Ty) {
73213df4626SMatt Arsenault   switch (Ty) {
73313df4626SMatt Arsenault   case Check::CheckNone:
734a908e7bdSPaul Robinson   case Check::CheckBadNot:
73513df4626SMatt Arsenault     return 0;
73613df4626SMatt Arsenault 
73713df4626SMatt Arsenault   case Check::CheckPlain:
73813df4626SMatt Arsenault     return sizeof(":") - 1;
73913df4626SMatt Arsenault 
74013df4626SMatt Arsenault   case Check::CheckNext:
74113df4626SMatt Arsenault     return sizeof("-NEXT:") - 1;
74213df4626SMatt Arsenault 
74301ac1707SDuncan P. N. Exon Smith   case Check::CheckSame:
74401ac1707SDuncan P. N. Exon Smith     return sizeof("-SAME:") - 1;
74501ac1707SDuncan P. N. Exon Smith 
74613df4626SMatt Arsenault   case Check::CheckNot:
74713df4626SMatt Arsenault     return sizeof("-NOT:") - 1;
74813df4626SMatt Arsenault 
74913df4626SMatt Arsenault   case Check::CheckDAG:
75013df4626SMatt Arsenault     return sizeof("-DAG:") - 1;
75113df4626SMatt Arsenault 
75213df4626SMatt Arsenault   case Check::CheckLabel:
75313df4626SMatt Arsenault     return sizeof("-LABEL:") - 1;
75413df4626SMatt Arsenault 
7555507f668SJames Henderson   case Check::CheckEmpty:
7565507f668SJames Henderson     return sizeof("-EMPTY:") - 1;
7575507f668SJames Henderson 
75813df4626SMatt Arsenault   case Check::CheckEOF:
75913df4626SMatt Arsenault     llvm_unreachable("Should not be using EOF size");
76013df4626SMatt Arsenault   }
76113df4626SMatt Arsenault 
76213df4626SMatt Arsenault   llvm_unreachable("Bad check type");
76313df4626SMatt Arsenault }
76413df4626SMatt Arsenault 
765*dc5ba317SJoel E. Denny // Get a description of the type.
766*dc5ba317SJoel E. Denny static std::string CheckTypeName(StringRef Prefix, Check::CheckType Ty) {
767*dc5ba317SJoel E. Denny   switch (Ty) {
768*dc5ba317SJoel E. Denny   case Check::CheckNone:
769*dc5ba317SJoel E. Denny     return "invalid";
770*dc5ba317SJoel E. Denny   case Check::CheckPlain:
771*dc5ba317SJoel E. Denny     return Prefix;
772*dc5ba317SJoel E. Denny   case Check::CheckNext:
773*dc5ba317SJoel E. Denny     return Prefix.str() + "-NEXT";
774*dc5ba317SJoel E. Denny   case Check::CheckSame:
775*dc5ba317SJoel E. Denny     return Prefix.str() + "-SAME";
776*dc5ba317SJoel E. Denny   case Check::CheckNot:
777*dc5ba317SJoel E. Denny     return Prefix.str() + "-NOT";
778*dc5ba317SJoel E. Denny   case Check::CheckDAG:
779*dc5ba317SJoel E. Denny     return Prefix.str() + "-DAG";
780*dc5ba317SJoel E. Denny   case Check::CheckLabel:
781*dc5ba317SJoel E. Denny     return Prefix.str() + "-LABEL";
782*dc5ba317SJoel E. Denny   case Check::CheckEmpty:
783*dc5ba317SJoel E. Denny     return Prefix.str() + "-EMPTY";
784*dc5ba317SJoel E. Denny   case Check::CheckEOF:
785*dc5ba317SJoel E. Denny     return "implicit EOF";
786*dc5ba317SJoel E. Denny   case Check::CheckBadNot:
787*dc5ba317SJoel E. Denny     return "bad NOT";
788*dc5ba317SJoel E. Denny   }
789*dc5ba317SJoel E. Denny   llvm_unreachable("unknown CheckType");
790*dc5ba317SJoel E. Denny }
791*dc5ba317SJoel E. Denny 
79213df4626SMatt Arsenault static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
79383e63d96SGeorge Rimar   if (Buffer.size() <= Prefix.size())
79483e63d96SGeorge Rimar     return Check::CheckNone;
79583e63d96SGeorge Rimar 
796c4d2d471SMatt Arsenault   char NextChar = Buffer[Prefix.size()];
79738820972SMatt Arsenault 
79838820972SMatt Arsenault   // Verify that the : is present after the prefix.
79913df4626SMatt Arsenault   if (NextChar == ':')
80038820972SMatt Arsenault     return Check::CheckPlain;
80138820972SMatt Arsenault 
80213df4626SMatt Arsenault   if (NextChar != '-')
80338820972SMatt Arsenault     return Check::CheckNone;
80438820972SMatt Arsenault 
805c4d2d471SMatt Arsenault   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
80613df4626SMatt Arsenault   if (Rest.startswith("NEXT:"))
80738820972SMatt Arsenault     return Check::CheckNext;
80838820972SMatt Arsenault 
80901ac1707SDuncan P. N. Exon Smith   if (Rest.startswith("SAME:"))
81001ac1707SDuncan P. N. Exon Smith     return Check::CheckSame;
81101ac1707SDuncan P. N. Exon Smith 
81213df4626SMatt Arsenault   if (Rest.startswith("NOT:"))
81338820972SMatt Arsenault     return Check::CheckNot;
81438820972SMatt Arsenault 
81513df4626SMatt Arsenault   if (Rest.startswith("DAG:"))
81638820972SMatt Arsenault     return Check::CheckDAG;
81738820972SMatt Arsenault 
81813df4626SMatt Arsenault   if (Rest.startswith("LABEL:"))
81938820972SMatt Arsenault     return Check::CheckLabel;
82013df4626SMatt Arsenault 
8215507f668SJames Henderson   if (Rest.startswith("EMPTY:"))
8225507f668SJames Henderson     return Check::CheckEmpty;
8235507f668SJames Henderson 
824a908e7bdSPaul Robinson   // You can't combine -NOT with another suffix.
825a908e7bdSPaul Robinson   if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
826a908e7bdSPaul Robinson       Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
8275507f668SJames Henderson       Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") ||
8285507f668SJames Henderson       Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:"))
829a908e7bdSPaul Robinson     return Check::CheckBadNot;
830a908e7bdSPaul Robinson 
83113df4626SMatt Arsenault   return Check::CheckNone;
83238820972SMatt Arsenault }
83338820972SMatt Arsenault 
83413df4626SMatt Arsenault // From the given position, find the next character after the word.
83513df4626SMatt Arsenault static size_t SkipWord(StringRef Str, size_t Loc) {
83613df4626SMatt Arsenault   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
83713df4626SMatt Arsenault     ++Loc;
83813df4626SMatt Arsenault   return Loc;
83913df4626SMatt Arsenault }
84013df4626SMatt Arsenault 
841726774cbSChandler Carruth /// Search the buffer for the first prefix in the prefix regular expression.
842726774cbSChandler Carruth ///
843726774cbSChandler Carruth /// This searches the buffer using the provided regular expression, however it
844726774cbSChandler Carruth /// enforces constraints beyond that:
845726774cbSChandler Carruth /// 1) The found prefix must not be a suffix of something that looks like
846726774cbSChandler Carruth ///    a valid prefix.
847726774cbSChandler Carruth /// 2) The found prefix must be followed by a valid check type suffix using \c
848726774cbSChandler Carruth ///    FindCheckType above.
849726774cbSChandler Carruth ///
850726774cbSChandler Carruth /// The first match of the regular expression to satisfy these two is returned,
851726774cbSChandler Carruth /// otherwise an empty StringRef is returned to indicate failure.
852726774cbSChandler Carruth ///
853726774cbSChandler Carruth /// If this routine returns a valid prefix, it will also shrink \p Buffer to
854726774cbSChandler Carruth /// start at the beginning of the returned prefix, increment \p LineNumber for
855726774cbSChandler Carruth /// each new line consumed from \p Buffer, and set \p CheckTy to the type of
856726774cbSChandler Carruth /// check found by examining the suffix.
857726774cbSChandler Carruth ///
858726774cbSChandler Carruth /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
859726774cbSChandler Carruth /// is unspecified.
860726774cbSChandler Carruth static StringRef FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer,
86113df4626SMatt Arsenault                                          unsigned &LineNumber,
862726774cbSChandler Carruth                                          Check::CheckType &CheckTy) {
863726774cbSChandler Carruth   SmallVector<StringRef, 2> Matches;
864726774cbSChandler Carruth 
86513df4626SMatt Arsenault   while (!Buffer.empty()) {
866726774cbSChandler Carruth     // Find the first (longest) match using the RE.
867726774cbSChandler Carruth     if (!PrefixRE.match(Buffer, &Matches))
868726774cbSChandler Carruth       // No match at all, bail.
869726774cbSChandler Carruth       return StringRef();
870726774cbSChandler Carruth 
871726774cbSChandler Carruth     StringRef Prefix = Matches[0];
872726774cbSChandler Carruth     Matches.clear();
873726774cbSChandler Carruth 
874726774cbSChandler Carruth     assert(Prefix.data() >= Buffer.data() &&
875726774cbSChandler Carruth            Prefix.data() < Buffer.data() + Buffer.size() &&
876726774cbSChandler Carruth            "Prefix doesn't start inside of buffer!");
877726774cbSChandler Carruth     size_t Loc = Prefix.data() - Buffer.data();
878726774cbSChandler Carruth     StringRef Skipped = Buffer.substr(0, Loc);
879726774cbSChandler Carruth     Buffer = Buffer.drop_front(Loc);
880726774cbSChandler Carruth     LineNumber += Skipped.count('\n');
881726774cbSChandler Carruth 
882726774cbSChandler Carruth     // Check that the matched prefix isn't a suffix of some other check-like
883726774cbSChandler Carruth     // word.
884726774cbSChandler Carruth     // FIXME: This is a very ad-hoc check. it would be better handled in some
885726774cbSChandler Carruth     // other way. Among other things it seems hard to distinguish between
886726774cbSChandler Carruth     // intentional and unintentional uses of this feature.
887726774cbSChandler Carruth     if (Skipped.empty() || !IsPartOfWord(Skipped.back())) {
888726774cbSChandler Carruth       // Now extract the type.
889726774cbSChandler Carruth       CheckTy = FindCheckType(Buffer, Prefix);
890726774cbSChandler Carruth 
891726774cbSChandler Carruth       // If we've found a valid check type for this prefix, we're done.
892726774cbSChandler Carruth       if (CheckTy != Check::CheckNone)
89313df4626SMatt Arsenault         return Prefix;
89413df4626SMatt Arsenault     }
89513df4626SMatt Arsenault 
896726774cbSChandler Carruth     // If we didn't successfully find a prefix, we need to skip this invalid
897726774cbSChandler Carruth     // prefix and continue scanning. We directly skip the prefix that was
898726774cbSChandler Carruth     // matched and any additional parts of that check-like word.
899726774cbSChandler Carruth     Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size()));
90013df4626SMatt Arsenault   }
90113df4626SMatt Arsenault 
902726774cbSChandler Carruth   // We ran out of buffer while skipping partial matches so give up.
90313df4626SMatt Arsenault   return StringRef();
90438820972SMatt Arsenault }
905ee3c74fbSChris Lattner 
9064dabac20SChandler Carruth /// Read the check file, which specifies the sequence of expected strings.
9074dabac20SChandler Carruth ///
9084dabac20SChandler Carruth /// The strings are added to the CheckStrings vector. Returns true in case of
9094dabac20SChandler Carruth /// an error, false otherwise.
910726774cbSChandler Carruth static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
91126cccfe1SChris Lattner                           std::vector<CheckString> &CheckStrings) {
91256ccdbbdSAlexander Kornienko   std::vector<Pattern> ImplicitNegativeChecks;
91356ccdbbdSAlexander Kornienko   for (const auto &PatternString : ImplicitCheckNot) {
91456ccdbbdSAlexander Kornienko     // Create a buffer with fake command line content in order to display the
91556ccdbbdSAlexander Kornienko     // command line option responsible for the specific implicit CHECK-NOT.
916ff43d69dSDavid Blaikie     std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
91756ccdbbdSAlexander Kornienko     std::string Suffix = "'";
9183560ff2cSRafael Espindola     std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
9193560ff2cSRafael Espindola         Prefix + PatternString + Suffix, "command line");
9203560ff2cSRafael Espindola 
92156ccdbbdSAlexander Kornienko     StringRef PatternInBuffer =
92256ccdbbdSAlexander Kornienko         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
9231961f14cSDavid Blaikie     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
92456ccdbbdSAlexander Kornienko 
92556ccdbbdSAlexander Kornienko     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
92656ccdbbdSAlexander Kornienko     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
92756ccdbbdSAlexander Kornienko                                                "IMPLICIT-CHECK", SM, 0);
92856ccdbbdSAlexander Kornienko   }
92956ccdbbdSAlexander Kornienko 
93056ccdbbdSAlexander Kornienko   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
931236d2d5eSChris Lattner 
93243d50d4aSEli Bendersky   // LineNumber keeps track of the line on which CheckPrefix instances are
93343d50d4aSEli Bendersky   // found.
93492987fb3SAlexander Kornienko   unsigned LineNumber = 1;
93592987fb3SAlexander Kornienko 
936ee3c74fbSChris Lattner   while (1) {
93713df4626SMatt Arsenault     Check::CheckType CheckTy;
93813df4626SMatt Arsenault 
93913df4626SMatt Arsenault     // See if a prefix occurs in the memory buffer.
940726774cbSChandler Carruth     StringRef UsedPrefix = FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber,
941726774cbSChandler Carruth                                                    CheckTy);
94213df4626SMatt Arsenault     if (UsedPrefix.empty())
943ee3c74fbSChris Lattner       break;
944726774cbSChandler Carruth     assert(UsedPrefix.data() == Buffer.data() &&
945726774cbSChandler Carruth            "Failed to move Buffer's start forward, or pointed prefix outside "
946726774cbSChandler Carruth            "of the buffer!");
94792987fb3SAlexander Kornienko 
94813df4626SMatt Arsenault     // Location to use for error messages.
949726774cbSChandler Carruth     const char *UsedPrefixStart = UsedPrefix.data();
95092987fb3SAlexander Kornienko 
951726774cbSChandler Carruth     // Skip the buffer to the end.
95213df4626SMatt Arsenault     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
95310f10cedSChris Lattner 
954a908e7bdSPaul Robinson     // Complain about useful-looking but unsupported suffixes.
955a908e7bdSPaul Robinson     if (CheckTy == Check::CheckBadNot) {
956e8f2fb20SChandler Carruth       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
957a908e7bdSPaul Robinson                       "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
958a908e7bdSPaul Robinson       return true;
959a908e7bdSPaul Robinson     }
960a908e7bdSPaul Robinson 
96138820972SMatt Arsenault     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
962a26bc914STom de Vries     // leading whitespace.
9631714676aSTom de Vries     if (!(NoCanonicalizeWhiteSpace && MatchFullLines))
964236d2d5eSChris Lattner       Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
965ee3c74fbSChris Lattner 
966ee3c74fbSChris Lattner     // Scan ahead to the end of line.
967caa5fc0cSChris Lattner     size_t EOL = Buffer.find_first_of("\n\r");
968ee3c74fbSChris Lattner 
969838fb09aSDan Gohman     // Remember the location of the start of the pattern, for diagnostics.
970838fb09aSDan Gohman     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
971838fb09aSDan Gohman 
97274d50731SChris Lattner     // Parse the pattern.
97338820972SMatt Arsenault     Pattern P(CheckTy);
97413df4626SMatt Arsenault     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
975ee3c74fbSChris Lattner       return true;
976ee3c74fbSChris Lattner 
977f8bd2e5bSStephen Lin     // Verify that CHECK-LABEL lines do not define or use variables
97838820972SMatt Arsenault     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
979e8f2fb20SChandler Carruth       SM.PrintMessage(
980e8f2fb20SChandler Carruth           SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error,
98113df4626SMatt Arsenault           "found '" + UsedPrefix + "-LABEL:'"
98213df4626SMatt Arsenault                                    " with variable definition or use");
983f8bd2e5bSStephen Lin       return true;
984f8bd2e5bSStephen Lin     }
985f8bd2e5bSStephen Lin 
986236d2d5eSChris Lattner     Buffer = Buffer.substr(EOL);
98774d50731SChris Lattner 
9885507f668SJames Henderson     // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them.
9895507f668SJames Henderson     if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame ||
9905507f668SJames Henderson          CheckTy == Check::CheckEmpty) &&
99101ac1707SDuncan P. N. Exon Smith         CheckStrings.empty()) {
9925507f668SJames Henderson       StringRef Type = CheckTy == Check::CheckNext
9935507f668SJames Henderson                            ? "NEXT"
9945507f668SJames Henderson                            : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME";
99513df4626SMatt Arsenault       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
99603b80a40SChris Lattner                       SourceMgr::DK_Error,
997e8f2fb20SChandler Carruth                       "found '" + UsedPrefix + "-" + Type +
998e8f2fb20SChandler Carruth                           "' without previous '" + UsedPrefix + ": line");
999da108b4eSChris Lattner       return true;
1000da108b4eSChris Lattner     }
1001da108b4eSChris Lattner 
100291a1b2c9SMichael Liao     // Handle CHECK-DAG/-NOT.
100338820972SMatt Arsenault     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
100491a1b2c9SMichael Liao       DagNotMatches.push_back(P);
100574d50731SChris Lattner       continue;
100674d50731SChris Lattner     }
100774d50731SChris Lattner 
1008ee3c74fbSChris Lattner     // Okay, add the string we captured to the output vector and move on.
100985913ccaSJames Y Knight     CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
101091a1b2c9SMichael Liao     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
101156ccdbbdSAlexander Kornienko     DagNotMatches = ImplicitNegativeChecks;
1012ee3c74fbSChris Lattner   }
1013ee3c74fbSChris Lattner 
101413df4626SMatt Arsenault   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
101513df4626SMatt Arsenault   // prefix as a filler for the error message.
101691a1b2c9SMichael Liao   if (!DagNotMatches.empty()) {
1017f5e2fc47SBenjamin Kramer     CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
101885913ccaSJames Y Knight                               SMLoc::getFromPointer(Buffer.data()));
101991a1b2c9SMichael Liao     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
1020eba55822SJakob Stoklund Olesen   }
1021eba55822SJakob Stoklund Olesen 
1022ee3c74fbSChris Lattner   if (CheckStrings.empty()) {
102313df4626SMatt Arsenault     errs() << "error: no check strings found with prefix"
102413df4626SMatt Arsenault            << (CheckPrefixes.size() > 1 ? "es " : " ");
10253e3ef2f2SChris Bieneman     prefix_iterator I = CheckPrefixes.begin();
10263e3ef2f2SChris Bieneman     prefix_iterator E = CheckPrefixes.end();
10273e3ef2f2SChris Bieneman     if (I != E) {
10283e3ef2f2SChris Bieneman       errs() << "\'" << *I << ":'";
10293e3ef2f2SChris Bieneman       ++I;
103013df4626SMatt Arsenault     }
10313e3ef2f2SChris Bieneman     for (; I != E; ++I)
10323e3ef2f2SChris Bieneman       errs() << ", \'" << *I << ":'";
103313df4626SMatt Arsenault 
103413df4626SMatt Arsenault     errs() << '\n';
1035ee3c74fbSChris Lattner     return true;
1036ee3c74fbSChris Lattner   }
1037ee3c74fbSChris Lattner 
1038ee3c74fbSChris Lattner   return false;
1039ee3c74fbSChris Lattner }
1040ee3c74fbSChris Lattner 
1041*dc5ba317SJoel E. Denny static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
1042*dc5ba317SJoel E. Denny                        StringRef Prefix, SMLoc Loc, const Pattern &Pat,
1043*dc5ba317SJoel E. Denny                        StringRef Buffer, StringMap<StringRef> &VariableTable,
1044*dc5ba317SJoel E. Denny                        size_t MatchPos, size_t MatchLen) {
1045*dc5ba317SJoel E. Denny   if (ExpectedMatch) {
1046*dc5ba317SJoel E. Denny     if (!Verbose)
1047*dc5ba317SJoel E. Denny       return;
1048*dc5ba317SJoel E. Denny     if (!VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF)
1049*dc5ba317SJoel E. Denny       return;
1050*dc5ba317SJoel E. Denny   }
1051*dc5ba317SJoel E. Denny   SMLoc MatchStart = SMLoc::getFromPointer(Buffer.data() + MatchPos);
1052*dc5ba317SJoel E. Denny   SMLoc MatchEnd = SMLoc::getFromPointer(Buffer.data() + MatchPos + MatchLen);
1053*dc5ba317SJoel E. Denny   SMRange MatchRange(MatchStart, MatchEnd);
1054*dc5ba317SJoel E. Denny   SM.PrintMessage(
1055*dc5ba317SJoel E. Denny       Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error,
1056*dc5ba317SJoel E. Denny       CheckTypeName(Prefix, Pat.getCheckTy()) + ": " +
1057*dc5ba317SJoel E. Denny           (ExpectedMatch ? "expected" : "excluded") +
1058*dc5ba317SJoel E. Denny           " string found in input");
1059*dc5ba317SJoel E. Denny   SM.PrintMessage(MatchStart, SourceMgr::DK_Note, "found here", {MatchRange});
1060*dc5ba317SJoel E. Denny   Pat.PrintVariableUses(SM, Buffer, VariableTable, MatchRange);
1061*dc5ba317SJoel E. Denny }
1062*dc5ba317SJoel E. Denny 
1063*dc5ba317SJoel E. Denny static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
1064*dc5ba317SJoel E. Denny                        const CheckString &CheckStr, StringRef Buffer,
1065*dc5ba317SJoel E. Denny                        StringMap<StringRef> &VariableTable, size_t MatchPos,
1066*dc5ba317SJoel E. Denny                        size_t MatchLen) {
1067*dc5ba317SJoel E. Denny   PrintMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
1068*dc5ba317SJoel E. Denny              Buffer, VariableTable, MatchPos, MatchLen);
1069*dc5ba317SJoel E. Denny }
1070*dc5ba317SJoel E. Denny 
1071*dc5ba317SJoel E. Denny static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
1072*dc5ba317SJoel E. Denny                          StringRef Prefix, SMLoc Loc, const Pattern &Pat,
1073e8f2fb20SChandler Carruth                          StringRef Buffer,
1074e0ef65abSDaniel Dunbar                          StringMap<StringRef> &VariableTable) {
1075*dc5ba317SJoel E. Denny   if (!ExpectedMatch && !VerboseVerbose)
1076*dc5ba317SJoel E. Denny     return;
1077*dc5ba317SJoel E. Denny 
1078da108b4eSChris Lattner   // Otherwise, we have an error, emit an error message.
1079*dc5ba317SJoel E. Denny   SM.PrintMessage(Loc,
1080*dc5ba317SJoel E. Denny                   ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark,
1081*dc5ba317SJoel E. Denny                   CheckTypeName(Prefix, Pat.getCheckTy()) + ": " +
1082*dc5ba317SJoel E. Denny                       (ExpectedMatch ? "expected" : "excluded") +
1083*dc5ba317SJoel E. Denny                       " string not found in input");
1084da108b4eSChris Lattner 
1085da108b4eSChris Lattner   // Print the "scanning from here" line.  If the current position is at the
1086da108b4eSChris Lattner   // end of a line, advance to the start of the next line.
1087caa5fc0cSChris Lattner   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
1088da108b4eSChris Lattner 
108903b80a40SChris Lattner   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
109003b80a40SChris Lattner                   "scanning from here");
1091e0ef65abSDaniel Dunbar 
1092e0ef65abSDaniel Dunbar   // Allow the pattern to print additional information if desired.
1093*dc5ba317SJoel E. Denny   Pat.PrintVariableUses(SM, Buffer, VariableTable);
1094*dc5ba317SJoel E. Denny   if (ExpectedMatch)
1095*dc5ba317SJoel E. Denny     Pat.PrintFuzzyMatch(SM, Buffer, VariableTable);
109691a1b2c9SMichael Liao }
109791a1b2c9SMichael Liao 
1098*dc5ba317SJoel E. Denny static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
1099*dc5ba317SJoel E. Denny                          const CheckString &CheckStr, StringRef Buffer,
110091a1b2c9SMichael Liao                          StringMap<StringRef> &VariableTable) {
1101*dc5ba317SJoel E. Denny   PrintNoMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
1102*dc5ba317SJoel E. Denny                Buffer, VariableTable);
1103da108b4eSChris Lattner }
1104da108b4eSChris Lattner 
11054dabac20SChandler Carruth /// Count the number of newlines in the specified range.
1106592fe880SRichard Smith static unsigned CountNumNewlinesBetween(StringRef Range,
1107592fe880SRichard Smith                                         const char *&FirstNewLine) {
1108da108b4eSChris Lattner   unsigned NumNewLines = 0;
110937183584SChris Lattner   while (1) {
1110da108b4eSChris Lattner     // Scan for newline.
111137183584SChris Lattner     Range = Range.substr(Range.find_first_of("\n\r"));
1112e8f2fb20SChandler Carruth     if (Range.empty())
1113e8f2fb20SChandler Carruth       return NumNewLines;
1114da108b4eSChris Lattner 
1115da108b4eSChris Lattner     ++NumNewLines;
1116da108b4eSChris Lattner 
1117da108b4eSChris Lattner     // Handle \n\r and \r\n as a single newline.
1118e8f2fb20SChandler Carruth     if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') &&
111937183584SChris Lattner         (Range[0] != Range[1]))
112037183584SChris Lattner       Range = Range.substr(1);
112137183584SChris Lattner     Range = Range.substr(1);
1122592fe880SRichard Smith 
1123592fe880SRichard Smith     if (NumNewLines == 1)
1124592fe880SRichard Smith       FirstNewLine = Range.begin();
1125da108b4eSChris Lattner   }
1126da108b4eSChris Lattner }
1127da108b4eSChris Lattner 
11284dabac20SChandler Carruth /// Match check string and its "not strings" and/or "dag strings".
1129dcc7d48dSMichael Liao size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1130e93a3a08SStephen Lin                           bool IsLabelScanMode, size_t &MatchLen,
1131dcc7d48dSMichael Liao                           StringMap<StringRef> &VariableTable) const {
113291a1b2c9SMichael Liao   size_t LastPos = 0;
113391a1b2c9SMichael Liao   std::vector<const Pattern *> NotStrings;
113491a1b2c9SMichael Liao 
1135e93a3a08SStephen Lin   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1136e93a3a08SStephen Lin   // bounds; we have not processed variable definitions within the bounded block
1137e93a3a08SStephen Lin   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1138e93a3a08SStephen Lin   // over the block again (including the last CHECK-LABEL) in normal mode.
1139e93a3a08SStephen Lin   if (!IsLabelScanMode) {
114091a1b2c9SMichael Liao     // Match "dag strings" (with mixed "not strings" if any).
114191a1b2c9SMichael Liao     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
114291a1b2c9SMichael Liao     if (LastPos == StringRef::npos)
114391a1b2c9SMichael Liao       return StringRef::npos;
1144e93a3a08SStephen Lin   }
114591a1b2c9SMichael Liao 
114691a1b2c9SMichael Liao   // Match itself from the last position after matching CHECK-DAG.
114791a1b2c9SMichael Liao   StringRef MatchBuffer = Buffer.substr(LastPos);
114891a1b2c9SMichael Liao   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1149dcc7d48dSMichael Liao   if (MatchPos == StringRef::npos) {
1150*dc5ba317SJoel E. Denny     PrintNoMatch(true, SM, *this, MatchBuffer, VariableTable);
1151dcc7d48dSMichael Liao     return StringRef::npos;
1152dcc7d48dSMichael Liao   }
1153*dc5ba317SJoel E. Denny   PrintMatch(true, SM, *this, MatchBuffer, VariableTable, MatchPos, MatchLen);
1154dcc7d48dSMichael Liao 
1155e93a3a08SStephen Lin   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1156e93a3a08SStephen Lin   // or CHECK-NOT
1157e93a3a08SStephen Lin   if (!IsLabelScanMode) {
115891a1b2c9SMichael Liao     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1159dcc7d48dSMichael Liao 
1160dcc7d48dSMichael Liao     // If this check is a "CHECK-NEXT", verify that the previous match was on
1161dcc7d48dSMichael Liao     // the previous line (i.e. that there is one newline between them).
1162dcc7d48dSMichael Liao     if (CheckNext(SM, SkippedRegion))
1163dcc7d48dSMichael Liao       return StringRef::npos;
1164dcc7d48dSMichael Liao 
116501ac1707SDuncan P. N. Exon Smith     // If this check is a "CHECK-SAME", verify that the previous match was on
116601ac1707SDuncan P. N. Exon Smith     // the same line (i.e. that there is no newline between them).
116701ac1707SDuncan P. N. Exon Smith     if (CheckSame(SM, SkippedRegion))
116801ac1707SDuncan P. N. Exon Smith       return StringRef::npos;
116901ac1707SDuncan P. N. Exon Smith 
1170dcc7d48dSMichael Liao     // If this match had "not strings", verify that they don't exist in the
1171dcc7d48dSMichael Liao     // skipped region.
117291a1b2c9SMichael Liao     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1173dcc7d48dSMichael Liao       return StringRef::npos;
1174f8bd2e5bSStephen Lin   }
1175dcc7d48dSMichael Liao 
11767dfb92b9SMehdi Amini   return LastPos + MatchPos;
1177dcc7d48dSMichael Liao }
1178dcc7d48dSMichael Liao 
11794dabac20SChandler Carruth /// Verify there is a single line in the given buffer.
1180dcc7d48dSMichael Liao bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
11815507f668SJames Henderson   if (Pat.getCheckTy() != Check::CheckNext &&
11825507f668SJames Henderson       Pat.getCheckTy() != Check::CheckEmpty)
1183dcc7d48dSMichael Liao     return false;
1184dcc7d48dSMichael Liao 
11855507f668SJames Henderson   Twine CheckName =
11865507f668SJames Henderson       Prefix +
11875507f668SJames Henderson       Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT");
11885507f668SJames Henderson 
1189dcc7d48dSMichael Liao   // Count the number of newlines between the previous match and this one.
1190dcc7d48dSMichael Liao   assert(Buffer.data() !=
1191e8f2fb20SChandler Carruth              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1192e8f2fb20SChandler Carruth                                     SMLoc::getFromPointer(Buffer.data())))
1193e8f2fb20SChandler Carruth                  ->getBufferStart() &&
11945507f668SJames Henderson          "CHECK-NEXT and CHECK-EMPTY can't be the first check in a file");
1195dcc7d48dSMichael Liao 
119666f09ad0SCraig Topper   const char *FirstNewLine = nullptr;
1197592fe880SRichard Smith   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1198dcc7d48dSMichael Liao 
1199dcc7d48dSMichael Liao   if (NumNewLines == 0) {
1200e8f2fb20SChandler Carruth     SM.PrintMessage(Loc, SourceMgr::DK_Error,
12015507f668SJames Henderson                     CheckName + ": is on the same line as previous match");
1202e8f2fb20SChandler Carruth     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1203e8f2fb20SChandler Carruth                     "'next' match was here");
1204dcc7d48dSMichael Liao     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1205dcc7d48dSMichael Liao                     "previous match ended here");
1206dcc7d48dSMichael Liao     return true;
1207dcc7d48dSMichael Liao   }
1208dcc7d48dSMichael Liao 
1209dcc7d48dSMichael Liao   if (NumNewLines != 1) {
1210e8f2fb20SChandler Carruth     SM.PrintMessage(Loc, SourceMgr::DK_Error,
12115507f668SJames Henderson                     CheckName +
12125507f668SJames Henderson                         ": is not on the line after the previous match");
1213e8f2fb20SChandler Carruth     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1214e8f2fb20SChandler Carruth                     "'next' match was here");
1215dcc7d48dSMichael Liao     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1216dcc7d48dSMichael Liao                     "previous match ended here");
1217592fe880SRichard Smith     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1218592fe880SRichard Smith                     "non-matching line after previous match is here");
1219dcc7d48dSMichael Liao     return true;
1220dcc7d48dSMichael Liao   }
1221dcc7d48dSMichael Liao 
1222dcc7d48dSMichael Liao   return false;
1223dcc7d48dSMichael Liao }
1224dcc7d48dSMichael Liao 
12254dabac20SChandler Carruth /// Verify there is no newline in the given buffer.
122601ac1707SDuncan P. N. Exon Smith bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
122785913ccaSJames Y Knight   if (Pat.getCheckTy() != Check::CheckSame)
122801ac1707SDuncan P. N. Exon Smith     return false;
122901ac1707SDuncan P. N. Exon Smith 
123001ac1707SDuncan P. N. Exon Smith   // Count the number of newlines between the previous match and this one.
123101ac1707SDuncan P. N. Exon Smith   assert(Buffer.data() !=
123201ac1707SDuncan P. N. Exon Smith              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
123301ac1707SDuncan P. N. Exon Smith                                     SMLoc::getFromPointer(Buffer.data())))
123401ac1707SDuncan P. N. Exon Smith                  ->getBufferStart() &&
123501ac1707SDuncan P. N. Exon Smith          "CHECK-SAME can't be the first check in a file");
123601ac1707SDuncan P. N. Exon Smith 
123701ac1707SDuncan P. N. Exon Smith   const char *FirstNewLine = nullptr;
123801ac1707SDuncan P. N. Exon Smith   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
123901ac1707SDuncan P. N. Exon Smith 
124001ac1707SDuncan P. N. Exon Smith   if (NumNewLines != 0) {
124101ac1707SDuncan P. N. Exon Smith     SM.PrintMessage(Loc, SourceMgr::DK_Error,
124201ac1707SDuncan P. N. Exon Smith                     Prefix +
124301ac1707SDuncan P. N. Exon Smith                         "-SAME: is not on the same line as the previous match");
124401ac1707SDuncan P. N. Exon Smith     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
124501ac1707SDuncan P. N. Exon Smith                     "'next' match was here");
124601ac1707SDuncan P. N. Exon Smith     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
124701ac1707SDuncan P. N. Exon Smith                     "previous match ended here");
124801ac1707SDuncan P. N. Exon Smith     return true;
124901ac1707SDuncan P. N. Exon Smith   }
125001ac1707SDuncan P. N. Exon Smith 
125101ac1707SDuncan P. N. Exon Smith   return false;
125201ac1707SDuncan P. N. Exon Smith }
125301ac1707SDuncan P. N. Exon Smith 
12544dabac20SChandler Carruth /// Verify there's no "not strings" in the given buffer.
1255dcc7d48dSMichael Liao bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
125691a1b2c9SMichael Liao                            const std::vector<const Pattern *> &NotStrings,
1257dcc7d48dSMichael Liao                            StringMap<StringRef> &VariableTable) const {
12588f870499SBenjamin Kramer   for (const Pattern *Pat : NotStrings) {
125938820972SMatt Arsenault     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
126091a1b2c9SMichael Liao 
1261dcc7d48dSMichael Liao     size_t MatchLen = 0;
126291a1b2c9SMichael Liao     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1263dcc7d48dSMichael Liao 
1264*dc5ba317SJoel E. Denny     if (Pos == StringRef::npos) {
1265*dc5ba317SJoel E. Denny       PrintNoMatch(false, SM, Prefix, Pat->getLoc(), *Pat, Buffer,
1266*dc5ba317SJoel E. Denny                    VariableTable);
1267e8f2fb20SChandler Carruth       continue;
1268*dc5ba317SJoel E. Denny     }
1269dcc7d48dSMichael Liao 
1270*dc5ba317SJoel E. Denny     PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, Buffer, VariableTable,
1271*dc5ba317SJoel E. Denny                Pos, MatchLen);
1272*dc5ba317SJoel E. Denny 
1273dcc7d48dSMichael Liao     return true;
1274dcc7d48dSMichael Liao   }
1275dcc7d48dSMichael Liao 
1276dcc7d48dSMichael Liao   return false;
1277dcc7d48dSMichael Liao }
1278dcc7d48dSMichael Liao 
12794dabac20SChandler Carruth /// Match "dag strings" and their mixed "not strings".
128091a1b2c9SMichael Liao size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
128191a1b2c9SMichael Liao                              std::vector<const Pattern *> &NotStrings,
128291a1b2c9SMichael Liao                              StringMap<StringRef> &VariableTable) const {
128391a1b2c9SMichael Liao   if (DagNotStrings.empty())
128491a1b2c9SMichael Liao     return 0;
128591a1b2c9SMichael Liao 
128691a1b2c9SMichael Liao   size_t LastPos = 0;
128791a1b2c9SMichael Liao   size_t StartPos = LastPos;
128891a1b2c9SMichael Liao 
1289bcf5b441SJoel E. Denny   // A sorted list of ranges for non-overlapping dag matches.
1290bcf5b441SJoel E. Denny   struct Match {
1291bcf5b441SJoel E. Denny     size_t Pos;
1292bcf5b441SJoel E. Denny     size_t End;
1293bcf5b441SJoel E. Denny   };
1294bcf5b441SJoel E. Denny   std::list<Match> Matches;
1295bcf5b441SJoel E. Denny 
12968f870499SBenjamin Kramer   for (const Pattern &Pat : DagNotStrings) {
129738820972SMatt Arsenault     assert((Pat.getCheckTy() == Check::CheckDAG ||
129838820972SMatt Arsenault             Pat.getCheckTy() == Check::CheckNot) &&
129991a1b2c9SMichael Liao            "Invalid CHECK-DAG or CHECK-NOT!");
130091a1b2c9SMichael Liao 
130138820972SMatt Arsenault     if (Pat.getCheckTy() == Check::CheckNot) {
130291a1b2c9SMichael Liao       NotStrings.push_back(&Pat);
130391a1b2c9SMichael Liao       continue;
130491a1b2c9SMichael Liao     }
130591a1b2c9SMichael Liao 
130638820972SMatt Arsenault     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
130791a1b2c9SMichael Liao 
1308614c9861SJoel E. Denny     // CHECK-DAG always matches from the start.
1309bcf5b441SJoel E. Denny     size_t MatchLen = 0, MatchPos = StartPos;
1310bcf5b441SJoel E. Denny 
1311bcf5b441SJoel E. Denny     // Search for a match that doesn't overlap a previous match in this
1312bcf5b441SJoel E. Denny     // CHECK-DAG group.
1313bcf5b441SJoel E. Denny     for (auto MI = Matches.begin(), ME = Matches.end(); true; ++MI) {
1314bcf5b441SJoel E. Denny       StringRef MatchBuffer = Buffer.substr(MatchPos);
1315bcf5b441SJoel E. Denny       size_t MatchPosBuf = Pat.Match(MatchBuffer, MatchLen, VariableTable);
131691a1b2c9SMichael Liao       // With a group of CHECK-DAGs, a single mismatching means the match on
131791a1b2c9SMichael Liao       // that group of CHECK-DAGs fails immediately.
1318bcf5b441SJoel E. Denny       if (MatchPosBuf == StringRef::npos) {
1319*dc5ba317SJoel E. Denny         PrintNoMatch(true, SM, Prefix, Pat.getLoc(), Pat, MatchBuffer,
1320*dc5ba317SJoel E. Denny                      VariableTable);
132191a1b2c9SMichael Liao         return StringRef::npos;
132291a1b2c9SMichael Liao       }
132391a1b2c9SMichael Liao       // Re-calc it as the offset relative to the start of the original string.
1324bcf5b441SJoel E. Denny       MatchPos += MatchPosBuf;
1325*dc5ba317SJoel E. Denny       if (VerboseVerbose)
1326*dc5ba317SJoel E. Denny         PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, Buffer, VariableTable,
1327*dc5ba317SJoel E. Denny                    MatchPos, MatchLen);
1328bcf5b441SJoel E. Denny       if (AllowDeprecatedDagOverlap)
1329bcf5b441SJoel E. Denny         break;
1330bcf5b441SJoel E. Denny       // Iterate previous matches until overlapping match or insertion point.
1331bcf5b441SJoel E. Denny       Match M{MatchPos, MatchPos + MatchLen};
1332bcf5b441SJoel E. Denny       bool Overlap = false;
1333bcf5b441SJoel E. Denny       for (; MI != ME; ++MI) {
1334bcf5b441SJoel E. Denny         if (M.Pos < MI->End) {
1335bcf5b441SJoel E. Denny           // !Overlap => New match has no overlap and is before this old match.
1336bcf5b441SJoel E. Denny           // Overlap => New match overlaps this old match.
1337bcf5b441SJoel E. Denny           Overlap = MI->Pos < M.End;
1338bcf5b441SJoel E. Denny           break;
1339bcf5b441SJoel E. Denny         }
1340bcf5b441SJoel E. Denny       }
1341bcf5b441SJoel E. Denny       if (!Overlap) {
1342bcf5b441SJoel E. Denny         // Insert non-overlapping match into list.
1343bcf5b441SJoel E. Denny         Matches.insert(MI, M);
1344bcf5b441SJoel E. Denny         break;
1345bcf5b441SJoel E. Denny       }
1346*dc5ba317SJoel E. Denny       if (VerboseVerbose) {
1347*dc5ba317SJoel E. Denny         SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos);
1348*dc5ba317SJoel E. Denny         SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End);
1349*dc5ba317SJoel E. Denny         SMRange OldRange(OldStart, OldEnd);
1350*dc5ba317SJoel E. Denny         SM.PrintMessage(OldStart, SourceMgr::DK_Note,
1351*dc5ba317SJoel E. Denny                         "match discarded, overlaps earlier DAG match here",
1352*dc5ba317SJoel E. Denny                         {OldRange});
1353*dc5ba317SJoel E. Denny       }
1354bcf5b441SJoel E. Denny       MatchPos = MI->End;
1355bcf5b441SJoel E. Denny     }
1356*dc5ba317SJoel E. Denny     if (!VerboseVerbose)
1357*dc5ba317SJoel E. Denny       PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, Buffer, VariableTable,
1358*dc5ba317SJoel E. Denny                  MatchPos, MatchLen);
135991a1b2c9SMichael Liao 
136091a1b2c9SMichael Liao     if (!NotStrings.empty()) {
136191a1b2c9SMichael Liao       if (MatchPos < LastPos) {
136291a1b2c9SMichael Liao         // Reordered?
136391a1b2c9SMichael Liao         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
136491a1b2c9SMichael Liao                         SourceMgr::DK_Error,
136513df4626SMatt Arsenault                         Prefix + "-DAG: found a match of CHECK-DAG"
136691a1b2c9SMichael Liao                                  " reordering across a CHECK-NOT");
136791a1b2c9SMichael Liao         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
136891a1b2c9SMichael Liao                         SourceMgr::DK_Note,
136913df4626SMatt Arsenault                         Prefix + "-DAG: the farthest match of CHECK-DAG"
137091a1b2c9SMichael Liao                                  " is found here");
137191a1b2c9SMichael Liao         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
137213df4626SMatt Arsenault                         Prefix + "-NOT: the crossed pattern specified"
137391a1b2c9SMichael Liao                                  " here");
137491a1b2c9SMichael Liao         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
137513df4626SMatt Arsenault                         Prefix + "-DAG: the reordered pattern specified"
137691a1b2c9SMichael Liao                                  " here");
137791a1b2c9SMichael Liao         return StringRef::npos;
137891a1b2c9SMichael Liao       }
137991a1b2c9SMichael Liao       // All subsequent CHECK-DAGs should be matched from the farthest
1380bcf5b441SJoel E. Denny       // position of all precedent CHECK-DAGs (not including this one).
138191a1b2c9SMichael Liao       StartPos = LastPos;
1382bcf5b441SJoel E. Denny       // Don't waste time checking for (impossible) overlaps before that.
1383bcf5b441SJoel E. Denny       Matches.clear();
1384bcf5b441SJoel E. Denny       Matches.push_back(Match{MatchPos, MatchPos + MatchLen});
138591a1b2c9SMichael Liao       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
138691a1b2c9SMichael Liao       // CHECK-DAG, verify that there's no 'not' strings occurred in that
138791a1b2c9SMichael Liao       // region.
1388cf60ab31SBenjamin Kramer       StringRef SkippedRegion = Buffer.slice(LastPos, MatchPos);
1389cf708c32STim Northover       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
139091a1b2c9SMichael Liao         return StringRef::npos;
139191a1b2c9SMichael Liao       // Clear "not strings".
139291a1b2c9SMichael Liao       NotStrings.clear();
139391a1b2c9SMichael Liao     }
139491a1b2c9SMichael Liao 
139591a1b2c9SMichael Liao     // Update the last position with CHECK-DAG matches.
139691a1b2c9SMichael Liao     LastPos = std::max(MatchPos + MatchLen, LastPos);
139791a1b2c9SMichael Liao   }
139891a1b2c9SMichael Liao 
139991a1b2c9SMichael Liao   return LastPos;
140091a1b2c9SMichael Liao }
140191a1b2c9SMichael Liao 
140213df4626SMatt Arsenault // A check prefix must contain only alphanumeric, hyphens and underscores.
140313df4626SMatt Arsenault static bool ValidateCheckPrefix(StringRef CheckPrefix) {
140413df4626SMatt Arsenault   Regex Validator("^[a-zA-Z0-9_-]*$");
140513df4626SMatt Arsenault   return Validator.match(CheckPrefix);
140613df4626SMatt Arsenault }
140713df4626SMatt Arsenault 
140813df4626SMatt Arsenault static bool ValidateCheckPrefixes() {
140913df4626SMatt Arsenault   StringSet<> PrefixSet;
141013df4626SMatt Arsenault 
14118f870499SBenjamin Kramer   for (StringRef Prefix : CheckPrefixes) {
141224412b14SEli Bendersky     // Reject empty prefixes.
141324412b14SEli Bendersky     if (Prefix == "")
141424412b14SEli Bendersky       return false;
141524412b14SEli Bendersky 
14160356975cSDavid Blaikie     if (!PrefixSet.insert(Prefix).second)
141713df4626SMatt Arsenault       return false;
141813df4626SMatt Arsenault 
141913df4626SMatt Arsenault     if (!ValidateCheckPrefix(Prefix))
142013df4626SMatt Arsenault       return false;
142113df4626SMatt Arsenault   }
142213df4626SMatt Arsenault 
142313df4626SMatt Arsenault   return true;
142413df4626SMatt Arsenault }
142513df4626SMatt Arsenault 
1426726774cbSChandler Carruth // Combines the check prefixes into a single regex so that we can efficiently
1427726774cbSChandler Carruth // scan for any of the set.
1428726774cbSChandler Carruth //
1429726774cbSChandler Carruth // The semantics are that the longest-match wins which matches our regex
1430726774cbSChandler Carruth // library.
1431726774cbSChandler Carruth static Regex buildCheckPrefixRegex() {
143213df4626SMatt Arsenault   // I don't think there's a way to specify an initial value for cl::list,
143313df4626SMatt Arsenault   // so if nothing was specified, add the default
143413df4626SMatt Arsenault   if (CheckPrefixes.empty())
143513df4626SMatt Arsenault     CheckPrefixes.push_back("CHECK");
1436726774cbSChandler Carruth 
1437726774cbSChandler Carruth   // We already validated the contents of CheckPrefixes so just concatenate
1438726774cbSChandler Carruth   // them as alternatives.
1439726774cbSChandler Carruth   SmallString<32> PrefixRegexStr;
1440726774cbSChandler Carruth   for (StringRef Prefix : CheckPrefixes) {
1441726774cbSChandler Carruth     if (Prefix != CheckPrefixes.front())
1442726774cbSChandler Carruth       PrefixRegexStr.push_back('|');
1443726774cbSChandler Carruth 
1444726774cbSChandler Carruth     PrefixRegexStr.append(Prefix);
1445726774cbSChandler Carruth   }
1446726774cbSChandler Carruth 
1447726774cbSChandler Carruth   return Regex(PrefixRegexStr);
1448c2735158SRui Ueyama }
1449c2735158SRui Ueyama 
14502bd4f8b6SXinliang David Li static void DumpCommandLine(int argc, char **argv) {
14512bd4f8b6SXinliang David Li   errs() << "FileCheck command line: ";
14522bd4f8b6SXinliang David Li   for (int I = 0; I < argc; I++)
14532bd4f8b6SXinliang David Li     errs() << " " << argv[I];
14542bd4f8b6SXinliang David Li   errs() << "\n";
14552bd4f8b6SXinliang David Li }
14562bd4f8b6SXinliang David Li 
1457f55e72a5SArtem Belevich // Remove local variables from \p VariableTable. Global variables
1458f55e72a5SArtem Belevich // (start with '$') are preserved.
1459f55e72a5SArtem Belevich static void ClearLocalVars(StringMap<StringRef> &VariableTable) {
1460f55e72a5SArtem Belevich   SmallVector<StringRef, 16> LocalVars;
1461f55e72a5SArtem Belevich   for (const auto &Var : VariableTable)
1462f55e72a5SArtem Belevich     if (Var.first()[0] != '$')
1463f55e72a5SArtem Belevich       LocalVars.push_back(Var.first());
1464f55e72a5SArtem Belevich 
1465f55e72a5SArtem Belevich   for (const auto &Var : LocalVars)
1466f55e72a5SArtem Belevich     VariableTable.erase(Var);
1467f55e72a5SArtem Belevich }
1468f55e72a5SArtem Belevich 
146920247900SChandler Carruth /// Check the input to FileCheck provided in the \p Buffer against the \p
147020247900SChandler Carruth /// CheckStrings read from the check file.
147120247900SChandler Carruth ///
147220247900SChandler Carruth /// Returns false if the input fails to satisfy the checks.
147320247900SChandler Carruth bool CheckInput(SourceMgr &SM, StringRef Buffer,
147420247900SChandler Carruth                 ArrayRef<CheckString> CheckStrings) {
147520247900SChandler Carruth   bool ChecksFailed = false;
147620247900SChandler Carruth 
147720247900SChandler Carruth   /// VariableTable - This holds all the current filecheck variables.
147820247900SChandler Carruth   StringMap<StringRef> VariableTable;
147920247900SChandler Carruth 
148046e1fd61SAlexander Richardson   for (const auto& Def : GlobalDefines)
148146e1fd61SAlexander Richardson     VariableTable.insert(StringRef(Def).split('='));
148246e1fd61SAlexander Richardson 
148320247900SChandler Carruth   unsigned i = 0, j = 0, e = CheckStrings.size();
148420247900SChandler Carruth   while (true) {
148520247900SChandler Carruth     StringRef CheckRegion;
148620247900SChandler Carruth     if (j == e) {
148720247900SChandler Carruth       CheckRegion = Buffer;
148820247900SChandler Carruth     } else {
148920247900SChandler Carruth       const CheckString &CheckLabelStr = CheckStrings[j];
149020247900SChandler Carruth       if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
149120247900SChandler Carruth         ++j;
149220247900SChandler Carruth         continue;
149320247900SChandler Carruth       }
149420247900SChandler Carruth 
149520247900SChandler Carruth       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
149620247900SChandler Carruth       size_t MatchLabelLen = 0;
1497e8f2fb20SChandler Carruth       size_t MatchLabelPos =
1498e8f2fb20SChandler Carruth           CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable);
149920247900SChandler Carruth       if (MatchLabelPos == StringRef::npos)
150020247900SChandler Carruth         // Immediately bail of CHECK-LABEL fails, nothing else we can do.
150120247900SChandler Carruth         return false;
150220247900SChandler Carruth 
150320247900SChandler Carruth       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
150420247900SChandler Carruth       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
150520247900SChandler Carruth       ++j;
150620247900SChandler Carruth     }
150720247900SChandler Carruth 
1508f55e72a5SArtem Belevich     if (EnableVarScope)
1509f55e72a5SArtem Belevich       ClearLocalVars(VariableTable);
1510f55e72a5SArtem Belevich 
151120247900SChandler Carruth     for (; i != j; ++i) {
151220247900SChandler Carruth       const CheckString &CheckStr = CheckStrings[i];
151320247900SChandler Carruth 
151420247900SChandler Carruth       // Check each string within the scanned region, including a second check
151520247900SChandler Carruth       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
151620247900SChandler Carruth       size_t MatchLen = 0;
1517e8f2fb20SChandler Carruth       size_t MatchPos =
1518e8f2fb20SChandler Carruth           CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable);
151920247900SChandler Carruth 
152020247900SChandler Carruth       if (MatchPos == StringRef::npos) {
152120247900SChandler Carruth         ChecksFailed = true;
152220247900SChandler Carruth         i = j;
152320247900SChandler Carruth         break;
152420247900SChandler Carruth       }
152520247900SChandler Carruth 
152620247900SChandler Carruth       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
152720247900SChandler Carruth     }
152820247900SChandler Carruth 
152920247900SChandler Carruth     if (j == e)
153020247900SChandler Carruth       break;
153120247900SChandler Carruth   }
153220247900SChandler Carruth 
153320247900SChandler Carruth   // Success if no checks failed.
153420247900SChandler Carruth   return !ChecksFailed;
153520247900SChandler Carruth }
153620247900SChandler Carruth 
1537ee3c74fbSChris Lattner int main(int argc, char **argv) {
1538197194b6SRui Ueyama   InitLLVM X(argc, argv);
1539ee3c74fbSChris Lattner   cl::ParseCommandLineOptions(argc, argv);
1540ee3c74fbSChris Lattner 
154113df4626SMatt Arsenault   if (!ValidateCheckPrefixes()) {
154213df4626SMatt Arsenault     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
154313df4626SMatt Arsenault               "start with a letter and contain only alphanumeric characters, "
154413df4626SMatt Arsenault               "hyphens and underscores\n";
1545c2735158SRui Ueyama     return 2;
1546c2735158SRui Ueyama   }
1547c2735158SRui Ueyama 
1548726774cbSChandler Carruth   Regex PrefixRE = buildCheckPrefixRegex();
1549726774cbSChandler Carruth   std::string REError;
1550726774cbSChandler Carruth   if (!PrefixRE.isValid(REError)) {
1551726774cbSChandler Carruth     errs() << "Unable to combine check-prefix strings into a prefix regular "
1552726774cbSChandler Carruth               "expression! This is likely a bug in FileCheck's verification of "
1553726774cbSChandler Carruth               "the check-prefix strings. Regular expression parsing failed "
1554726774cbSChandler Carruth               "with the following error: "
1555726774cbSChandler Carruth            << REError << "\n";
1556726774cbSChandler Carruth     return 2;
1557726774cbSChandler Carruth   }
155813df4626SMatt Arsenault 
1559*dc5ba317SJoel E. Denny   if (VerboseVerbose)
1560*dc5ba317SJoel E. Denny     Verbose = true;
1561*dc5ba317SJoel E. Denny 
1562ee3c74fbSChris Lattner   SourceMgr SM;
1563ee3c74fbSChris Lattner 
1564ee3c74fbSChris Lattner   // Read the expected strings from the check file.
156520247900SChandler Carruth   ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr =
156620247900SChandler Carruth       MemoryBuffer::getFileOrSTDIN(CheckFilename);
156720247900SChandler Carruth   if (std::error_code EC = CheckFileOrErr.getError()) {
156820247900SChandler Carruth     errs() << "Could not open check file '" << CheckFilename
156920247900SChandler Carruth            << "': " << EC.message() << '\n';
157020247900SChandler Carruth     return 2;
157120247900SChandler Carruth   }
157220247900SChandler Carruth   MemoryBuffer &CheckFile = *CheckFileOrErr.get();
157320247900SChandler Carruth 
157420247900SChandler Carruth   SmallString<4096> CheckFileBuffer;
1575b03c166aSChandler Carruth   StringRef CheckFileText = CanonicalizeFile(CheckFile, CheckFileBuffer);
157620247900SChandler Carruth 
157720247900SChandler Carruth   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
157820247900SChandler Carruth                             CheckFileText, CheckFile.getBufferIdentifier()),
157920247900SChandler Carruth                         SMLoc());
158020247900SChandler Carruth 
158126cccfe1SChris Lattner   std::vector<CheckString> CheckStrings;
1582726774cbSChandler Carruth   if (ReadCheckFile(SM, CheckFileText, PrefixRE, CheckStrings))
1583ee3c74fbSChris Lattner     return 2;
1584ee3c74fbSChris Lattner 
1585ee3c74fbSChris Lattner   // Open the file to check and add it to SourceMgr.
158620247900SChandler Carruth   ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr =
1587adf21f2aSRafael Espindola       MemoryBuffer::getFileOrSTDIN(InputFilename);
158820247900SChandler Carruth   if (std::error_code EC = InputFileOrErr.getError()) {
1589adf21f2aSRafael Espindola     errs() << "Could not open input file '" << InputFilename
1590adf21f2aSRafael Espindola            << "': " << EC.message() << '\n';
15918e1c6477SEli Bendersky     return 2;
1592ee3c74fbSChris Lattner   }
159320247900SChandler Carruth   MemoryBuffer &InputFile = *InputFileOrErr.get();
15942c3e5cdfSChris Lattner 
159520247900SChandler Carruth   if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) {
1596b692bed7SChris Lattner     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
15972bd4f8b6SXinliang David Li     DumpCommandLine(argc, argv);
15988e1c6477SEli Bendersky     return 2;
1599b692bed7SChris Lattner   }
1600b692bed7SChris Lattner 
160120247900SChandler Carruth   SmallString<4096> InputFileBuffer;
1602b03c166aSChandler Carruth   StringRef InputFileText = CanonicalizeFile(InputFile, InputFileBuffer);
16032c3e5cdfSChris Lattner 
1604e8f2fb20SChandler Carruth   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1605e8f2fb20SChandler Carruth                             InputFileText, InputFile.getBufferIdentifier()),
1606e8f2fb20SChandler Carruth                         SMLoc());
1607ee3c74fbSChris Lattner 
160820247900SChandler Carruth   return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1;
1609ee3c74fbSChris Lattner }
1610