1ee3c74fbSChris Lattner //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2ee3c74fbSChris Lattner //
3ee3c74fbSChris Lattner //                     The LLVM Compiler Infrastructure
4ee3c74fbSChris Lattner //
5ee3c74fbSChris Lattner // This file is distributed under the University of Illinois Open Source
6ee3c74fbSChris Lattner // License. See LICENSE.TXT for details.
7ee3c74fbSChris Lattner //
8ee3c74fbSChris Lattner //===----------------------------------------------------------------------===//
9ee3c74fbSChris Lattner //
10ee3c74fbSChris Lattner // FileCheck does a line-by line check of a file that validates whether it
11ee3c74fbSChris Lattner // contains the expected content.  This is useful for regression tests etc.
12ee3c74fbSChris Lattner //
13ee3c74fbSChris Lattner // This program exits with an error status of 2 on error, exit status of 0 if
14ee3c74fbSChris Lattner // the file matched the expected contents, and exit status of 1 if it did not
15ee3c74fbSChris Lattner // contain the expected contents.
16ee3c74fbSChris Lattner //
17ee3c74fbSChris Lattner //===----------------------------------------------------------------------===//
18ee3c74fbSChris Lattner 
1991d19d8eSChandler Carruth #include "llvm/ADT/SmallString.h"
2091d19d8eSChandler Carruth #include "llvm/ADT/StringExtras.h"
2191d19d8eSChandler Carruth #include "llvm/ADT/StringMap.h"
2213df4626SMatt Arsenault #include "llvm/ADT/StringSet.h"
23ee3c74fbSChris Lattner #include "llvm/Support/CommandLine.h"
24ee3c74fbSChris Lattner #include "llvm/Support/MemoryBuffer.h"
25ee3c74fbSChris Lattner #include "llvm/Support/PrettyStackTrace.h"
26f08d2db9SChris Lattner #include "llvm/Support/Regex.h"
2791d19d8eSChandler Carruth #include "llvm/Support/Signals.h"
28ee3c74fbSChris Lattner #include "llvm/Support/SourceMgr.h"
29ee3c74fbSChris Lattner #include "llvm/Support/raw_ostream.h"
308879e06dSChris Lattner #include <algorithm>
31981af002SWill Dietz #include <cctype>
32e8b8f1bcSEli Bendersky #include <map>
33e8b8f1bcSEli Bendersky #include <string>
34a6e9c3e4SRafael Espindola #include <system_error>
35e8b8f1bcSEli Bendersky #include <vector>
36ee3c74fbSChris Lattner using namespace llvm;
37ee3c74fbSChris Lattner 
38ee3c74fbSChris Lattner static cl::opt<std::string>
39ee3c74fbSChris Lattner CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40ee3c74fbSChris Lattner 
41ee3c74fbSChris Lattner static cl::opt<std::string>
42ee3c74fbSChris Lattner InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43ee3c74fbSChris Lattner               cl::init("-"), cl::value_desc("filename"));
44ee3c74fbSChris Lattner 
4513df4626SMatt Arsenault static cl::list<std::string>
4613df4626SMatt Arsenault CheckPrefixes("check-prefix",
47ee3c74fbSChris Lattner               cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48ee3c74fbSChris Lattner 
492c3e5cdfSChris Lattner static cl::opt<bool>
502c3e5cdfSChris Lattner NoCanonicalizeWhiteSpace("strict-whitespace",
512c3e5cdfSChris Lattner               cl::desc("Do not treat all horizontal whitespace as equivalent"));
522c3e5cdfSChris Lattner 
5356ccdbbdSAlexander Kornienko static cl::list<std::string> ImplicitCheckNot(
5456ccdbbdSAlexander Kornienko     "implicit-check-not",
5556ccdbbdSAlexander Kornienko     cl::desc("Add an implicit negative check with this pattern to every\n"
5656ccdbbdSAlexander Kornienko              "positive check. This can be used to ensure that no instances of\n"
5756ccdbbdSAlexander Kornienko              "this pattern occur which are not matched by a positive pattern"),
5856ccdbbdSAlexander Kornienko     cl::value_desc("pattern"));
5956ccdbbdSAlexander Kornienko 
6013df4626SMatt Arsenault typedef cl::list<std::string>::const_iterator prefix_iterator;
6113df4626SMatt Arsenault 
6274d50731SChris Lattner //===----------------------------------------------------------------------===//
6374d50731SChris Lattner // Pattern Handling Code.
6474d50731SChris Lattner //===----------------------------------------------------------------------===//
6574d50731SChris Lattner 
6638820972SMatt Arsenault namespace Check {
6738820972SMatt Arsenault   enum CheckType {
6838820972SMatt Arsenault     CheckNone = 0,
6938820972SMatt Arsenault     CheckPlain,
7038820972SMatt Arsenault     CheckNext,
7138820972SMatt Arsenault     CheckNot,
7238820972SMatt Arsenault     CheckDAG,
7338820972SMatt Arsenault     CheckLabel,
740a4c44bdSChris Lattner 
75eba55822SJakob Stoklund Olesen     /// MatchEOF - When set, this pattern only matches the end of file. This is
76eba55822SJakob Stoklund Olesen     /// used for trailing CHECK-NOTs.
7738820972SMatt Arsenault     CheckEOF
7838820972SMatt Arsenault   };
7938820972SMatt Arsenault }
80eba55822SJakob Stoklund Olesen 
8138820972SMatt Arsenault class Pattern {
8238820972SMatt Arsenault   SMLoc PatternLoc;
8391a1b2c9SMichael Liao 
8438820972SMatt Arsenault   Check::CheckType CheckTy;
8591a1b2c9SMichael Liao 
86b16ab0c4SChris Lattner   /// FixedStr - If non-empty, this pattern is a fixed string match with the
87b16ab0c4SChris Lattner   /// specified fixed string.
88221460e0SChris Lattner   StringRef FixedStr;
89b16ab0c4SChris Lattner 
90b16ab0c4SChris Lattner   /// RegEx - If non-empty, this is a regex pattern.
91b16ab0c4SChris Lattner   std::string RegExStr;
928879e06dSChris Lattner 
9392987fb3SAlexander Kornienko   /// \brief Contains the number of line this pattern is in.
9492987fb3SAlexander Kornienko   unsigned LineNumber;
9592987fb3SAlexander Kornienko 
968879e06dSChris Lattner   /// VariableUses - Entries in this vector map to uses of a variable in the
978879e06dSChris Lattner   /// pattern, e.g. "foo[[bar]]baz".  In this case, the RegExStr will contain
988879e06dSChris Lattner   /// "foobaz" and we'll get an entry in this vector that tells us to insert the
998879e06dSChris Lattner   /// value of bar at offset 3.
1008879e06dSChris Lattner   std::vector<std::pair<StringRef, unsigned> > VariableUses;
1018879e06dSChris Lattner 
102e8b8f1bcSEli Bendersky   /// VariableDefs - Maps definitions of variables to their parenthesized
103e8b8f1bcSEli Bendersky   /// capture numbers.
104e8b8f1bcSEli Bendersky   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
105e8b8f1bcSEli Bendersky   std::map<StringRef, unsigned> VariableDefs;
1068879e06dSChris Lattner 
1073b40b445SChris Lattner public:
1083b40b445SChris Lattner 
10938820972SMatt Arsenault   Pattern(Check::CheckType Ty)
11038820972SMatt Arsenault     : CheckTy(Ty) { }
11174d50731SChris Lattner 
1120b707eb8SMichael Liao   /// getLoc - Return the location in source code.
1130b707eb8SMichael Liao   SMLoc getLoc() const { return PatternLoc; }
1140b707eb8SMichael Liao 
11513df4626SMatt Arsenault   /// ParsePattern - Parse the given string into the Pattern. Prefix provides
11613df4626SMatt Arsenault   /// which prefix is being matched, SM provides the SourceMgr used for error
11713df4626SMatt Arsenault   /// reports, and LineNumber is the line number in the input file from which
11813df4626SMatt Arsenault   /// the pattern string was read.  Returns true in case of an error, false
11913df4626SMatt Arsenault   /// otherwise.
12013df4626SMatt Arsenault   bool ParsePattern(StringRef PatternStr,
12113df4626SMatt Arsenault                     StringRef Prefix,
12213df4626SMatt Arsenault                     SourceMgr &SM,
12313df4626SMatt Arsenault                     unsigned LineNumber);
1243b40b445SChris Lattner 
1253b40b445SChris Lattner   /// Match - Match the pattern string against the input buffer Buffer.  This
1263b40b445SChris Lattner   /// returns the position that is matched or npos if there is no match.  If
1273b40b445SChris Lattner   /// there is a match, the size of the matched string is returned in MatchLen.
1288879e06dSChris Lattner   ///
1298879e06dSChris Lattner   /// The VariableTable StringMap provides the current values of filecheck
1308879e06dSChris Lattner   /// variables and is updated if this match defines new values.
1318879e06dSChris Lattner   size_t Match(StringRef Buffer, size_t &MatchLen,
1328879e06dSChris Lattner                StringMap<StringRef> &VariableTable) const;
133b16ab0c4SChris Lattner 
134e0ef65abSDaniel Dunbar   /// PrintFailureInfo - Print additional information about a failure to match
135e0ef65abSDaniel Dunbar   /// involving this pattern.
136e0ef65abSDaniel Dunbar   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
137e0ef65abSDaniel Dunbar                         const StringMap<StringRef> &VariableTable) const;
138e0ef65abSDaniel Dunbar 
139f8bd2e5bSStephen Lin   bool hasVariable() const { return !(VariableUses.empty() &&
140f8bd2e5bSStephen Lin                                       VariableDefs.empty()); }
141f8bd2e5bSStephen Lin 
14238820972SMatt Arsenault   Check::CheckType getCheckTy() const { return CheckTy; }
14391a1b2c9SMichael Liao 
144b16ab0c4SChris Lattner private:
145e8b8f1bcSEli Bendersky   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
146e8b8f1bcSEli Bendersky   void AddBackrefToRegEx(unsigned BackrefNum);
147fd29d886SDaniel Dunbar 
148fd29d886SDaniel Dunbar   /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
149fd29d886SDaniel Dunbar   /// matching this pattern at the start of \arg Buffer; a distance of zero
150fd29d886SDaniel Dunbar   /// should correspond to a perfect match.
151fd29d886SDaniel Dunbar   unsigned ComputeMatchDistance(StringRef Buffer,
152fd29d886SDaniel Dunbar                                const StringMap<StringRef> &VariableTable) const;
15392987fb3SAlexander Kornienko 
15492987fb3SAlexander Kornienko   /// \brief Evaluates expression and stores the result to \p Value.
15592987fb3SAlexander Kornienko   /// \return true on success. false when the expression has invalid syntax.
15692987fb3SAlexander Kornienko   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
157061d2baaSEli Bendersky 
158061d2baaSEli Bendersky   /// \brief Finds the closing sequence of a regex variable usage or
159061d2baaSEli Bendersky   /// definition. Str has to point in the beginning of the definition
160061d2baaSEli Bendersky   /// (right after the opening sequence).
161061d2baaSEli Bendersky   /// \return offset of the closing sequence within Str, or npos if it was not
162061d2baaSEli Bendersky   /// found.
16381e5cd9eSAdrian Prantl   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
1643b40b445SChris Lattner };
1653b40b445SChris Lattner 
1668879e06dSChris Lattner 
16713df4626SMatt Arsenault bool Pattern::ParsePattern(StringRef PatternStr,
16813df4626SMatt Arsenault                            StringRef Prefix,
16913df4626SMatt Arsenault                            SourceMgr &SM,
17092987fb3SAlexander Kornienko                            unsigned LineNumber) {
17192987fb3SAlexander Kornienko   this->LineNumber = LineNumber;
1720a4c44bdSChris Lattner   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
1730a4c44bdSChris Lattner 
17474d50731SChris Lattner   // Ignore trailing whitespace.
17574d50731SChris Lattner   while (!PatternStr.empty() &&
17674d50731SChris Lattner          (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
17774d50731SChris Lattner     PatternStr = PatternStr.substr(0, PatternStr.size()-1);
17874d50731SChris Lattner 
17974d50731SChris Lattner   // Check that there is something on the line.
18074d50731SChris Lattner   if (PatternStr.empty()) {
18103b80a40SChris Lattner     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
18203b80a40SChris Lattner                     "found empty check string with prefix '" +
18313df4626SMatt Arsenault                     Prefix + ":'");
18474d50731SChris Lattner     return true;
18574d50731SChris Lattner   }
18674d50731SChris Lattner 
187221460e0SChris Lattner   // Check to see if this is a fixed string, or if it has regex pieces.
188d9466967STed Kremenek   if (PatternStr.size() < 2 ||
1898879e06dSChris Lattner       (PatternStr.find("{{") == StringRef::npos &&
1908879e06dSChris Lattner        PatternStr.find("[[") == StringRef::npos)) {
191221460e0SChris Lattner     FixedStr = PatternStr;
192221460e0SChris Lattner     return false;
193221460e0SChris Lattner   }
194221460e0SChris Lattner 
1958879e06dSChris Lattner   // Paren value #0 is for the fully matched string.  Any new parenthesized
19653e0679dSChris Lattner   // values add from there.
1978879e06dSChris Lattner   unsigned CurParen = 1;
1988879e06dSChris Lattner 
199b16ab0c4SChris Lattner   // Otherwise, there is at least one regex piece.  Build up the regex pattern
200b16ab0c4SChris Lattner   // by escaping scary characters in fixed strings, building up one big regex.
201f08d2db9SChris Lattner   while (!PatternStr.empty()) {
2028879e06dSChris Lattner     // RegEx matches.
20353e0679dSChris Lattner     if (PatternStr.startswith("{{")) {
20443d50d4aSEli Bendersky       // This is the start of a regex match.  Scan for the }}.
205f08d2db9SChris Lattner       size_t End = PatternStr.find("}}");
206f08d2db9SChris Lattner       if (End == StringRef::npos) {
207f08d2db9SChris Lattner         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
20803b80a40SChris Lattner                         SourceMgr::DK_Error,
20903b80a40SChris Lattner                         "found start of regex string with no end '}}'");
210f08d2db9SChris Lattner         return true;
211f08d2db9SChris Lattner       }
212f08d2db9SChris Lattner 
213e53c95f1SChris Lattner       // Enclose {{}} patterns in parens just like [[]] even though we're not
214e53c95f1SChris Lattner       // capturing the result for any purpose.  This is required in case the
215e53c95f1SChris Lattner       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
216e53c95f1SChris Lattner       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
217e53c95f1SChris Lattner       RegExStr += '(';
218e53c95f1SChris Lattner       ++CurParen;
219e53c95f1SChris Lattner 
2208879e06dSChris Lattner       if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
2218879e06dSChris Lattner         return true;
222e53c95f1SChris Lattner       RegExStr += ')';
22353e0679dSChris Lattner 
2248879e06dSChris Lattner       PatternStr = PatternStr.substr(End+2);
2258879e06dSChris Lattner       continue;
2268879e06dSChris Lattner     }
2278879e06dSChris Lattner 
2288879e06dSChris Lattner     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
2298879e06dSChris Lattner     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
2308879e06dSChris Lattner     // second form is [[foo]] which is a reference to foo.  The variable name
23157cb733bSDaniel Dunbar     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
2328879e06dSChris Lattner     // it.  This is to catch some common errors.
23353e0679dSChris Lattner     if (PatternStr.startswith("[[")) {
234061d2baaSEli Bendersky       // Find the closing bracket pair ending the match.  End is going to be an
235061d2baaSEli Bendersky       // offset relative to the beginning of the match string.
23681e5cd9eSAdrian Prantl       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
237061d2baaSEli Bendersky 
2388879e06dSChris Lattner       if (End == StringRef::npos) {
2398879e06dSChris Lattner         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
24003b80a40SChris Lattner                         SourceMgr::DK_Error,
24103b80a40SChris Lattner                         "invalid named regex reference, no ]] found");
242f08d2db9SChris Lattner         return true;
243f08d2db9SChris Lattner       }
244f08d2db9SChris Lattner 
245061d2baaSEli Bendersky       StringRef MatchStr = PatternStr.substr(2, End);
246061d2baaSEli Bendersky       PatternStr = PatternStr.substr(End+4);
2478879e06dSChris Lattner 
2488879e06dSChris Lattner       // Get the regex name (e.g. "foo").
2498879e06dSChris Lattner       size_t NameEnd = MatchStr.find(':');
2508879e06dSChris Lattner       StringRef Name = MatchStr.substr(0, NameEnd);
2518879e06dSChris Lattner 
2528879e06dSChris Lattner       if (Name.empty()) {
25303b80a40SChris Lattner         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
25403b80a40SChris Lattner                         "invalid name in named regex: empty name");
2558879e06dSChris Lattner         return true;
2568879e06dSChris Lattner       }
2578879e06dSChris Lattner 
25892987fb3SAlexander Kornienko       // Verify that the name/expression is well formed. FileCheck currently
25992987fb3SAlexander Kornienko       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
26092987fb3SAlexander Kornienko       // is relaxed, more strict check is performed in \c EvaluateExpression.
26192987fb3SAlexander Kornienko       bool IsExpression = false;
26292987fb3SAlexander Kornienko       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
26392987fb3SAlexander Kornienko         if (i == 0 && Name[i] == '@') {
26492987fb3SAlexander Kornienko           if (NameEnd != StringRef::npos) {
26592987fb3SAlexander Kornienko             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
26692987fb3SAlexander Kornienko                             SourceMgr::DK_Error,
26792987fb3SAlexander Kornienko                             "invalid name in named regex definition");
26892987fb3SAlexander Kornienko             return true;
26992987fb3SAlexander Kornienko           }
27092987fb3SAlexander Kornienko           IsExpression = true;
27192987fb3SAlexander Kornienko           continue;
27292987fb3SAlexander Kornienko         }
27392987fb3SAlexander Kornienko         if (Name[i] != '_' && !isalnum(Name[i]) &&
27492987fb3SAlexander Kornienko             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
2758879e06dSChris Lattner           SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
27603b80a40SChris Lattner                           SourceMgr::DK_Error, "invalid name in named regex");
2778879e06dSChris Lattner           return true;
2788879e06dSChris Lattner         }
27992987fb3SAlexander Kornienko       }
2808879e06dSChris Lattner 
2818879e06dSChris Lattner       // Name can't start with a digit.
28283c74e9fSGuy Benyei       if (isdigit(static_cast<unsigned char>(Name[0]))) {
28303b80a40SChris Lattner         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
28403b80a40SChris Lattner                         "invalid name in named regex");
2858879e06dSChris Lattner         return true;
2868879e06dSChris Lattner       }
2878879e06dSChris Lattner 
2888879e06dSChris Lattner       // Handle [[foo]].
2898879e06dSChris Lattner       if (NameEnd == StringRef::npos) {
290e8b8f1bcSEli Bendersky         // Handle variables that were defined earlier on the same line by
291e8b8f1bcSEli Bendersky         // emitting a backreference.
292e8b8f1bcSEli Bendersky         if (VariableDefs.find(Name) != VariableDefs.end()) {
293e8b8f1bcSEli Bendersky           unsigned VarParenNum = VariableDefs[Name];
294e8b8f1bcSEli Bendersky           if (VarParenNum < 1 || VarParenNum > 9) {
295e8b8f1bcSEli Bendersky             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
296e8b8f1bcSEli Bendersky                             SourceMgr::DK_Error,
297e8b8f1bcSEli Bendersky                             "Can't back-reference more than 9 variables");
298e8b8f1bcSEli Bendersky             return true;
299e8b8f1bcSEli Bendersky           }
300e8b8f1bcSEli Bendersky           AddBackrefToRegEx(VarParenNum);
301e8b8f1bcSEli Bendersky         } else {
3028879e06dSChris Lattner           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
303e8b8f1bcSEli Bendersky         }
3048879e06dSChris Lattner         continue;
3058879e06dSChris Lattner       }
3068879e06dSChris Lattner 
3078879e06dSChris Lattner       // Handle [[foo:.*]].
308e8b8f1bcSEli Bendersky       VariableDefs[Name] = CurParen;
3098879e06dSChris Lattner       RegExStr += '(';
3108879e06dSChris Lattner       ++CurParen;
3118879e06dSChris Lattner 
3128879e06dSChris Lattner       if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
3138879e06dSChris Lattner         return true;
3148879e06dSChris Lattner 
3158879e06dSChris Lattner       RegExStr += ')';
3168879e06dSChris Lattner     }
3178879e06dSChris Lattner 
3188879e06dSChris Lattner     // Handle fixed string matches.
3198879e06dSChris Lattner     // Find the end, which is the start of the next regex.
3208879e06dSChris Lattner     size_t FixedMatchEnd = PatternStr.find("{{");
3218879e06dSChris Lattner     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
3226f4f77b7SHans Wennborg     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
3238879e06dSChris Lattner     PatternStr = PatternStr.substr(FixedMatchEnd);
324f08d2db9SChris Lattner   }
325f08d2db9SChris Lattner 
32674d50731SChris Lattner   return false;
32774d50731SChris Lattner }
32874d50731SChris Lattner 
329e8b8f1bcSEli Bendersky bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
3308879e06dSChris Lattner                               SourceMgr &SM) {
331e8b8f1bcSEli Bendersky   Regex R(RS);
3328879e06dSChris Lattner   std::string Error;
3338879e06dSChris Lattner   if (!R.isValid(Error)) {
334e8b8f1bcSEli Bendersky     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
33503b80a40SChris Lattner                     "invalid regex: " + Error);
3368879e06dSChris Lattner     return true;
3378879e06dSChris Lattner   }
3388879e06dSChris Lattner 
339e8b8f1bcSEli Bendersky   RegExStr += RS.str();
3408879e06dSChris Lattner   CurParen += R.getNumMatches();
3418879e06dSChris Lattner   return false;
3428879e06dSChris Lattner }
343b16ab0c4SChris Lattner 
344e8b8f1bcSEli Bendersky void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
345e8b8f1bcSEli Bendersky   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
346e8b8f1bcSEli Bendersky   std::string Backref = std::string("\\") +
347e8b8f1bcSEli Bendersky                         std::string(1, '0' + BackrefNum);
348e8b8f1bcSEli Bendersky   RegExStr += Backref;
349e8b8f1bcSEli Bendersky }
350e8b8f1bcSEli Bendersky 
35192987fb3SAlexander Kornienko bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
35292987fb3SAlexander Kornienko   // The only supported expression is @LINE([\+-]\d+)?
35392987fb3SAlexander Kornienko   if (!Expr.startswith("@LINE"))
35492987fb3SAlexander Kornienko     return false;
35592987fb3SAlexander Kornienko   Expr = Expr.substr(StringRef("@LINE").size());
35692987fb3SAlexander Kornienko   int Offset = 0;
35792987fb3SAlexander Kornienko   if (!Expr.empty()) {
35892987fb3SAlexander Kornienko     if (Expr[0] == '+')
35992987fb3SAlexander Kornienko       Expr = Expr.substr(1);
36092987fb3SAlexander Kornienko     else if (Expr[0] != '-')
36192987fb3SAlexander Kornienko       return false;
36292987fb3SAlexander Kornienko     if (Expr.getAsInteger(10, Offset))
36392987fb3SAlexander Kornienko       return false;
36492987fb3SAlexander Kornienko   }
36592987fb3SAlexander Kornienko   Value = llvm::itostr(LineNumber + Offset);
36692987fb3SAlexander Kornienko   return true;
36792987fb3SAlexander Kornienko }
36892987fb3SAlexander Kornienko 
369f08d2db9SChris Lattner /// Match - Match the pattern string against the input buffer Buffer.  This
370f08d2db9SChris Lattner /// returns the position that is matched or npos if there is no match.  If
371f08d2db9SChris Lattner /// there is a match, the size of the matched string is returned in MatchLen.
3728879e06dSChris Lattner size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
3738879e06dSChris Lattner                       StringMap<StringRef> &VariableTable) const {
374eba55822SJakob Stoklund Olesen   // If this is the EOF pattern, match it immediately.
37538820972SMatt Arsenault   if (CheckTy == Check::CheckEOF) {
376eba55822SJakob Stoklund Olesen     MatchLen = 0;
377eba55822SJakob Stoklund Olesen     return Buffer.size();
378eba55822SJakob Stoklund Olesen   }
379eba55822SJakob Stoklund Olesen 
380221460e0SChris Lattner   // If this is a fixed string pattern, just match it now.
381221460e0SChris Lattner   if (!FixedStr.empty()) {
382221460e0SChris Lattner     MatchLen = FixedStr.size();
383221460e0SChris Lattner     return Buffer.find(FixedStr);
384221460e0SChris Lattner   }
385221460e0SChris Lattner 
386b16ab0c4SChris Lattner   // Regex match.
3878879e06dSChris Lattner 
3888879e06dSChris Lattner   // If there are variable uses, we need to create a temporary string with the
3898879e06dSChris Lattner   // actual value.
3908879e06dSChris Lattner   StringRef RegExToMatch = RegExStr;
3918879e06dSChris Lattner   std::string TmpStr;
3928879e06dSChris Lattner   if (!VariableUses.empty()) {
3938879e06dSChris Lattner     TmpStr = RegExStr;
3948879e06dSChris Lattner 
3958879e06dSChris Lattner     unsigned InsertOffset = 0;
3968879e06dSChris Lattner     for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
39792987fb3SAlexander Kornienko       std::string Value;
39892987fb3SAlexander Kornienko 
39992987fb3SAlexander Kornienko       if (VariableUses[i].first[0] == '@') {
40092987fb3SAlexander Kornienko         if (!EvaluateExpression(VariableUses[i].first, Value))
40192987fb3SAlexander Kornienko           return StringRef::npos;
40292987fb3SAlexander Kornienko       } else {
403e0ef65abSDaniel Dunbar         StringMap<StringRef>::iterator it =
404e0ef65abSDaniel Dunbar           VariableTable.find(VariableUses[i].first);
405e0ef65abSDaniel Dunbar         // If the variable is undefined, return an error.
406e0ef65abSDaniel Dunbar         if (it == VariableTable.end())
407e0ef65abSDaniel Dunbar           return StringRef::npos;
408e0ef65abSDaniel Dunbar 
4096f4f77b7SHans Wennborg         // Look up the value and escape it so that we can put it into the regex.
4106f4f77b7SHans Wennborg         Value += Regex::escape(it->second);
41192987fb3SAlexander Kornienko       }
4128879e06dSChris Lattner 
4138879e06dSChris Lattner       // Plop it into the regex at the adjusted offset.
4148879e06dSChris Lattner       TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
4158879e06dSChris Lattner                     Value.begin(), Value.end());
4168879e06dSChris Lattner       InsertOffset += Value.size();
4178879e06dSChris Lattner     }
4188879e06dSChris Lattner 
4198879e06dSChris Lattner     // Match the newly constructed regex.
4208879e06dSChris Lattner     RegExToMatch = TmpStr;
4218879e06dSChris Lattner   }
4228879e06dSChris Lattner 
4238879e06dSChris Lattner 
424b16ab0c4SChris Lattner   SmallVector<StringRef, 4> MatchInfo;
4258879e06dSChris Lattner   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
426f08d2db9SChris Lattner     return StringRef::npos;
427b16ab0c4SChris Lattner 
428b16ab0c4SChris Lattner   // Successful regex match.
429b16ab0c4SChris Lattner   assert(!MatchInfo.empty() && "Didn't get any match");
430b16ab0c4SChris Lattner   StringRef FullMatch = MatchInfo[0];
431b16ab0c4SChris Lattner 
4328879e06dSChris Lattner   // If this defines any variables, remember their values.
433e8b8f1bcSEli Bendersky   for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(),
434e8b8f1bcSEli Bendersky                                                      E = VariableDefs.end();
435e8b8f1bcSEli Bendersky        I != E; ++I) {
436e8b8f1bcSEli Bendersky     assert(I->second < MatchInfo.size() && "Internal paren error");
437e8b8f1bcSEli Bendersky     VariableTable[I->first] = MatchInfo[I->second];
4380a4c44bdSChris Lattner   }
4390a4c44bdSChris Lattner 
440b16ab0c4SChris Lattner   MatchLen = FullMatch.size();
441b16ab0c4SChris Lattner   return FullMatch.data()-Buffer.data();
442f08d2db9SChris Lattner }
443f08d2db9SChris Lattner 
444fd29d886SDaniel Dunbar unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
445fd29d886SDaniel Dunbar                               const StringMap<StringRef> &VariableTable) const {
446fd29d886SDaniel Dunbar   // Just compute the number of matching characters. For regular expressions, we
447fd29d886SDaniel Dunbar   // just compare against the regex itself and hope for the best.
448fd29d886SDaniel Dunbar   //
449fd29d886SDaniel Dunbar   // FIXME: One easy improvement here is have the regex lib generate a single
450fd29d886SDaniel Dunbar   // example regular expression which matches, and use that as the example
451fd29d886SDaniel Dunbar   // string.
452fd29d886SDaniel Dunbar   StringRef ExampleString(FixedStr);
453fd29d886SDaniel Dunbar   if (ExampleString.empty())
454fd29d886SDaniel Dunbar     ExampleString = RegExStr;
455fd29d886SDaniel Dunbar 
456e9aa36c8SDaniel Dunbar   // Only compare up to the first line in the buffer, or the string size.
457e9aa36c8SDaniel Dunbar   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
458e9aa36c8SDaniel Dunbar   BufferPrefix = BufferPrefix.split('\n').first;
459e9aa36c8SDaniel Dunbar   return BufferPrefix.edit_distance(ExampleString);
460fd29d886SDaniel Dunbar }
461fd29d886SDaniel Dunbar 
462e0ef65abSDaniel Dunbar void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
463e0ef65abSDaniel Dunbar                                const StringMap<StringRef> &VariableTable) const{
464e0ef65abSDaniel Dunbar   // If this was a regular expression using variables, print the current
465e0ef65abSDaniel Dunbar   // variable values.
466e0ef65abSDaniel Dunbar   if (!VariableUses.empty()) {
467e0ef65abSDaniel Dunbar     for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
468e69170a1SAlp Toker       SmallString<256> Msg;
469e69170a1SAlp Toker       raw_svector_ostream OS(Msg);
47092987fb3SAlexander Kornienko       StringRef Var = VariableUses[i].first;
47192987fb3SAlexander Kornienko       if (Var[0] == '@') {
47292987fb3SAlexander Kornienko         std::string Value;
47392987fb3SAlexander Kornienko         if (EvaluateExpression(Var, Value)) {
47492987fb3SAlexander Kornienko           OS << "with expression \"";
47592987fb3SAlexander Kornienko           OS.write_escaped(Var) << "\" equal to \"";
47692987fb3SAlexander Kornienko           OS.write_escaped(Value) << "\"";
47792987fb3SAlexander Kornienko         } else {
47892987fb3SAlexander Kornienko           OS << "uses incorrect expression \"";
47992987fb3SAlexander Kornienko           OS.write_escaped(Var) << "\"";
48092987fb3SAlexander Kornienko         }
48192987fb3SAlexander Kornienko       } else {
48292987fb3SAlexander Kornienko         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
483e0ef65abSDaniel Dunbar 
484e0ef65abSDaniel Dunbar         // Check for undefined variable references.
485e0ef65abSDaniel Dunbar         if (it == VariableTable.end()) {
486e0ef65abSDaniel Dunbar           OS << "uses undefined variable \"";
48792987fb3SAlexander Kornienko           OS.write_escaped(Var) << "\"";
488e0ef65abSDaniel Dunbar         } else {
489e0ef65abSDaniel Dunbar           OS << "with variable \"";
490e0ef65abSDaniel Dunbar           OS.write_escaped(Var) << "\" equal to \"";
491e0ef65abSDaniel Dunbar           OS.write_escaped(it->second) << "\"";
492e0ef65abSDaniel Dunbar         }
49392987fb3SAlexander Kornienko       }
494e0ef65abSDaniel Dunbar 
49503b80a40SChris Lattner       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
49603b80a40SChris Lattner                       OS.str());
497e0ef65abSDaniel Dunbar     }
498e0ef65abSDaniel Dunbar   }
499fd29d886SDaniel Dunbar 
500fd29d886SDaniel Dunbar   // Attempt to find the closest/best fuzzy match.  Usually an error happens
501fd29d886SDaniel Dunbar   // because some string in the output didn't exactly match. In these cases, we
502fd29d886SDaniel Dunbar   // would like to show the user a best guess at what "should have" matched, to
503fd29d886SDaniel Dunbar   // save them having to actually check the input manually.
504fd29d886SDaniel Dunbar   size_t NumLinesForward = 0;
505fd29d886SDaniel Dunbar   size_t Best = StringRef::npos;
506fd29d886SDaniel Dunbar   double BestQuality = 0;
507fd29d886SDaniel Dunbar 
508fd29d886SDaniel Dunbar   // Use an arbitrary 4k limit on how far we will search.
5092bf486ebSDan Gohman   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
510fd29d886SDaniel Dunbar     if (Buffer[i] == '\n')
511fd29d886SDaniel Dunbar       ++NumLinesForward;
512fd29d886SDaniel Dunbar 
513df22bbf7SDan Gohman     // Patterns have leading whitespace stripped, so skip whitespace when
514df22bbf7SDan Gohman     // looking for something which looks like a pattern.
515df22bbf7SDan Gohman     if (Buffer[i] == ' ' || Buffer[i] == '\t')
516df22bbf7SDan Gohman       continue;
517df22bbf7SDan Gohman 
518fd29d886SDaniel Dunbar     // Compute the "quality" of this match as an arbitrary combination of the
519fd29d886SDaniel Dunbar     // match distance and the number of lines skipped to get to this match.
520fd29d886SDaniel Dunbar     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
521fd29d886SDaniel Dunbar     double Quality = Distance + (NumLinesForward / 100.);
522fd29d886SDaniel Dunbar 
523fd29d886SDaniel Dunbar     if (Quality < BestQuality || Best == StringRef::npos) {
524fd29d886SDaniel Dunbar       Best = i;
525fd29d886SDaniel Dunbar       BestQuality = Quality;
526fd29d886SDaniel Dunbar     }
527fd29d886SDaniel Dunbar   }
528fd29d886SDaniel Dunbar 
529fd29d886SDaniel Dunbar   // Print the "possible intended match here" line if we found something
530c069cc8eSDaniel Dunbar   // reasonable and not equal to what we showed in the "scanning from here"
531c069cc8eSDaniel Dunbar   // line.
532c069cc8eSDaniel Dunbar   if (Best && Best != StringRef::npos && BestQuality < 50) {
533fd29d886SDaniel Dunbar       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
53403b80a40SChris Lattner                       SourceMgr::DK_Note, "possible intended match here");
535fd29d886SDaniel Dunbar 
536fd29d886SDaniel Dunbar     // FIXME: If we wanted to be really friendly we would show why the match
537fd29d886SDaniel Dunbar     // failed, as it can be hard to spot simple one character differences.
538fd29d886SDaniel Dunbar   }
539e0ef65abSDaniel Dunbar }
54074d50731SChris Lattner 
54181e5cd9eSAdrian Prantl size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
542061d2baaSEli Bendersky   // Offset keeps track of the current offset within the input Str
543061d2baaSEli Bendersky   size_t Offset = 0;
544061d2baaSEli Bendersky   // [...] Nesting depth
545061d2baaSEli Bendersky   size_t BracketDepth = 0;
546061d2baaSEli Bendersky 
547061d2baaSEli Bendersky   while (!Str.empty()) {
548061d2baaSEli Bendersky     if (Str.startswith("]]") && BracketDepth == 0)
549061d2baaSEli Bendersky       return Offset;
550061d2baaSEli Bendersky     if (Str[0] == '\\') {
551061d2baaSEli Bendersky       // Backslash escapes the next char within regexes, so skip them both.
552061d2baaSEli Bendersky       Str = Str.substr(2);
553061d2baaSEli Bendersky       Offset += 2;
554061d2baaSEli Bendersky     } else {
555061d2baaSEli Bendersky       switch (Str[0]) {
556061d2baaSEli Bendersky         default:
557061d2baaSEli Bendersky           break;
558061d2baaSEli Bendersky         case '[':
559061d2baaSEli Bendersky           BracketDepth++;
560061d2baaSEli Bendersky           break;
561061d2baaSEli Bendersky         case ']':
56281e5cd9eSAdrian Prantl           if (BracketDepth == 0) {
56381e5cd9eSAdrian Prantl             SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
56481e5cd9eSAdrian Prantl                             SourceMgr::DK_Error,
56581e5cd9eSAdrian Prantl                             "missing closing \"]\" for regex variable");
56681e5cd9eSAdrian Prantl             exit(1);
56781e5cd9eSAdrian Prantl           }
568061d2baaSEli Bendersky           BracketDepth--;
569061d2baaSEli Bendersky           break;
570061d2baaSEli Bendersky       }
571061d2baaSEli Bendersky       Str = Str.substr(1);
572061d2baaSEli Bendersky       Offset++;
573061d2baaSEli Bendersky     }
574061d2baaSEli Bendersky   }
575061d2baaSEli Bendersky 
576061d2baaSEli Bendersky   return StringRef::npos;
577061d2baaSEli Bendersky }
578061d2baaSEli Bendersky 
579061d2baaSEli Bendersky 
58074d50731SChris Lattner //===----------------------------------------------------------------------===//
58174d50731SChris Lattner // Check Strings.
58274d50731SChris Lattner //===----------------------------------------------------------------------===//
5833b40b445SChris Lattner 
5843b40b445SChris Lattner /// CheckString - This is a check that we found in the input file.
5853b40b445SChris Lattner struct CheckString {
5863b40b445SChris Lattner   /// Pat - The pattern to match.
5873b40b445SChris Lattner   Pattern Pat;
58826cccfe1SChris Lattner 
58913df4626SMatt Arsenault   /// Prefix - Which prefix name this check matched.
59013df4626SMatt Arsenault   StringRef Prefix;
59113df4626SMatt Arsenault 
59226cccfe1SChris Lattner   /// Loc - The location in the match file that the check string was specified.
59326cccfe1SChris Lattner   SMLoc Loc;
59426cccfe1SChris Lattner 
59538820972SMatt Arsenault   /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
59638820972SMatt Arsenault   /// as opposed to a CHECK: directive.
59738820972SMatt Arsenault   Check::CheckType CheckTy;
598f8bd2e5bSStephen Lin 
59991a1b2c9SMichael Liao   /// DagNotStrings - These are all of the strings that are disallowed from
600236d2d5eSChris Lattner   /// occurring between this match string and the previous one (or start of
601236d2d5eSChris Lattner   /// file).
60291a1b2c9SMichael Liao   std::vector<Pattern> DagNotStrings;
603236d2d5eSChris Lattner 
60413df4626SMatt Arsenault 
60513df4626SMatt Arsenault   CheckString(const Pattern &P,
60613df4626SMatt Arsenault               StringRef S,
60713df4626SMatt Arsenault               SMLoc L,
60813df4626SMatt Arsenault               Check::CheckType Ty)
60913df4626SMatt Arsenault     : Pat(P), Prefix(S), Loc(L), CheckTy(Ty) {}
610dcc7d48dSMichael Liao 
61191a1b2c9SMichael Liao   /// Check - Match check string and its "not strings" and/or "dag strings".
612e93a3a08SStephen Lin   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
613f8bd2e5bSStephen Lin                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
614dcc7d48dSMichael Liao 
615dcc7d48dSMichael Liao   /// CheckNext - Verify there is a single line in the given buffer.
616dcc7d48dSMichael Liao   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
617dcc7d48dSMichael Liao 
618dcc7d48dSMichael Liao   /// CheckNot - Verify there's no "not strings" in the given buffer.
619dcc7d48dSMichael Liao   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
62091a1b2c9SMichael Liao                 const std::vector<const Pattern *> &NotStrings,
62191a1b2c9SMichael Liao                 StringMap<StringRef> &VariableTable) const;
62291a1b2c9SMichael Liao 
62391a1b2c9SMichael Liao   /// CheckDag - Match "dag strings" and their mixed "not strings".
62491a1b2c9SMichael Liao   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
62591a1b2c9SMichael Liao                   std::vector<const Pattern *> &NotStrings,
626dcc7d48dSMichael Liao                   StringMap<StringRef> &VariableTable) const;
62726cccfe1SChris Lattner };
62826cccfe1SChris Lattner 
6295ea04c38SGuy Benyei /// Canonicalize whitespaces in the input file. Line endings are replaced
6305ea04c38SGuy Benyei /// with UNIX-style '\n'.
6315ea04c38SGuy Benyei ///
6325ea04c38SGuy Benyei /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
6335ea04c38SGuy Benyei /// characters to a single space.
634*ce5dd1acSRafael Espindola static MemoryBuffer *CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,
6355ea04c38SGuy Benyei                                            bool PreserveHorizontal) {
6360e45d24aSChris Lattner   SmallString<128> NewFile;
637a2f8fc5aSChris Lattner   NewFile.reserve(MB->getBufferSize());
638a2f8fc5aSChris Lattner 
639a2f8fc5aSChris Lattner   for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
640a2f8fc5aSChris Lattner        Ptr != End; ++Ptr) {
641fd781bf0SNAKAMURA Takumi     // Eliminate trailing dosish \r.
642fd781bf0SNAKAMURA Takumi     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
643fd781bf0SNAKAMURA Takumi       continue;
644fd781bf0SNAKAMURA Takumi     }
645fd781bf0SNAKAMURA Takumi 
6465ea04c38SGuy Benyei     // If current char is not a horizontal whitespace or if horizontal
6475ea04c38SGuy Benyei     // whitespace canonicalization is disabled, dump it to output as is.
6485ea04c38SGuy Benyei     if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
649a2f8fc5aSChris Lattner       NewFile.push_back(*Ptr);
650a2f8fc5aSChris Lattner       continue;
651a2f8fc5aSChris Lattner     }
652a2f8fc5aSChris Lattner 
653a2f8fc5aSChris Lattner     // Otherwise, add one space and advance over neighboring space.
654a2f8fc5aSChris Lattner     NewFile.push_back(' ');
655a2f8fc5aSChris Lattner     while (Ptr+1 != End &&
656a2f8fc5aSChris Lattner            (Ptr[1] == ' ' || Ptr[1] == '\t'))
657a2f8fc5aSChris Lattner       ++Ptr;
658a2f8fc5aSChris Lattner   }
659a2f8fc5aSChris Lattner 
660*ce5dd1acSRafael Espindola   return MemoryBuffer::getMemBufferCopy(NewFile.str(),
661*ce5dd1acSRafael Espindola                                         MB->getBufferIdentifier());
662a2f8fc5aSChris Lattner }
663a2f8fc5aSChris Lattner 
66438820972SMatt Arsenault static bool IsPartOfWord(char c) {
66538820972SMatt Arsenault   return (isalnum(c) || c == '-' || c == '_');
66638820972SMatt Arsenault }
66738820972SMatt Arsenault 
66813df4626SMatt Arsenault // Get the size of the prefix extension.
66913df4626SMatt Arsenault static size_t CheckTypeSize(Check::CheckType Ty) {
67013df4626SMatt Arsenault   switch (Ty) {
67113df4626SMatt Arsenault   case Check::CheckNone:
67213df4626SMatt Arsenault     return 0;
67313df4626SMatt Arsenault 
67413df4626SMatt Arsenault   case Check::CheckPlain:
67513df4626SMatt Arsenault     return sizeof(":") - 1;
67613df4626SMatt Arsenault 
67713df4626SMatt Arsenault   case Check::CheckNext:
67813df4626SMatt Arsenault     return sizeof("-NEXT:") - 1;
67913df4626SMatt Arsenault 
68013df4626SMatt Arsenault   case Check::CheckNot:
68113df4626SMatt Arsenault     return sizeof("-NOT:") - 1;
68213df4626SMatt Arsenault 
68313df4626SMatt Arsenault   case Check::CheckDAG:
68413df4626SMatt Arsenault     return sizeof("-DAG:") - 1;
68513df4626SMatt Arsenault 
68613df4626SMatt Arsenault   case Check::CheckLabel:
68713df4626SMatt Arsenault     return sizeof("-LABEL:") - 1;
68813df4626SMatt Arsenault 
68913df4626SMatt Arsenault   case Check::CheckEOF:
69013df4626SMatt Arsenault     llvm_unreachable("Should not be using EOF size");
69113df4626SMatt Arsenault   }
69213df4626SMatt Arsenault 
69313df4626SMatt Arsenault   llvm_unreachable("Bad check type");
69413df4626SMatt Arsenault }
69513df4626SMatt Arsenault 
69613df4626SMatt Arsenault static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
697c4d2d471SMatt Arsenault   char NextChar = Buffer[Prefix.size()];
69838820972SMatt Arsenault 
69938820972SMatt Arsenault   // Verify that the : is present after the prefix.
70013df4626SMatt Arsenault   if (NextChar == ':')
70138820972SMatt Arsenault     return Check::CheckPlain;
70238820972SMatt Arsenault 
70313df4626SMatt Arsenault   if (NextChar != '-')
70438820972SMatt Arsenault     return Check::CheckNone;
70538820972SMatt Arsenault 
706c4d2d471SMatt Arsenault   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
70713df4626SMatt Arsenault   if (Rest.startswith("NEXT:"))
70838820972SMatt Arsenault     return Check::CheckNext;
70938820972SMatt Arsenault 
71013df4626SMatt Arsenault   if (Rest.startswith("NOT:"))
71138820972SMatt Arsenault     return Check::CheckNot;
71238820972SMatt Arsenault 
71313df4626SMatt Arsenault   if (Rest.startswith("DAG:"))
71438820972SMatt Arsenault     return Check::CheckDAG;
71538820972SMatt Arsenault 
71613df4626SMatt Arsenault   if (Rest.startswith("LABEL:"))
71738820972SMatt Arsenault     return Check::CheckLabel;
71813df4626SMatt Arsenault 
71913df4626SMatt Arsenault   return Check::CheckNone;
72038820972SMatt Arsenault }
72138820972SMatt Arsenault 
72213df4626SMatt Arsenault // From the given position, find the next character after the word.
72313df4626SMatt Arsenault static size_t SkipWord(StringRef Str, size_t Loc) {
72413df4626SMatt Arsenault   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
72513df4626SMatt Arsenault     ++Loc;
72613df4626SMatt Arsenault   return Loc;
72713df4626SMatt Arsenault }
72813df4626SMatt Arsenault 
72913df4626SMatt Arsenault // Try to find the first match in buffer for any prefix. If a valid match is
73013df4626SMatt Arsenault // found, return that prefix and set its type and location.  If there are almost
73113df4626SMatt Arsenault // matches (e.g. the actual prefix string is found, but is not an actual check
73213df4626SMatt Arsenault // string), but no valid match, return an empty string and set the position to
73313df4626SMatt Arsenault // resume searching from. If no partial matches are found, return an empty
73413df4626SMatt Arsenault // string and the location will be StringRef::npos. If one prefix is a substring
73513df4626SMatt Arsenault // of another, the maximal match should be found. e.g. if "A" and "AA" are
73613df4626SMatt Arsenault // prefixes then AA-CHECK: should match the second one.
73713df4626SMatt Arsenault static StringRef FindFirstCandidateMatch(StringRef &Buffer,
73813df4626SMatt Arsenault                                          Check::CheckType &CheckTy,
73913df4626SMatt Arsenault                                          size_t &CheckLoc) {
74013df4626SMatt Arsenault   StringRef FirstPrefix;
74113df4626SMatt Arsenault   size_t FirstLoc = StringRef::npos;
74213df4626SMatt Arsenault   size_t SearchLoc = StringRef::npos;
74313df4626SMatt Arsenault   Check::CheckType FirstTy = Check::CheckNone;
74413df4626SMatt Arsenault 
74513df4626SMatt Arsenault   CheckTy = Check::CheckNone;
74613df4626SMatt Arsenault   CheckLoc = StringRef::npos;
74713df4626SMatt Arsenault 
74813df4626SMatt Arsenault   for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
74913df4626SMatt Arsenault        I != E; ++I) {
75013df4626SMatt Arsenault     StringRef Prefix(*I);
75113df4626SMatt Arsenault     size_t PrefixLoc = Buffer.find(Prefix);
75213df4626SMatt Arsenault 
75313df4626SMatt Arsenault     if (PrefixLoc == StringRef::npos)
75413df4626SMatt Arsenault       continue;
75513df4626SMatt Arsenault 
75613df4626SMatt Arsenault     // Track where we are searching for invalid prefixes that look almost right.
75713df4626SMatt Arsenault     // We need to only advance to the first partial match on the next attempt
75813df4626SMatt Arsenault     // since a partial match could be a substring of a later, valid prefix.
75913df4626SMatt Arsenault     // Need to skip to the end of the word, otherwise we could end up
76013df4626SMatt Arsenault     // matching a prefix in a substring later.
76113df4626SMatt Arsenault     if (PrefixLoc < SearchLoc)
76213df4626SMatt Arsenault       SearchLoc = SkipWord(Buffer, PrefixLoc);
76313df4626SMatt Arsenault 
76413df4626SMatt Arsenault     // We only want to find the first match to avoid skipping some.
76513df4626SMatt Arsenault     if (PrefixLoc > FirstLoc)
76613df4626SMatt Arsenault       continue;
767a7181a1bSAlexey Samsonov     // If one matching check-prefix is a prefix of another, choose the
768a7181a1bSAlexey Samsonov     // longer one.
769a7181a1bSAlexey Samsonov     if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
770a7181a1bSAlexey Samsonov       continue;
77113df4626SMatt Arsenault 
77213df4626SMatt Arsenault     StringRef Rest = Buffer.drop_front(PrefixLoc);
77313df4626SMatt Arsenault     // Make sure we have actually found the prefix, and not a word containing
77413df4626SMatt Arsenault     // it. This should also prevent matching the wrong prefix when one is a
77513df4626SMatt Arsenault     // substring of another.
77613df4626SMatt Arsenault     if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
77743b5f572SDaniel Sanders       FirstTy = Check::CheckNone;
77843b5f572SDaniel Sanders     else
77943b5f572SDaniel Sanders       FirstTy = FindCheckType(Rest, Prefix);
78013df4626SMatt Arsenault 
78113df4626SMatt Arsenault     FirstLoc = PrefixLoc;
782a7181a1bSAlexey Samsonov     FirstPrefix = Prefix;
78313df4626SMatt Arsenault   }
78413df4626SMatt Arsenault 
785a7181a1bSAlexey Samsonov   // If the first prefix is invalid, we should continue the search after it.
786a7181a1bSAlexey Samsonov   if (FirstTy == Check::CheckNone) {
78713df4626SMatt Arsenault     CheckLoc = SearchLoc;
788a7181a1bSAlexey Samsonov     return "";
789a7181a1bSAlexey Samsonov   }
790a7181a1bSAlexey Samsonov 
79113df4626SMatt Arsenault   CheckTy = FirstTy;
79213df4626SMatt Arsenault   CheckLoc = FirstLoc;
79313df4626SMatt Arsenault   return FirstPrefix;
79413df4626SMatt Arsenault }
79513df4626SMatt Arsenault 
79613df4626SMatt Arsenault static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
79713df4626SMatt Arsenault                                          unsigned &LineNumber,
79813df4626SMatt Arsenault                                          Check::CheckType &CheckTy,
79913df4626SMatt Arsenault                                          size_t &CheckLoc) {
80013df4626SMatt Arsenault   while (!Buffer.empty()) {
80113df4626SMatt Arsenault     StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
80213df4626SMatt Arsenault     // If we found a real match, we are done.
80313df4626SMatt Arsenault     if (!Prefix.empty()) {
80413df4626SMatt Arsenault       LineNumber += Buffer.substr(0, CheckLoc).count('\n');
80513df4626SMatt Arsenault       return Prefix;
80613df4626SMatt Arsenault     }
80713df4626SMatt Arsenault 
80813df4626SMatt Arsenault     // We didn't find any almost matches either, we are also done.
80913df4626SMatt Arsenault     if (CheckLoc == StringRef::npos)
81013df4626SMatt Arsenault       return StringRef();
81113df4626SMatt Arsenault 
81213df4626SMatt Arsenault     LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
81313df4626SMatt Arsenault 
81413df4626SMatt Arsenault     // Advance to the last possible match we found and try again.
81513df4626SMatt Arsenault     Buffer = Buffer.drop_front(CheckLoc + 1);
81613df4626SMatt Arsenault   }
81713df4626SMatt Arsenault 
81813df4626SMatt Arsenault   return StringRef();
81938820972SMatt Arsenault }
820ee3c74fbSChris Lattner 
821ee3c74fbSChris Lattner /// ReadCheckFile - Read the check file, which specifies the sequence of
822ee3c74fbSChris Lattner /// expected strings.  The strings are added to the CheckStrings vector.
82343d50d4aSEli Bendersky /// Returns true in case of an error, false otherwise.
824ee3c74fbSChris Lattner static bool ReadCheckFile(SourceMgr &SM,
82526cccfe1SChris Lattner                           std::vector<CheckString> &CheckStrings) {
826adf21f2aSRafael Espindola   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
827adf21f2aSRafael Espindola       MemoryBuffer::getFileOrSTDIN(CheckFilename);
828adf21f2aSRafael Espindola   if (std::error_code EC = FileOrErr.getError()) {
829adf21f2aSRafael Espindola     errs() << "Could not open check file '" << CheckFilename
830adf21f2aSRafael Espindola            << "': " << EC.message() << '\n';
831ee3c74fbSChris Lattner     return true;
832ee3c74fbSChris Lattner   }
833a2f8fc5aSChris Lattner 
834a2f8fc5aSChris Lattner   // If we want to canonicalize whitespace, strip excess whitespace from the
8355ea04c38SGuy Benyei   // buffer containing the CHECK lines. Remove DOS style line endings.
836*ce5dd1acSRafael Espindola   MemoryBuffer *F = CanonicalizeInputFile(std::move(FileOrErr.get()),
837adf21f2aSRafael Espindola                                           NoCanonicalizeWhiteSpace);
838a2f8fc5aSChris Lattner 
839ee3c74fbSChris Lattner   SM.AddNewSourceBuffer(F, SMLoc());
840ee3c74fbSChris Lattner 
84110f10cedSChris Lattner   // Find all instances of CheckPrefix followed by : in the file.
842caa5fc0cSChris Lattner   StringRef Buffer = F->getBuffer();
84356ccdbbdSAlexander Kornienko 
84456ccdbbdSAlexander Kornienko   std::vector<Pattern> ImplicitNegativeChecks;
84556ccdbbdSAlexander Kornienko   for (const auto &PatternString : ImplicitCheckNot) {
84656ccdbbdSAlexander Kornienko     // Create a buffer with fake command line content in order to display the
84756ccdbbdSAlexander Kornienko     // command line option responsible for the specific implicit CHECK-NOT.
84856ccdbbdSAlexander Kornienko     std::string Prefix = std::string("-") + ImplicitCheckNot.ArgStr + "='";
84956ccdbbdSAlexander Kornienko     std::string Suffix = "'";
85056ccdbbdSAlexander Kornienko     MemoryBuffer *CmdLine = MemoryBuffer::getMemBufferCopy(
85156ccdbbdSAlexander Kornienko         Prefix + PatternString + Suffix, "command line");
85256ccdbbdSAlexander Kornienko     StringRef PatternInBuffer =
85356ccdbbdSAlexander Kornienko         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
85456ccdbbdSAlexander Kornienko     SM.AddNewSourceBuffer(CmdLine, SMLoc());
85556ccdbbdSAlexander Kornienko 
85656ccdbbdSAlexander Kornienko     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
85756ccdbbdSAlexander Kornienko     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
85856ccdbbdSAlexander Kornienko                                                "IMPLICIT-CHECK", SM, 0);
85956ccdbbdSAlexander Kornienko   }
86056ccdbbdSAlexander Kornienko 
86156ccdbbdSAlexander Kornienko 
86256ccdbbdSAlexander Kornienko   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
863236d2d5eSChris Lattner 
86443d50d4aSEli Bendersky   // LineNumber keeps track of the line on which CheckPrefix instances are
86543d50d4aSEli Bendersky   // found.
86692987fb3SAlexander Kornienko   unsigned LineNumber = 1;
86792987fb3SAlexander Kornienko 
868ee3c74fbSChris Lattner   while (1) {
86913df4626SMatt Arsenault     Check::CheckType CheckTy;
87013df4626SMatt Arsenault     size_t PrefixLoc;
87113df4626SMatt Arsenault 
87213df4626SMatt Arsenault     // See if a prefix occurs in the memory buffer.
87313df4626SMatt Arsenault     StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer,
87413df4626SMatt Arsenault                                                    LineNumber,
87513df4626SMatt Arsenault                                                    CheckTy,
87613df4626SMatt Arsenault                                                    PrefixLoc);
87713df4626SMatt Arsenault     if (UsedPrefix.empty())
878ee3c74fbSChris Lattner       break;
879ee3c74fbSChris Lattner 
88013df4626SMatt Arsenault     Buffer = Buffer.drop_front(PrefixLoc);
88192987fb3SAlexander Kornienko 
88213df4626SMatt Arsenault     // Location to use for error messages.
88313df4626SMatt Arsenault     const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
88492987fb3SAlexander Kornienko 
88513df4626SMatt Arsenault     // PrefixLoc is to the start of the prefix. Skip to the end.
88613df4626SMatt Arsenault     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
88710f10cedSChris Lattner 
88838820972SMatt Arsenault     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
88938820972SMatt Arsenault     // leading and trailing whitespace.
890236d2d5eSChris Lattner     Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
891ee3c74fbSChris Lattner 
892ee3c74fbSChris Lattner     // Scan ahead to the end of line.
893caa5fc0cSChris Lattner     size_t EOL = Buffer.find_first_of("\n\r");
894ee3c74fbSChris Lattner 
895838fb09aSDan Gohman     // Remember the location of the start of the pattern, for diagnostics.
896838fb09aSDan Gohman     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
897838fb09aSDan Gohman 
89874d50731SChris Lattner     // Parse the pattern.
89938820972SMatt Arsenault     Pattern P(CheckTy);
90013df4626SMatt Arsenault     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
901ee3c74fbSChris Lattner       return true;
902ee3c74fbSChris Lattner 
903f8bd2e5bSStephen Lin     // Verify that CHECK-LABEL lines do not define or use variables
90438820972SMatt Arsenault     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
90513df4626SMatt Arsenault       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
906f8bd2e5bSStephen Lin                       SourceMgr::DK_Error,
90713df4626SMatt Arsenault                       "found '" + UsedPrefix + "-LABEL:'"
90813df4626SMatt Arsenault                       " with variable definition or use");
909f8bd2e5bSStephen Lin       return true;
910f8bd2e5bSStephen Lin     }
911f8bd2e5bSStephen Lin 
912236d2d5eSChris Lattner     Buffer = Buffer.substr(EOL);
91374d50731SChris Lattner 
914da108b4eSChris Lattner     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
91538820972SMatt Arsenault     if ((CheckTy == Check::CheckNext) && CheckStrings.empty()) {
91613df4626SMatt Arsenault       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
91703b80a40SChris Lattner                       SourceMgr::DK_Error,
91813df4626SMatt Arsenault                       "found '" + UsedPrefix + "-NEXT:' without previous '"
91913df4626SMatt Arsenault                       + UsedPrefix + ": line");
920da108b4eSChris Lattner       return true;
921da108b4eSChris Lattner     }
922da108b4eSChris Lattner 
92391a1b2c9SMichael Liao     // Handle CHECK-DAG/-NOT.
92438820972SMatt Arsenault     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
92591a1b2c9SMichael Liao       DagNotMatches.push_back(P);
92674d50731SChris Lattner       continue;
92774d50731SChris Lattner     }
92874d50731SChris Lattner 
929ee3c74fbSChris Lattner     // Okay, add the string we captured to the output vector and move on.
9303b40b445SChris Lattner     CheckStrings.push_back(CheckString(P,
93113df4626SMatt Arsenault                                        UsedPrefix,
932838fb09aSDan Gohman                                        PatternLoc,
93338820972SMatt Arsenault                                        CheckTy));
93491a1b2c9SMichael Liao     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
93556ccdbbdSAlexander Kornienko     DagNotMatches = ImplicitNegativeChecks;
936ee3c74fbSChris Lattner   }
937ee3c74fbSChris Lattner 
93813df4626SMatt Arsenault   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
93913df4626SMatt Arsenault   // prefix as a filler for the error message.
94091a1b2c9SMichael Liao   if (!DagNotMatches.empty()) {
94138820972SMatt Arsenault     CheckStrings.push_back(CheckString(Pattern(Check::CheckEOF),
94213df4626SMatt Arsenault                                        CheckPrefixes[0],
943eba55822SJakob Stoklund Olesen                                        SMLoc::getFromPointer(Buffer.data()),
94438820972SMatt Arsenault                                        Check::CheckEOF));
94591a1b2c9SMichael Liao     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
946eba55822SJakob Stoklund Olesen   }
947eba55822SJakob Stoklund Olesen 
948ee3c74fbSChris Lattner   if (CheckStrings.empty()) {
94913df4626SMatt Arsenault     errs() << "error: no check strings found with prefix"
95013df4626SMatt Arsenault            << (CheckPrefixes.size() > 1 ? "es " : " ");
95113df4626SMatt Arsenault     for (size_t I = 0, N = CheckPrefixes.size(); I != N; ++I) {
95213df4626SMatt Arsenault       StringRef Prefix(CheckPrefixes[I]);
95313df4626SMatt Arsenault       errs() << '\'' << Prefix << ":'";
95413df4626SMatt Arsenault       if (I != N - 1)
95513df4626SMatt Arsenault         errs() << ", ";
95613df4626SMatt Arsenault     }
95713df4626SMatt Arsenault 
95813df4626SMatt Arsenault     errs() << '\n';
959ee3c74fbSChris Lattner     return true;
960ee3c74fbSChris Lattner   }
961ee3c74fbSChris Lattner 
962ee3c74fbSChris Lattner   return false;
963ee3c74fbSChris Lattner }
964ee3c74fbSChris Lattner 
96591a1b2c9SMichael Liao static void PrintCheckFailed(const SourceMgr &SM, const SMLoc &Loc,
96691a1b2c9SMichael Liao                              const Pattern &Pat, StringRef Buffer,
967e0ef65abSDaniel Dunbar                              StringMap<StringRef> &VariableTable) {
968da108b4eSChris Lattner   // Otherwise, we have an error, emit an error message.
96991a1b2c9SMichael Liao   SM.PrintMessage(Loc, SourceMgr::DK_Error,
97003b80a40SChris Lattner                   "expected string not found in input");
971da108b4eSChris Lattner 
972da108b4eSChris Lattner   // Print the "scanning from here" line.  If the current position is at the
973da108b4eSChris Lattner   // end of a line, advance to the start of the next line.
974caa5fc0cSChris Lattner   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
975da108b4eSChris Lattner 
97603b80a40SChris Lattner   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
97703b80a40SChris Lattner                   "scanning from here");
978e0ef65abSDaniel Dunbar 
979e0ef65abSDaniel Dunbar   // Allow the pattern to print additional information if desired.
98091a1b2c9SMichael Liao   Pat.PrintFailureInfo(SM, Buffer, VariableTable);
98191a1b2c9SMichael Liao }
98291a1b2c9SMichael Liao 
98391a1b2c9SMichael Liao static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
98491a1b2c9SMichael Liao                              StringRef Buffer,
98591a1b2c9SMichael Liao                              StringMap<StringRef> &VariableTable) {
98691a1b2c9SMichael Liao   PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
987da108b4eSChris Lattner }
988da108b4eSChris Lattner 
98937183584SChris Lattner /// CountNumNewlinesBetween - Count the number of newlines in the specified
99037183584SChris Lattner /// range.
991592fe880SRichard Smith static unsigned CountNumNewlinesBetween(StringRef Range,
992592fe880SRichard Smith                                         const char *&FirstNewLine) {
993da108b4eSChris Lattner   unsigned NumNewLines = 0;
99437183584SChris Lattner   while (1) {
995da108b4eSChris Lattner     // Scan for newline.
99637183584SChris Lattner     Range = Range.substr(Range.find_first_of("\n\r"));
99737183584SChris Lattner     if (Range.empty()) return NumNewLines;
998da108b4eSChris Lattner 
999da108b4eSChris Lattner     ++NumNewLines;
1000da108b4eSChris Lattner 
1001da108b4eSChris Lattner     // Handle \n\r and \r\n as a single newline.
100237183584SChris Lattner     if (Range.size() > 1 &&
100337183584SChris Lattner         (Range[1] == '\n' || Range[1] == '\r') &&
100437183584SChris Lattner         (Range[0] != Range[1]))
100537183584SChris Lattner       Range = Range.substr(1);
100637183584SChris Lattner     Range = Range.substr(1);
1007592fe880SRichard Smith 
1008592fe880SRichard Smith     if (NumNewLines == 1)
1009592fe880SRichard Smith       FirstNewLine = Range.begin();
1010da108b4eSChris Lattner   }
1011da108b4eSChris Lattner }
1012da108b4eSChris Lattner 
1013dcc7d48dSMichael Liao size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1014e93a3a08SStephen Lin                           bool IsLabelScanMode, size_t &MatchLen,
1015dcc7d48dSMichael Liao                           StringMap<StringRef> &VariableTable) const {
101691a1b2c9SMichael Liao   size_t LastPos = 0;
101791a1b2c9SMichael Liao   std::vector<const Pattern *> NotStrings;
101891a1b2c9SMichael Liao 
1019e93a3a08SStephen Lin   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1020e93a3a08SStephen Lin   // bounds; we have not processed variable definitions within the bounded block
1021e93a3a08SStephen Lin   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1022e93a3a08SStephen Lin   // over the block again (including the last CHECK-LABEL) in normal mode.
1023e93a3a08SStephen Lin   if (!IsLabelScanMode) {
102491a1b2c9SMichael Liao     // Match "dag strings" (with mixed "not strings" if any).
102591a1b2c9SMichael Liao     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
102691a1b2c9SMichael Liao     if (LastPos == StringRef::npos)
102791a1b2c9SMichael Liao       return StringRef::npos;
1028e93a3a08SStephen Lin   }
102991a1b2c9SMichael Liao 
103091a1b2c9SMichael Liao   // Match itself from the last position after matching CHECK-DAG.
103191a1b2c9SMichael Liao   StringRef MatchBuffer = Buffer.substr(LastPos);
103291a1b2c9SMichael Liao   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1033dcc7d48dSMichael Liao   if (MatchPos == StringRef::npos) {
103491a1b2c9SMichael Liao     PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1035dcc7d48dSMichael Liao     return StringRef::npos;
1036dcc7d48dSMichael Liao   }
103791a1b2c9SMichael Liao   MatchPos += LastPos;
1038dcc7d48dSMichael Liao 
1039e93a3a08SStephen Lin   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1040e93a3a08SStephen Lin   // or CHECK-NOT
1041e93a3a08SStephen Lin   if (!IsLabelScanMode) {
104291a1b2c9SMichael Liao     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1043dcc7d48dSMichael Liao 
1044dcc7d48dSMichael Liao     // If this check is a "CHECK-NEXT", verify that the previous match was on
1045dcc7d48dSMichael Liao     // the previous line (i.e. that there is one newline between them).
1046dcc7d48dSMichael Liao     if (CheckNext(SM, SkippedRegion))
1047dcc7d48dSMichael Liao       return StringRef::npos;
1048dcc7d48dSMichael Liao 
1049dcc7d48dSMichael Liao     // If this match had "not strings", verify that they don't exist in the
1050dcc7d48dSMichael Liao     // skipped region.
105191a1b2c9SMichael Liao     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1052dcc7d48dSMichael Liao       return StringRef::npos;
1053f8bd2e5bSStephen Lin   }
1054dcc7d48dSMichael Liao 
1055dcc7d48dSMichael Liao   return MatchPos;
1056dcc7d48dSMichael Liao }
1057dcc7d48dSMichael Liao 
1058dcc7d48dSMichael Liao bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
105938820972SMatt Arsenault   if (CheckTy != Check::CheckNext)
1060dcc7d48dSMichael Liao     return false;
1061dcc7d48dSMichael Liao 
1062dcc7d48dSMichael Liao   // Count the number of newlines between the previous match and this one.
1063dcc7d48dSMichael Liao   assert(Buffer.data() !=
1064dcc7d48dSMichael Liao          SM.getMemoryBuffer(
1065dcc7d48dSMichael Liao            SM.FindBufferContainingLoc(
1066dcc7d48dSMichael Liao              SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
1067dcc7d48dSMichael Liao          "CHECK-NEXT can't be the first check in a file");
1068dcc7d48dSMichael Liao 
106966f09ad0SCraig Topper   const char *FirstNewLine = nullptr;
1070592fe880SRichard Smith   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1071dcc7d48dSMichael Liao 
1072dcc7d48dSMichael Liao   if (NumNewLines == 0) {
107313df4626SMatt Arsenault     SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1074dcc7d48dSMichael Liao                     "-NEXT: is on the same line as previous match");
1075dcc7d48dSMichael Liao     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1076dcc7d48dSMichael Liao                     SourceMgr::DK_Note, "'next' match was here");
1077dcc7d48dSMichael Liao     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1078dcc7d48dSMichael Liao                     "previous match ended here");
1079dcc7d48dSMichael Liao     return true;
1080dcc7d48dSMichael Liao   }
1081dcc7d48dSMichael Liao 
1082dcc7d48dSMichael Liao   if (NumNewLines != 1) {
108313df4626SMatt Arsenault     SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1084dcc7d48dSMichael Liao                     "-NEXT: is not on the line after the previous match");
1085dcc7d48dSMichael Liao     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1086dcc7d48dSMichael Liao                     SourceMgr::DK_Note, "'next' match was here");
1087dcc7d48dSMichael Liao     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1088dcc7d48dSMichael Liao                     "previous match ended here");
1089592fe880SRichard Smith     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1090592fe880SRichard Smith                     "non-matching line after previous match is here");
1091dcc7d48dSMichael Liao     return true;
1092dcc7d48dSMichael Liao   }
1093dcc7d48dSMichael Liao 
1094dcc7d48dSMichael Liao   return false;
1095dcc7d48dSMichael Liao }
1096dcc7d48dSMichael Liao 
1097dcc7d48dSMichael Liao bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
109891a1b2c9SMichael Liao                            const std::vector<const Pattern *> &NotStrings,
1099dcc7d48dSMichael Liao                            StringMap<StringRef> &VariableTable) const {
1100dcc7d48dSMichael Liao   for (unsigned ChunkNo = 0, e = NotStrings.size();
1101dcc7d48dSMichael Liao        ChunkNo != e; ++ChunkNo) {
110291a1b2c9SMichael Liao     const Pattern *Pat = NotStrings[ChunkNo];
110338820972SMatt Arsenault     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
110491a1b2c9SMichael Liao 
1105dcc7d48dSMichael Liao     size_t MatchLen = 0;
110691a1b2c9SMichael Liao     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1107dcc7d48dSMichael Liao 
1108dcc7d48dSMichael Liao     if (Pos == StringRef::npos) continue;
1109dcc7d48dSMichael Liao 
1110dcc7d48dSMichael Liao     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
1111dcc7d48dSMichael Liao                     SourceMgr::DK_Error,
111213df4626SMatt Arsenault                     Prefix + "-NOT: string occurred!");
111391a1b2c9SMichael Liao     SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
111413df4626SMatt Arsenault                     Prefix + "-NOT: pattern specified here");
1115dcc7d48dSMichael Liao     return true;
1116dcc7d48dSMichael Liao   }
1117dcc7d48dSMichael Liao 
1118dcc7d48dSMichael Liao   return false;
1119dcc7d48dSMichael Liao }
1120dcc7d48dSMichael Liao 
112191a1b2c9SMichael Liao size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
112291a1b2c9SMichael Liao                              std::vector<const Pattern *> &NotStrings,
112391a1b2c9SMichael Liao                              StringMap<StringRef> &VariableTable) const {
112491a1b2c9SMichael Liao   if (DagNotStrings.empty())
112591a1b2c9SMichael Liao     return 0;
112691a1b2c9SMichael Liao 
112791a1b2c9SMichael Liao   size_t LastPos = 0;
112891a1b2c9SMichael Liao   size_t StartPos = LastPos;
112991a1b2c9SMichael Liao 
113091a1b2c9SMichael Liao   for (unsigned ChunkNo = 0, e = DagNotStrings.size();
113191a1b2c9SMichael Liao        ChunkNo != e; ++ChunkNo) {
113291a1b2c9SMichael Liao     const Pattern &Pat = DagNotStrings[ChunkNo];
113391a1b2c9SMichael Liao 
113438820972SMatt Arsenault     assert((Pat.getCheckTy() == Check::CheckDAG ||
113538820972SMatt Arsenault             Pat.getCheckTy() == Check::CheckNot) &&
113691a1b2c9SMichael Liao            "Invalid CHECK-DAG or CHECK-NOT!");
113791a1b2c9SMichael Liao 
113838820972SMatt Arsenault     if (Pat.getCheckTy() == Check::CheckNot) {
113991a1b2c9SMichael Liao       NotStrings.push_back(&Pat);
114091a1b2c9SMichael Liao       continue;
114191a1b2c9SMichael Liao     }
114291a1b2c9SMichael Liao 
114338820972SMatt Arsenault     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
114491a1b2c9SMichael Liao 
114591a1b2c9SMichael Liao     size_t MatchLen = 0, MatchPos;
114691a1b2c9SMichael Liao 
114791a1b2c9SMichael Liao     // CHECK-DAG always matches from the start.
114891a1b2c9SMichael Liao     StringRef MatchBuffer = Buffer.substr(StartPos);
114991a1b2c9SMichael Liao     MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
115091a1b2c9SMichael Liao     // With a group of CHECK-DAGs, a single mismatching means the match on
115191a1b2c9SMichael Liao     // that group of CHECK-DAGs fails immediately.
115291a1b2c9SMichael Liao     if (MatchPos == StringRef::npos) {
115391a1b2c9SMichael Liao       PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
115491a1b2c9SMichael Liao       return StringRef::npos;
115591a1b2c9SMichael Liao     }
115691a1b2c9SMichael Liao     // Re-calc it as the offset relative to the start of the original string.
115791a1b2c9SMichael Liao     MatchPos += StartPos;
115891a1b2c9SMichael Liao 
115991a1b2c9SMichael Liao     if (!NotStrings.empty()) {
116091a1b2c9SMichael Liao       if (MatchPos < LastPos) {
116191a1b2c9SMichael Liao         // Reordered?
116291a1b2c9SMichael Liao         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
116391a1b2c9SMichael Liao                         SourceMgr::DK_Error,
116413df4626SMatt Arsenault                         Prefix + "-DAG: found a match of CHECK-DAG"
116591a1b2c9SMichael Liao                         " reordering across a CHECK-NOT");
116691a1b2c9SMichael Liao         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
116791a1b2c9SMichael Liao                         SourceMgr::DK_Note,
116813df4626SMatt Arsenault                         Prefix + "-DAG: the farthest match of CHECK-DAG"
116991a1b2c9SMichael Liao                         " is found here");
117091a1b2c9SMichael Liao         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
117113df4626SMatt Arsenault                         Prefix + "-NOT: the crossed pattern specified"
117291a1b2c9SMichael Liao                         " here");
117391a1b2c9SMichael Liao         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
117413df4626SMatt Arsenault                         Prefix + "-DAG: the reordered pattern specified"
117591a1b2c9SMichael Liao                         " here");
117691a1b2c9SMichael Liao         return StringRef::npos;
117791a1b2c9SMichael Liao       }
117891a1b2c9SMichael Liao       // All subsequent CHECK-DAGs should be matched from the farthest
117991a1b2c9SMichael Liao       // position of all precedent CHECK-DAGs (including this one.)
118091a1b2c9SMichael Liao       StartPos = LastPos;
118191a1b2c9SMichael Liao       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
118291a1b2c9SMichael Liao       // CHECK-DAG, verify that there's no 'not' strings occurred in that
118391a1b2c9SMichael Liao       // region.
118491a1b2c9SMichael Liao       StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1185cf708c32STim Northover       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
118691a1b2c9SMichael Liao         return StringRef::npos;
118791a1b2c9SMichael Liao       // Clear "not strings".
118891a1b2c9SMichael Liao       NotStrings.clear();
118991a1b2c9SMichael Liao     }
119091a1b2c9SMichael Liao 
119191a1b2c9SMichael Liao     // Update the last position with CHECK-DAG matches.
119291a1b2c9SMichael Liao     LastPos = std::max(MatchPos + MatchLen, LastPos);
119391a1b2c9SMichael Liao   }
119491a1b2c9SMichael Liao 
119591a1b2c9SMichael Liao   return LastPos;
119691a1b2c9SMichael Liao }
119791a1b2c9SMichael Liao 
119813df4626SMatt Arsenault // A check prefix must contain only alphanumeric, hyphens and underscores.
119913df4626SMatt Arsenault static bool ValidateCheckPrefix(StringRef CheckPrefix) {
120013df4626SMatt Arsenault   Regex Validator("^[a-zA-Z0-9_-]*$");
120113df4626SMatt Arsenault   return Validator.match(CheckPrefix);
120213df4626SMatt Arsenault }
120313df4626SMatt Arsenault 
120413df4626SMatt Arsenault static bool ValidateCheckPrefixes() {
120513df4626SMatt Arsenault   StringSet<> PrefixSet;
120613df4626SMatt Arsenault 
120713df4626SMatt Arsenault   for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
120813df4626SMatt Arsenault        I != E; ++I) {
120913df4626SMatt Arsenault     StringRef Prefix(*I);
121013df4626SMatt Arsenault 
121124412b14SEli Bendersky     // Reject empty prefixes.
121224412b14SEli Bendersky     if (Prefix == "")
121324412b14SEli Bendersky       return false;
121424412b14SEli Bendersky 
121513df4626SMatt Arsenault     if (!PrefixSet.insert(Prefix))
121613df4626SMatt Arsenault       return false;
121713df4626SMatt Arsenault 
121813df4626SMatt Arsenault     if (!ValidateCheckPrefix(Prefix))
121913df4626SMatt Arsenault       return false;
122013df4626SMatt Arsenault   }
122113df4626SMatt Arsenault 
122213df4626SMatt Arsenault   return true;
122313df4626SMatt Arsenault }
122413df4626SMatt Arsenault 
122513df4626SMatt Arsenault // I don't think there's a way to specify an initial value for cl::list,
122613df4626SMatt Arsenault // so if nothing was specified, add the default
122713df4626SMatt Arsenault static void AddCheckPrefixIfNeeded() {
122813df4626SMatt Arsenault   if (CheckPrefixes.empty())
122913df4626SMatt Arsenault     CheckPrefixes.push_back("CHECK");
1230c2735158SRui Ueyama }
1231c2735158SRui Ueyama 
1232ee3c74fbSChris Lattner int main(int argc, char **argv) {
1233ee3c74fbSChris Lattner   sys::PrintStackTraceOnErrorSignal();
1234ee3c74fbSChris Lattner   PrettyStackTraceProgram X(argc, argv);
1235ee3c74fbSChris Lattner   cl::ParseCommandLineOptions(argc, argv);
1236ee3c74fbSChris Lattner 
123713df4626SMatt Arsenault   if (!ValidateCheckPrefixes()) {
123813df4626SMatt Arsenault     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
123913df4626SMatt Arsenault               "start with a letter and contain only alphanumeric characters, "
124013df4626SMatt Arsenault               "hyphens and underscores\n";
1241c2735158SRui Ueyama     return 2;
1242c2735158SRui Ueyama   }
1243c2735158SRui Ueyama 
124413df4626SMatt Arsenault   AddCheckPrefixIfNeeded();
124513df4626SMatt Arsenault 
1246ee3c74fbSChris Lattner   SourceMgr SM;
1247ee3c74fbSChris Lattner 
1248ee3c74fbSChris Lattner   // Read the expected strings from the check file.
124926cccfe1SChris Lattner   std::vector<CheckString> CheckStrings;
1250ee3c74fbSChris Lattner   if (ReadCheckFile(SM, CheckStrings))
1251ee3c74fbSChris Lattner     return 2;
1252ee3c74fbSChris Lattner 
1253ee3c74fbSChris Lattner   // Open the file to check and add it to SourceMgr.
1254adf21f2aSRafael Espindola   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
1255adf21f2aSRafael Espindola       MemoryBuffer::getFileOrSTDIN(InputFilename);
1256adf21f2aSRafael Espindola   if (std::error_code EC = FileOrErr.getError()) {
1257adf21f2aSRafael Espindola     errs() << "Could not open input file '" << InputFilename
1258adf21f2aSRafael Espindola            << "': " << EC.message() << '\n';
12598e1c6477SEli Bendersky     return 2;
1260ee3c74fbSChris Lattner   }
1261adf21f2aSRafael Espindola   std::unique_ptr<MemoryBuffer> File = std::move(FileOrErr.get());
12622c3e5cdfSChris Lattner 
1263e963d660SBenjamin Kramer   if (File->getBufferSize() == 0) {
1264b692bed7SChris Lattner     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
12658e1c6477SEli Bendersky     return 2;
1266b692bed7SChris Lattner   }
1267b692bed7SChris Lattner 
12682c3e5cdfSChris Lattner   // Remove duplicate spaces in the input file if requested.
12695ea04c38SGuy Benyei   // Remove DOS style line endings.
1270e963d660SBenjamin Kramer   MemoryBuffer *F =
1271*ce5dd1acSRafael Espindola     CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace);
12722c3e5cdfSChris Lattner 
1273ee3c74fbSChris Lattner   SM.AddNewSourceBuffer(F, SMLoc());
1274ee3c74fbSChris Lattner 
12758879e06dSChris Lattner   /// VariableTable - This holds all the current filecheck variables.
12768879e06dSChris Lattner   StringMap<StringRef> VariableTable;
12778879e06dSChris Lattner 
1278ee3c74fbSChris Lattner   // Check that we have all of the expected strings, in order, in the input
1279ee3c74fbSChris Lattner   // file.
1280caa5fc0cSChris Lattner   StringRef Buffer = F->getBuffer();
1281ee3c74fbSChris Lattner 
1282f8bd2e5bSStephen Lin   bool hasError = false;
1283ee3c74fbSChris Lattner 
1284f8bd2e5bSStephen Lin   unsigned i = 0, j = 0, e = CheckStrings.size();
1285ee3c74fbSChris Lattner 
1286f8bd2e5bSStephen Lin   while (true) {
1287f8bd2e5bSStephen Lin     StringRef CheckRegion;
1288f8bd2e5bSStephen Lin     if (j == e) {
1289f8bd2e5bSStephen Lin       CheckRegion = Buffer;
1290f8bd2e5bSStephen Lin     } else {
1291f8bd2e5bSStephen Lin       const CheckString &CheckLabelStr = CheckStrings[j];
129238820972SMatt Arsenault       if (CheckLabelStr.CheckTy != Check::CheckLabel) {
1293f8bd2e5bSStephen Lin         ++j;
1294f8bd2e5bSStephen Lin         continue;
1295da108b4eSChris Lattner       }
1296da108b4eSChris Lattner 
1297f8bd2e5bSStephen Lin       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1298f8bd2e5bSStephen Lin       size_t MatchLabelLen = 0;
1299e93a3a08SStephen Lin       size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
1300f8bd2e5bSStephen Lin                                                  MatchLabelLen, VariableTable);
1301f8bd2e5bSStephen Lin       if (MatchLabelPos == StringRef::npos) {
1302f8bd2e5bSStephen Lin         hasError = true;
1303f8bd2e5bSStephen Lin         break;
1304f8bd2e5bSStephen Lin       }
1305f8bd2e5bSStephen Lin 
1306f8bd2e5bSStephen Lin       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1307f8bd2e5bSStephen Lin       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1308f8bd2e5bSStephen Lin       ++j;
1309f8bd2e5bSStephen Lin     }
1310f8bd2e5bSStephen Lin 
1311f8bd2e5bSStephen Lin     for ( ; i != j; ++i) {
1312f8bd2e5bSStephen Lin       const CheckString &CheckStr = CheckStrings[i];
1313f8bd2e5bSStephen Lin 
1314f8bd2e5bSStephen Lin       // Check each string within the scanned region, including a second check
1315f8bd2e5bSStephen Lin       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1316f8bd2e5bSStephen Lin       size_t MatchLen = 0;
1317e93a3a08SStephen Lin       size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
1318f8bd2e5bSStephen Lin                                        VariableTable);
1319f8bd2e5bSStephen Lin 
1320f8bd2e5bSStephen Lin       if (MatchPos == StringRef::npos) {
1321f8bd2e5bSStephen Lin         hasError = true;
1322f8bd2e5bSStephen Lin         i = j;
1323f8bd2e5bSStephen Lin         break;
1324f8bd2e5bSStephen Lin       }
1325f8bd2e5bSStephen Lin 
1326f8bd2e5bSStephen Lin       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1327f8bd2e5bSStephen Lin     }
1328f8bd2e5bSStephen Lin 
1329f8bd2e5bSStephen Lin     if (j == e)
1330f8bd2e5bSStephen Lin       break;
1331f8bd2e5bSStephen Lin   }
1332f8bd2e5bSStephen Lin 
1333f8bd2e5bSStephen Lin   return hasError ? 1 : 0;
1334ee3c74fbSChris Lattner }
1335