1ee3c74fbSChris Lattner //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2ee3c74fbSChris Lattner //
3ee3c74fbSChris Lattner //                     The LLVM Compiler Infrastructure
4ee3c74fbSChris Lattner //
5ee3c74fbSChris Lattner // This file is distributed under the University of Illinois Open Source
6ee3c74fbSChris Lattner // License. See LICENSE.TXT for details.
7ee3c74fbSChris Lattner //
8ee3c74fbSChris Lattner //===----------------------------------------------------------------------===//
9ee3c74fbSChris Lattner //
10ee3c74fbSChris Lattner // FileCheck does a line-by line check of a file that validates whether it
11ee3c74fbSChris Lattner // contains the expected content.  This is useful for regression tests etc.
12ee3c74fbSChris Lattner //
13ee3c74fbSChris Lattner // This program exits with an error status of 2 on error, exit status of 0 if
14ee3c74fbSChris Lattner // the file matched the expected contents, and exit status of 1 if it did not
15ee3c74fbSChris Lattner // contain the expected contents.
16ee3c74fbSChris Lattner //
17ee3c74fbSChris Lattner //===----------------------------------------------------------------------===//
18ee3c74fbSChris Lattner 
1991d19d8eSChandler Carruth #include "llvm/ADT/SmallString.h"
2091d19d8eSChandler Carruth #include "llvm/ADT/StringExtras.h"
2191d19d8eSChandler Carruth #include "llvm/ADT/StringMap.h"
2213df4626SMatt Arsenault #include "llvm/ADT/StringSet.h"
23ee3c74fbSChris Lattner #include "llvm/Support/CommandLine.h"
24ee3c74fbSChris Lattner #include "llvm/Support/MemoryBuffer.h"
25ee3c74fbSChris Lattner #include "llvm/Support/PrettyStackTrace.h"
26f08d2db9SChris Lattner #include "llvm/Support/Regex.h"
2791d19d8eSChandler Carruth #include "llvm/Support/Signals.h"
28ee3c74fbSChris Lattner #include "llvm/Support/SourceMgr.h"
29ee3c74fbSChris Lattner #include "llvm/Support/raw_ostream.h"
308879e06dSChris Lattner #include <algorithm>
31981af002SWill Dietz #include <cctype>
32e8b8f1bcSEli Bendersky #include <map>
33e8b8f1bcSEli Bendersky #include <string>
34a6e9c3e4SRafael Espindola #include <system_error>
35e8b8f1bcSEli Bendersky #include <vector>
36ee3c74fbSChris Lattner using namespace llvm;
37ee3c74fbSChris Lattner 
38ee3c74fbSChris Lattner static cl::opt<std::string>
39ee3c74fbSChris Lattner CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40ee3c74fbSChris Lattner 
41ee3c74fbSChris Lattner static cl::opt<std::string>
42ee3c74fbSChris Lattner InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43ee3c74fbSChris Lattner               cl::init("-"), cl::value_desc("filename"));
44ee3c74fbSChris Lattner 
4513df4626SMatt Arsenault static cl::list<std::string>
4613df4626SMatt Arsenault CheckPrefixes("check-prefix",
47ee3c74fbSChris Lattner               cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48ee3c74fbSChris Lattner 
492c3e5cdfSChris Lattner static cl::opt<bool>
502c3e5cdfSChris Lattner NoCanonicalizeWhiteSpace("strict-whitespace",
512c3e5cdfSChris Lattner               cl::desc("Do not treat all horizontal whitespace as equivalent"));
522c3e5cdfSChris Lattner 
5356ccdbbdSAlexander Kornienko static cl::list<std::string> ImplicitCheckNot(
5456ccdbbdSAlexander Kornienko     "implicit-check-not",
5556ccdbbdSAlexander Kornienko     cl::desc("Add an implicit negative check with this pattern to every\n"
5656ccdbbdSAlexander Kornienko              "positive check. This can be used to ensure that no instances of\n"
5756ccdbbdSAlexander Kornienko              "this pattern occur which are not matched by a positive pattern"),
5856ccdbbdSAlexander Kornienko     cl::value_desc("pattern"));
5956ccdbbdSAlexander Kornienko 
60*1b9f936fSJustin Bogner static cl::opt<bool> AllowEmptyInput(
61*1b9f936fSJustin Bogner     "allow-empty", cl::init(false),
62*1b9f936fSJustin Bogner     cl::desc("Allow the input file to be empty. This is useful when making\n"
63*1b9f936fSJustin Bogner              "checks that some error message does not occur, for example."));
64*1b9f936fSJustin Bogner 
6513df4626SMatt Arsenault typedef cl::list<std::string>::const_iterator prefix_iterator;
6613df4626SMatt Arsenault 
6774d50731SChris Lattner //===----------------------------------------------------------------------===//
6874d50731SChris Lattner // Pattern Handling Code.
6974d50731SChris Lattner //===----------------------------------------------------------------------===//
7074d50731SChris Lattner 
7138820972SMatt Arsenault namespace Check {
7238820972SMatt Arsenault   enum CheckType {
7338820972SMatt Arsenault     CheckNone = 0,
7438820972SMatt Arsenault     CheckPlain,
7538820972SMatt Arsenault     CheckNext,
7638820972SMatt Arsenault     CheckNot,
7738820972SMatt Arsenault     CheckDAG,
7838820972SMatt Arsenault     CheckLabel,
790a4c44bdSChris Lattner 
80eba55822SJakob Stoklund Olesen     /// MatchEOF - When set, this pattern only matches the end of file. This is
81eba55822SJakob Stoklund Olesen     /// used for trailing CHECK-NOTs.
8238820972SMatt Arsenault     CheckEOF
8338820972SMatt Arsenault   };
8438820972SMatt Arsenault }
85eba55822SJakob Stoklund Olesen 
8638820972SMatt Arsenault class Pattern {
8738820972SMatt Arsenault   SMLoc PatternLoc;
8891a1b2c9SMichael Liao 
8938820972SMatt Arsenault   Check::CheckType CheckTy;
9091a1b2c9SMichael Liao 
91b16ab0c4SChris Lattner   /// FixedStr - If non-empty, this pattern is a fixed string match with the
92b16ab0c4SChris Lattner   /// specified fixed string.
93221460e0SChris Lattner   StringRef FixedStr;
94b16ab0c4SChris Lattner 
95b16ab0c4SChris Lattner   /// RegEx - If non-empty, this is a regex pattern.
96b16ab0c4SChris Lattner   std::string RegExStr;
978879e06dSChris Lattner 
9892987fb3SAlexander Kornienko   /// \brief Contains the number of line this pattern is in.
9992987fb3SAlexander Kornienko   unsigned LineNumber;
10092987fb3SAlexander Kornienko 
1018879e06dSChris Lattner   /// VariableUses - Entries in this vector map to uses of a variable in the
1028879e06dSChris Lattner   /// pattern, e.g. "foo[[bar]]baz".  In this case, the RegExStr will contain
1038879e06dSChris Lattner   /// "foobaz" and we'll get an entry in this vector that tells us to insert the
1048879e06dSChris Lattner   /// value of bar at offset 3.
1058879e06dSChris Lattner   std::vector<std::pair<StringRef, unsigned> > VariableUses;
1068879e06dSChris Lattner 
107e8b8f1bcSEli Bendersky   /// VariableDefs - Maps definitions of variables to their parenthesized
108e8b8f1bcSEli Bendersky   /// capture numbers.
109e8b8f1bcSEli Bendersky   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
110e8b8f1bcSEli Bendersky   std::map<StringRef, unsigned> VariableDefs;
1118879e06dSChris Lattner 
1123b40b445SChris Lattner public:
1133b40b445SChris Lattner 
11438820972SMatt Arsenault   Pattern(Check::CheckType Ty)
11538820972SMatt Arsenault     : CheckTy(Ty) { }
11674d50731SChris Lattner 
1170b707eb8SMichael Liao   /// getLoc - Return the location in source code.
1180b707eb8SMichael Liao   SMLoc getLoc() const { return PatternLoc; }
1190b707eb8SMichael Liao 
12013df4626SMatt Arsenault   /// ParsePattern - Parse the given string into the Pattern. Prefix provides
12113df4626SMatt Arsenault   /// which prefix is being matched, SM provides the SourceMgr used for error
12213df4626SMatt Arsenault   /// reports, and LineNumber is the line number in the input file from which
12313df4626SMatt Arsenault   /// the pattern string was read.  Returns true in case of an error, false
12413df4626SMatt Arsenault   /// otherwise.
12513df4626SMatt Arsenault   bool ParsePattern(StringRef PatternStr,
12613df4626SMatt Arsenault                     StringRef Prefix,
12713df4626SMatt Arsenault                     SourceMgr &SM,
12813df4626SMatt Arsenault                     unsigned LineNumber);
1293b40b445SChris Lattner 
1303b40b445SChris Lattner   /// Match - Match the pattern string against the input buffer Buffer.  This
1313b40b445SChris Lattner   /// returns the position that is matched or npos if there is no match.  If
1323b40b445SChris Lattner   /// there is a match, the size of the matched string is returned in MatchLen.
1338879e06dSChris Lattner   ///
1348879e06dSChris Lattner   /// The VariableTable StringMap provides the current values of filecheck
1358879e06dSChris Lattner   /// variables and is updated if this match defines new values.
1368879e06dSChris Lattner   size_t Match(StringRef Buffer, size_t &MatchLen,
1378879e06dSChris Lattner                StringMap<StringRef> &VariableTable) const;
138b16ab0c4SChris Lattner 
139e0ef65abSDaniel Dunbar   /// PrintFailureInfo - Print additional information about a failure to match
140e0ef65abSDaniel Dunbar   /// involving this pattern.
141e0ef65abSDaniel Dunbar   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
142e0ef65abSDaniel Dunbar                         const StringMap<StringRef> &VariableTable) const;
143e0ef65abSDaniel Dunbar 
144f8bd2e5bSStephen Lin   bool hasVariable() const { return !(VariableUses.empty() &&
145f8bd2e5bSStephen Lin                                       VariableDefs.empty()); }
146f8bd2e5bSStephen Lin 
14738820972SMatt Arsenault   Check::CheckType getCheckTy() const { return CheckTy; }
14891a1b2c9SMichael Liao 
149b16ab0c4SChris Lattner private:
150e8b8f1bcSEli Bendersky   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
151e8b8f1bcSEli Bendersky   void AddBackrefToRegEx(unsigned BackrefNum);
152fd29d886SDaniel Dunbar 
153fd29d886SDaniel Dunbar   /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
154fd29d886SDaniel Dunbar   /// matching this pattern at the start of \arg Buffer; a distance of zero
155fd29d886SDaniel Dunbar   /// should correspond to a perfect match.
156fd29d886SDaniel Dunbar   unsigned ComputeMatchDistance(StringRef Buffer,
157fd29d886SDaniel Dunbar                                const StringMap<StringRef> &VariableTable) const;
15892987fb3SAlexander Kornienko 
15992987fb3SAlexander Kornienko   /// \brief Evaluates expression and stores the result to \p Value.
16092987fb3SAlexander Kornienko   /// \return true on success. false when the expression has invalid syntax.
16192987fb3SAlexander Kornienko   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
162061d2baaSEli Bendersky 
163061d2baaSEli Bendersky   /// \brief Finds the closing sequence of a regex variable usage or
164061d2baaSEli Bendersky   /// definition. Str has to point in the beginning of the definition
165061d2baaSEli Bendersky   /// (right after the opening sequence).
166061d2baaSEli Bendersky   /// \return offset of the closing sequence within Str, or npos if it was not
167061d2baaSEli Bendersky   /// found.
16881e5cd9eSAdrian Prantl   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
1693b40b445SChris Lattner };
1703b40b445SChris Lattner 
1718879e06dSChris Lattner 
17213df4626SMatt Arsenault bool Pattern::ParsePattern(StringRef PatternStr,
17313df4626SMatt Arsenault                            StringRef Prefix,
17413df4626SMatt Arsenault                            SourceMgr &SM,
17592987fb3SAlexander Kornienko                            unsigned LineNumber) {
17692987fb3SAlexander Kornienko   this->LineNumber = LineNumber;
1770a4c44bdSChris Lattner   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
1780a4c44bdSChris Lattner 
17974d50731SChris Lattner   // Ignore trailing whitespace.
18074d50731SChris Lattner   while (!PatternStr.empty() &&
18174d50731SChris Lattner          (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
18274d50731SChris Lattner     PatternStr = PatternStr.substr(0, PatternStr.size()-1);
18374d50731SChris Lattner 
18474d50731SChris Lattner   // Check that there is something on the line.
18574d50731SChris Lattner   if (PatternStr.empty()) {
18603b80a40SChris Lattner     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
18703b80a40SChris Lattner                     "found empty check string with prefix '" +
18813df4626SMatt Arsenault                     Prefix + ":'");
18974d50731SChris Lattner     return true;
19074d50731SChris Lattner   }
19174d50731SChris Lattner 
192221460e0SChris Lattner   // Check to see if this is a fixed string, or if it has regex pieces.
193d9466967STed Kremenek   if (PatternStr.size() < 2 ||
1948879e06dSChris Lattner       (PatternStr.find("{{") == StringRef::npos &&
1958879e06dSChris Lattner        PatternStr.find("[[") == StringRef::npos)) {
196221460e0SChris Lattner     FixedStr = PatternStr;
197221460e0SChris Lattner     return false;
198221460e0SChris Lattner   }
199221460e0SChris Lattner 
2008879e06dSChris Lattner   // Paren value #0 is for the fully matched string.  Any new parenthesized
20153e0679dSChris Lattner   // values add from there.
2028879e06dSChris Lattner   unsigned CurParen = 1;
2038879e06dSChris Lattner 
204b16ab0c4SChris Lattner   // Otherwise, there is at least one regex piece.  Build up the regex pattern
205b16ab0c4SChris Lattner   // by escaping scary characters in fixed strings, building up one big regex.
206f08d2db9SChris Lattner   while (!PatternStr.empty()) {
2078879e06dSChris Lattner     // RegEx matches.
20853e0679dSChris Lattner     if (PatternStr.startswith("{{")) {
20943d50d4aSEli Bendersky       // This is the start of a regex match.  Scan for the }}.
210f08d2db9SChris Lattner       size_t End = PatternStr.find("}}");
211f08d2db9SChris Lattner       if (End == StringRef::npos) {
212f08d2db9SChris Lattner         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
21303b80a40SChris Lattner                         SourceMgr::DK_Error,
21403b80a40SChris Lattner                         "found start of regex string with no end '}}'");
215f08d2db9SChris Lattner         return true;
216f08d2db9SChris Lattner       }
217f08d2db9SChris Lattner 
218e53c95f1SChris Lattner       // Enclose {{}} patterns in parens just like [[]] even though we're not
219e53c95f1SChris Lattner       // capturing the result for any purpose.  This is required in case the
220e53c95f1SChris Lattner       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
221e53c95f1SChris Lattner       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
222e53c95f1SChris Lattner       RegExStr += '(';
223e53c95f1SChris Lattner       ++CurParen;
224e53c95f1SChris Lattner 
2258879e06dSChris Lattner       if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
2268879e06dSChris Lattner         return true;
227e53c95f1SChris Lattner       RegExStr += ')';
22853e0679dSChris Lattner 
2298879e06dSChris Lattner       PatternStr = PatternStr.substr(End+2);
2308879e06dSChris Lattner       continue;
2318879e06dSChris Lattner     }
2328879e06dSChris Lattner 
2338879e06dSChris Lattner     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
2348879e06dSChris Lattner     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
2358879e06dSChris Lattner     // second form is [[foo]] which is a reference to foo.  The variable name
23657cb733bSDaniel Dunbar     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
2378879e06dSChris Lattner     // it.  This is to catch some common errors.
23853e0679dSChris Lattner     if (PatternStr.startswith("[[")) {
239061d2baaSEli Bendersky       // Find the closing bracket pair ending the match.  End is going to be an
240061d2baaSEli Bendersky       // offset relative to the beginning of the match string.
24181e5cd9eSAdrian Prantl       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
242061d2baaSEli Bendersky 
2438879e06dSChris Lattner       if (End == StringRef::npos) {
2448879e06dSChris Lattner         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
24503b80a40SChris Lattner                         SourceMgr::DK_Error,
24603b80a40SChris Lattner                         "invalid named regex reference, no ]] found");
247f08d2db9SChris Lattner         return true;
248f08d2db9SChris Lattner       }
249f08d2db9SChris Lattner 
250061d2baaSEli Bendersky       StringRef MatchStr = PatternStr.substr(2, End);
251061d2baaSEli Bendersky       PatternStr = PatternStr.substr(End+4);
2528879e06dSChris Lattner 
2538879e06dSChris Lattner       // Get the regex name (e.g. "foo").
2548879e06dSChris Lattner       size_t NameEnd = MatchStr.find(':');
2558879e06dSChris Lattner       StringRef Name = MatchStr.substr(0, NameEnd);
2568879e06dSChris Lattner 
2578879e06dSChris Lattner       if (Name.empty()) {
25803b80a40SChris Lattner         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
25903b80a40SChris Lattner                         "invalid name in named regex: empty name");
2608879e06dSChris Lattner         return true;
2618879e06dSChris Lattner       }
2628879e06dSChris Lattner 
26392987fb3SAlexander Kornienko       // Verify that the name/expression is well formed. FileCheck currently
26492987fb3SAlexander Kornienko       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
26592987fb3SAlexander Kornienko       // is relaxed, more strict check is performed in \c EvaluateExpression.
26692987fb3SAlexander Kornienko       bool IsExpression = false;
26792987fb3SAlexander Kornienko       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
26892987fb3SAlexander Kornienko         if (i == 0 && Name[i] == '@') {
26992987fb3SAlexander Kornienko           if (NameEnd != StringRef::npos) {
27092987fb3SAlexander Kornienko             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
27192987fb3SAlexander Kornienko                             SourceMgr::DK_Error,
27292987fb3SAlexander Kornienko                             "invalid name in named regex definition");
27392987fb3SAlexander Kornienko             return true;
27492987fb3SAlexander Kornienko           }
27592987fb3SAlexander Kornienko           IsExpression = true;
27692987fb3SAlexander Kornienko           continue;
27792987fb3SAlexander Kornienko         }
27892987fb3SAlexander Kornienko         if (Name[i] != '_' && !isalnum(Name[i]) &&
27992987fb3SAlexander Kornienko             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
2808879e06dSChris Lattner           SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
28103b80a40SChris Lattner                           SourceMgr::DK_Error, "invalid name in named regex");
2828879e06dSChris Lattner           return true;
2838879e06dSChris Lattner         }
28492987fb3SAlexander Kornienko       }
2858879e06dSChris Lattner 
2868879e06dSChris Lattner       // Name can't start with a digit.
28783c74e9fSGuy Benyei       if (isdigit(static_cast<unsigned char>(Name[0]))) {
28803b80a40SChris Lattner         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
28903b80a40SChris Lattner                         "invalid name in named regex");
2908879e06dSChris Lattner         return true;
2918879e06dSChris Lattner       }
2928879e06dSChris Lattner 
2938879e06dSChris Lattner       // Handle [[foo]].
2948879e06dSChris Lattner       if (NameEnd == StringRef::npos) {
295e8b8f1bcSEli Bendersky         // Handle variables that were defined earlier on the same line by
296e8b8f1bcSEli Bendersky         // emitting a backreference.
297e8b8f1bcSEli Bendersky         if (VariableDefs.find(Name) != VariableDefs.end()) {
298e8b8f1bcSEli Bendersky           unsigned VarParenNum = VariableDefs[Name];
299e8b8f1bcSEli Bendersky           if (VarParenNum < 1 || VarParenNum > 9) {
300e8b8f1bcSEli Bendersky             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
301e8b8f1bcSEli Bendersky                             SourceMgr::DK_Error,
302e8b8f1bcSEli Bendersky                             "Can't back-reference more than 9 variables");
303e8b8f1bcSEli Bendersky             return true;
304e8b8f1bcSEli Bendersky           }
305e8b8f1bcSEli Bendersky           AddBackrefToRegEx(VarParenNum);
306e8b8f1bcSEli Bendersky         } else {
3078879e06dSChris Lattner           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
308e8b8f1bcSEli Bendersky         }
3098879e06dSChris Lattner         continue;
3108879e06dSChris Lattner       }
3118879e06dSChris Lattner 
3128879e06dSChris Lattner       // Handle [[foo:.*]].
313e8b8f1bcSEli Bendersky       VariableDefs[Name] = CurParen;
3148879e06dSChris Lattner       RegExStr += '(';
3158879e06dSChris Lattner       ++CurParen;
3168879e06dSChris Lattner 
3178879e06dSChris Lattner       if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
3188879e06dSChris Lattner         return true;
3198879e06dSChris Lattner 
3208879e06dSChris Lattner       RegExStr += ')';
3218879e06dSChris Lattner     }
3228879e06dSChris Lattner 
3238879e06dSChris Lattner     // Handle fixed string matches.
3248879e06dSChris Lattner     // Find the end, which is the start of the next regex.
3258879e06dSChris Lattner     size_t FixedMatchEnd = PatternStr.find("{{");
3268879e06dSChris Lattner     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
3276f4f77b7SHans Wennborg     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
3288879e06dSChris Lattner     PatternStr = PatternStr.substr(FixedMatchEnd);
329f08d2db9SChris Lattner   }
330f08d2db9SChris Lattner 
33174d50731SChris Lattner   return false;
33274d50731SChris Lattner }
33374d50731SChris Lattner 
334e8b8f1bcSEli Bendersky bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
3358879e06dSChris Lattner                               SourceMgr &SM) {
336e8b8f1bcSEli Bendersky   Regex R(RS);
3378879e06dSChris Lattner   std::string Error;
3388879e06dSChris Lattner   if (!R.isValid(Error)) {
339e8b8f1bcSEli Bendersky     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
34003b80a40SChris Lattner                     "invalid regex: " + Error);
3418879e06dSChris Lattner     return true;
3428879e06dSChris Lattner   }
3438879e06dSChris Lattner 
344e8b8f1bcSEli Bendersky   RegExStr += RS.str();
3458879e06dSChris Lattner   CurParen += R.getNumMatches();
3468879e06dSChris Lattner   return false;
3478879e06dSChris Lattner }
348b16ab0c4SChris Lattner 
349e8b8f1bcSEli Bendersky void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
350e8b8f1bcSEli Bendersky   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
351e8b8f1bcSEli Bendersky   std::string Backref = std::string("\\") +
352e8b8f1bcSEli Bendersky                         std::string(1, '0' + BackrefNum);
353e8b8f1bcSEli Bendersky   RegExStr += Backref;
354e8b8f1bcSEli Bendersky }
355e8b8f1bcSEli Bendersky 
35692987fb3SAlexander Kornienko bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
35792987fb3SAlexander Kornienko   // The only supported expression is @LINE([\+-]\d+)?
35892987fb3SAlexander Kornienko   if (!Expr.startswith("@LINE"))
35992987fb3SAlexander Kornienko     return false;
36092987fb3SAlexander Kornienko   Expr = Expr.substr(StringRef("@LINE").size());
36192987fb3SAlexander Kornienko   int Offset = 0;
36292987fb3SAlexander Kornienko   if (!Expr.empty()) {
36392987fb3SAlexander Kornienko     if (Expr[0] == '+')
36492987fb3SAlexander Kornienko       Expr = Expr.substr(1);
36592987fb3SAlexander Kornienko     else if (Expr[0] != '-')
36692987fb3SAlexander Kornienko       return false;
36792987fb3SAlexander Kornienko     if (Expr.getAsInteger(10, Offset))
36892987fb3SAlexander Kornienko       return false;
36992987fb3SAlexander Kornienko   }
37092987fb3SAlexander Kornienko   Value = llvm::itostr(LineNumber + Offset);
37192987fb3SAlexander Kornienko   return true;
37292987fb3SAlexander Kornienko }
37392987fb3SAlexander Kornienko 
374f08d2db9SChris Lattner /// Match - Match the pattern string against the input buffer Buffer.  This
375f08d2db9SChris Lattner /// returns the position that is matched or npos if there is no match.  If
376f08d2db9SChris Lattner /// there is a match, the size of the matched string is returned in MatchLen.
3778879e06dSChris Lattner size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
3788879e06dSChris Lattner                       StringMap<StringRef> &VariableTable) const {
379eba55822SJakob Stoklund Olesen   // If this is the EOF pattern, match it immediately.
38038820972SMatt Arsenault   if (CheckTy == Check::CheckEOF) {
381eba55822SJakob Stoklund Olesen     MatchLen = 0;
382eba55822SJakob Stoklund Olesen     return Buffer.size();
383eba55822SJakob Stoklund Olesen   }
384eba55822SJakob Stoklund Olesen 
385221460e0SChris Lattner   // If this is a fixed string pattern, just match it now.
386221460e0SChris Lattner   if (!FixedStr.empty()) {
387221460e0SChris Lattner     MatchLen = FixedStr.size();
388221460e0SChris Lattner     return Buffer.find(FixedStr);
389221460e0SChris Lattner   }
390221460e0SChris Lattner 
391b16ab0c4SChris Lattner   // Regex match.
3928879e06dSChris Lattner 
3938879e06dSChris Lattner   // If there are variable uses, we need to create a temporary string with the
3948879e06dSChris Lattner   // actual value.
3958879e06dSChris Lattner   StringRef RegExToMatch = RegExStr;
3968879e06dSChris Lattner   std::string TmpStr;
3978879e06dSChris Lattner   if (!VariableUses.empty()) {
3988879e06dSChris Lattner     TmpStr = RegExStr;
3998879e06dSChris Lattner 
4008879e06dSChris Lattner     unsigned InsertOffset = 0;
4018879e06dSChris Lattner     for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
40292987fb3SAlexander Kornienko       std::string Value;
40392987fb3SAlexander Kornienko 
40492987fb3SAlexander Kornienko       if (VariableUses[i].first[0] == '@') {
40592987fb3SAlexander Kornienko         if (!EvaluateExpression(VariableUses[i].first, Value))
40692987fb3SAlexander Kornienko           return StringRef::npos;
40792987fb3SAlexander Kornienko       } else {
408e0ef65abSDaniel Dunbar         StringMap<StringRef>::iterator it =
409e0ef65abSDaniel Dunbar           VariableTable.find(VariableUses[i].first);
410e0ef65abSDaniel Dunbar         // If the variable is undefined, return an error.
411e0ef65abSDaniel Dunbar         if (it == VariableTable.end())
412e0ef65abSDaniel Dunbar           return StringRef::npos;
413e0ef65abSDaniel Dunbar 
4146f4f77b7SHans Wennborg         // Look up the value and escape it so that we can put it into the regex.
4156f4f77b7SHans Wennborg         Value += Regex::escape(it->second);
41692987fb3SAlexander Kornienko       }
4178879e06dSChris Lattner 
4188879e06dSChris Lattner       // Plop it into the regex at the adjusted offset.
4198879e06dSChris Lattner       TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
4208879e06dSChris Lattner                     Value.begin(), Value.end());
4218879e06dSChris Lattner       InsertOffset += Value.size();
4228879e06dSChris Lattner     }
4238879e06dSChris Lattner 
4248879e06dSChris Lattner     // Match the newly constructed regex.
4258879e06dSChris Lattner     RegExToMatch = TmpStr;
4268879e06dSChris Lattner   }
4278879e06dSChris Lattner 
4288879e06dSChris Lattner 
429b16ab0c4SChris Lattner   SmallVector<StringRef, 4> MatchInfo;
4308879e06dSChris Lattner   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
431f08d2db9SChris Lattner     return StringRef::npos;
432b16ab0c4SChris Lattner 
433b16ab0c4SChris Lattner   // Successful regex match.
434b16ab0c4SChris Lattner   assert(!MatchInfo.empty() && "Didn't get any match");
435b16ab0c4SChris Lattner   StringRef FullMatch = MatchInfo[0];
436b16ab0c4SChris Lattner 
4378879e06dSChris Lattner   // If this defines any variables, remember their values.
438e8b8f1bcSEli Bendersky   for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(),
439e8b8f1bcSEli Bendersky                                                      E = VariableDefs.end();
440e8b8f1bcSEli Bendersky        I != E; ++I) {
441e8b8f1bcSEli Bendersky     assert(I->second < MatchInfo.size() && "Internal paren error");
442e8b8f1bcSEli Bendersky     VariableTable[I->first] = MatchInfo[I->second];
4430a4c44bdSChris Lattner   }
4440a4c44bdSChris Lattner 
445b16ab0c4SChris Lattner   MatchLen = FullMatch.size();
446b16ab0c4SChris Lattner   return FullMatch.data()-Buffer.data();
447f08d2db9SChris Lattner }
448f08d2db9SChris Lattner 
449fd29d886SDaniel Dunbar unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
450fd29d886SDaniel Dunbar                               const StringMap<StringRef> &VariableTable) const {
451fd29d886SDaniel Dunbar   // Just compute the number of matching characters. For regular expressions, we
452fd29d886SDaniel Dunbar   // just compare against the regex itself and hope for the best.
453fd29d886SDaniel Dunbar   //
454fd29d886SDaniel Dunbar   // FIXME: One easy improvement here is have the regex lib generate a single
455fd29d886SDaniel Dunbar   // example regular expression which matches, and use that as the example
456fd29d886SDaniel Dunbar   // string.
457fd29d886SDaniel Dunbar   StringRef ExampleString(FixedStr);
458fd29d886SDaniel Dunbar   if (ExampleString.empty())
459fd29d886SDaniel Dunbar     ExampleString = RegExStr;
460fd29d886SDaniel Dunbar 
461e9aa36c8SDaniel Dunbar   // Only compare up to the first line in the buffer, or the string size.
462e9aa36c8SDaniel Dunbar   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
463e9aa36c8SDaniel Dunbar   BufferPrefix = BufferPrefix.split('\n').first;
464e9aa36c8SDaniel Dunbar   return BufferPrefix.edit_distance(ExampleString);
465fd29d886SDaniel Dunbar }
466fd29d886SDaniel Dunbar 
467e0ef65abSDaniel Dunbar void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
468e0ef65abSDaniel Dunbar                                const StringMap<StringRef> &VariableTable) const{
469e0ef65abSDaniel Dunbar   // If this was a regular expression using variables, print the current
470e0ef65abSDaniel Dunbar   // variable values.
471e0ef65abSDaniel Dunbar   if (!VariableUses.empty()) {
472e0ef65abSDaniel Dunbar     for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
473e69170a1SAlp Toker       SmallString<256> Msg;
474e69170a1SAlp Toker       raw_svector_ostream OS(Msg);
47592987fb3SAlexander Kornienko       StringRef Var = VariableUses[i].first;
47692987fb3SAlexander Kornienko       if (Var[0] == '@') {
47792987fb3SAlexander Kornienko         std::string Value;
47892987fb3SAlexander Kornienko         if (EvaluateExpression(Var, Value)) {
47992987fb3SAlexander Kornienko           OS << "with expression \"";
48092987fb3SAlexander Kornienko           OS.write_escaped(Var) << "\" equal to \"";
48192987fb3SAlexander Kornienko           OS.write_escaped(Value) << "\"";
48292987fb3SAlexander Kornienko         } else {
48392987fb3SAlexander Kornienko           OS << "uses incorrect expression \"";
48492987fb3SAlexander Kornienko           OS.write_escaped(Var) << "\"";
48592987fb3SAlexander Kornienko         }
48692987fb3SAlexander Kornienko       } else {
48792987fb3SAlexander Kornienko         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
488e0ef65abSDaniel Dunbar 
489e0ef65abSDaniel Dunbar         // Check for undefined variable references.
490e0ef65abSDaniel Dunbar         if (it == VariableTable.end()) {
491e0ef65abSDaniel Dunbar           OS << "uses undefined variable \"";
49292987fb3SAlexander Kornienko           OS.write_escaped(Var) << "\"";
493e0ef65abSDaniel Dunbar         } else {
494e0ef65abSDaniel Dunbar           OS << "with variable \"";
495e0ef65abSDaniel Dunbar           OS.write_escaped(Var) << "\" equal to \"";
496e0ef65abSDaniel Dunbar           OS.write_escaped(it->second) << "\"";
497e0ef65abSDaniel Dunbar         }
49892987fb3SAlexander Kornienko       }
499e0ef65abSDaniel Dunbar 
50003b80a40SChris Lattner       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
50103b80a40SChris Lattner                       OS.str());
502e0ef65abSDaniel Dunbar     }
503e0ef65abSDaniel Dunbar   }
504fd29d886SDaniel Dunbar 
505fd29d886SDaniel Dunbar   // Attempt to find the closest/best fuzzy match.  Usually an error happens
506fd29d886SDaniel Dunbar   // because some string in the output didn't exactly match. In these cases, we
507fd29d886SDaniel Dunbar   // would like to show the user a best guess at what "should have" matched, to
508fd29d886SDaniel Dunbar   // save them having to actually check the input manually.
509fd29d886SDaniel Dunbar   size_t NumLinesForward = 0;
510fd29d886SDaniel Dunbar   size_t Best = StringRef::npos;
511fd29d886SDaniel Dunbar   double BestQuality = 0;
512fd29d886SDaniel Dunbar 
513fd29d886SDaniel Dunbar   // Use an arbitrary 4k limit on how far we will search.
5142bf486ebSDan Gohman   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
515fd29d886SDaniel Dunbar     if (Buffer[i] == '\n')
516fd29d886SDaniel Dunbar       ++NumLinesForward;
517fd29d886SDaniel Dunbar 
518df22bbf7SDan Gohman     // Patterns have leading whitespace stripped, so skip whitespace when
519df22bbf7SDan Gohman     // looking for something which looks like a pattern.
520df22bbf7SDan Gohman     if (Buffer[i] == ' ' || Buffer[i] == '\t')
521df22bbf7SDan Gohman       continue;
522df22bbf7SDan Gohman 
523fd29d886SDaniel Dunbar     // Compute the "quality" of this match as an arbitrary combination of the
524fd29d886SDaniel Dunbar     // match distance and the number of lines skipped to get to this match.
525fd29d886SDaniel Dunbar     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
526fd29d886SDaniel Dunbar     double Quality = Distance + (NumLinesForward / 100.);
527fd29d886SDaniel Dunbar 
528fd29d886SDaniel Dunbar     if (Quality < BestQuality || Best == StringRef::npos) {
529fd29d886SDaniel Dunbar       Best = i;
530fd29d886SDaniel Dunbar       BestQuality = Quality;
531fd29d886SDaniel Dunbar     }
532fd29d886SDaniel Dunbar   }
533fd29d886SDaniel Dunbar 
534fd29d886SDaniel Dunbar   // Print the "possible intended match here" line if we found something
535c069cc8eSDaniel Dunbar   // reasonable and not equal to what we showed in the "scanning from here"
536c069cc8eSDaniel Dunbar   // line.
537c069cc8eSDaniel Dunbar   if (Best && Best != StringRef::npos && BestQuality < 50) {
538fd29d886SDaniel Dunbar       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
53903b80a40SChris Lattner                       SourceMgr::DK_Note, "possible intended match here");
540fd29d886SDaniel Dunbar 
541fd29d886SDaniel Dunbar     // FIXME: If we wanted to be really friendly we would show why the match
542fd29d886SDaniel Dunbar     // failed, as it can be hard to spot simple one character differences.
543fd29d886SDaniel Dunbar   }
544e0ef65abSDaniel Dunbar }
54574d50731SChris Lattner 
54681e5cd9eSAdrian Prantl size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
547061d2baaSEli Bendersky   // Offset keeps track of the current offset within the input Str
548061d2baaSEli Bendersky   size_t Offset = 0;
549061d2baaSEli Bendersky   // [...] Nesting depth
550061d2baaSEli Bendersky   size_t BracketDepth = 0;
551061d2baaSEli Bendersky 
552061d2baaSEli Bendersky   while (!Str.empty()) {
553061d2baaSEli Bendersky     if (Str.startswith("]]") && BracketDepth == 0)
554061d2baaSEli Bendersky       return Offset;
555061d2baaSEli Bendersky     if (Str[0] == '\\') {
556061d2baaSEli Bendersky       // Backslash escapes the next char within regexes, so skip them both.
557061d2baaSEli Bendersky       Str = Str.substr(2);
558061d2baaSEli Bendersky       Offset += 2;
559061d2baaSEli Bendersky     } else {
560061d2baaSEli Bendersky       switch (Str[0]) {
561061d2baaSEli Bendersky         default:
562061d2baaSEli Bendersky           break;
563061d2baaSEli Bendersky         case '[':
564061d2baaSEli Bendersky           BracketDepth++;
565061d2baaSEli Bendersky           break;
566061d2baaSEli Bendersky         case ']':
56781e5cd9eSAdrian Prantl           if (BracketDepth == 0) {
56881e5cd9eSAdrian Prantl             SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
56981e5cd9eSAdrian Prantl                             SourceMgr::DK_Error,
57081e5cd9eSAdrian Prantl                             "missing closing \"]\" for regex variable");
57181e5cd9eSAdrian Prantl             exit(1);
57281e5cd9eSAdrian Prantl           }
573061d2baaSEli Bendersky           BracketDepth--;
574061d2baaSEli Bendersky           break;
575061d2baaSEli Bendersky       }
576061d2baaSEli Bendersky       Str = Str.substr(1);
577061d2baaSEli Bendersky       Offset++;
578061d2baaSEli Bendersky     }
579061d2baaSEli Bendersky   }
580061d2baaSEli Bendersky 
581061d2baaSEli Bendersky   return StringRef::npos;
582061d2baaSEli Bendersky }
583061d2baaSEli Bendersky 
584061d2baaSEli Bendersky 
58574d50731SChris Lattner //===----------------------------------------------------------------------===//
58674d50731SChris Lattner // Check Strings.
58774d50731SChris Lattner //===----------------------------------------------------------------------===//
5883b40b445SChris Lattner 
5893b40b445SChris Lattner /// CheckString - This is a check that we found in the input file.
5903b40b445SChris Lattner struct CheckString {
5913b40b445SChris Lattner   /// Pat - The pattern to match.
5923b40b445SChris Lattner   Pattern Pat;
59326cccfe1SChris Lattner 
59413df4626SMatt Arsenault   /// Prefix - Which prefix name this check matched.
59513df4626SMatt Arsenault   StringRef Prefix;
59613df4626SMatt Arsenault 
59726cccfe1SChris Lattner   /// Loc - The location in the match file that the check string was specified.
59826cccfe1SChris Lattner   SMLoc Loc;
59926cccfe1SChris Lattner 
60038820972SMatt Arsenault   /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
60138820972SMatt Arsenault   /// as opposed to a CHECK: directive.
60238820972SMatt Arsenault   Check::CheckType CheckTy;
603f8bd2e5bSStephen Lin 
60491a1b2c9SMichael Liao   /// DagNotStrings - These are all of the strings that are disallowed from
605236d2d5eSChris Lattner   /// occurring between this match string and the previous one (or start of
606236d2d5eSChris Lattner   /// file).
60791a1b2c9SMichael Liao   std::vector<Pattern> DagNotStrings;
608236d2d5eSChris Lattner 
60913df4626SMatt Arsenault 
61013df4626SMatt Arsenault   CheckString(const Pattern &P,
61113df4626SMatt Arsenault               StringRef S,
61213df4626SMatt Arsenault               SMLoc L,
61313df4626SMatt Arsenault               Check::CheckType Ty)
61413df4626SMatt Arsenault     : Pat(P), Prefix(S), Loc(L), CheckTy(Ty) {}
615dcc7d48dSMichael Liao 
61691a1b2c9SMichael Liao   /// Check - Match check string and its "not strings" and/or "dag strings".
617e93a3a08SStephen Lin   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
618f8bd2e5bSStephen Lin                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
619dcc7d48dSMichael Liao 
620dcc7d48dSMichael Liao   /// CheckNext - Verify there is a single line in the given buffer.
621dcc7d48dSMichael Liao   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
622dcc7d48dSMichael Liao 
623dcc7d48dSMichael Liao   /// CheckNot - Verify there's no "not strings" in the given buffer.
624dcc7d48dSMichael Liao   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
62591a1b2c9SMichael Liao                 const std::vector<const Pattern *> &NotStrings,
62691a1b2c9SMichael Liao                 StringMap<StringRef> &VariableTable) const;
62791a1b2c9SMichael Liao 
62891a1b2c9SMichael Liao   /// CheckDag - Match "dag strings" and their mixed "not strings".
62991a1b2c9SMichael Liao   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
63091a1b2c9SMichael Liao                   std::vector<const Pattern *> &NotStrings,
631dcc7d48dSMichael Liao                   StringMap<StringRef> &VariableTable) const;
63226cccfe1SChris Lattner };
63326cccfe1SChris Lattner 
6345ea04c38SGuy Benyei /// Canonicalize whitespaces in the input file. Line endings are replaced
6355ea04c38SGuy Benyei /// with UNIX-style '\n'.
6365ea04c38SGuy Benyei ///
6375ea04c38SGuy Benyei /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
6385ea04c38SGuy Benyei /// characters to a single space.
639ce5dd1acSRafael Espindola static MemoryBuffer *CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,
6405ea04c38SGuy Benyei                                            bool PreserveHorizontal) {
6410e45d24aSChris Lattner   SmallString<128> NewFile;
642a2f8fc5aSChris Lattner   NewFile.reserve(MB->getBufferSize());
643a2f8fc5aSChris Lattner 
644a2f8fc5aSChris Lattner   for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
645a2f8fc5aSChris Lattner        Ptr != End; ++Ptr) {
646fd781bf0SNAKAMURA Takumi     // Eliminate trailing dosish \r.
647fd781bf0SNAKAMURA Takumi     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
648fd781bf0SNAKAMURA Takumi       continue;
649fd781bf0SNAKAMURA Takumi     }
650fd781bf0SNAKAMURA Takumi 
6515ea04c38SGuy Benyei     // If current char is not a horizontal whitespace or if horizontal
6525ea04c38SGuy Benyei     // whitespace canonicalization is disabled, dump it to output as is.
6535ea04c38SGuy Benyei     if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
654a2f8fc5aSChris Lattner       NewFile.push_back(*Ptr);
655a2f8fc5aSChris Lattner       continue;
656a2f8fc5aSChris Lattner     }
657a2f8fc5aSChris Lattner 
658a2f8fc5aSChris Lattner     // Otherwise, add one space and advance over neighboring space.
659a2f8fc5aSChris Lattner     NewFile.push_back(' ');
660a2f8fc5aSChris Lattner     while (Ptr+1 != End &&
661a2f8fc5aSChris Lattner            (Ptr[1] == ' ' || Ptr[1] == '\t'))
662a2f8fc5aSChris Lattner       ++Ptr;
663a2f8fc5aSChris Lattner   }
664a2f8fc5aSChris Lattner 
665ce5dd1acSRafael Espindola   return MemoryBuffer::getMemBufferCopy(NewFile.str(),
666ce5dd1acSRafael Espindola                                         MB->getBufferIdentifier());
667a2f8fc5aSChris Lattner }
668a2f8fc5aSChris Lattner 
66938820972SMatt Arsenault static bool IsPartOfWord(char c) {
67038820972SMatt Arsenault   return (isalnum(c) || c == '-' || c == '_');
67138820972SMatt Arsenault }
67238820972SMatt Arsenault 
67313df4626SMatt Arsenault // Get the size of the prefix extension.
67413df4626SMatt Arsenault static size_t CheckTypeSize(Check::CheckType Ty) {
67513df4626SMatt Arsenault   switch (Ty) {
67613df4626SMatt Arsenault   case Check::CheckNone:
67713df4626SMatt Arsenault     return 0;
67813df4626SMatt Arsenault 
67913df4626SMatt Arsenault   case Check::CheckPlain:
68013df4626SMatt Arsenault     return sizeof(":") - 1;
68113df4626SMatt Arsenault 
68213df4626SMatt Arsenault   case Check::CheckNext:
68313df4626SMatt Arsenault     return sizeof("-NEXT:") - 1;
68413df4626SMatt Arsenault 
68513df4626SMatt Arsenault   case Check::CheckNot:
68613df4626SMatt Arsenault     return sizeof("-NOT:") - 1;
68713df4626SMatt Arsenault 
68813df4626SMatt Arsenault   case Check::CheckDAG:
68913df4626SMatt Arsenault     return sizeof("-DAG:") - 1;
69013df4626SMatt Arsenault 
69113df4626SMatt Arsenault   case Check::CheckLabel:
69213df4626SMatt Arsenault     return sizeof("-LABEL:") - 1;
69313df4626SMatt Arsenault 
69413df4626SMatt Arsenault   case Check::CheckEOF:
69513df4626SMatt Arsenault     llvm_unreachable("Should not be using EOF size");
69613df4626SMatt Arsenault   }
69713df4626SMatt Arsenault 
69813df4626SMatt Arsenault   llvm_unreachable("Bad check type");
69913df4626SMatt Arsenault }
70013df4626SMatt Arsenault 
70113df4626SMatt Arsenault static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
702c4d2d471SMatt Arsenault   char NextChar = Buffer[Prefix.size()];
70338820972SMatt Arsenault 
70438820972SMatt Arsenault   // Verify that the : is present after the prefix.
70513df4626SMatt Arsenault   if (NextChar == ':')
70638820972SMatt Arsenault     return Check::CheckPlain;
70738820972SMatt Arsenault 
70813df4626SMatt Arsenault   if (NextChar != '-')
70938820972SMatt Arsenault     return Check::CheckNone;
71038820972SMatt Arsenault 
711c4d2d471SMatt Arsenault   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
71213df4626SMatt Arsenault   if (Rest.startswith("NEXT:"))
71338820972SMatt Arsenault     return Check::CheckNext;
71438820972SMatt Arsenault 
71513df4626SMatt Arsenault   if (Rest.startswith("NOT:"))
71638820972SMatt Arsenault     return Check::CheckNot;
71738820972SMatt Arsenault 
71813df4626SMatt Arsenault   if (Rest.startswith("DAG:"))
71938820972SMatt Arsenault     return Check::CheckDAG;
72038820972SMatt Arsenault 
72113df4626SMatt Arsenault   if (Rest.startswith("LABEL:"))
72238820972SMatt Arsenault     return Check::CheckLabel;
72313df4626SMatt Arsenault 
72413df4626SMatt Arsenault   return Check::CheckNone;
72538820972SMatt Arsenault }
72638820972SMatt Arsenault 
72713df4626SMatt Arsenault // From the given position, find the next character after the word.
72813df4626SMatt Arsenault static size_t SkipWord(StringRef Str, size_t Loc) {
72913df4626SMatt Arsenault   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
73013df4626SMatt Arsenault     ++Loc;
73113df4626SMatt Arsenault   return Loc;
73213df4626SMatt Arsenault }
73313df4626SMatt Arsenault 
73413df4626SMatt Arsenault // Try to find the first match in buffer for any prefix. If a valid match is
73513df4626SMatt Arsenault // found, return that prefix and set its type and location.  If there are almost
73613df4626SMatt Arsenault // matches (e.g. the actual prefix string is found, but is not an actual check
73713df4626SMatt Arsenault // string), but no valid match, return an empty string and set the position to
73813df4626SMatt Arsenault // resume searching from. If no partial matches are found, return an empty
73913df4626SMatt Arsenault // string and the location will be StringRef::npos. If one prefix is a substring
74013df4626SMatt Arsenault // of another, the maximal match should be found. e.g. if "A" and "AA" are
74113df4626SMatt Arsenault // prefixes then AA-CHECK: should match the second one.
74213df4626SMatt Arsenault static StringRef FindFirstCandidateMatch(StringRef &Buffer,
74313df4626SMatt Arsenault                                          Check::CheckType &CheckTy,
74413df4626SMatt Arsenault                                          size_t &CheckLoc) {
74513df4626SMatt Arsenault   StringRef FirstPrefix;
74613df4626SMatt Arsenault   size_t FirstLoc = StringRef::npos;
74713df4626SMatt Arsenault   size_t SearchLoc = StringRef::npos;
74813df4626SMatt Arsenault   Check::CheckType FirstTy = Check::CheckNone;
74913df4626SMatt Arsenault 
75013df4626SMatt Arsenault   CheckTy = Check::CheckNone;
75113df4626SMatt Arsenault   CheckLoc = StringRef::npos;
75213df4626SMatt Arsenault 
75313df4626SMatt Arsenault   for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
75413df4626SMatt Arsenault        I != E; ++I) {
75513df4626SMatt Arsenault     StringRef Prefix(*I);
75613df4626SMatt Arsenault     size_t PrefixLoc = Buffer.find(Prefix);
75713df4626SMatt Arsenault 
75813df4626SMatt Arsenault     if (PrefixLoc == StringRef::npos)
75913df4626SMatt Arsenault       continue;
76013df4626SMatt Arsenault 
76113df4626SMatt Arsenault     // Track where we are searching for invalid prefixes that look almost right.
76213df4626SMatt Arsenault     // We need to only advance to the first partial match on the next attempt
76313df4626SMatt Arsenault     // since a partial match could be a substring of a later, valid prefix.
76413df4626SMatt Arsenault     // Need to skip to the end of the word, otherwise we could end up
76513df4626SMatt Arsenault     // matching a prefix in a substring later.
76613df4626SMatt Arsenault     if (PrefixLoc < SearchLoc)
76713df4626SMatt Arsenault       SearchLoc = SkipWord(Buffer, PrefixLoc);
76813df4626SMatt Arsenault 
76913df4626SMatt Arsenault     // We only want to find the first match to avoid skipping some.
77013df4626SMatt Arsenault     if (PrefixLoc > FirstLoc)
77113df4626SMatt Arsenault       continue;
772a7181a1bSAlexey Samsonov     // If one matching check-prefix is a prefix of another, choose the
773a7181a1bSAlexey Samsonov     // longer one.
774a7181a1bSAlexey Samsonov     if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
775a7181a1bSAlexey Samsonov       continue;
77613df4626SMatt Arsenault 
77713df4626SMatt Arsenault     StringRef Rest = Buffer.drop_front(PrefixLoc);
77813df4626SMatt Arsenault     // Make sure we have actually found the prefix, and not a word containing
77913df4626SMatt Arsenault     // it. This should also prevent matching the wrong prefix when one is a
78013df4626SMatt Arsenault     // substring of another.
78113df4626SMatt Arsenault     if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
78243b5f572SDaniel Sanders       FirstTy = Check::CheckNone;
78343b5f572SDaniel Sanders     else
78443b5f572SDaniel Sanders       FirstTy = FindCheckType(Rest, Prefix);
78513df4626SMatt Arsenault 
78613df4626SMatt Arsenault     FirstLoc = PrefixLoc;
787a7181a1bSAlexey Samsonov     FirstPrefix = Prefix;
78813df4626SMatt Arsenault   }
78913df4626SMatt Arsenault 
790a7181a1bSAlexey Samsonov   // If the first prefix is invalid, we should continue the search after it.
791a7181a1bSAlexey Samsonov   if (FirstTy == Check::CheckNone) {
79213df4626SMatt Arsenault     CheckLoc = SearchLoc;
793a7181a1bSAlexey Samsonov     return "";
794a7181a1bSAlexey Samsonov   }
795a7181a1bSAlexey Samsonov 
79613df4626SMatt Arsenault   CheckTy = FirstTy;
79713df4626SMatt Arsenault   CheckLoc = FirstLoc;
79813df4626SMatt Arsenault   return FirstPrefix;
79913df4626SMatt Arsenault }
80013df4626SMatt Arsenault 
80113df4626SMatt Arsenault static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
80213df4626SMatt Arsenault                                          unsigned &LineNumber,
80313df4626SMatt Arsenault                                          Check::CheckType &CheckTy,
80413df4626SMatt Arsenault                                          size_t &CheckLoc) {
80513df4626SMatt Arsenault   while (!Buffer.empty()) {
80613df4626SMatt Arsenault     StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
80713df4626SMatt Arsenault     // If we found a real match, we are done.
80813df4626SMatt Arsenault     if (!Prefix.empty()) {
80913df4626SMatt Arsenault       LineNumber += Buffer.substr(0, CheckLoc).count('\n');
81013df4626SMatt Arsenault       return Prefix;
81113df4626SMatt Arsenault     }
81213df4626SMatt Arsenault 
81313df4626SMatt Arsenault     // We didn't find any almost matches either, we are also done.
81413df4626SMatt Arsenault     if (CheckLoc == StringRef::npos)
81513df4626SMatt Arsenault       return StringRef();
81613df4626SMatt Arsenault 
81713df4626SMatt Arsenault     LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
81813df4626SMatt Arsenault 
81913df4626SMatt Arsenault     // Advance to the last possible match we found and try again.
82013df4626SMatt Arsenault     Buffer = Buffer.drop_front(CheckLoc + 1);
82113df4626SMatt Arsenault   }
82213df4626SMatt Arsenault 
82313df4626SMatt Arsenault   return StringRef();
82438820972SMatt Arsenault }
825ee3c74fbSChris Lattner 
826ee3c74fbSChris Lattner /// ReadCheckFile - Read the check file, which specifies the sequence of
827ee3c74fbSChris Lattner /// expected strings.  The strings are added to the CheckStrings vector.
82843d50d4aSEli Bendersky /// Returns true in case of an error, false otherwise.
829ee3c74fbSChris Lattner static bool ReadCheckFile(SourceMgr &SM,
83026cccfe1SChris Lattner                           std::vector<CheckString> &CheckStrings) {
831adf21f2aSRafael Espindola   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
832adf21f2aSRafael Espindola       MemoryBuffer::getFileOrSTDIN(CheckFilename);
833adf21f2aSRafael Espindola   if (std::error_code EC = FileOrErr.getError()) {
834adf21f2aSRafael Espindola     errs() << "Could not open check file '" << CheckFilename
835adf21f2aSRafael Espindola            << "': " << EC.message() << '\n';
836ee3c74fbSChris Lattner     return true;
837ee3c74fbSChris Lattner   }
838a2f8fc5aSChris Lattner 
839a2f8fc5aSChris Lattner   // If we want to canonicalize whitespace, strip excess whitespace from the
8405ea04c38SGuy Benyei   // buffer containing the CHECK lines. Remove DOS style line endings.
841ce5dd1acSRafael Espindola   MemoryBuffer *F = CanonicalizeInputFile(std::move(FileOrErr.get()),
842adf21f2aSRafael Espindola                                           NoCanonicalizeWhiteSpace);
843a2f8fc5aSChris Lattner 
844ee3c74fbSChris Lattner   SM.AddNewSourceBuffer(F, SMLoc());
845ee3c74fbSChris Lattner 
84610f10cedSChris Lattner   // Find all instances of CheckPrefix followed by : in the file.
847caa5fc0cSChris Lattner   StringRef Buffer = F->getBuffer();
84856ccdbbdSAlexander Kornienko 
84956ccdbbdSAlexander Kornienko   std::vector<Pattern> ImplicitNegativeChecks;
85056ccdbbdSAlexander Kornienko   for (const auto &PatternString : ImplicitCheckNot) {
85156ccdbbdSAlexander Kornienko     // Create a buffer with fake command line content in order to display the
85256ccdbbdSAlexander Kornienko     // command line option responsible for the specific implicit CHECK-NOT.
85356ccdbbdSAlexander Kornienko     std::string Prefix = std::string("-") + ImplicitCheckNot.ArgStr + "='";
85456ccdbbdSAlexander Kornienko     std::string Suffix = "'";
85556ccdbbdSAlexander Kornienko     MemoryBuffer *CmdLine = MemoryBuffer::getMemBufferCopy(
85656ccdbbdSAlexander Kornienko         Prefix + PatternString + Suffix, "command line");
85756ccdbbdSAlexander Kornienko     StringRef PatternInBuffer =
85856ccdbbdSAlexander Kornienko         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
85956ccdbbdSAlexander Kornienko     SM.AddNewSourceBuffer(CmdLine, SMLoc());
86056ccdbbdSAlexander Kornienko 
86156ccdbbdSAlexander Kornienko     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
86256ccdbbdSAlexander Kornienko     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
86356ccdbbdSAlexander Kornienko                                                "IMPLICIT-CHECK", SM, 0);
86456ccdbbdSAlexander Kornienko   }
86556ccdbbdSAlexander Kornienko 
86656ccdbbdSAlexander Kornienko 
86756ccdbbdSAlexander Kornienko   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
868236d2d5eSChris Lattner 
86943d50d4aSEli Bendersky   // LineNumber keeps track of the line on which CheckPrefix instances are
87043d50d4aSEli Bendersky   // found.
87192987fb3SAlexander Kornienko   unsigned LineNumber = 1;
87292987fb3SAlexander Kornienko 
873ee3c74fbSChris Lattner   while (1) {
87413df4626SMatt Arsenault     Check::CheckType CheckTy;
87513df4626SMatt Arsenault     size_t PrefixLoc;
87613df4626SMatt Arsenault 
87713df4626SMatt Arsenault     // See if a prefix occurs in the memory buffer.
87813df4626SMatt Arsenault     StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer,
87913df4626SMatt Arsenault                                                    LineNumber,
88013df4626SMatt Arsenault                                                    CheckTy,
88113df4626SMatt Arsenault                                                    PrefixLoc);
88213df4626SMatt Arsenault     if (UsedPrefix.empty())
883ee3c74fbSChris Lattner       break;
884ee3c74fbSChris Lattner 
88513df4626SMatt Arsenault     Buffer = Buffer.drop_front(PrefixLoc);
88692987fb3SAlexander Kornienko 
88713df4626SMatt Arsenault     // Location to use for error messages.
88813df4626SMatt Arsenault     const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
88992987fb3SAlexander Kornienko 
89013df4626SMatt Arsenault     // PrefixLoc is to the start of the prefix. Skip to the end.
89113df4626SMatt Arsenault     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
89210f10cedSChris Lattner 
89338820972SMatt Arsenault     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
89438820972SMatt Arsenault     // leading and trailing whitespace.
895236d2d5eSChris Lattner     Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
896ee3c74fbSChris Lattner 
897ee3c74fbSChris Lattner     // Scan ahead to the end of line.
898caa5fc0cSChris Lattner     size_t EOL = Buffer.find_first_of("\n\r");
899ee3c74fbSChris Lattner 
900838fb09aSDan Gohman     // Remember the location of the start of the pattern, for diagnostics.
901838fb09aSDan Gohman     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
902838fb09aSDan Gohman 
90374d50731SChris Lattner     // Parse the pattern.
90438820972SMatt Arsenault     Pattern P(CheckTy);
90513df4626SMatt Arsenault     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
906ee3c74fbSChris Lattner       return true;
907ee3c74fbSChris Lattner 
908f8bd2e5bSStephen Lin     // Verify that CHECK-LABEL lines do not define or use variables
90938820972SMatt Arsenault     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
91013df4626SMatt Arsenault       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
911f8bd2e5bSStephen Lin                       SourceMgr::DK_Error,
91213df4626SMatt Arsenault                       "found '" + UsedPrefix + "-LABEL:'"
91313df4626SMatt Arsenault                       " with variable definition or use");
914f8bd2e5bSStephen Lin       return true;
915f8bd2e5bSStephen Lin     }
916f8bd2e5bSStephen Lin 
917236d2d5eSChris Lattner     Buffer = Buffer.substr(EOL);
91874d50731SChris Lattner 
919da108b4eSChris Lattner     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
92038820972SMatt Arsenault     if ((CheckTy == Check::CheckNext) && CheckStrings.empty()) {
92113df4626SMatt Arsenault       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
92203b80a40SChris Lattner                       SourceMgr::DK_Error,
92313df4626SMatt Arsenault                       "found '" + UsedPrefix + "-NEXT:' without previous '"
92413df4626SMatt Arsenault                       + UsedPrefix + ": line");
925da108b4eSChris Lattner       return true;
926da108b4eSChris Lattner     }
927da108b4eSChris Lattner 
92891a1b2c9SMichael Liao     // Handle CHECK-DAG/-NOT.
92938820972SMatt Arsenault     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
93091a1b2c9SMichael Liao       DagNotMatches.push_back(P);
93174d50731SChris Lattner       continue;
93274d50731SChris Lattner     }
93374d50731SChris Lattner 
934ee3c74fbSChris Lattner     // Okay, add the string we captured to the output vector and move on.
9353b40b445SChris Lattner     CheckStrings.push_back(CheckString(P,
93613df4626SMatt Arsenault                                        UsedPrefix,
937838fb09aSDan Gohman                                        PatternLoc,
93838820972SMatt Arsenault                                        CheckTy));
93991a1b2c9SMichael Liao     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
94056ccdbbdSAlexander Kornienko     DagNotMatches = ImplicitNegativeChecks;
941ee3c74fbSChris Lattner   }
942ee3c74fbSChris Lattner 
94313df4626SMatt Arsenault   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
94413df4626SMatt Arsenault   // prefix as a filler for the error message.
94591a1b2c9SMichael Liao   if (!DagNotMatches.empty()) {
94638820972SMatt Arsenault     CheckStrings.push_back(CheckString(Pattern(Check::CheckEOF),
94713df4626SMatt Arsenault                                        CheckPrefixes[0],
948eba55822SJakob Stoklund Olesen                                        SMLoc::getFromPointer(Buffer.data()),
94938820972SMatt Arsenault                                        Check::CheckEOF));
95091a1b2c9SMichael Liao     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
951eba55822SJakob Stoklund Olesen   }
952eba55822SJakob Stoklund Olesen 
953ee3c74fbSChris Lattner   if (CheckStrings.empty()) {
95413df4626SMatt Arsenault     errs() << "error: no check strings found with prefix"
95513df4626SMatt Arsenault            << (CheckPrefixes.size() > 1 ? "es " : " ");
95613df4626SMatt Arsenault     for (size_t I = 0, N = CheckPrefixes.size(); I != N; ++I) {
95713df4626SMatt Arsenault       StringRef Prefix(CheckPrefixes[I]);
95813df4626SMatt Arsenault       errs() << '\'' << Prefix << ":'";
95913df4626SMatt Arsenault       if (I != N - 1)
96013df4626SMatt Arsenault         errs() << ", ";
96113df4626SMatt Arsenault     }
96213df4626SMatt Arsenault 
96313df4626SMatt Arsenault     errs() << '\n';
964ee3c74fbSChris Lattner     return true;
965ee3c74fbSChris Lattner   }
966ee3c74fbSChris Lattner 
967ee3c74fbSChris Lattner   return false;
968ee3c74fbSChris Lattner }
969ee3c74fbSChris Lattner 
97091a1b2c9SMichael Liao static void PrintCheckFailed(const SourceMgr &SM, const SMLoc &Loc,
97191a1b2c9SMichael Liao                              const Pattern &Pat, StringRef Buffer,
972e0ef65abSDaniel Dunbar                              StringMap<StringRef> &VariableTable) {
973da108b4eSChris Lattner   // Otherwise, we have an error, emit an error message.
97491a1b2c9SMichael Liao   SM.PrintMessage(Loc, SourceMgr::DK_Error,
97503b80a40SChris Lattner                   "expected string not found in input");
976da108b4eSChris Lattner 
977da108b4eSChris Lattner   // Print the "scanning from here" line.  If the current position is at the
978da108b4eSChris Lattner   // end of a line, advance to the start of the next line.
979caa5fc0cSChris Lattner   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
980da108b4eSChris Lattner 
98103b80a40SChris Lattner   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
98203b80a40SChris Lattner                   "scanning from here");
983e0ef65abSDaniel Dunbar 
984e0ef65abSDaniel Dunbar   // Allow the pattern to print additional information if desired.
98591a1b2c9SMichael Liao   Pat.PrintFailureInfo(SM, Buffer, VariableTable);
98691a1b2c9SMichael Liao }
98791a1b2c9SMichael Liao 
98891a1b2c9SMichael Liao static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
98991a1b2c9SMichael Liao                              StringRef Buffer,
99091a1b2c9SMichael Liao                              StringMap<StringRef> &VariableTable) {
99191a1b2c9SMichael Liao   PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
992da108b4eSChris Lattner }
993da108b4eSChris Lattner 
99437183584SChris Lattner /// CountNumNewlinesBetween - Count the number of newlines in the specified
99537183584SChris Lattner /// range.
996592fe880SRichard Smith static unsigned CountNumNewlinesBetween(StringRef Range,
997592fe880SRichard Smith                                         const char *&FirstNewLine) {
998da108b4eSChris Lattner   unsigned NumNewLines = 0;
99937183584SChris Lattner   while (1) {
1000da108b4eSChris Lattner     // Scan for newline.
100137183584SChris Lattner     Range = Range.substr(Range.find_first_of("\n\r"));
100237183584SChris Lattner     if (Range.empty()) return NumNewLines;
1003da108b4eSChris Lattner 
1004da108b4eSChris Lattner     ++NumNewLines;
1005da108b4eSChris Lattner 
1006da108b4eSChris Lattner     // Handle \n\r and \r\n as a single newline.
100737183584SChris Lattner     if (Range.size() > 1 &&
100837183584SChris Lattner         (Range[1] == '\n' || Range[1] == '\r') &&
100937183584SChris Lattner         (Range[0] != Range[1]))
101037183584SChris Lattner       Range = Range.substr(1);
101137183584SChris Lattner     Range = Range.substr(1);
1012592fe880SRichard Smith 
1013592fe880SRichard Smith     if (NumNewLines == 1)
1014592fe880SRichard Smith       FirstNewLine = Range.begin();
1015da108b4eSChris Lattner   }
1016da108b4eSChris Lattner }
1017da108b4eSChris Lattner 
1018dcc7d48dSMichael Liao size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1019e93a3a08SStephen Lin                           bool IsLabelScanMode, size_t &MatchLen,
1020dcc7d48dSMichael Liao                           StringMap<StringRef> &VariableTable) const {
102191a1b2c9SMichael Liao   size_t LastPos = 0;
102291a1b2c9SMichael Liao   std::vector<const Pattern *> NotStrings;
102391a1b2c9SMichael Liao 
1024e93a3a08SStephen Lin   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1025e93a3a08SStephen Lin   // bounds; we have not processed variable definitions within the bounded block
1026e93a3a08SStephen Lin   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1027e93a3a08SStephen Lin   // over the block again (including the last CHECK-LABEL) in normal mode.
1028e93a3a08SStephen Lin   if (!IsLabelScanMode) {
102991a1b2c9SMichael Liao     // Match "dag strings" (with mixed "not strings" if any).
103091a1b2c9SMichael Liao     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
103191a1b2c9SMichael Liao     if (LastPos == StringRef::npos)
103291a1b2c9SMichael Liao       return StringRef::npos;
1033e93a3a08SStephen Lin   }
103491a1b2c9SMichael Liao 
103591a1b2c9SMichael Liao   // Match itself from the last position after matching CHECK-DAG.
103691a1b2c9SMichael Liao   StringRef MatchBuffer = Buffer.substr(LastPos);
103791a1b2c9SMichael Liao   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1038dcc7d48dSMichael Liao   if (MatchPos == StringRef::npos) {
103991a1b2c9SMichael Liao     PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1040dcc7d48dSMichael Liao     return StringRef::npos;
1041dcc7d48dSMichael Liao   }
104291a1b2c9SMichael Liao   MatchPos += LastPos;
1043dcc7d48dSMichael Liao 
1044e93a3a08SStephen Lin   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1045e93a3a08SStephen Lin   // or CHECK-NOT
1046e93a3a08SStephen Lin   if (!IsLabelScanMode) {
104791a1b2c9SMichael Liao     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1048dcc7d48dSMichael Liao 
1049dcc7d48dSMichael Liao     // If this check is a "CHECK-NEXT", verify that the previous match was on
1050dcc7d48dSMichael Liao     // the previous line (i.e. that there is one newline between them).
1051dcc7d48dSMichael Liao     if (CheckNext(SM, SkippedRegion))
1052dcc7d48dSMichael Liao       return StringRef::npos;
1053dcc7d48dSMichael Liao 
1054dcc7d48dSMichael Liao     // If this match had "not strings", verify that they don't exist in the
1055dcc7d48dSMichael Liao     // skipped region.
105691a1b2c9SMichael Liao     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1057dcc7d48dSMichael Liao       return StringRef::npos;
1058f8bd2e5bSStephen Lin   }
1059dcc7d48dSMichael Liao 
1060dcc7d48dSMichael Liao   return MatchPos;
1061dcc7d48dSMichael Liao }
1062dcc7d48dSMichael Liao 
1063dcc7d48dSMichael Liao bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
106438820972SMatt Arsenault   if (CheckTy != Check::CheckNext)
1065dcc7d48dSMichael Liao     return false;
1066dcc7d48dSMichael Liao 
1067dcc7d48dSMichael Liao   // Count the number of newlines between the previous match and this one.
1068dcc7d48dSMichael Liao   assert(Buffer.data() !=
1069dcc7d48dSMichael Liao          SM.getMemoryBuffer(
1070dcc7d48dSMichael Liao            SM.FindBufferContainingLoc(
1071dcc7d48dSMichael Liao              SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
1072dcc7d48dSMichael Liao          "CHECK-NEXT can't be the first check in a file");
1073dcc7d48dSMichael Liao 
107466f09ad0SCraig Topper   const char *FirstNewLine = nullptr;
1075592fe880SRichard Smith   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1076dcc7d48dSMichael Liao 
1077dcc7d48dSMichael Liao   if (NumNewLines == 0) {
107813df4626SMatt Arsenault     SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1079dcc7d48dSMichael Liao                     "-NEXT: is on the same line as previous match");
1080dcc7d48dSMichael Liao     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1081dcc7d48dSMichael Liao                     SourceMgr::DK_Note, "'next' match was here");
1082dcc7d48dSMichael Liao     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1083dcc7d48dSMichael Liao                     "previous match ended here");
1084dcc7d48dSMichael Liao     return true;
1085dcc7d48dSMichael Liao   }
1086dcc7d48dSMichael Liao 
1087dcc7d48dSMichael Liao   if (NumNewLines != 1) {
108813df4626SMatt Arsenault     SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1089dcc7d48dSMichael Liao                     "-NEXT: is not on the line after the previous match");
1090dcc7d48dSMichael Liao     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1091dcc7d48dSMichael Liao                     SourceMgr::DK_Note, "'next' match was here");
1092dcc7d48dSMichael Liao     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1093dcc7d48dSMichael Liao                     "previous match ended here");
1094592fe880SRichard Smith     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1095592fe880SRichard Smith                     "non-matching line after previous match is here");
1096dcc7d48dSMichael Liao     return true;
1097dcc7d48dSMichael Liao   }
1098dcc7d48dSMichael Liao 
1099dcc7d48dSMichael Liao   return false;
1100dcc7d48dSMichael Liao }
1101dcc7d48dSMichael Liao 
1102dcc7d48dSMichael Liao bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
110391a1b2c9SMichael Liao                            const std::vector<const Pattern *> &NotStrings,
1104dcc7d48dSMichael Liao                            StringMap<StringRef> &VariableTable) const {
1105dcc7d48dSMichael Liao   for (unsigned ChunkNo = 0, e = NotStrings.size();
1106dcc7d48dSMichael Liao        ChunkNo != e; ++ChunkNo) {
110791a1b2c9SMichael Liao     const Pattern *Pat = NotStrings[ChunkNo];
110838820972SMatt Arsenault     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
110991a1b2c9SMichael Liao 
1110dcc7d48dSMichael Liao     size_t MatchLen = 0;
111191a1b2c9SMichael Liao     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1112dcc7d48dSMichael Liao 
1113dcc7d48dSMichael Liao     if (Pos == StringRef::npos) continue;
1114dcc7d48dSMichael Liao 
1115dcc7d48dSMichael Liao     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
1116dcc7d48dSMichael Liao                     SourceMgr::DK_Error,
111713df4626SMatt Arsenault                     Prefix + "-NOT: string occurred!");
111891a1b2c9SMichael Liao     SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
111913df4626SMatt Arsenault                     Prefix + "-NOT: pattern specified here");
1120dcc7d48dSMichael Liao     return true;
1121dcc7d48dSMichael Liao   }
1122dcc7d48dSMichael Liao 
1123dcc7d48dSMichael Liao   return false;
1124dcc7d48dSMichael Liao }
1125dcc7d48dSMichael Liao 
112691a1b2c9SMichael Liao size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
112791a1b2c9SMichael Liao                              std::vector<const Pattern *> &NotStrings,
112891a1b2c9SMichael Liao                              StringMap<StringRef> &VariableTable) const {
112991a1b2c9SMichael Liao   if (DagNotStrings.empty())
113091a1b2c9SMichael Liao     return 0;
113191a1b2c9SMichael Liao 
113291a1b2c9SMichael Liao   size_t LastPos = 0;
113391a1b2c9SMichael Liao   size_t StartPos = LastPos;
113491a1b2c9SMichael Liao 
113591a1b2c9SMichael Liao   for (unsigned ChunkNo = 0, e = DagNotStrings.size();
113691a1b2c9SMichael Liao        ChunkNo != e; ++ChunkNo) {
113791a1b2c9SMichael Liao     const Pattern &Pat = DagNotStrings[ChunkNo];
113891a1b2c9SMichael Liao 
113938820972SMatt Arsenault     assert((Pat.getCheckTy() == Check::CheckDAG ||
114038820972SMatt Arsenault             Pat.getCheckTy() == Check::CheckNot) &&
114191a1b2c9SMichael Liao            "Invalid CHECK-DAG or CHECK-NOT!");
114291a1b2c9SMichael Liao 
114338820972SMatt Arsenault     if (Pat.getCheckTy() == Check::CheckNot) {
114491a1b2c9SMichael Liao       NotStrings.push_back(&Pat);
114591a1b2c9SMichael Liao       continue;
114691a1b2c9SMichael Liao     }
114791a1b2c9SMichael Liao 
114838820972SMatt Arsenault     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
114991a1b2c9SMichael Liao 
115091a1b2c9SMichael Liao     size_t MatchLen = 0, MatchPos;
115191a1b2c9SMichael Liao 
115291a1b2c9SMichael Liao     // CHECK-DAG always matches from the start.
115391a1b2c9SMichael Liao     StringRef MatchBuffer = Buffer.substr(StartPos);
115491a1b2c9SMichael Liao     MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
115591a1b2c9SMichael Liao     // With a group of CHECK-DAGs, a single mismatching means the match on
115691a1b2c9SMichael Liao     // that group of CHECK-DAGs fails immediately.
115791a1b2c9SMichael Liao     if (MatchPos == StringRef::npos) {
115891a1b2c9SMichael Liao       PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
115991a1b2c9SMichael Liao       return StringRef::npos;
116091a1b2c9SMichael Liao     }
116191a1b2c9SMichael Liao     // Re-calc it as the offset relative to the start of the original string.
116291a1b2c9SMichael Liao     MatchPos += StartPos;
116391a1b2c9SMichael Liao 
116491a1b2c9SMichael Liao     if (!NotStrings.empty()) {
116591a1b2c9SMichael Liao       if (MatchPos < LastPos) {
116691a1b2c9SMichael Liao         // Reordered?
116791a1b2c9SMichael Liao         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
116891a1b2c9SMichael Liao                         SourceMgr::DK_Error,
116913df4626SMatt Arsenault                         Prefix + "-DAG: found a match of CHECK-DAG"
117091a1b2c9SMichael Liao                         " reordering across a CHECK-NOT");
117191a1b2c9SMichael Liao         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
117291a1b2c9SMichael Liao                         SourceMgr::DK_Note,
117313df4626SMatt Arsenault                         Prefix + "-DAG: the farthest match of CHECK-DAG"
117491a1b2c9SMichael Liao                         " is found here");
117591a1b2c9SMichael Liao         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
117613df4626SMatt Arsenault                         Prefix + "-NOT: the crossed pattern specified"
117791a1b2c9SMichael Liao                         " here");
117891a1b2c9SMichael Liao         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
117913df4626SMatt Arsenault                         Prefix + "-DAG: the reordered pattern specified"
118091a1b2c9SMichael Liao                         " here");
118191a1b2c9SMichael Liao         return StringRef::npos;
118291a1b2c9SMichael Liao       }
118391a1b2c9SMichael Liao       // All subsequent CHECK-DAGs should be matched from the farthest
118491a1b2c9SMichael Liao       // position of all precedent CHECK-DAGs (including this one.)
118591a1b2c9SMichael Liao       StartPos = LastPos;
118691a1b2c9SMichael Liao       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
118791a1b2c9SMichael Liao       // CHECK-DAG, verify that there's no 'not' strings occurred in that
118891a1b2c9SMichael Liao       // region.
118991a1b2c9SMichael Liao       StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1190cf708c32STim Northover       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
119191a1b2c9SMichael Liao         return StringRef::npos;
119291a1b2c9SMichael Liao       // Clear "not strings".
119391a1b2c9SMichael Liao       NotStrings.clear();
119491a1b2c9SMichael Liao     }
119591a1b2c9SMichael Liao 
119691a1b2c9SMichael Liao     // Update the last position with CHECK-DAG matches.
119791a1b2c9SMichael Liao     LastPos = std::max(MatchPos + MatchLen, LastPos);
119891a1b2c9SMichael Liao   }
119991a1b2c9SMichael Liao 
120091a1b2c9SMichael Liao   return LastPos;
120191a1b2c9SMichael Liao }
120291a1b2c9SMichael Liao 
120313df4626SMatt Arsenault // A check prefix must contain only alphanumeric, hyphens and underscores.
120413df4626SMatt Arsenault static bool ValidateCheckPrefix(StringRef CheckPrefix) {
120513df4626SMatt Arsenault   Regex Validator("^[a-zA-Z0-9_-]*$");
120613df4626SMatt Arsenault   return Validator.match(CheckPrefix);
120713df4626SMatt Arsenault }
120813df4626SMatt Arsenault 
120913df4626SMatt Arsenault static bool ValidateCheckPrefixes() {
121013df4626SMatt Arsenault   StringSet<> PrefixSet;
121113df4626SMatt Arsenault 
121213df4626SMatt Arsenault   for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
121313df4626SMatt Arsenault        I != E; ++I) {
121413df4626SMatt Arsenault     StringRef Prefix(*I);
121513df4626SMatt Arsenault 
121624412b14SEli Bendersky     // Reject empty prefixes.
121724412b14SEli Bendersky     if (Prefix == "")
121824412b14SEli Bendersky       return false;
121924412b14SEli Bendersky 
122013df4626SMatt Arsenault     if (!PrefixSet.insert(Prefix))
122113df4626SMatt Arsenault       return false;
122213df4626SMatt Arsenault 
122313df4626SMatt Arsenault     if (!ValidateCheckPrefix(Prefix))
122413df4626SMatt Arsenault       return false;
122513df4626SMatt Arsenault   }
122613df4626SMatt Arsenault 
122713df4626SMatt Arsenault   return true;
122813df4626SMatt Arsenault }
122913df4626SMatt Arsenault 
123013df4626SMatt Arsenault // I don't think there's a way to specify an initial value for cl::list,
123113df4626SMatt Arsenault // so if nothing was specified, add the default
123213df4626SMatt Arsenault static void AddCheckPrefixIfNeeded() {
123313df4626SMatt Arsenault   if (CheckPrefixes.empty())
123413df4626SMatt Arsenault     CheckPrefixes.push_back("CHECK");
1235c2735158SRui Ueyama }
1236c2735158SRui Ueyama 
1237ee3c74fbSChris Lattner int main(int argc, char **argv) {
1238ee3c74fbSChris Lattner   sys::PrintStackTraceOnErrorSignal();
1239ee3c74fbSChris Lattner   PrettyStackTraceProgram X(argc, argv);
1240ee3c74fbSChris Lattner   cl::ParseCommandLineOptions(argc, argv);
1241ee3c74fbSChris Lattner 
124213df4626SMatt Arsenault   if (!ValidateCheckPrefixes()) {
124313df4626SMatt Arsenault     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
124413df4626SMatt Arsenault               "start with a letter and contain only alphanumeric characters, "
124513df4626SMatt Arsenault               "hyphens and underscores\n";
1246c2735158SRui Ueyama     return 2;
1247c2735158SRui Ueyama   }
1248c2735158SRui Ueyama 
124913df4626SMatt Arsenault   AddCheckPrefixIfNeeded();
125013df4626SMatt Arsenault 
1251ee3c74fbSChris Lattner   SourceMgr SM;
1252ee3c74fbSChris Lattner 
1253ee3c74fbSChris Lattner   // Read the expected strings from the check file.
125426cccfe1SChris Lattner   std::vector<CheckString> CheckStrings;
1255ee3c74fbSChris Lattner   if (ReadCheckFile(SM, CheckStrings))
1256ee3c74fbSChris Lattner     return 2;
1257ee3c74fbSChris Lattner 
1258ee3c74fbSChris Lattner   // Open the file to check and add it to SourceMgr.
1259adf21f2aSRafael Espindola   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
1260adf21f2aSRafael Espindola       MemoryBuffer::getFileOrSTDIN(InputFilename);
1261adf21f2aSRafael Espindola   if (std::error_code EC = FileOrErr.getError()) {
1262adf21f2aSRafael Espindola     errs() << "Could not open input file '" << InputFilename
1263adf21f2aSRafael Espindola            << "': " << EC.message() << '\n';
12648e1c6477SEli Bendersky     return 2;
1265ee3c74fbSChris Lattner   }
12663f6481d0SRafael Espindola   std::unique_ptr<MemoryBuffer> &File = FileOrErr.get();
12672c3e5cdfSChris Lattner 
1268*1b9f936fSJustin Bogner   if (File->getBufferSize() == 0 && !AllowEmptyInput) {
1269b692bed7SChris Lattner     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
12708e1c6477SEli Bendersky     return 2;
1271b692bed7SChris Lattner   }
1272b692bed7SChris Lattner 
12732c3e5cdfSChris Lattner   // Remove duplicate spaces in the input file if requested.
12745ea04c38SGuy Benyei   // Remove DOS style line endings.
1275e963d660SBenjamin Kramer   MemoryBuffer *F =
1276ce5dd1acSRafael Espindola     CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace);
12772c3e5cdfSChris Lattner 
1278ee3c74fbSChris Lattner   SM.AddNewSourceBuffer(F, SMLoc());
1279ee3c74fbSChris Lattner 
12808879e06dSChris Lattner   /// VariableTable - This holds all the current filecheck variables.
12818879e06dSChris Lattner   StringMap<StringRef> VariableTable;
12828879e06dSChris Lattner 
1283ee3c74fbSChris Lattner   // Check that we have all of the expected strings, in order, in the input
1284ee3c74fbSChris Lattner   // file.
1285caa5fc0cSChris Lattner   StringRef Buffer = F->getBuffer();
1286ee3c74fbSChris Lattner 
1287f8bd2e5bSStephen Lin   bool hasError = false;
1288ee3c74fbSChris Lattner 
1289f8bd2e5bSStephen Lin   unsigned i = 0, j = 0, e = CheckStrings.size();
1290ee3c74fbSChris Lattner 
1291f8bd2e5bSStephen Lin   while (true) {
1292f8bd2e5bSStephen Lin     StringRef CheckRegion;
1293f8bd2e5bSStephen Lin     if (j == e) {
1294f8bd2e5bSStephen Lin       CheckRegion = Buffer;
1295f8bd2e5bSStephen Lin     } else {
1296f8bd2e5bSStephen Lin       const CheckString &CheckLabelStr = CheckStrings[j];
129738820972SMatt Arsenault       if (CheckLabelStr.CheckTy != Check::CheckLabel) {
1298f8bd2e5bSStephen Lin         ++j;
1299f8bd2e5bSStephen Lin         continue;
1300da108b4eSChris Lattner       }
1301da108b4eSChris Lattner 
1302f8bd2e5bSStephen Lin       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1303f8bd2e5bSStephen Lin       size_t MatchLabelLen = 0;
1304e93a3a08SStephen Lin       size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
1305f8bd2e5bSStephen Lin                                                  MatchLabelLen, VariableTable);
1306f8bd2e5bSStephen Lin       if (MatchLabelPos == StringRef::npos) {
1307f8bd2e5bSStephen Lin         hasError = true;
1308f8bd2e5bSStephen Lin         break;
1309f8bd2e5bSStephen Lin       }
1310f8bd2e5bSStephen Lin 
1311f8bd2e5bSStephen Lin       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1312f8bd2e5bSStephen Lin       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1313f8bd2e5bSStephen Lin       ++j;
1314f8bd2e5bSStephen Lin     }
1315f8bd2e5bSStephen Lin 
1316f8bd2e5bSStephen Lin     for ( ; i != j; ++i) {
1317f8bd2e5bSStephen Lin       const CheckString &CheckStr = CheckStrings[i];
1318f8bd2e5bSStephen Lin 
1319f8bd2e5bSStephen Lin       // Check each string within the scanned region, including a second check
1320f8bd2e5bSStephen Lin       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1321f8bd2e5bSStephen Lin       size_t MatchLen = 0;
1322e93a3a08SStephen Lin       size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
1323f8bd2e5bSStephen Lin                                        VariableTable);
1324f8bd2e5bSStephen Lin 
1325f8bd2e5bSStephen Lin       if (MatchPos == StringRef::npos) {
1326f8bd2e5bSStephen Lin         hasError = true;
1327f8bd2e5bSStephen Lin         i = j;
1328f8bd2e5bSStephen Lin         break;
1329f8bd2e5bSStephen Lin       }
1330f8bd2e5bSStephen Lin 
1331f8bd2e5bSStephen Lin       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1332f8bd2e5bSStephen Lin     }
1333f8bd2e5bSStephen Lin 
1334f8bd2e5bSStephen Lin     if (j == e)
1335f8bd2e5bSStephen Lin       break;
1336f8bd2e5bSStephen Lin   }
1337f8bd2e5bSStephen Lin 
1338f8bd2e5bSStephen Lin   return hasError ? 1 : 0;
1339ee3c74fbSChris Lattner }
1340