1ee3c74fbSChris Lattner //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2ee3c74fbSChris Lattner // 3ee3c74fbSChris Lattner // The LLVM Compiler Infrastructure 4ee3c74fbSChris Lattner // 5ee3c74fbSChris Lattner // This file is distributed under the University of Illinois Open Source 6ee3c74fbSChris Lattner // License. See LICENSE.TXT for details. 7ee3c74fbSChris Lattner // 8ee3c74fbSChris Lattner //===----------------------------------------------------------------------===// 9ee3c74fbSChris Lattner // 10ee3c74fbSChris Lattner // FileCheck does a line-by line check of a file that validates whether it 11ee3c74fbSChris Lattner // contains the expected content. This is useful for regression tests etc. 12ee3c74fbSChris Lattner // 13ee3c74fbSChris Lattner // This program exits with an error status of 2 on error, exit status of 0 if 14ee3c74fbSChris Lattner // the file matched the expected contents, and exit status of 1 if it did not 15ee3c74fbSChris Lattner // contain the expected contents. 16ee3c74fbSChris Lattner // 17ee3c74fbSChris Lattner //===----------------------------------------------------------------------===// 18ee3c74fbSChris Lattner 1991d19d8eSChandler Carruth #include "llvm/ADT/SmallString.h" 2091d19d8eSChandler Carruth #include "llvm/ADT/StringExtras.h" 2191d19d8eSChandler Carruth #include "llvm/ADT/StringMap.h" 2213df4626SMatt Arsenault #include "llvm/ADT/StringSet.h" 23ee3c74fbSChris Lattner #include "llvm/Support/CommandLine.h" 24ee3c74fbSChris Lattner #include "llvm/Support/MemoryBuffer.h" 25ee3c74fbSChris Lattner #include "llvm/Support/PrettyStackTrace.h" 26f08d2db9SChris Lattner #include "llvm/Support/Regex.h" 2791d19d8eSChandler Carruth #include "llvm/Support/Signals.h" 28ee3c74fbSChris Lattner #include "llvm/Support/SourceMgr.h" 29ee3c74fbSChris Lattner #include "llvm/Support/raw_ostream.h" 308879e06dSChris Lattner #include <algorithm> 31981af002SWill Dietz #include <cctype> 32e8b8f1bcSEli Bendersky #include <map> 33e8b8f1bcSEli Bendersky #include <string> 34a6e9c3e4SRafael Espindola #include <system_error> 35e8b8f1bcSEli Bendersky #include <vector> 36ee3c74fbSChris Lattner using namespace llvm; 37ee3c74fbSChris Lattner 38ee3c74fbSChris Lattner static cl::opt<std::string> 39ee3c74fbSChris Lattner CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); 40ee3c74fbSChris Lattner 41ee3c74fbSChris Lattner static cl::opt<std::string> 42ee3c74fbSChris Lattner InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), 43ee3c74fbSChris Lattner cl::init("-"), cl::value_desc("filename")); 44ee3c74fbSChris Lattner 4513df4626SMatt Arsenault static cl::list<std::string> 4613df4626SMatt Arsenault CheckPrefixes("check-prefix", 47ee3c74fbSChris Lattner cl::desc("Prefix to use from check file (defaults to 'CHECK')")); 48ee3c74fbSChris Lattner 492c3e5cdfSChris Lattner static cl::opt<bool> 502c3e5cdfSChris Lattner NoCanonicalizeWhiteSpace("strict-whitespace", 512c3e5cdfSChris Lattner cl::desc("Do not treat all horizontal whitespace as equivalent")); 522c3e5cdfSChris Lattner 5356ccdbbdSAlexander Kornienko static cl::list<std::string> ImplicitCheckNot( 5456ccdbbdSAlexander Kornienko "implicit-check-not", 5556ccdbbdSAlexander Kornienko cl::desc("Add an implicit negative check with this pattern to every\n" 5656ccdbbdSAlexander Kornienko "positive check. This can be used to ensure that no instances of\n" 5756ccdbbdSAlexander Kornienko "this pattern occur which are not matched by a positive pattern"), 5856ccdbbdSAlexander Kornienko cl::value_desc("pattern")); 5956ccdbbdSAlexander Kornienko 6013df4626SMatt Arsenault typedef cl::list<std::string>::const_iterator prefix_iterator; 6113df4626SMatt Arsenault 6274d50731SChris Lattner //===----------------------------------------------------------------------===// 6374d50731SChris Lattner // Pattern Handling Code. 6474d50731SChris Lattner //===----------------------------------------------------------------------===// 6574d50731SChris Lattner 6638820972SMatt Arsenault namespace Check { 6738820972SMatt Arsenault enum CheckType { 6838820972SMatt Arsenault CheckNone = 0, 6938820972SMatt Arsenault CheckPlain, 7038820972SMatt Arsenault CheckNext, 7138820972SMatt Arsenault CheckNot, 7238820972SMatt Arsenault CheckDAG, 7338820972SMatt Arsenault CheckLabel, 740a4c44bdSChris Lattner 75eba55822SJakob Stoklund Olesen /// MatchEOF - When set, this pattern only matches the end of file. This is 76eba55822SJakob Stoklund Olesen /// used for trailing CHECK-NOTs. 7738820972SMatt Arsenault CheckEOF 7838820972SMatt Arsenault }; 7938820972SMatt Arsenault } 80eba55822SJakob Stoklund Olesen 8138820972SMatt Arsenault class Pattern { 8238820972SMatt Arsenault SMLoc PatternLoc; 8391a1b2c9SMichael Liao 8438820972SMatt Arsenault Check::CheckType CheckTy; 8591a1b2c9SMichael Liao 86b16ab0c4SChris Lattner /// FixedStr - If non-empty, this pattern is a fixed string match with the 87b16ab0c4SChris Lattner /// specified fixed string. 88221460e0SChris Lattner StringRef FixedStr; 89b16ab0c4SChris Lattner 90b16ab0c4SChris Lattner /// RegEx - If non-empty, this is a regex pattern. 91b16ab0c4SChris Lattner std::string RegExStr; 928879e06dSChris Lattner 9392987fb3SAlexander Kornienko /// \brief Contains the number of line this pattern is in. 9492987fb3SAlexander Kornienko unsigned LineNumber; 9592987fb3SAlexander Kornienko 968879e06dSChris Lattner /// VariableUses - Entries in this vector map to uses of a variable in the 978879e06dSChris Lattner /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain 988879e06dSChris Lattner /// "foobaz" and we'll get an entry in this vector that tells us to insert the 998879e06dSChris Lattner /// value of bar at offset 3. 1008879e06dSChris Lattner std::vector<std::pair<StringRef, unsigned> > VariableUses; 1018879e06dSChris Lattner 102e8b8f1bcSEli Bendersky /// VariableDefs - Maps definitions of variables to their parenthesized 103e8b8f1bcSEli Bendersky /// capture numbers. 104e8b8f1bcSEli Bendersky /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1. 105e8b8f1bcSEli Bendersky std::map<StringRef, unsigned> VariableDefs; 1068879e06dSChris Lattner 1073b40b445SChris Lattner public: 1083b40b445SChris Lattner 10938820972SMatt Arsenault Pattern(Check::CheckType Ty) 11038820972SMatt Arsenault : CheckTy(Ty) { } 11174d50731SChris Lattner 1120b707eb8SMichael Liao /// getLoc - Return the location in source code. 1130b707eb8SMichael Liao SMLoc getLoc() const { return PatternLoc; } 1140b707eb8SMichael Liao 11513df4626SMatt Arsenault /// ParsePattern - Parse the given string into the Pattern. Prefix provides 11613df4626SMatt Arsenault /// which prefix is being matched, SM provides the SourceMgr used for error 11713df4626SMatt Arsenault /// reports, and LineNumber is the line number in the input file from which 11813df4626SMatt Arsenault /// the pattern string was read. Returns true in case of an error, false 11913df4626SMatt Arsenault /// otherwise. 12013df4626SMatt Arsenault bool ParsePattern(StringRef PatternStr, 12113df4626SMatt Arsenault StringRef Prefix, 12213df4626SMatt Arsenault SourceMgr &SM, 12313df4626SMatt Arsenault unsigned LineNumber); 1243b40b445SChris Lattner 1253b40b445SChris Lattner /// Match - Match the pattern string against the input buffer Buffer. This 1263b40b445SChris Lattner /// returns the position that is matched or npos if there is no match. If 1273b40b445SChris Lattner /// there is a match, the size of the matched string is returned in MatchLen. 1288879e06dSChris Lattner /// 1298879e06dSChris Lattner /// The VariableTable StringMap provides the current values of filecheck 1308879e06dSChris Lattner /// variables and is updated if this match defines new values. 1318879e06dSChris Lattner size_t Match(StringRef Buffer, size_t &MatchLen, 1328879e06dSChris Lattner StringMap<StringRef> &VariableTable) const; 133b16ab0c4SChris Lattner 134e0ef65abSDaniel Dunbar /// PrintFailureInfo - Print additional information about a failure to match 135e0ef65abSDaniel Dunbar /// involving this pattern. 136e0ef65abSDaniel Dunbar void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 137e0ef65abSDaniel Dunbar const StringMap<StringRef> &VariableTable) const; 138e0ef65abSDaniel Dunbar 139f8bd2e5bSStephen Lin bool hasVariable() const { return !(VariableUses.empty() && 140f8bd2e5bSStephen Lin VariableDefs.empty()); } 141f8bd2e5bSStephen Lin 14238820972SMatt Arsenault Check::CheckType getCheckTy() const { return CheckTy; } 14391a1b2c9SMichael Liao 144b16ab0c4SChris Lattner private: 145e8b8f1bcSEli Bendersky bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); 146e8b8f1bcSEli Bendersky void AddBackrefToRegEx(unsigned BackrefNum); 147fd29d886SDaniel Dunbar 148fd29d886SDaniel Dunbar /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of 149fd29d886SDaniel Dunbar /// matching this pattern at the start of \arg Buffer; a distance of zero 150fd29d886SDaniel Dunbar /// should correspond to a perfect match. 151fd29d886SDaniel Dunbar unsigned ComputeMatchDistance(StringRef Buffer, 152fd29d886SDaniel Dunbar const StringMap<StringRef> &VariableTable) const; 15392987fb3SAlexander Kornienko 15492987fb3SAlexander Kornienko /// \brief Evaluates expression and stores the result to \p Value. 15592987fb3SAlexander Kornienko /// \return true on success. false when the expression has invalid syntax. 15692987fb3SAlexander Kornienko bool EvaluateExpression(StringRef Expr, std::string &Value) const; 157061d2baaSEli Bendersky 158061d2baaSEli Bendersky /// \brief Finds the closing sequence of a regex variable usage or 159061d2baaSEli Bendersky /// definition. Str has to point in the beginning of the definition 160061d2baaSEli Bendersky /// (right after the opening sequence). 161061d2baaSEli Bendersky /// \return offset of the closing sequence within Str, or npos if it was not 162061d2baaSEli Bendersky /// found. 16381e5cd9eSAdrian Prantl size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); 1643b40b445SChris Lattner }; 1653b40b445SChris Lattner 1668879e06dSChris Lattner 16713df4626SMatt Arsenault bool Pattern::ParsePattern(StringRef PatternStr, 16813df4626SMatt Arsenault StringRef Prefix, 16913df4626SMatt Arsenault SourceMgr &SM, 17092987fb3SAlexander Kornienko unsigned LineNumber) { 17192987fb3SAlexander Kornienko this->LineNumber = LineNumber; 1720a4c44bdSChris Lattner PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 1730a4c44bdSChris Lattner 17474d50731SChris Lattner // Ignore trailing whitespace. 17574d50731SChris Lattner while (!PatternStr.empty() && 17674d50731SChris Lattner (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 17774d50731SChris Lattner PatternStr = PatternStr.substr(0, PatternStr.size()-1); 17874d50731SChris Lattner 17974d50731SChris Lattner // Check that there is something on the line. 18074d50731SChris Lattner if (PatternStr.empty()) { 18103b80a40SChris Lattner SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 18203b80a40SChris Lattner "found empty check string with prefix '" + 18313df4626SMatt Arsenault Prefix + ":'"); 18474d50731SChris Lattner return true; 18574d50731SChris Lattner } 18674d50731SChris Lattner 187221460e0SChris Lattner // Check to see if this is a fixed string, or if it has regex pieces. 188d9466967STed Kremenek if (PatternStr.size() < 2 || 1898879e06dSChris Lattner (PatternStr.find("{{") == StringRef::npos && 1908879e06dSChris Lattner PatternStr.find("[[") == StringRef::npos)) { 191221460e0SChris Lattner FixedStr = PatternStr; 192221460e0SChris Lattner return false; 193221460e0SChris Lattner } 194221460e0SChris Lattner 1958879e06dSChris Lattner // Paren value #0 is for the fully matched string. Any new parenthesized 19653e0679dSChris Lattner // values add from there. 1978879e06dSChris Lattner unsigned CurParen = 1; 1988879e06dSChris Lattner 199b16ab0c4SChris Lattner // Otherwise, there is at least one regex piece. Build up the regex pattern 200b16ab0c4SChris Lattner // by escaping scary characters in fixed strings, building up one big regex. 201f08d2db9SChris Lattner while (!PatternStr.empty()) { 2028879e06dSChris Lattner // RegEx matches. 20353e0679dSChris Lattner if (PatternStr.startswith("{{")) { 20443d50d4aSEli Bendersky // This is the start of a regex match. Scan for the }}. 205f08d2db9SChris Lattner size_t End = PatternStr.find("}}"); 206f08d2db9SChris Lattner if (End == StringRef::npos) { 207f08d2db9SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 20803b80a40SChris Lattner SourceMgr::DK_Error, 20903b80a40SChris Lattner "found start of regex string with no end '}}'"); 210f08d2db9SChris Lattner return true; 211f08d2db9SChris Lattner } 212f08d2db9SChris Lattner 213e53c95f1SChris Lattner // Enclose {{}} patterns in parens just like [[]] even though we're not 214e53c95f1SChris Lattner // capturing the result for any purpose. This is required in case the 215e53c95f1SChris Lattner // expression contains an alternation like: CHECK: abc{{x|z}}def. We 216e53c95f1SChris Lattner // want this to turn into: "abc(x|z)def" not "abcx|zdef". 217e53c95f1SChris Lattner RegExStr += '('; 218e53c95f1SChris Lattner ++CurParen; 219e53c95f1SChris Lattner 2208879e06dSChris Lattner if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM)) 2218879e06dSChris Lattner return true; 222e53c95f1SChris Lattner RegExStr += ')'; 22353e0679dSChris Lattner 2248879e06dSChris Lattner PatternStr = PatternStr.substr(End+2); 2258879e06dSChris Lattner continue; 2268879e06dSChris Lattner } 2278879e06dSChris Lattner 2288879e06dSChris Lattner // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* 2298879e06dSChris Lattner // (or some other regex) and assigns it to the FileCheck variable 'foo'. The 2308879e06dSChris Lattner // second form is [[foo]] which is a reference to foo. The variable name 23157cb733bSDaniel Dunbar // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject 2328879e06dSChris Lattner // it. This is to catch some common errors. 23353e0679dSChris Lattner if (PatternStr.startswith("[[")) { 234061d2baaSEli Bendersky // Find the closing bracket pair ending the match. End is going to be an 235061d2baaSEli Bendersky // offset relative to the beginning of the match string. 23681e5cd9eSAdrian Prantl size_t End = FindRegexVarEnd(PatternStr.substr(2), SM); 237061d2baaSEli Bendersky 2388879e06dSChris Lattner if (End == StringRef::npos) { 2398879e06dSChris Lattner SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 24003b80a40SChris Lattner SourceMgr::DK_Error, 24103b80a40SChris Lattner "invalid named regex reference, no ]] found"); 242f08d2db9SChris Lattner return true; 243f08d2db9SChris Lattner } 244f08d2db9SChris Lattner 245061d2baaSEli Bendersky StringRef MatchStr = PatternStr.substr(2, End); 246061d2baaSEli Bendersky PatternStr = PatternStr.substr(End+4); 2478879e06dSChris Lattner 2488879e06dSChris Lattner // Get the regex name (e.g. "foo"). 2498879e06dSChris Lattner size_t NameEnd = MatchStr.find(':'); 2508879e06dSChris Lattner StringRef Name = MatchStr.substr(0, NameEnd); 2518879e06dSChris Lattner 2528879e06dSChris Lattner if (Name.empty()) { 25303b80a40SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 25403b80a40SChris Lattner "invalid name in named regex: empty name"); 2558879e06dSChris Lattner return true; 2568879e06dSChris Lattner } 2578879e06dSChris Lattner 25892987fb3SAlexander Kornienko // Verify that the name/expression is well formed. FileCheck currently 25992987fb3SAlexander Kornienko // supports @LINE, @LINE+number, @LINE-number expressions. The check here 26092987fb3SAlexander Kornienko // is relaxed, more strict check is performed in \c EvaluateExpression. 26192987fb3SAlexander Kornienko bool IsExpression = false; 26292987fb3SAlexander Kornienko for (unsigned i = 0, e = Name.size(); i != e; ++i) { 26392987fb3SAlexander Kornienko if (i == 0 && Name[i] == '@') { 26492987fb3SAlexander Kornienko if (NameEnd != StringRef::npos) { 26592987fb3SAlexander Kornienko SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 26692987fb3SAlexander Kornienko SourceMgr::DK_Error, 26792987fb3SAlexander Kornienko "invalid name in named regex definition"); 26892987fb3SAlexander Kornienko return true; 26992987fb3SAlexander Kornienko } 27092987fb3SAlexander Kornienko IsExpression = true; 27192987fb3SAlexander Kornienko continue; 27292987fb3SAlexander Kornienko } 27392987fb3SAlexander Kornienko if (Name[i] != '_' && !isalnum(Name[i]) && 27492987fb3SAlexander Kornienko (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { 2758879e06dSChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i), 27603b80a40SChris Lattner SourceMgr::DK_Error, "invalid name in named regex"); 2778879e06dSChris Lattner return true; 2788879e06dSChris Lattner } 27992987fb3SAlexander Kornienko } 2808879e06dSChris Lattner 2818879e06dSChris Lattner // Name can't start with a digit. 28283c74e9fSGuy Benyei if (isdigit(static_cast<unsigned char>(Name[0]))) { 28303b80a40SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 28403b80a40SChris Lattner "invalid name in named regex"); 2858879e06dSChris Lattner return true; 2868879e06dSChris Lattner } 2878879e06dSChris Lattner 2888879e06dSChris Lattner // Handle [[foo]]. 2898879e06dSChris Lattner if (NameEnd == StringRef::npos) { 290e8b8f1bcSEli Bendersky // Handle variables that were defined earlier on the same line by 291e8b8f1bcSEli Bendersky // emitting a backreference. 292e8b8f1bcSEli Bendersky if (VariableDefs.find(Name) != VariableDefs.end()) { 293e8b8f1bcSEli Bendersky unsigned VarParenNum = VariableDefs[Name]; 294e8b8f1bcSEli Bendersky if (VarParenNum < 1 || VarParenNum > 9) { 295e8b8f1bcSEli Bendersky SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 296e8b8f1bcSEli Bendersky SourceMgr::DK_Error, 297e8b8f1bcSEli Bendersky "Can't back-reference more than 9 variables"); 298e8b8f1bcSEli Bendersky return true; 299e8b8f1bcSEli Bendersky } 300e8b8f1bcSEli Bendersky AddBackrefToRegEx(VarParenNum); 301e8b8f1bcSEli Bendersky } else { 3028879e06dSChris Lattner VariableUses.push_back(std::make_pair(Name, RegExStr.size())); 303e8b8f1bcSEli Bendersky } 3048879e06dSChris Lattner continue; 3058879e06dSChris Lattner } 3068879e06dSChris Lattner 3078879e06dSChris Lattner // Handle [[foo:.*]]. 308e8b8f1bcSEli Bendersky VariableDefs[Name] = CurParen; 3098879e06dSChris Lattner RegExStr += '('; 3108879e06dSChris Lattner ++CurParen; 3118879e06dSChris Lattner 3128879e06dSChris Lattner if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM)) 3138879e06dSChris Lattner return true; 3148879e06dSChris Lattner 3158879e06dSChris Lattner RegExStr += ')'; 3168879e06dSChris Lattner } 3178879e06dSChris Lattner 3188879e06dSChris Lattner // Handle fixed string matches. 3198879e06dSChris Lattner // Find the end, which is the start of the next regex. 3208879e06dSChris Lattner size_t FixedMatchEnd = PatternStr.find("{{"); 3218879e06dSChris Lattner FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 3226f4f77b7SHans Wennborg RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 3238879e06dSChris Lattner PatternStr = PatternStr.substr(FixedMatchEnd); 324f08d2db9SChris Lattner } 325f08d2db9SChris Lattner 32674d50731SChris Lattner return false; 32774d50731SChris Lattner } 32874d50731SChris Lattner 329e8b8f1bcSEli Bendersky bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, 3308879e06dSChris Lattner SourceMgr &SM) { 331e8b8f1bcSEli Bendersky Regex R(RS); 3328879e06dSChris Lattner std::string Error; 3338879e06dSChris Lattner if (!R.isValid(Error)) { 334e8b8f1bcSEli Bendersky SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 33503b80a40SChris Lattner "invalid regex: " + Error); 3368879e06dSChris Lattner return true; 3378879e06dSChris Lattner } 3388879e06dSChris Lattner 339e8b8f1bcSEli Bendersky RegExStr += RS.str(); 3408879e06dSChris Lattner CurParen += R.getNumMatches(); 3418879e06dSChris Lattner return false; 3428879e06dSChris Lattner } 343b16ab0c4SChris Lattner 344e8b8f1bcSEli Bendersky void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 345e8b8f1bcSEli Bendersky assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 346e8b8f1bcSEli Bendersky std::string Backref = std::string("\\") + 347e8b8f1bcSEli Bendersky std::string(1, '0' + BackrefNum); 348e8b8f1bcSEli Bendersky RegExStr += Backref; 349e8b8f1bcSEli Bendersky } 350e8b8f1bcSEli Bendersky 35192987fb3SAlexander Kornienko bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { 35292987fb3SAlexander Kornienko // The only supported expression is @LINE([\+-]\d+)? 35392987fb3SAlexander Kornienko if (!Expr.startswith("@LINE")) 35492987fb3SAlexander Kornienko return false; 35592987fb3SAlexander Kornienko Expr = Expr.substr(StringRef("@LINE").size()); 35692987fb3SAlexander Kornienko int Offset = 0; 35792987fb3SAlexander Kornienko if (!Expr.empty()) { 35892987fb3SAlexander Kornienko if (Expr[0] == '+') 35992987fb3SAlexander Kornienko Expr = Expr.substr(1); 36092987fb3SAlexander Kornienko else if (Expr[0] != '-') 36192987fb3SAlexander Kornienko return false; 36292987fb3SAlexander Kornienko if (Expr.getAsInteger(10, Offset)) 36392987fb3SAlexander Kornienko return false; 36492987fb3SAlexander Kornienko } 36592987fb3SAlexander Kornienko Value = llvm::itostr(LineNumber + Offset); 36692987fb3SAlexander Kornienko return true; 36792987fb3SAlexander Kornienko } 36892987fb3SAlexander Kornienko 369f08d2db9SChris Lattner /// Match - Match the pattern string against the input buffer Buffer. This 370f08d2db9SChris Lattner /// returns the position that is matched or npos if there is no match. If 371f08d2db9SChris Lattner /// there is a match, the size of the matched string is returned in MatchLen. 3728879e06dSChris Lattner size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, 3738879e06dSChris Lattner StringMap<StringRef> &VariableTable) const { 374eba55822SJakob Stoklund Olesen // If this is the EOF pattern, match it immediately. 37538820972SMatt Arsenault if (CheckTy == Check::CheckEOF) { 376eba55822SJakob Stoklund Olesen MatchLen = 0; 377eba55822SJakob Stoklund Olesen return Buffer.size(); 378eba55822SJakob Stoklund Olesen } 379eba55822SJakob Stoklund Olesen 380221460e0SChris Lattner // If this is a fixed string pattern, just match it now. 381221460e0SChris Lattner if (!FixedStr.empty()) { 382221460e0SChris Lattner MatchLen = FixedStr.size(); 383221460e0SChris Lattner return Buffer.find(FixedStr); 384221460e0SChris Lattner } 385221460e0SChris Lattner 386b16ab0c4SChris Lattner // Regex match. 3878879e06dSChris Lattner 3888879e06dSChris Lattner // If there are variable uses, we need to create a temporary string with the 3898879e06dSChris Lattner // actual value. 3908879e06dSChris Lattner StringRef RegExToMatch = RegExStr; 3918879e06dSChris Lattner std::string TmpStr; 3928879e06dSChris Lattner if (!VariableUses.empty()) { 3938879e06dSChris Lattner TmpStr = RegExStr; 3948879e06dSChris Lattner 3958879e06dSChris Lattner unsigned InsertOffset = 0; 3968879e06dSChris Lattner for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) { 39792987fb3SAlexander Kornienko std::string Value; 39892987fb3SAlexander Kornienko 39992987fb3SAlexander Kornienko if (VariableUses[i].first[0] == '@') { 40092987fb3SAlexander Kornienko if (!EvaluateExpression(VariableUses[i].first, Value)) 40192987fb3SAlexander Kornienko return StringRef::npos; 40292987fb3SAlexander Kornienko } else { 403e0ef65abSDaniel Dunbar StringMap<StringRef>::iterator it = 404e0ef65abSDaniel Dunbar VariableTable.find(VariableUses[i].first); 405e0ef65abSDaniel Dunbar // If the variable is undefined, return an error. 406e0ef65abSDaniel Dunbar if (it == VariableTable.end()) 407e0ef65abSDaniel Dunbar return StringRef::npos; 408e0ef65abSDaniel Dunbar 4096f4f77b7SHans Wennborg // Look up the value and escape it so that we can put it into the regex. 4106f4f77b7SHans Wennborg Value += Regex::escape(it->second); 41192987fb3SAlexander Kornienko } 4128879e06dSChris Lattner 4138879e06dSChris Lattner // Plop it into the regex at the adjusted offset. 4148879e06dSChris Lattner TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset, 4158879e06dSChris Lattner Value.begin(), Value.end()); 4168879e06dSChris Lattner InsertOffset += Value.size(); 4178879e06dSChris Lattner } 4188879e06dSChris Lattner 4198879e06dSChris Lattner // Match the newly constructed regex. 4208879e06dSChris Lattner RegExToMatch = TmpStr; 4218879e06dSChris Lattner } 4228879e06dSChris Lattner 4238879e06dSChris Lattner 424b16ab0c4SChris Lattner SmallVector<StringRef, 4> MatchInfo; 4258879e06dSChris Lattner if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) 426f08d2db9SChris Lattner return StringRef::npos; 427b16ab0c4SChris Lattner 428b16ab0c4SChris Lattner // Successful regex match. 429b16ab0c4SChris Lattner assert(!MatchInfo.empty() && "Didn't get any match"); 430b16ab0c4SChris Lattner StringRef FullMatch = MatchInfo[0]; 431b16ab0c4SChris Lattner 4328879e06dSChris Lattner // If this defines any variables, remember their values. 433e8b8f1bcSEli Bendersky for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(), 434e8b8f1bcSEli Bendersky E = VariableDefs.end(); 435e8b8f1bcSEli Bendersky I != E; ++I) { 436e8b8f1bcSEli Bendersky assert(I->second < MatchInfo.size() && "Internal paren error"); 437e8b8f1bcSEli Bendersky VariableTable[I->first] = MatchInfo[I->second]; 4380a4c44bdSChris Lattner } 4390a4c44bdSChris Lattner 440b16ab0c4SChris Lattner MatchLen = FullMatch.size(); 441b16ab0c4SChris Lattner return FullMatch.data()-Buffer.data(); 442f08d2db9SChris Lattner } 443f08d2db9SChris Lattner 444fd29d886SDaniel Dunbar unsigned Pattern::ComputeMatchDistance(StringRef Buffer, 445fd29d886SDaniel Dunbar const StringMap<StringRef> &VariableTable) const { 446fd29d886SDaniel Dunbar // Just compute the number of matching characters. For regular expressions, we 447fd29d886SDaniel Dunbar // just compare against the regex itself and hope for the best. 448fd29d886SDaniel Dunbar // 449fd29d886SDaniel Dunbar // FIXME: One easy improvement here is have the regex lib generate a single 450fd29d886SDaniel Dunbar // example regular expression which matches, and use that as the example 451fd29d886SDaniel Dunbar // string. 452fd29d886SDaniel Dunbar StringRef ExampleString(FixedStr); 453fd29d886SDaniel Dunbar if (ExampleString.empty()) 454fd29d886SDaniel Dunbar ExampleString = RegExStr; 455fd29d886SDaniel Dunbar 456e9aa36c8SDaniel Dunbar // Only compare up to the first line in the buffer, or the string size. 457e9aa36c8SDaniel Dunbar StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 458e9aa36c8SDaniel Dunbar BufferPrefix = BufferPrefix.split('\n').first; 459e9aa36c8SDaniel Dunbar return BufferPrefix.edit_distance(ExampleString); 460fd29d886SDaniel Dunbar } 461fd29d886SDaniel Dunbar 462e0ef65abSDaniel Dunbar void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 463e0ef65abSDaniel Dunbar const StringMap<StringRef> &VariableTable) const{ 464e0ef65abSDaniel Dunbar // If this was a regular expression using variables, print the current 465e0ef65abSDaniel Dunbar // variable values. 466e0ef65abSDaniel Dunbar if (!VariableUses.empty()) { 467e0ef65abSDaniel Dunbar for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) { 468e69170a1SAlp Toker SmallString<256> Msg; 469e69170a1SAlp Toker raw_svector_ostream OS(Msg); 47092987fb3SAlexander Kornienko StringRef Var = VariableUses[i].first; 47192987fb3SAlexander Kornienko if (Var[0] == '@') { 47292987fb3SAlexander Kornienko std::string Value; 47392987fb3SAlexander Kornienko if (EvaluateExpression(Var, Value)) { 47492987fb3SAlexander Kornienko OS << "with expression \""; 47592987fb3SAlexander Kornienko OS.write_escaped(Var) << "\" equal to \""; 47692987fb3SAlexander Kornienko OS.write_escaped(Value) << "\""; 47792987fb3SAlexander Kornienko } else { 47892987fb3SAlexander Kornienko OS << "uses incorrect expression \""; 47992987fb3SAlexander Kornienko OS.write_escaped(Var) << "\""; 48092987fb3SAlexander Kornienko } 48192987fb3SAlexander Kornienko } else { 48292987fb3SAlexander Kornienko StringMap<StringRef>::const_iterator it = VariableTable.find(Var); 483e0ef65abSDaniel Dunbar 484e0ef65abSDaniel Dunbar // Check for undefined variable references. 485e0ef65abSDaniel Dunbar if (it == VariableTable.end()) { 486e0ef65abSDaniel Dunbar OS << "uses undefined variable \""; 48792987fb3SAlexander Kornienko OS.write_escaped(Var) << "\""; 488e0ef65abSDaniel Dunbar } else { 489e0ef65abSDaniel Dunbar OS << "with variable \""; 490e0ef65abSDaniel Dunbar OS.write_escaped(Var) << "\" equal to \""; 491e0ef65abSDaniel Dunbar OS.write_escaped(it->second) << "\""; 492e0ef65abSDaniel Dunbar } 49392987fb3SAlexander Kornienko } 494e0ef65abSDaniel Dunbar 49503b80a40SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 49603b80a40SChris Lattner OS.str()); 497e0ef65abSDaniel Dunbar } 498e0ef65abSDaniel Dunbar } 499fd29d886SDaniel Dunbar 500fd29d886SDaniel Dunbar // Attempt to find the closest/best fuzzy match. Usually an error happens 501fd29d886SDaniel Dunbar // because some string in the output didn't exactly match. In these cases, we 502fd29d886SDaniel Dunbar // would like to show the user a best guess at what "should have" matched, to 503fd29d886SDaniel Dunbar // save them having to actually check the input manually. 504fd29d886SDaniel Dunbar size_t NumLinesForward = 0; 505fd29d886SDaniel Dunbar size_t Best = StringRef::npos; 506fd29d886SDaniel Dunbar double BestQuality = 0; 507fd29d886SDaniel Dunbar 508fd29d886SDaniel Dunbar // Use an arbitrary 4k limit on how far we will search. 5092bf486ebSDan Gohman for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 510fd29d886SDaniel Dunbar if (Buffer[i] == '\n') 511fd29d886SDaniel Dunbar ++NumLinesForward; 512fd29d886SDaniel Dunbar 513df22bbf7SDan Gohman // Patterns have leading whitespace stripped, so skip whitespace when 514df22bbf7SDan Gohman // looking for something which looks like a pattern. 515df22bbf7SDan Gohman if (Buffer[i] == ' ' || Buffer[i] == '\t') 516df22bbf7SDan Gohman continue; 517df22bbf7SDan Gohman 518fd29d886SDaniel Dunbar // Compute the "quality" of this match as an arbitrary combination of the 519fd29d886SDaniel Dunbar // match distance and the number of lines skipped to get to this match. 520fd29d886SDaniel Dunbar unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable); 521fd29d886SDaniel Dunbar double Quality = Distance + (NumLinesForward / 100.); 522fd29d886SDaniel Dunbar 523fd29d886SDaniel Dunbar if (Quality < BestQuality || Best == StringRef::npos) { 524fd29d886SDaniel Dunbar Best = i; 525fd29d886SDaniel Dunbar BestQuality = Quality; 526fd29d886SDaniel Dunbar } 527fd29d886SDaniel Dunbar } 528fd29d886SDaniel Dunbar 529fd29d886SDaniel Dunbar // Print the "possible intended match here" line if we found something 530c069cc8eSDaniel Dunbar // reasonable and not equal to what we showed in the "scanning from here" 531c069cc8eSDaniel Dunbar // line. 532c069cc8eSDaniel Dunbar if (Best && Best != StringRef::npos && BestQuality < 50) { 533fd29d886SDaniel Dunbar SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best), 53403b80a40SChris Lattner SourceMgr::DK_Note, "possible intended match here"); 535fd29d886SDaniel Dunbar 536fd29d886SDaniel Dunbar // FIXME: If we wanted to be really friendly we would show why the match 537fd29d886SDaniel Dunbar // failed, as it can be hard to spot simple one character differences. 538fd29d886SDaniel Dunbar } 539e0ef65abSDaniel Dunbar } 54074d50731SChris Lattner 54181e5cd9eSAdrian Prantl size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 542061d2baaSEli Bendersky // Offset keeps track of the current offset within the input Str 543061d2baaSEli Bendersky size_t Offset = 0; 544061d2baaSEli Bendersky // [...] Nesting depth 545061d2baaSEli Bendersky size_t BracketDepth = 0; 546061d2baaSEli Bendersky 547061d2baaSEli Bendersky while (!Str.empty()) { 548061d2baaSEli Bendersky if (Str.startswith("]]") && BracketDepth == 0) 549061d2baaSEli Bendersky return Offset; 550061d2baaSEli Bendersky if (Str[0] == '\\') { 551061d2baaSEli Bendersky // Backslash escapes the next char within regexes, so skip them both. 552061d2baaSEli Bendersky Str = Str.substr(2); 553061d2baaSEli Bendersky Offset += 2; 554061d2baaSEli Bendersky } else { 555061d2baaSEli Bendersky switch (Str[0]) { 556061d2baaSEli Bendersky default: 557061d2baaSEli Bendersky break; 558061d2baaSEli Bendersky case '[': 559061d2baaSEli Bendersky BracketDepth++; 560061d2baaSEli Bendersky break; 561061d2baaSEli Bendersky case ']': 56281e5cd9eSAdrian Prantl if (BracketDepth == 0) { 56381e5cd9eSAdrian Prantl SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 56481e5cd9eSAdrian Prantl SourceMgr::DK_Error, 56581e5cd9eSAdrian Prantl "missing closing \"]\" for regex variable"); 56681e5cd9eSAdrian Prantl exit(1); 56781e5cd9eSAdrian Prantl } 568061d2baaSEli Bendersky BracketDepth--; 569061d2baaSEli Bendersky break; 570061d2baaSEli Bendersky } 571061d2baaSEli Bendersky Str = Str.substr(1); 572061d2baaSEli Bendersky Offset++; 573061d2baaSEli Bendersky } 574061d2baaSEli Bendersky } 575061d2baaSEli Bendersky 576061d2baaSEli Bendersky return StringRef::npos; 577061d2baaSEli Bendersky } 578061d2baaSEli Bendersky 579061d2baaSEli Bendersky 58074d50731SChris Lattner //===----------------------------------------------------------------------===// 58174d50731SChris Lattner // Check Strings. 58274d50731SChris Lattner //===----------------------------------------------------------------------===// 5833b40b445SChris Lattner 5843b40b445SChris Lattner /// CheckString - This is a check that we found in the input file. 5853b40b445SChris Lattner struct CheckString { 5863b40b445SChris Lattner /// Pat - The pattern to match. 5873b40b445SChris Lattner Pattern Pat; 58826cccfe1SChris Lattner 58913df4626SMatt Arsenault /// Prefix - Which prefix name this check matched. 59013df4626SMatt Arsenault StringRef Prefix; 59113df4626SMatt Arsenault 59226cccfe1SChris Lattner /// Loc - The location in the match file that the check string was specified. 59326cccfe1SChris Lattner SMLoc Loc; 59426cccfe1SChris Lattner 59538820972SMatt Arsenault /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive, 59638820972SMatt Arsenault /// as opposed to a CHECK: directive. 59738820972SMatt Arsenault Check::CheckType CheckTy; 598f8bd2e5bSStephen Lin 59991a1b2c9SMichael Liao /// DagNotStrings - These are all of the strings that are disallowed from 600236d2d5eSChris Lattner /// occurring between this match string and the previous one (or start of 601236d2d5eSChris Lattner /// file). 60291a1b2c9SMichael Liao std::vector<Pattern> DagNotStrings; 603236d2d5eSChris Lattner 60413df4626SMatt Arsenault 60513df4626SMatt Arsenault CheckString(const Pattern &P, 60613df4626SMatt Arsenault StringRef S, 60713df4626SMatt Arsenault SMLoc L, 60813df4626SMatt Arsenault Check::CheckType Ty) 60913df4626SMatt Arsenault : Pat(P), Prefix(S), Loc(L), CheckTy(Ty) {} 610dcc7d48dSMichael Liao 61191a1b2c9SMichael Liao /// Check - Match check string and its "not strings" and/or "dag strings". 612e93a3a08SStephen Lin size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, 613f8bd2e5bSStephen Lin size_t &MatchLen, StringMap<StringRef> &VariableTable) const; 614dcc7d48dSMichael Liao 615dcc7d48dSMichael Liao /// CheckNext - Verify there is a single line in the given buffer. 616dcc7d48dSMichael Liao bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; 617dcc7d48dSMichael Liao 618dcc7d48dSMichael Liao /// CheckNot - Verify there's no "not strings" in the given buffer. 619dcc7d48dSMichael Liao bool CheckNot(const SourceMgr &SM, StringRef Buffer, 62091a1b2c9SMichael Liao const std::vector<const Pattern *> &NotStrings, 62191a1b2c9SMichael Liao StringMap<StringRef> &VariableTable) const; 62291a1b2c9SMichael Liao 62391a1b2c9SMichael Liao /// CheckDag - Match "dag strings" and their mixed "not strings". 62491a1b2c9SMichael Liao size_t CheckDag(const SourceMgr &SM, StringRef Buffer, 62591a1b2c9SMichael Liao std::vector<const Pattern *> &NotStrings, 626dcc7d48dSMichael Liao StringMap<StringRef> &VariableTable) const; 62726cccfe1SChris Lattner }; 62826cccfe1SChris Lattner 6295ea04c38SGuy Benyei /// Canonicalize whitespaces in the input file. Line endings are replaced 6305ea04c38SGuy Benyei /// with UNIX-style '\n'. 6315ea04c38SGuy Benyei /// 6325ea04c38SGuy Benyei /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace 6335ea04c38SGuy Benyei /// characters to a single space. 6345ea04c38SGuy Benyei static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB, 6355ea04c38SGuy Benyei bool PreserveHorizontal) { 6360e45d24aSChris Lattner SmallString<128> NewFile; 637a2f8fc5aSChris Lattner NewFile.reserve(MB->getBufferSize()); 638a2f8fc5aSChris Lattner 639a2f8fc5aSChris Lattner for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd(); 640a2f8fc5aSChris Lattner Ptr != End; ++Ptr) { 641fd781bf0SNAKAMURA Takumi // Eliminate trailing dosish \r. 642fd781bf0SNAKAMURA Takumi if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 643fd781bf0SNAKAMURA Takumi continue; 644fd781bf0SNAKAMURA Takumi } 645fd781bf0SNAKAMURA Takumi 6465ea04c38SGuy Benyei // If current char is not a horizontal whitespace or if horizontal 6475ea04c38SGuy Benyei // whitespace canonicalization is disabled, dump it to output as is. 6485ea04c38SGuy Benyei if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) { 649a2f8fc5aSChris Lattner NewFile.push_back(*Ptr); 650a2f8fc5aSChris Lattner continue; 651a2f8fc5aSChris Lattner } 652a2f8fc5aSChris Lattner 653a2f8fc5aSChris Lattner // Otherwise, add one space and advance over neighboring space. 654a2f8fc5aSChris Lattner NewFile.push_back(' '); 655a2f8fc5aSChris Lattner while (Ptr+1 != End && 656a2f8fc5aSChris Lattner (Ptr[1] == ' ' || Ptr[1] == '\t')) 657a2f8fc5aSChris Lattner ++Ptr; 658a2f8fc5aSChris Lattner } 659a2f8fc5aSChris Lattner 660a2f8fc5aSChris Lattner // Free the old buffer and return a new one. 661a2f8fc5aSChris Lattner MemoryBuffer *MB2 = 6620e45d24aSChris Lattner MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier()); 663a2f8fc5aSChris Lattner 664a2f8fc5aSChris Lattner delete MB; 665a2f8fc5aSChris Lattner return MB2; 666a2f8fc5aSChris Lattner } 667a2f8fc5aSChris Lattner 66838820972SMatt Arsenault static bool IsPartOfWord(char c) { 66938820972SMatt Arsenault return (isalnum(c) || c == '-' || c == '_'); 67038820972SMatt Arsenault } 67138820972SMatt Arsenault 67213df4626SMatt Arsenault // Get the size of the prefix extension. 67313df4626SMatt Arsenault static size_t CheckTypeSize(Check::CheckType Ty) { 67413df4626SMatt Arsenault switch (Ty) { 67513df4626SMatt Arsenault case Check::CheckNone: 67613df4626SMatt Arsenault return 0; 67713df4626SMatt Arsenault 67813df4626SMatt Arsenault case Check::CheckPlain: 67913df4626SMatt Arsenault return sizeof(":") - 1; 68013df4626SMatt Arsenault 68113df4626SMatt Arsenault case Check::CheckNext: 68213df4626SMatt Arsenault return sizeof("-NEXT:") - 1; 68313df4626SMatt Arsenault 68413df4626SMatt Arsenault case Check::CheckNot: 68513df4626SMatt Arsenault return sizeof("-NOT:") - 1; 68613df4626SMatt Arsenault 68713df4626SMatt Arsenault case Check::CheckDAG: 68813df4626SMatt Arsenault return sizeof("-DAG:") - 1; 68913df4626SMatt Arsenault 69013df4626SMatt Arsenault case Check::CheckLabel: 69113df4626SMatt Arsenault return sizeof("-LABEL:") - 1; 69213df4626SMatt Arsenault 69313df4626SMatt Arsenault case Check::CheckEOF: 69413df4626SMatt Arsenault llvm_unreachable("Should not be using EOF size"); 69513df4626SMatt Arsenault } 69613df4626SMatt Arsenault 69713df4626SMatt Arsenault llvm_unreachable("Bad check type"); 69813df4626SMatt Arsenault } 69913df4626SMatt Arsenault 70013df4626SMatt Arsenault static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) { 701c4d2d471SMatt Arsenault char NextChar = Buffer[Prefix.size()]; 70238820972SMatt Arsenault 70338820972SMatt Arsenault // Verify that the : is present after the prefix. 70413df4626SMatt Arsenault if (NextChar == ':') 70538820972SMatt Arsenault return Check::CheckPlain; 70638820972SMatt Arsenault 70713df4626SMatt Arsenault if (NextChar != '-') 70838820972SMatt Arsenault return Check::CheckNone; 70938820972SMatt Arsenault 710c4d2d471SMatt Arsenault StringRef Rest = Buffer.drop_front(Prefix.size() + 1); 71113df4626SMatt Arsenault if (Rest.startswith("NEXT:")) 71238820972SMatt Arsenault return Check::CheckNext; 71338820972SMatt Arsenault 71413df4626SMatt Arsenault if (Rest.startswith("NOT:")) 71538820972SMatt Arsenault return Check::CheckNot; 71638820972SMatt Arsenault 71713df4626SMatt Arsenault if (Rest.startswith("DAG:")) 71838820972SMatt Arsenault return Check::CheckDAG; 71938820972SMatt Arsenault 72013df4626SMatt Arsenault if (Rest.startswith("LABEL:")) 72138820972SMatt Arsenault return Check::CheckLabel; 72213df4626SMatt Arsenault 72313df4626SMatt Arsenault return Check::CheckNone; 72438820972SMatt Arsenault } 72538820972SMatt Arsenault 72613df4626SMatt Arsenault // From the given position, find the next character after the word. 72713df4626SMatt Arsenault static size_t SkipWord(StringRef Str, size_t Loc) { 72813df4626SMatt Arsenault while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 72913df4626SMatt Arsenault ++Loc; 73013df4626SMatt Arsenault return Loc; 73113df4626SMatt Arsenault } 73213df4626SMatt Arsenault 73313df4626SMatt Arsenault // Try to find the first match in buffer for any prefix. If a valid match is 73413df4626SMatt Arsenault // found, return that prefix and set its type and location. If there are almost 73513df4626SMatt Arsenault // matches (e.g. the actual prefix string is found, but is not an actual check 73613df4626SMatt Arsenault // string), but no valid match, return an empty string and set the position to 73713df4626SMatt Arsenault // resume searching from. If no partial matches are found, return an empty 73813df4626SMatt Arsenault // string and the location will be StringRef::npos. If one prefix is a substring 73913df4626SMatt Arsenault // of another, the maximal match should be found. e.g. if "A" and "AA" are 74013df4626SMatt Arsenault // prefixes then AA-CHECK: should match the second one. 74113df4626SMatt Arsenault static StringRef FindFirstCandidateMatch(StringRef &Buffer, 74213df4626SMatt Arsenault Check::CheckType &CheckTy, 74313df4626SMatt Arsenault size_t &CheckLoc) { 74413df4626SMatt Arsenault StringRef FirstPrefix; 74513df4626SMatt Arsenault size_t FirstLoc = StringRef::npos; 74613df4626SMatt Arsenault size_t SearchLoc = StringRef::npos; 74713df4626SMatt Arsenault Check::CheckType FirstTy = Check::CheckNone; 74813df4626SMatt Arsenault 74913df4626SMatt Arsenault CheckTy = Check::CheckNone; 75013df4626SMatt Arsenault CheckLoc = StringRef::npos; 75113df4626SMatt Arsenault 75213df4626SMatt Arsenault for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end(); 75313df4626SMatt Arsenault I != E; ++I) { 75413df4626SMatt Arsenault StringRef Prefix(*I); 75513df4626SMatt Arsenault size_t PrefixLoc = Buffer.find(Prefix); 75613df4626SMatt Arsenault 75713df4626SMatt Arsenault if (PrefixLoc == StringRef::npos) 75813df4626SMatt Arsenault continue; 75913df4626SMatt Arsenault 76013df4626SMatt Arsenault // Track where we are searching for invalid prefixes that look almost right. 76113df4626SMatt Arsenault // We need to only advance to the first partial match on the next attempt 76213df4626SMatt Arsenault // since a partial match could be a substring of a later, valid prefix. 76313df4626SMatt Arsenault // Need to skip to the end of the word, otherwise we could end up 76413df4626SMatt Arsenault // matching a prefix in a substring later. 76513df4626SMatt Arsenault if (PrefixLoc < SearchLoc) 76613df4626SMatt Arsenault SearchLoc = SkipWord(Buffer, PrefixLoc); 76713df4626SMatt Arsenault 76813df4626SMatt Arsenault // We only want to find the first match to avoid skipping some. 76913df4626SMatt Arsenault if (PrefixLoc > FirstLoc) 77013df4626SMatt Arsenault continue; 771a7181a1bSAlexey Samsonov // If one matching check-prefix is a prefix of another, choose the 772a7181a1bSAlexey Samsonov // longer one. 773a7181a1bSAlexey Samsonov if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size()) 774a7181a1bSAlexey Samsonov continue; 77513df4626SMatt Arsenault 77613df4626SMatt Arsenault StringRef Rest = Buffer.drop_front(PrefixLoc); 77713df4626SMatt Arsenault // Make sure we have actually found the prefix, and not a word containing 77813df4626SMatt Arsenault // it. This should also prevent matching the wrong prefix when one is a 77913df4626SMatt Arsenault // substring of another. 78013df4626SMatt Arsenault if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1])) 78143b5f572SDaniel Sanders FirstTy = Check::CheckNone; 78243b5f572SDaniel Sanders else 78343b5f572SDaniel Sanders FirstTy = FindCheckType(Rest, Prefix); 78413df4626SMatt Arsenault 78513df4626SMatt Arsenault FirstLoc = PrefixLoc; 786a7181a1bSAlexey Samsonov FirstPrefix = Prefix; 78713df4626SMatt Arsenault } 78813df4626SMatt Arsenault 789a7181a1bSAlexey Samsonov // If the first prefix is invalid, we should continue the search after it. 790a7181a1bSAlexey Samsonov if (FirstTy == Check::CheckNone) { 79113df4626SMatt Arsenault CheckLoc = SearchLoc; 792a7181a1bSAlexey Samsonov return ""; 793a7181a1bSAlexey Samsonov } 794a7181a1bSAlexey Samsonov 79513df4626SMatt Arsenault CheckTy = FirstTy; 79613df4626SMatt Arsenault CheckLoc = FirstLoc; 79713df4626SMatt Arsenault return FirstPrefix; 79813df4626SMatt Arsenault } 79913df4626SMatt Arsenault 80013df4626SMatt Arsenault static StringRef FindFirstMatchingPrefix(StringRef &Buffer, 80113df4626SMatt Arsenault unsigned &LineNumber, 80213df4626SMatt Arsenault Check::CheckType &CheckTy, 80313df4626SMatt Arsenault size_t &CheckLoc) { 80413df4626SMatt Arsenault while (!Buffer.empty()) { 80513df4626SMatt Arsenault StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc); 80613df4626SMatt Arsenault // If we found a real match, we are done. 80713df4626SMatt Arsenault if (!Prefix.empty()) { 80813df4626SMatt Arsenault LineNumber += Buffer.substr(0, CheckLoc).count('\n'); 80913df4626SMatt Arsenault return Prefix; 81013df4626SMatt Arsenault } 81113df4626SMatt Arsenault 81213df4626SMatt Arsenault // We didn't find any almost matches either, we are also done. 81313df4626SMatt Arsenault if (CheckLoc == StringRef::npos) 81413df4626SMatt Arsenault return StringRef(); 81513df4626SMatt Arsenault 81613df4626SMatt Arsenault LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n'); 81713df4626SMatt Arsenault 81813df4626SMatt Arsenault // Advance to the last possible match we found and try again. 81913df4626SMatt Arsenault Buffer = Buffer.drop_front(CheckLoc + 1); 82013df4626SMatt Arsenault } 82113df4626SMatt Arsenault 82213df4626SMatt Arsenault return StringRef(); 82338820972SMatt Arsenault } 824ee3c74fbSChris Lattner 825ee3c74fbSChris Lattner /// ReadCheckFile - Read the check file, which specifies the sequence of 826ee3c74fbSChris Lattner /// expected strings. The strings are added to the CheckStrings vector. 82743d50d4aSEli Bendersky /// Returns true in case of an error, false otherwise. 828ee3c74fbSChris Lattner static bool ReadCheckFile(SourceMgr &SM, 82926cccfe1SChris Lattner std::vector<CheckString> &CheckStrings) { 830adf21f2aSRafael Espindola ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = 831adf21f2aSRafael Espindola MemoryBuffer::getFileOrSTDIN(CheckFilename); 832adf21f2aSRafael Espindola if (std::error_code EC = FileOrErr.getError()) { 833adf21f2aSRafael Espindola errs() << "Could not open check file '" << CheckFilename 834adf21f2aSRafael Espindola << "': " << EC.message() << '\n'; 835ee3c74fbSChris Lattner return true; 836ee3c74fbSChris Lattner } 837a2f8fc5aSChris Lattner 838a2f8fc5aSChris Lattner // If we want to canonicalize whitespace, strip excess whitespace from the 8395ea04c38SGuy Benyei // buffer containing the CHECK lines. Remove DOS style line endings. 840adf21f2aSRafael Espindola MemoryBuffer *F = CanonicalizeInputFile(FileOrErr.get().release(), 841adf21f2aSRafael Espindola NoCanonicalizeWhiteSpace); 842a2f8fc5aSChris Lattner 843ee3c74fbSChris Lattner SM.AddNewSourceBuffer(F, SMLoc()); 844ee3c74fbSChris Lattner 84510f10cedSChris Lattner // Find all instances of CheckPrefix followed by : in the file. 846caa5fc0cSChris Lattner StringRef Buffer = F->getBuffer(); 84756ccdbbdSAlexander Kornienko 84856ccdbbdSAlexander Kornienko std::vector<Pattern> ImplicitNegativeChecks; 84956ccdbbdSAlexander Kornienko for (const auto &PatternString : ImplicitCheckNot) { 85056ccdbbdSAlexander Kornienko // Create a buffer with fake command line content in order to display the 85156ccdbbdSAlexander Kornienko // command line option responsible for the specific implicit CHECK-NOT. 85256ccdbbdSAlexander Kornienko std::string Prefix = std::string("-") + ImplicitCheckNot.ArgStr + "='"; 85356ccdbbdSAlexander Kornienko std::string Suffix = "'"; 85456ccdbbdSAlexander Kornienko MemoryBuffer *CmdLine = MemoryBuffer::getMemBufferCopy( 85556ccdbbdSAlexander Kornienko Prefix + PatternString + Suffix, "command line"); 85656ccdbbdSAlexander Kornienko StringRef PatternInBuffer = 85756ccdbbdSAlexander Kornienko CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 85856ccdbbdSAlexander Kornienko SM.AddNewSourceBuffer(CmdLine, SMLoc()); 85956ccdbbdSAlexander Kornienko 86056ccdbbdSAlexander Kornienko ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot)); 86156ccdbbdSAlexander Kornienko ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer, 86256ccdbbdSAlexander Kornienko "IMPLICIT-CHECK", SM, 0); 86356ccdbbdSAlexander Kornienko } 86456ccdbbdSAlexander Kornienko 86556ccdbbdSAlexander Kornienko 86656ccdbbdSAlexander Kornienko std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 867236d2d5eSChris Lattner 86843d50d4aSEli Bendersky // LineNumber keeps track of the line on which CheckPrefix instances are 86943d50d4aSEli Bendersky // found. 87092987fb3SAlexander Kornienko unsigned LineNumber = 1; 87192987fb3SAlexander Kornienko 872ee3c74fbSChris Lattner while (1) { 87313df4626SMatt Arsenault Check::CheckType CheckTy; 87413df4626SMatt Arsenault size_t PrefixLoc; 87513df4626SMatt Arsenault 87613df4626SMatt Arsenault // See if a prefix occurs in the memory buffer. 87713df4626SMatt Arsenault StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer, 87813df4626SMatt Arsenault LineNumber, 87913df4626SMatt Arsenault CheckTy, 88013df4626SMatt Arsenault PrefixLoc); 88113df4626SMatt Arsenault if (UsedPrefix.empty()) 882ee3c74fbSChris Lattner break; 883ee3c74fbSChris Lattner 88413df4626SMatt Arsenault Buffer = Buffer.drop_front(PrefixLoc); 88592987fb3SAlexander Kornienko 88613df4626SMatt Arsenault // Location to use for error messages. 88713df4626SMatt Arsenault const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1); 88892987fb3SAlexander Kornienko 88913df4626SMatt Arsenault // PrefixLoc is to the start of the prefix. Skip to the end. 89013df4626SMatt Arsenault Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy)); 89110f10cedSChris Lattner 89238820972SMatt Arsenault // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 89338820972SMatt Arsenault // leading and trailing whitespace. 894236d2d5eSChris Lattner Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 895ee3c74fbSChris Lattner 896ee3c74fbSChris Lattner // Scan ahead to the end of line. 897caa5fc0cSChris Lattner size_t EOL = Buffer.find_first_of("\n\r"); 898ee3c74fbSChris Lattner 899838fb09aSDan Gohman // Remember the location of the start of the pattern, for diagnostics. 900838fb09aSDan Gohman SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 901838fb09aSDan Gohman 90274d50731SChris Lattner // Parse the pattern. 90338820972SMatt Arsenault Pattern P(CheckTy); 90413df4626SMatt Arsenault if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber)) 905ee3c74fbSChris Lattner return true; 906ee3c74fbSChris Lattner 907f8bd2e5bSStephen Lin // Verify that CHECK-LABEL lines do not define or use variables 90838820972SMatt Arsenault if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 90913df4626SMatt Arsenault SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 910f8bd2e5bSStephen Lin SourceMgr::DK_Error, 91113df4626SMatt Arsenault "found '" + UsedPrefix + "-LABEL:'" 91213df4626SMatt Arsenault " with variable definition or use"); 913f8bd2e5bSStephen Lin return true; 914f8bd2e5bSStephen Lin } 915f8bd2e5bSStephen Lin 916236d2d5eSChris Lattner Buffer = Buffer.substr(EOL); 91774d50731SChris Lattner 918da108b4eSChris Lattner // Verify that CHECK-NEXT lines have at least one CHECK line before them. 91938820972SMatt Arsenault if ((CheckTy == Check::CheckNext) && CheckStrings.empty()) { 92013df4626SMatt Arsenault SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 92103b80a40SChris Lattner SourceMgr::DK_Error, 92213df4626SMatt Arsenault "found '" + UsedPrefix + "-NEXT:' without previous '" 92313df4626SMatt Arsenault + UsedPrefix + ": line"); 924da108b4eSChris Lattner return true; 925da108b4eSChris Lattner } 926da108b4eSChris Lattner 92791a1b2c9SMichael Liao // Handle CHECK-DAG/-NOT. 92838820972SMatt Arsenault if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 92991a1b2c9SMichael Liao DagNotMatches.push_back(P); 93074d50731SChris Lattner continue; 93174d50731SChris Lattner } 93274d50731SChris Lattner 933ee3c74fbSChris Lattner // Okay, add the string we captured to the output vector and move on. 9343b40b445SChris Lattner CheckStrings.push_back(CheckString(P, 93513df4626SMatt Arsenault UsedPrefix, 936838fb09aSDan Gohman PatternLoc, 93738820972SMatt Arsenault CheckTy)); 93891a1b2c9SMichael Liao std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 93956ccdbbdSAlexander Kornienko DagNotMatches = ImplicitNegativeChecks; 940ee3c74fbSChris Lattner } 941ee3c74fbSChris Lattner 94213df4626SMatt Arsenault // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first 94313df4626SMatt Arsenault // prefix as a filler for the error message. 94491a1b2c9SMichael Liao if (!DagNotMatches.empty()) { 94538820972SMatt Arsenault CheckStrings.push_back(CheckString(Pattern(Check::CheckEOF), 94613df4626SMatt Arsenault CheckPrefixes[0], 947eba55822SJakob Stoklund Olesen SMLoc::getFromPointer(Buffer.data()), 94838820972SMatt Arsenault Check::CheckEOF)); 94991a1b2c9SMichael Liao std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 950eba55822SJakob Stoklund Olesen } 951eba55822SJakob Stoklund Olesen 952ee3c74fbSChris Lattner if (CheckStrings.empty()) { 95313df4626SMatt Arsenault errs() << "error: no check strings found with prefix" 95413df4626SMatt Arsenault << (CheckPrefixes.size() > 1 ? "es " : " "); 95513df4626SMatt Arsenault for (size_t I = 0, N = CheckPrefixes.size(); I != N; ++I) { 95613df4626SMatt Arsenault StringRef Prefix(CheckPrefixes[I]); 95713df4626SMatt Arsenault errs() << '\'' << Prefix << ":'"; 95813df4626SMatt Arsenault if (I != N - 1) 95913df4626SMatt Arsenault errs() << ", "; 96013df4626SMatt Arsenault } 96113df4626SMatt Arsenault 96213df4626SMatt Arsenault errs() << '\n'; 963ee3c74fbSChris Lattner return true; 964ee3c74fbSChris Lattner } 965ee3c74fbSChris Lattner 966ee3c74fbSChris Lattner return false; 967ee3c74fbSChris Lattner } 968ee3c74fbSChris Lattner 96991a1b2c9SMichael Liao static void PrintCheckFailed(const SourceMgr &SM, const SMLoc &Loc, 97091a1b2c9SMichael Liao const Pattern &Pat, StringRef Buffer, 971e0ef65abSDaniel Dunbar StringMap<StringRef> &VariableTable) { 972da108b4eSChris Lattner // Otherwise, we have an error, emit an error message. 97391a1b2c9SMichael Liao SM.PrintMessage(Loc, SourceMgr::DK_Error, 97403b80a40SChris Lattner "expected string not found in input"); 975da108b4eSChris Lattner 976da108b4eSChris Lattner // Print the "scanning from here" line. If the current position is at the 977da108b4eSChris Lattner // end of a line, advance to the start of the next line. 978caa5fc0cSChris Lattner Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 979da108b4eSChris Lattner 98003b80a40SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 98103b80a40SChris Lattner "scanning from here"); 982e0ef65abSDaniel Dunbar 983e0ef65abSDaniel Dunbar // Allow the pattern to print additional information if desired. 98491a1b2c9SMichael Liao Pat.PrintFailureInfo(SM, Buffer, VariableTable); 98591a1b2c9SMichael Liao } 98691a1b2c9SMichael Liao 98791a1b2c9SMichael Liao static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr, 98891a1b2c9SMichael Liao StringRef Buffer, 98991a1b2c9SMichael Liao StringMap<StringRef> &VariableTable) { 99091a1b2c9SMichael Liao PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable); 991da108b4eSChris Lattner } 992da108b4eSChris Lattner 99337183584SChris Lattner /// CountNumNewlinesBetween - Count the number of newlines in the specified 99437183584SChris Lattner /// range. 995592fe880SRichard Smith static unsigned CountNumNewlinesBetween(StringRef Range, 996592fe880SRichard Smith const char *&FirstNewLine) { 997da108b4eSChris Lattner unsigned NumNewLines = 0; 99837183584SChris Lattner while (1) { 999da108b4eSChris Lattner // Scan for newline. 100037183584SChris Lattner Range = Range.substr(Range.find_first_of("\n\r")); 100137183584SChris Lattner if (Range.empty()) return NumNewLines; 1002da108b4eSChris Lattner 1003da108b4eSChris Lattner ++NumNewLines; 1004da108b4eSChris Lattner 1005da108b4eSChris Lattner // Handle \n\r and \r\n as a single newline. 100637183584SChris Lattner if (Range.size() > 1 && 100737183584SChris Lattner (Range[1] == '\n' || Range[1] == '\r') && 100837183584SChris Lattner (Range[0] != Range[1])) 100937183584SChris Lattner Range = Range.substr(1); 101037183584SChris Lattner Range = Range.substr(1); 1011592fe880SRichard Smith 1012592fe880SRichard Smith if (NumNewLines == 1) 1013592fe880SRichard Smith FirstNewLine = Range.begin(); 1014da108b4eSChris Lattner } 1015da108b4eSChris Lattner } 1016da108b4eSChris Lattner 1017dcc7d48dSMichael Liao size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer, 1018e93a3a08SStephen Lin bool IsLabelScanMode, size_t &MatchLen, 1019dcc7d48dSMichael Liao StringMap<StringRef> &VariableTable) const { 102091a1b2c9SMichael Liao size_t LastPos = 0; 102191a1b2c9SMichael Liao std::vector<const Pattern *> NotStrings; 102291a1b2c9SMichael Liao 1023e93a3a08SStephen Lin // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 1024e93a3a08SStephen Lin // bounds; we have not processed variable definitions within the bounded block 1025e93a3a08SStephen Lin // yet so cannot handle any final CHECK-DAG yet; this is handled when going 1026e93a3a08SStephen Lin // over the block again (including the last CHECK-LABEL) in normal mode. 1027e93a3a08SStephen Lin if (!IsLabelScanMode) { 102891a1b2c9SMichael Liao // Match "dag strings" (with mixed "not strings" if any). 102991a1b2c9SMichael Liao LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable); 103091a1b2c9SMichael Liao if (LastPos == StringRef::npos) 103191a1b2c9SMichael Liao return StringRef::npos; 1032e93a3a08SStephen Lin } 103391a1b2c9SMichael Liao 103491a1b2c9SMichael Liao // Match itself from the last position after matching CHECK-DAG. 103591a1b2c9SMichael Liao StringRef MatchBuffer = Buffer.substr(LastPos); 103691a1b2c9SMichael Liao size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1037dcc7d48dSMichael Liao if (MatchPos == StringRef::npos) { 103891a1b2c9SMichael Liao PrintCheckFailed(SM, *this, MatchBuffer, VariableTable); 1039dcc7d48dSMichael Liao return StringRef::npos; 1040dcc7d48dSMichael Liao } 104191a1b2c9SMichael Liao MatchPos += LastPos; 1042dcc7d48dSMichael Liao 1043e93a3a08SStephen Lin // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 1044e93a3a08SStephen Lin // or CHECK-NOT 1045e93a3a08SStephen Lin if (!IsLabelScanMode) { 104691a1b2c9SMichael Liao StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1047dcc7d48dSMichael Liao 1048dcc7d48dSMichael Liao // If this check is a "CHECK-NEXT", verify that the previous match was on 1049dcc7d48dSMichael Liao // the previous line (i.e. that there is one newline between them). 1050dcc7d48dSMichael Liao if (CheckNext(SM, SkippedRegion)) 1051dcc7d48dSMichael Liao return StringRef::npos; 1052dcc7d48dSMichael Liao 1053dcc7d48dSMichael Liao // If this match had "not strings", verify that they don't exist in the 1054dcc7d48dSMichael Liao // skipped region. 105591a1b2c9SMichael Liao if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1056dcc7d48dSMichael Liao return StringRef::npos; 1057f8bd2e5bSStephen Lin } 1058dcc7d48dSMichael Liao 1059dcc7d48dSMichael Liao return MatchPos; 1060dcc7d48dSMichael Liao } 1061dcc7d48dSMichael Liao 1062dcc7d48dSMichael Liao bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 106338820972SMatt Arsenault if (CheckTy != Check::CheckNext) 1064dcc7d48dSMichael Liao return false; 1065dcc7d48dSMichael Liao 1066dcc7d48dSMichael Liao // Count the number of newlines between the previous match and this one. 1067dcc7d48dSMichael Liao assert(Buffer.data() != 1068dcc7d48dSMichael Liao SM.getMemoryBuffer( 1069dcc7d48dSMichael Liao SM.FindBufferContainingLoc( 1070dcc7d48dSMichael Liao SMLoc::getFromPointer(Buffer.data())))->getBufferStart() && 1071dcc7d48dSMichael Liao "CHECK-NEXT can't be the first check in a file"); 1072dcc7d48dSMichael Liao 107366f09ad0SCraig Topper const char *FirstNewLine = nullptr; 1074592fe880SRichard Smith unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1075dcc7d48dSMichael Liao 1076dcc7d48dSMichael Liao if (NumNewLines == 0) { 107713df4626SMatt Arsenault SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix + 1078dcc7d48dSMichael Liao "-NEXT: is on the same line as previous match"); 1079dcc7d48dSMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), 1080dcc7d48dSMichael Liao SourceMgr::DK_Note, "'next' match was here"); 1081dcc7d48dSMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1082dcc7d48dSMichael Liao "previous match ended here"); 1083dcc7d48dSMichael Liao return true; 1084dcc7d48dSMichael Liao } 1085dcc7d48dSMichael Liao 1086dcc7d48dSMichael Liao if (NumNewLines != 1) { 108713df4626SMatt Arsenault SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix + 1088dcc7d48dSMichael Liao "-NEXT: is not on the line after the previous match"); 1089dcc7d48dSMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), 1090dcc7d48dSMichael Liao SourceMgr::DK_Note, "'next' match was here"); 1091dcc7d48dSMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1092dcc7d48dSMichael Liao "previous match ended here"); 1093592fe880SRichard Smith SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 1094592fe880SRichard Smith "non-matching line after previous match is here"); 1095dcc7d48dSMichael Liao return true; 1096dcc7d48dSMichael Liao } 1097dcc7d48dSMichael Liao 1098dcc7d48dSMichael Liao return false; 1099dcc7d48dSMichael Liao } 1100dcc7d48dSMichael Liao 1101dcc7d48dSMichael Liao bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 110291a1b2c9SMichael Liao const std::vector<const Pattern *> &NotStrings, 1103dcc7d48dSMichael Liao StringMap<StringRef> &VariableTable) const { 1104dcc7d48dSMichael Liao for (unsigned ChunkNo = 0, e = NotStrings.size(); 1105dcc7d48dSMichael Liao ChunkNo != e; ++ChunkNo) { 110691a1b2c9SMichael Liao const Pattern *Pat = NotStrings[ChunkNo]; 110738820972SMatt Arsenault assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 110891a1b2c9SMichael Liao 1109dcc7d48dSMichael Liao size_t MatchLen = 0; 111091a1b2c9SMichael Liao size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable); 1111dcc7d48dSMichael Liao 1112dcc7d48dSMichael Liao if (Pos == StringRef::npos) continue; 1113dcc7d48dSMichael Liao 1114dcc7d48dSMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos), 1115dcc7d48dSMichael Liao SourceMgr::DK_Error, 111613df4626SMatt Arsenault Prefix + "-NOT: string occurred!"); 111791a1b2c9SMichael Liao SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note, 111813df4626SMatt Arsenault Prefix + "-NOT: pattern specified here"); 1119dcc7d48dSMichael Liao return true; 1120dcc7d48dSMichael Liao } 1121dcc7d48dSMichael Liao 1122dcc7d48dSMichael Liao return false; 1123dcc7d48dSMichael Liao } 1124dcc7d48dSMichael Liao 112591a1b2c9SMichael Liao size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 112691a1b2c9SMichael Liao std::vector<const Pattern *> &NotStrings, 112791a1b2c9SMichael Liao StringMap<StringRef> &VariableTable) const { 112891a1b2c9SMichael Liao if (DagNotStrings.empty()) 112991a1b2c9SMichael Liao return 0; 113091a1b2c9SMichael Liao 113191a1b2c9SMichael Liao size_t LastPos = 0; 113291a1b2c9SMichael Liao size_t StartPos = LastPos; 113391a1b2c9SMichael Liao 113491a1b2c9SMichael Liao for (unsigned ChunkNo = 0, e = DagNotStrings.size(); 113591a1b2c9SMichael Liao ChunkNo != e; ++ChunkNo) { 113691a1b2c9SMichael Liao const Pattern &Pat = DagNotStrings[ChunkNo]; 113791a1b2c9SMichael Liao 113838820972SMatt Arsenault assert((Pat.getCheckTy() == Check::CheckDAG || 113938820972SMatt Arsenault Pat.getCheckTy() == Check::CheckNot) && 114091a1b2c9SMichael Liao "Invalid CHECK-DAG or CHECK-NOT!"); 114191a1b2c9SMichael Liao 114238820972SMatt Arsenault if (Pat.getCheckTy() == Check::CheckNot) { 114391a1b2c9SMichael Liao NotStrings.push_back(&Pat); 114491a1b2c9SMichael Liao continue; 114591a1b2c9SMichael Liao } 114691a1b2c9SMichael Liao 114738820972SMatt Arsenault assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 114891a1b2c9SMichael Liao 114991a1b2c9SMichael Liao size_t MatchLen = 0, MatchPos; 115091a1b2c9SMichael Liao 115191a1b2c9SMichael Liao // CHECK-DAG always matches from the start. 115291a1b2c9SMichael Liao StringRef MatchBuffer = Buffer.substr(StartPos); 115391a1b2c9SMichael Liao MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 115491a1b2c9SMichael Liao // With a group of CHECK-DAGs, a single mismatching means the match on 115591a1b2c9SMichael Liao // that group of CHECK-DAGs fails immediately. 115691a1b2c9SMichael Liao if (MatchPos == StringRef::npos) { 115791a1b2c9SMichael Liao PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable); 115891a1b2c9SMichael Liao return StringRef::npos; 115991a1b2c9SMichael Liao } 116091a1b2c9SMichael Liao // Re-calc it as the offset relative to the start of the original string. 116191a1b2c9SMichael Liao MatchPos += StartPos; 116291a1b2c9SMichael Liao 116391a1b2c9SMichael Liao if (!NotStrings.empty()) { 116491a1b2c9SMichael Liao if (MatchPos < LastPos) { 116591a1b2c9SMichael Liao // Reordered? 116691a1b2c9SMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos), 116791a1b2c9SMichael Liao SourceMgr::DK_Error, 116813df4626SMatt Arsenault Prefix + "-DAG: found a match of CHECK-DAG" 116991a1b2c9SMichael Liao " reordering across a CHECK-NOT"); 117091a1b2c9SMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos), 117191a1b2c9SMichael Liao SourceMgr::DK_Note, 117213df4626SMatt Arsenault Prefix + "-DAG: the farthest match of CHECK-DAG" 117391a1b2c9SMichael Liao " is found here"); 117491a1b2c9SMichael Liao SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note, 117513df4626SMatt Arsenault Prefix + "-NOT: the crossed pattern specified" 117691a1b2c9SMichael Liao " here"); 117791a1b2c9SMichael Liao SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note, 117813df4626SMatt Arsenault Prefix + "-DAG: the reordered pattern specified" 117991a1b2c9SMichael Liao " here"); 118091a1b2c9SMichael Liao return StringRef::npos; 118191a1b2c9SMichael Liao } 118291a1b2c9SMichael Liao // All subsequent CHECK-DAGs should be matched from the farthest 118391a1b2c9SMichael Liao // position of all precedent CHECK-DAGs (including this one.) 118491a1b2c9SMichael Liao StartPos = LastPos; 118591a1b2c9SMichael Liao // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to 118691a1b2c9SMichael Liao // CHECK-DAG, verify that there's no 'not' strings occurred in that 118791a1b2c9SMichael Liao // region. 118891a1b2c9SMichael Liao StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1189cf708c32STim Northover if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 119091a1b2c9SMichael Liao return StringRef::npos; 119191a1b2c9SMichael Liao // Clear "not strings". 119291a1b2c9SMichael Liao NotStrings.clear(); 119391a1b2c9SMichael Liao } 119491a1b2c9SMichael Liao 119591a1b2c9SMichael Liao // Update the last position with CHECK-DAG matches. 119691a1b2c9SMichael Liao LastPos = std::max(MatchPos + MatchLen, LastPos); 119791a1b2c9SMichael Liao } 119891a1b2c9SMichael Liao 119991a1b2c9SMichael Liao return LastPos; 120091a1b2c9SMichael Liao } 120191a1b2c9SMichael Liao 120213df4626SMatt Arsenault // A check prefix must contain only alphanumeric, hyphens and underscores. 120313df4626SMatt Arsenault static bool ValidateCheckPrefix(StringRef CheckPrefix) { 120413df4626SMatt Arsenault Regex Validator("^[a-zA-Z0-9_-]*$"); 120513df4626SMatt Arsenault return Validator.match(CheckPrefix); 120613df4626SMatt Arsenault } 120713df4626SMatt Arsenault 120813df4626SMatt Arsenault static bool ValidateCheckPrefixes() { 120913df4626SMatt Arsenault StringSet<> PrefixSet; 121013df4626SMatt Arsenault 121113df4626SMatt Arsenault for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end(); 121213df4626SMatt Arsenault I != E; ++I) { 121313df4626SMatt Arsenault StringRef Prefix(*I); 121413df4626SMatt Arsenault 1215*24412b14SEli Bendersky // Reject empty prefixes. 1216*24412b14SEli Bendersky if (Prefix == "") 1217*24412b14SEli Bendersky return false; 1218*24412b14SEli Bendersky 121913df4626SMatt Arsenault if (!PrefixSet.insert(Prefix)) 122013df4626SMatt Arsenault return false; 122113df4626SMatt Arsenault 122213df4626SMatt Arsenault if (!ValidateCheckPrefix(Prefix)) 122313df4626SMatt Arsenault return false; 122413df4626SMatt Arsenault } 122513df4626SMatt Arsenault 122613df4626SMatt Arsenault return true; 122713df4626SMatt Arsenault } 122813df4626SMatt Arsenault 122913df4626SMatt Arsenault // I don't think there's a way to specify an initial value for cl::list, 123013df4626SMatt Arsenault // so if nothing was specified, add the default 123113df4626SMatt Arsenault static void AddCheckPrefixIfNeeded() { 123213df4626SMatt Arsenault if (CheckPrefixes.empty()) 123313df4626SMatt Arsenault CheckPrefixes.push_back("CHECK"); 1234c2735158SRui Ueyama } 1235c2735158SRui Ueyama 1236ee3c74fbSChris Lattner int main(int argc, char **argv) { 1237ee3c74fbSChris Lattner sys::PrintStackTraceOnErrorSignal(); 1238ee3c74fbSChris Lattner PrettyStackTraceProgram X(argc, argv); 1239ee3c74fbSChris Lattner cl::ParseCommandLineOptions(argc, argv); 1240ee3c74fbSChris Lattner 124113df4626SMatt Arsenault if (!ValidateCheckPrefixes()) { 124213df4626SMatt Arsenault errs() << "Supplied check-prefix is invalid! Prefixes must be unique and " 124313df4626SMatt Arsenault "start with a letter and contain only alphanumeric characters, " 124413df4626SMatt Arsenault "hyphens and underscores\n"; 1245c2735158SRui Ueyama return 2; 1246c2735158SRui Ueyama } 1247c2735158SRui Ueyama 124813df4626SMatt Arsenault AddCheckPrefixIfNeeded(); 124913df4626SMatt Arsenault 1250ee3c74fbSChris Lattner SourceMgr SM; 1251ee3c74fbSChris Lattner 1252ee3c74fbSChris Lattner // Read the expected strings from the check file. 125326cccfe1SChris Lattner std::vector<CheckString> CheckStrings; 1254ee3c74fbSChris Lattner if (ReadCheckFile(SM, CheckStrings)) 1255ee3c74fbSChris Lattner return 2; 1256ee3c74fbSChris Lattner 1257ee3c74fbSChris Lattner // Open the file to check and add it to SourceMgr. 1258adf21f2aSRafael Espindola ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = 1259adf21f2aSRafael Espindola MemoryBuffer::getFileOrSTDIN(InputFilename); 1260adf21f2aSRafael Espindola if (std::error_code EC = FileOrErr.getError()) { 1261adf21f2aSRafael Espindola errs() << "Could not open input file '" << InputFilename 1262adf21f2aSRafael Espindola << "': " << EC.message() << '\n'; 12638e1c6477SEli Bendersky return 2; 1264ee3c74fbSChris Lattner } 1265adf21f2aSRafael Espindola std::unique_ptr<MemoryBuffer> File = std::move(FileOrErr.get()); 12662c3e5cdfSChris Lattner 1267e963d660SBenjamin Kramer if (File->getBufferSize() == 0) { 1268b692bed7SChris Lattner errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; 12698e1c6477SEli Bendersky return 2; 1270b692bed7SChris Lattner } 1271b692bed7SChris Lattner 12722c3e5cdfSChris Lattner // Remove duplicate spaces in the input file if requested. 12735ea04c38SGuy Benyei // Remove DOS style line endings. 1274e963d660SBenjamin Kramer MemoryBuffer *F = 127596c9d95fSAhmed Charles CanonicalizeInputFile(File.release(), NoCanonicalizeWhiteSpace); 12762c3e5cdfSChris Lattner 1277ee3c74fbSChris Lattner SM.AddNewSourceBuffer(F, SMLoc()); 1278ee3c74fbSChris Lattner 12798879e06dSChris Lattner /// VariableTable - This holds all the current filecheck variables. 12808879e06dSChris Lattner StringMap<StringRef> VariableTable; 12818879e06dSChris Lattner 1282ee3c74fbSChris Lattner // Check that we have all of the expected strings, in order, in the input 1283ee3c74fbSChris Lattner // file. 1284caa5fc0cSChris Lattner StringRef Buffer = F->getBuffer(); 1285ee3c74fbSChris Lattner 1286f8bd2e5bSStephen Lin bool hasError = false; 1287ee3c74fbSChris Lattner 1288f8bd2e5bSStephen Lin unsigned i = 0, j = 0, e = CheckStrings.size(); 1289ee3c74fbSChris Lattner 1290f8bd2e5bSStephen Lin while (true) { 1291f8bd2e5bSStephen Lin StringRef CheckRegion; 1292f8bd2e5bSStephen Lin if (j == e) { 1293f8bd2e5bSStephen Lin CheckRegion = Buffer; 1294f8bd2e5bSStephen Lin } else { 1295f8bd2e5bSStephen Lin const CheckString &CheckLabelStr = CheckStrings[j]; 129638820972SMatt Arsenault if (CheckLabelStr.CheckTy != Check::CheckLabel) { 1297f8bd2e5bSStephen Lin ++j; 1298f8bd2e5bSStephen Lin continue; 1299da108b4eSChris Lattner } 1300da108b4eSChris Lattner 1301f8bd2e5bSStephen Lin // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 1302f8bd2e5bSStephen Lin size_t MatchLabelLen = 0; 1303e93a3a08SStephen Lin size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true, 1304f8bd2e5bSStephen Lin MatchLabelLen, VariableTable); 1305f8bd2e5bSStephen Lin if (MatchLabelPos == StringRef::npos) { 1306f8bd2e5bSStephen Lin hasError = true; 1307f8bd2e5bSStephen Lin break; 1308f8bd2e5bSStephen Lin } 1309f8bd2e5bSStephen Lin 1310f8bd2e5bSStephen Lin CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 1311f8bd2e5bSStephen Lin Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 1312f8bd2e5bSStephen Lin ++j; 1313f8bd2e5bSStephen Lin } 1314f8bd2e5bSStephen Lin 1315f8bd2e5bSStephen Lin for ( ; i != j; ++i) { 1316f8bd2e5bSStephen Lin const CheckString &CheckStr = CheckStrings[i]; 1317f8bd2e5bSStephen Lin 1318f8bd2e5bSStephen Lin // Check each string within the scanned region, including a second check 1319f8bd2e5bSStephen Lin // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 1320f8bd2e5bSStephen Lin size_t MatchLen = 0; 1321e93a3a08SStephen Lin size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen, 1322f8bd2e5bSStephen Lin VariableTable); 1323f8bd2e5bSStephen Lin 1324f8bd2e5bSStephen Lin if (MatchPos == StringRef::npos) { 1325f8bd2e5bSStephen Lin hasError = true; 1326f8bd2e5bSStephen Lin i = j; 1327f8bd2e5bSStephen Lin break; 1328f8bd2e5bSStephen Lin } 1329f8bd2e5bSStephen Lin 1330f8bd2e5bSStephen Lin CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 1331f8bd2e5bSStephen Lin } 1332f8bd2e5bSStephen Lin 1333f8bd2e5bSStephen Lin if (j == e) 1334f8bd2e5bSStephen Lin break; 1335f8bd2e5bSStephen Lin } 1336f8bd2e5bSStephen Lin 1337f8bd2e5bSStephen Lin return hasError ? 1 : 0; 1338ee3c74fbSChris Lattner } 1339