1ee3c74fbSChris Lattner //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2ee3c74fbSChris Lattner // 3ee3c74fbSChris Lattner // The LLVM Compiler Infrastructure 4ee3c74fbSChris Lattner // 5ee3c74fbSChris Lattner // This file is distributed under the University of Illinois Open Source 6ee3c74fbSChris Lattner // License. See LICENSE.TXT for details. 7ee3c74fbSChris Lattner // 8ee3c74fbSChris Lattner //===----------------------------------------------------------------------===// 9ee3c74fbSChris Lattner // 10ee3c74fbSChris Lattner // FileCheck does a line-by line check of a file that validates whether it 11ee3c74fbSChris Lattner // contains the expected content. This is useful for regression tests etc. 12ee3c74fbSChris Lattner // 13ee3c74fbSChris Lattner // This program exits with an error status of 2 on error, exit status of 0 if 14ee3c74fbSChris Lattner // the file matched the expected contents, and exit status of 1 if it did not 15ee3c74fbSChris Lattner // contain the expected contents. 16ee3c74fbSChris Lattner // 17ee3c74fbSChris Lattner //===----------------------------------------------------------------------===// 18ee3c74fbSChris Lattner 1991d19d8eSChandler Carruth #include "llvm/ADT/SmallString.h" 2091d19d8eSChandler Carruth #include "llvm/ADT/StringExtras.h" 2191d19d8eSChandler Carruth #include "llvm/ADT/StringMap.h" 2213df4626SMatt Arsenault #include "llvm/ADT/StringSet.h" 23ee3c74fbSChris Lattner #include "llvm/Support/CommandLine.h" 24ee3c74fbSChris Lattner #include "llvm/Support/MemoryBuffer.h" 25ee3c74fbSChris Lattner #include "llvm/Support/PrettyStackTrace.h" 26f08d2db9SChris Lattner #include "llvm/Support/Regex.h" 2791d19d8eSChandler Carruth #include "llvm/Support/Signals.h" 28ee3c74fbSChris Lattner #include "llvm/Support/SourceMgr.h" 29ee3c74fbSChris Lattner #include "llvm/Support/raw_ostream.h" 308879e06dSChris Lattner #include <algorithm> 31981af002SWill Dietz #include <cctype> 32e8b8f1bcSEli Bendersky #include <map> 33e8b8f1bcSEli Bendersky #include <string> 34a6e9c3e4SRafael Espindola #include <system_error> 35e8b8f1bcSEli Bendersky #include <vector> 36ee3c74fbSChris Lattner using namespace llvm; 37ee3c74fbSChris Lattner 38ee3c74fbSChris Lattner static cl::opt<std::string> 39ee3c74fbSChris Lattner CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); 40ee3c74fbSChris Lattner 41ee3c74fbSChris Lattner static cl::opt<std::string> 42ee3c74fbSChris Lattner InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), 43ee3c74fbSChris Lattner cl::init("-"), cl::value_desc("filename")); 44ee3c74fbSChris Lattner 4513df4626SMatt Arsenault static cl::list<std::string> 4613df4626SMatt Arsenault CheckPrefixes("check-prefix", 47ee3c74fbSChris Lattner cl::desc("Prefix to use from check file (defaults to 'CHECK')")); 48ee3c74fbSChris Lattner 492c3e5cdfSChris Lattner static cl::opt<bool> 502c3e5cdfSChris Lattner NoCanonicalizeWhiteSpace("strict-whitespace", 512c3e5cdfSChris Lattner cl::desc("Do not treat all horizontal whitespace as equivalent")); 522c3e5cdfSChris Lattner 5356ccdbbdSAlexander Kornienko static cl::list<std::string> ImplicitCheckNot( 5456ccdbbdSAlexander Kornienko "implicit-check-not", 5556ccdbbdSAlexander Kornienko cl::desc("Add an implicit negative check with this pattern to every\n" 5656ccdbbdSAlexander Kornienko "positive check. This can be used to ensure that no instances of\n" 5756ccdbbdSAlexander Kornienko "this pattern occur which are not matched by a positive pattern"), 5856ccdbbdSAlexander Kornienko cl::value_desc("pattern")); 5956ccdbbdSAlexander Kornienko 601b9f936fSJustin Bogner static cl::opt<bool> AllowEmptyInput( 611b9f936fSJustin Bogner "allow-empty", cl::init(false), 621b9f936fSJustin Bogner cl::desc("Allow the input file to be empty. This is useful when making\n" 631b9f936fSJustin Bogner "checks that some error message does not occur, for example.")); 641b9f936fSJustin Bogner 6513df4626SMatt Arsenault typedef cl::list<std::string>::const_iterator prefix_iterator; 6613df4626SMatt Arsenault 6774d50731SChris Lattner //===----------------------------------------------------------------------===// 6874d50731SChris Lattner // Pattern Handling Code. 6974d50731SChris Lattner //===----------------------------------------------------------------------===// 7074d50731SChris Lattner 7138820972SMatt Arsenault namespace Check { 7238820972SMatt Arsenault enum CheckType { 7338820972SMatt Arsenault CheckNone = 0, 7438820972SMatt Arsenault CheckPlain, 7538820972SMatt Arsenault CheckNext, 7638820972SMatt Arsenault CheckNot, 7738820972SMatt Arsenault CheckDAG, 7838820972SMatt Arsenault CheckLabel, 790a4c44bdSChris Lattner 80eba55822SJakob Stoklund Olesen /// MatchEOF - When set, this pattern only matches the end of file. This is 81eba55822SJakob Stoklund Olesen /// used for trailing CHECK-NOTs. 8238820972SMatt Arsenault CheckEOF 8338820972SMatt Arsenault }; 8438820972SMatt Arsenault } 85eba55822SJakob Stoklund Olesen 8638820972SMatt Arsenault class Pattern { 8738820972SMatt Arsenault SMLoc PatternLoc; 8891a1b2c9SMichael Liao 8938820972SMatt Arsenault Check::CheckType CheckTy; 9091a1b2c9SMichael Liao 91b16ab0c4SChris Lattner /// FixedStr - If non-empty, this pattern is a fixed string match with the 92b16ab0c4SChris Lattner /// specified fixed string. 93221460e0SChris Lattner StringRef FixedStr; 94b16ab0c4SChris Lattner 95b16ab0c4SChris Lattner /// RegEx - If non-empty, this is a regex pattern. 96b16ab0c4SChris Lattner std::string RegExStr; 978879e06dSChris Lattner 9892987fb3SAlexander Kornienko /// \brief Contains the number of line this pattern is in. 9992987fb3SAlexander Kornienko unsigned LineNumber; 10092987fb3SAlexander Kornienko 1018879e06dSChris Lattner /// VariableUses - Entries in this vector map to uses of a variable in the 1028879e06dSChris Lattner /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain 1038879e06dSChris Lattner /// "foobaz" and we'll get an entry in this vector that tells us to insert the 1048879e06dSChris Lattner /// value of bar at offset 3. 1058879e06dSChris Lattner std::vector<std::pair<StringRef, unsigned> > VariableUses; 1068879e06dSChris Lattner 107e8b8f1bcSEli Bendersky /// VariableDefs - Maps definitions of variables to their parenthesized 108e8b8f1bcSEli Bendersky /// capture numbers. 109e8b8f1bcSEli Bendersky /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1. 110e8b8f1bcSEli Bendersky std::map<StringRef, unsigned> VariableDefs; 1118879e06dSChris Lattner 1123b40b445SChris Lattner public: 1133b40b445SChris Lattner 11438820972SMatt Arsenault Pattern(Check::CheckType Ty) 11538820972SMatt Arsenault : CheckTy(Ty) { } 11674d50731SChris Lattner 1170b707eb8SMichael Liao /// getLoc - Return the location in source code. 1180b707eb8SMichael Liao SMLoc getLoc() const { return PatternLoc; } 1190b707eb8SMichael Liao 12013df4626SMatt Arsenault /// ParsePattern - Parse the given string into the Pattern. Prefix provides 12113df4626SMatt Arsenault /// which prefix is being matched, SM provides the SourceMgr used for error 12213df4626SMatt Arsenault /// reports, and LineNumber is the line number in the input file from which 12313df4626SMatt Arsenault /// the pattern string was read. Returns true in case of an error, false 12413df4626SMatt Arsenault /// otherwise. 12513df4626SMatt Arsenault bool ParsePattern(StringRef PatternStr, 12613df4626SMatt Arsenault StringRef Prefix, 12713df4626SMatt Arsenault SourceMgr &SM, 12813df4626SMatt Arsenault unsigned LineNumber); 1293b40b445SChris Lattner 1303b40b445SChris Lattner /// Match - Match the pattern string against the input buffer Buffer. This 1313b40b445SChris Lattner /// returns the position that is matched or npos if there is no match. If 1323b40b445SChris Lattner /// there is a match, the size of the matched string is returned in MatchLen. 1338879e06dSChris Lattner /// 1348879e06dSChris Lattner /// The VariableTable StringMap provides the current values of filecheck 1358879e06dSChris Lattner /// variables and is updated if this match defines new values. 1368879e06dSChris Lattner size_t Match(StringRef Buffer, size_t &MatchLen, 1378879e06dSChris Lattner StringMap<StringRef> &VariableTable) const; 138b16ab0c4SChris Lattner 139e0ef65abSDaniel Dunbar /// PrintFailureInfo - Print additional information about a failure to match 140e0ef65abSDaniel Dunbar /// involving this pattern. 141e0ef65abSDaniel Dunbar void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 142e0ef65abSDaniel Dunbar const StringMap<StringRef> &VariableTable) const; 143e0ef65abSDaniel Dunbar 144f8bd2e5bSStephen Lin bool hasVariable() const { return !(VariableUses.empty() && 145f8bd2e5bSStephen Lin VariableDefs.empty()); } 146f8bd2e5bSStephen Lin 14738820972SMatt Arsenault Check::CheckType getCheckTy() const { return CheckTy; } 14891a1b2c9SMichael Liao 149b16ab0c4SChris Lattner private: 150e8b8f1bcSEli Bendersky bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); 151e8b8f1bcSEli Bendersky void AddBackrefToRegEx(unsigned BackrefNum); 152fd29d886SDaniel Dunbar 153fd29d886SDaniel Dunbar /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of 154fd29d886SDaniel Dunbar /// matching this pattern at the start of \arg Buffer; a distance of zero 155fd29d886SDaniel Dunbar /// should correspond to a perfect match. 156fd29d886SDaniel Dunbar unsigned ComputeMatchDistance(StringRef Buffer, 157fd29d886SDaniel Dunbar const StringMap<StringRef> &VariableTable) const; 15892987fb3SAlexander Kornienko 15992987fb3SAlexander Kornienko /// \brief Evaluates expression and stores the result to \p Value. 16092987fb3SAlexander Kornienko /// \return true on success. false when the expression has invalid syntax. 16192987fb3SAlexander Kornienko bool EvaluateExpression(StringRef Expr, std::string &Value) const; 162061d2baaSEli Bendersky 163061d2baaSEli Bendersky /// \brief Finds the closing sequence of a regex variable usage or 164061d2baaSEli Bendersky /// definition. Str has to point in the beginning of the definition 165061d2baaSEli Bendersky /// (right after the opening sequence). 166061d2baaSEli Bendersky /// \return offset of the closing sequence within Str, or npos if it was not 167061d2baaSEli Bendersky /// found. 16881e5cd9eSAdrian Prantl size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); 1693b40b445SChris Lattner }; 1703b40b445SChris Lattner 1718879e06dSChris Lattner 17213df4626SMatt Arsenault bool Pattern::ParsePattern(StringRef PatternStr, 17313df4626SMatt Arsenault StringRef Prefix, 17413df4626SMatt Arsenault SourceMgr &SM, 17592987fb3SAlexander Kornienko unsigned LineNumber) { 17692987fb3SAlexander Kornienko this->LineNumber = LineNumber; 1770a4c44bdSChris Lattner PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 1780a4c44bdSChris Lattner 17974d50731SChris Lattner // Ignore trailing whitespace. 18074d50731SChris Lattner while (!PatternStr.empty() && 18174d50731SChris Lattner (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 18274d50731SChris Lattner PatternStr = PatternStr.substr(0, PatternStr.size()-1); 18374d50731SChris Lattner 18474d50731SChris Lattner // Check that there is something on the line. 18574d50731SChris Lattner if (PatternStr.empty()) { 18603b80a40SChris Lattner SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 18703b80a40SChris Lattner "found empty check string with prefix '" + 18813df4626SMatt Arsenault Prefix + ":'"); 18974d50731SChris Lattner return true; 19074d50731SChris Lattner } 19174d50731SChris Lattner 192221460e0SChris Lattner // Check to see if this is a fixed string, or if it has regex pieces. 193d9466967STed Kremenek if (PatternStr.size() < 2 || 1948879e06dSChris Lattner (PatternStr.find("{{") == StringRef::npos && 1958879e06dSChris Lattner PatternStr.find("[[") == StringRef::npos)) { 196221460e0SChris Lattner FixedStr = PatternStr; 197221460e0SChris Lattner return false; 198221460e0SChris Lattner } 199221460e0SChris Lattner 2008879e06dSChris Lattner // Paren value #0 is for the fully matched string. Any new parenthesized 20153e0679dSChris Lattner // values add from there. 2028879e06dSChris Lattner unsigned CurParen = 1; 2038879e06dSChris Lattner 204b16ab0c4SChris Lattner // Otherwise, there is at least one regex piece. Build up the regex pattern 205b16ab0c4SChris Lattner // by escaping scary characters in fixed strings, building up one big regex. 206f08d2db9SChris Lattner while (!PatternStr.empty()) { 2078879e06dSChris Lattner // RegEx matches. 20853e0679dSChris Lattner if (PatternStr.startswith("{{")) { 20943d50d4aSEli Bendersky // This is the start of a regex match. Scan for the }}. 210f08d2db9SChris Lattner size_t End = PatternStr.find("}}"); 211f08d2db9SChris Lattner if (End == StringRef::npos) { 212f08d2db9SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 21303b80a40SChris Lattner SourceMgr::DK_Error, 21403b80a40SChris Lattner "found start of regex string with no end '}}'"); 215f08d2db9SChris Lattner return true; 216f08d2db9SChris Lattner } 217f08d2db9SChris Lattner 218e53c95f1SChris Lattner // Enclose {{}} patterns in parens just like [[]] even though we're not 219e53c95f1SChris Lattner // capturing the result for any purpose. This is required in case the 220e53c95f1SChris Lattner // expression contains an alternation like: CHECK: abc{{x|z}}def. We 221e53c95f1SChris Lattner // want this to turn into: "abc(x|z)def" not "abcx|zdef". 222e53c95f1SChris Lattner RegExStr += '('; 223e53c95f1SChris Lattner ++CurParen; 224e53c95f1SChris Lattner 2258879e06dSChris Lattner if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM)) 2268879e06dSChris Lattner return true; 227e53c95f1SChris Lattner RegExStr += ')'; 22853e0679dSChris Lattner 2298879e06dSChris Lattner PatternStr = PatternStr.substr(End+2); 2308879e06dSChris Lattner continue; 2318879e06dSChris Lattner } 2328879e06dSChris Lattner 2338879e06dSChris Lattner // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* 2348879e06dSChris Lattner // (or some other regex) and assigns it to the FileCheck variable 'foo'. The 2358879e06dSChris Lattner // second form is [[foo]] which is a reference to foo. The variable name 23657cb733bSDaniel Dunbar // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject 2378879e06dSChris Lattner // it. This is to catch some common errors. 23853e0679dSChris Lattner if (PatternStr.startswith("[[")) { 239061d2baaSEli Bendersky // Find the closing bracket pair ending the match. End is going to be an 240061d2baaSEli Bendersky // offset relative to the beginning of the match string. 24181e5cd9eSAdrian Prantl size_t End = FindRegexVarEnd(PatternStr.substr(2), SM); 242061d2baaSEli Bendersky 2438879e06dSChris Lattner if (End == StringRef::npos) { 2448879e06dSChris Lattner SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 24503b80a40SChris Lattner SourceMgr::DK_Error, 24603b80a40SChris Lattner "invalid named regex reference, no ]] found"); 247f08d2db9SChris Lattner return true; 248f08d2db9SChris Lattner } 249f08d2db9SChris Lattner 250061d2baaSEli Bendersky StringRef MatchStr = PatternStr.substr(2, End); 251061d2baaSEli Bendersky PatternStr = PatternStr.substr(End+4); 2528879e06dSChris Lattner 2538879e06dSChris Lattner // Get the regex name (e.g. "foo"). 2548879e06dSChris Lattner size_t NameEnd = MatchStr.find(':'); 2558879e06dSChris Lattner StringRef Name = MatchStr.substr(0, NameEnd); 2568879e06dSChris Lattner 2578879e06dSChris Lattner if (Name.empty()) { 25803b80a40SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 25903b80a40SChris Lattner "invalid name in named regex: empty name"); 2608879e06dSChris Lattner return true; 2618879e06dSChris Lattner } 2628879e06dSChris Lattner 26392987fb3SAlexander Kornienko // Verify that the name/expression is well formed. FileCheck currently 26492987fb3SAlexander Kornienko // supports @LINE, @LINE+number, @LINE-number expressions. The check here 26592987fb3SAlexander Kornienko // is relaxed, more strict check is performed in \c EvaluateExpression. 26692987fb3SAlexander Kornienko bool IsExpression = false; 26792987fb3SAlexander Kornienko for (unsigned i = 0, e = Name.size(); i != e; ++i) { 26892987fb3SAlexander Kornienko if (i == 0 && Name[i] == '@') { 26992987fb3SAlexander Kornienko if (NameEnd != StringRef::npos) { 27092987fb3SAlexander Kornienko SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 27192987fb3SAlexander Kornienko SourceMgr::DK_Error, 27292987fb3SAlexander Kornienko "invalid name in named regex definition"); 27392987fb3SAlexander Kornienko return true; 27492987fb3SAlexander Kornienko } 27592987fb3SAlexander Kornienko IsExpression = true; 27692987fb3SAlexander Kornienko continue; 27792987fb3SAlexander Kornienko } 27892987fb3SAlexander Kornienko if (Name[i] != '_' && !isalnum(Name[i]) && 27992987fb3SAlexander Kornienko (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { 2808879e06dSChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i), 28103b80a40SChris Lattner SourceMgr::DK_Error, "invalid name in named regex"); 2828879e06dSChris Lattner return true; 2838879e06dSChris Lattner } 28492987fb3SAlexander Kornienko } 2858879e06dSChris Lattner 2868879e06dSChris Lattner // Name can't start with a digit. 28783c74e9fSGuy Benyei if (isdigit(static_cast<unsigned char>(Name[0]))) { 28803b80a40SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 28903b80a40SChris Lattner "invalid name in named regex"); 2908879e06dSChris Lattner return true; 2918879e06dSChris Lattner } 2928879e06dSChris Lattner 2938879e06dSChris Lattner // Handle [[foo]]. 2948879e06dSChris Lattner if (NameEnd == StringRef::npos) { 295e8b8f1bcSEli Bendersky // Handle variables that were defined earlier on the same line by 296e8b8f1bcSEli Bendersky // emitting a backreference. 297e8b8f1bcSEli Bendersky if (VariableDefs.find(Name) != VariableDefs.end()) { 298e8b8f1bcSEli Bendersky unsigned VarParenNum = VariableDefs[Name]; 299e8b8f1bcSEli Bendersky if (VarParenNum < 1 || VarParenNum > 9) { 300e8b8f1bcSEli Bendersky SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 301e8b8f1bcSEli Bendersky SourceMgr::DK_Error, 302e8b8f1bcSEli Bendersky "Can't back-reference more than 9 variables"); 303e8b8f1bcSEli Bendersky return true; 304e8b8f1bcSEli Bendersky } 305e8b8f1bcSEli Bendersky AddBackrefToRegEx(VarParenNum); 306e8b8f1bcSEli Bendersky } else { 3078879e06dSChris Lattner VariableUses.push_back(std::make_pair(Name, RegExStr.size())); 308e8b8f1bcSEli Bendersky } 3098879e06dSChris Lattner continue; 3108879e06dSChris Lattner } 3118879e06dSChris Lattner 3128879e06dSChris Lattner // Handle [[foo:.*]]. 313e8b8f1bcSEli Bendersky VariableDefs[Name] = CurParen; 3148879e06dSChris Lattner RegExStr += '('; 3158879e06dSChris Lattner ++CurParen; 3168879e06dSChris Lattner 3178879e06dSChris Lattner if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM)) 3188879e06dSChris Lattner return true; 3198879e06dSChris Lattner 3208879e06dSChris Lattner RegExStr += ')'; 3218879e06dSChris Lattner } 3228879e06dSChris Lattner 3238879e06dSChris Lattner // Handle fixed string matches. 3248879e06dSChris Lattner // Find the end, which is the start of the next regex. 3258879e06dSChris Lattner size_t FixedMatchEnd = PatternStr.find("{{"); 3268879e06dSChris Lattner FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 3276f4f77b7SHans Wennborg RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 3288879e06dSChris Lattner PatternStr = PatternStr.substr(FixedMatchEnd); 329f08d2db9SChris Lattner } 330f08d2db9SChris Lattner 33174d50731SChris Lattner return false; 33274d50731SChris Lattner } 33374d50731SChris Lattner 334e8b8f1bcSEli Bendersky bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, 3358879e06dSChris Lattner SourceMgr &SM) { 336e8b8f1bcSEli Bendersky Regex R(RS); 3378879e06dSChris Lattner std::string Error; 3388879e06dSChris Lattner if (!R.isValid(Error)) { 339e8b8f1bcSEli Bendersky SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 34003b80a40SChris Lattner "invalid regex: " + Error); 3418879e06dSChris Lattner return true; 3428879e06dSChris Lattner } 3438879e06dSChris Lattner 344e8b8f1bcSEli Bendersky RegExStr += RS.str(); 3458879e06dSChris Lattner CurParen += R.getNumMatches(); 3468879e06dSChris Lattner return false; 3478879e06dSChris Lattner } 348b16ab0c4SChris Lattner 349e8b8f1bcSEli Bendersky void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 350e8b8f1bcSEli Bendersky assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 351e8b8f1bcSEli Bendersky std::string Backref = std::string("\\") + 352e8b8f1bcSEli Bendersky std::string(1, '0' + BackrefNum); 353e8b8f1bcSEli Bendersky RegExStr += Backref; 354e8b8f1bcSEli Bendersky } 355e8b8f1bcSEli Bendersky 35692987fb3SAlexander Kornienko bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { 35792987fb3SAlexander Kornienko // The only supported expression is @LINE([\+-]\d+)? 35892987fb3SAlexander Kornienko if (!Expr.startswith("@LINE")) 35992987fb3SAlexander Kornienko return false; 36092987fb3SAlexander Kornienko Expr = Expr.substr(StringRef("@LINE").size()); 36192987fb3SAlexander Kornienko int Offset = 0; 36292987fb3SAlexander Kornienko if (!Expr.empty()) { 36392987fb3SAlexander Kornienko if (Expr[0] == '+') 36492987fb3SAlexander Kornienko Expr = Expr.substr(1); 36592987fb3SAlexander Kornienko else if (Expr[0] != '-') 36692987fb3SAlexander Kornienko return false; 36792987fb3SAlexander Kornienko if (Expr.getAsInteger(10, Offset)) 36892987fb3SAlexander Kornienko return false; 36992987fb3SAlexander Kornienko } 37092987fb3SAlexander Kornienko Value = llvm::itostr(LineNumber + Offset); 37192987fb3SAlexander Kornienko return true; 37292987fb3SAlexander Kornienko } 37392987fb3SAlexander Kornienko 374f08d2db9SChris Lattner /// Match - Match the pattern string against the input buffer Buffer. This 375f08d2db9SChris Lattner /// returns the position that is matched or npos if there is no match. If 376f08d2db9SChris Lattner /// there is a match, the size of the matched string is returned in MatchLen. 3778879e06dSChris Lattner size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, 3788879e06dSChris Lattner StringMap<StringRef> &VariableTable) const { 379eba55822SJakob Stoklund Olesen // If this is the EOF pattern, match it immediately. 38038820972SMatt Arsenault if (CheckTy == Check::CheckEOF) { 381eba55822SJakob Stoklund Olesen MatchLen = 0; 382eba55822SJakob Stoklund Olesen return Buffer.size(); 383eba55822SJakob Stoklund Olesen } 384eba55822SJakob Stoklund Olesen 385221460e0SChris Lattner // If this is a fixed string pattern, just match it now. 386221460e0SChris Lattner if (!FixedStr.empty()) { 387221460e0SChris Lattner MatchLen = FixedStr.size(); 388221460e0SChris Lattner return Buffer.find(FixedStr); 389221460e0SChris Lattner } 390221460e0SChris Lattner 391b16ab0c4SChris Lattner // Regex match. 3928879e06dSChris Lattner 3938879e06dSChris Lattner // If there are variable uses, we need to create a temporary string with the 3948879e06dSChris Lattner // actual value. 3958879e06dSChris Lattner StringRef RegExToMatch = RegExStr; 3968879e06dSChris Lattner std::string TmpStr; 3978879e06dSChris Lattner if (!VariableUses.empty()) { 3988879e06dSChris Lattner TmpStr = RegExStr; 3998879e06dSChris Lattner 4008879e06dSChris Lattner unsigned InsertOffset = 0; 4018879e06dSChris Lattner for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) { 40292987fb3SAlexander Kornienko std::string Value; 40392987fb3SAlexander Kornienko 40492987fb3SAlexander Kornienko if (VariableUses[i].first[0] == '@') { 40592987fb3SAlexander Kornienko if (!EvaluateExpression(VariableUses[i].first, Value)) 40692987fb3SAlexander Kornienko return StringRef::npos; 40792987fb3SAlexander Kornienko } else { 408e0ef65abSDaniel Dunbar StringMap<StringRef>::iterator it = 409e0ef65abSDaniel Dunbar VariableTable.find(VariableUses[i].first); 410e0ef65abSDaniel Dunbar // If the variable is undefined, return an error. 411e0ef65abSDaniel Dunbar if (it == VariableTable.end()) 412e0ef65abSDaniel Dunbar return StringRef::npos; 413e0ef65abSDaniel Dunbar 4146f4f77b7SHans Wennborg // Look up the value and escape it so that we can put it into the regex. 4156f4f77b7SHans Wennborg Value += Regex::escape(it->second); 41692987fb3SAlexander Kornienko } 4178879e06dSChris Lattner 4188879e06dSChris Lattner // Plop it into the regex at the adjusted offset. 4198879e06dSChris Lattner TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset, 4208879e06dSChris Lattner Value.begin(), Value.end()); 4218879e06dSChris Lattner InsertOffset += Value.size(); 4228879e06dSChris Lattner } 4238879e06dSChris Lattner 4248879e06dSChris Lattner // Match the newly constructed regex. 4258879e06dSChris Lattner RegExToMatch = TmpStr; 4268879e06dSChris Lattner } 4278879e06dSChris Lattner 4288879e06dSChris Lattner 429b16ab0c4SChris Lattner SmallVector<StringRef, 4> MatchInfo; 4308879e06dSChris Lattner if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) 431f08d2db9SChris Lattner return StringRef::npos; 432b16ab0c4SChris Lattner 433b16ab0c4SChris Lattner // Successful regex match. 434b16ab0c4SChris Lattner assert(!MatchInfo.empty() && "Didn't get any match"); 435b16ab0c4SChris Lattner StringRef FullMatch = MatchInfo[0]; 436b16ab0c4SChris Lattner 4378879e06dSChris Lattner // If this defines any variables, remember their values. 438e8b8f1bcSEli Bendersky for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(), 439e8b8f1bcSEli Bendersky E = VariableDefs.end(); 440e8b8f1bcSEli Bendersky I != E; ++I) { 441e8b8f1bcSEli Bendersky assert(I->second < MatchInfo.size() && "Internal paren error"); 442e8b8f1bcSEli Bendersky VariableTable[I->first] = MatchInfo[I->second]; 4430a4c44bdSChris Lattner } 4440a4c44bdSChris Lattner 445b16ab0c4SChris Lattner MatchLen = FullMatch.size(); 446b16ab0c4SChris Lattner return FullMatch.data()-Buffer.data(); 447f08d2db9SChris Lattner } 448f08d2db9SChris Lattner 449fd29d886SDaniel Dunbar unsigned Pattern::ComputeMatchDistance(StringRef Buffer, 450fd29d886SDaniel Dunbar const StringMap<StringRef> &VariableTable) const { 451fd29d886SDaniel Dunbar // Just compute the number of matching characters. For regular expressions, we 452fd29d886SDaniel Dunbar // just compare against the regex itself and hope for the best. 453fd29d886SDaniel Dunbar // 454fd29d886SDaniel Dunbar // FIXME: One easy improvement here is have the regex lib generate a single 455fd29d886SDaniel Dunbar // example regular expression which matches, and use that as the example 456fd29d886SDaniel Dunbar // string. 457fd29d886SDaniel Dunbar StringRef ExampleString(FixedStr); 458fd29d886SDaniel Dunbar if (ExampleString.empty()) 459fd29d886SDaniel Dunbar ExampleString = RegExStr; 460fd29d886SDaniel Dunbar 461e9aa36c8SDaniel Dunbar // Only compare up to the first line in the buffer, or the string size. 462e9aa36c8SDaniel Dunbar StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 463e9aa36c8SDaniel Dunbar BufferPrefix = BufferPrefix.split('\n').first; 464e9aa36c8SDaniel Dunbar return BufferPrefix.edit_distance(ExampleString); 465fd29d886SDaniel Dunbar } 466fd29d886SDaniel Dunbar 467e0ef65abSDaniel Dunbar void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 468e0ef65abSDaniel Dunbar const StringMap<StringRef> &VariableTable) const{ 469e0ef65abSDaniel Dunbar // If this was a regular expression using variables, print the current 470e0ef65abSDaniel Dunbar // variable values. 471e0ef65abSDaniel Dunbar if (!VariableUses.empty()) { 472e0ef65abSDaniel Dunbar for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) { 473e69170a1SAlp Toker SmallString<256> Msg; 474e69170a1SAlp Toker raw_svector_ostream OS(Msg); 47592987fb3SAlexander Kornienko StringRef Var = VariableUses[i].first; 47692987fb3SAlexander Kornienko if (Var[0] == '@') { 47792987fb3SAlexander Kornienko std::string Value; 47892987fb3SAlexander Kornienko if (EvaluateExpression(Var, Value)) { 47992987fb3SAlexander Kornienko OS << "with expression \""; 48092987fb3SAlexander Kornienko OS.write_escaped(Var) << "\" equal to \""; 48192987fb3SAlexander Kornienko OS.write_escaped(Value) << "\""; 48292987fb3SAlexander Kornienko } else { 48392987fb3SAlexander Kornienko OS << "uses incorrect expression \""; 48492987fb3SAlexander Kornienko OS.write_escaped(Var) << "\""; 48592987fb3SAlexander Kornienko } 48692987fb3SAlexander Kornienko } else { 48792987fb3SAlexander Kornienko StringMap<StringRef>::const_iterator it = VariableTable.find(Var); 488e0ef65abSDaniel Dunbar 489e0ef65abSDaniel Dunbar // Check for undefined variable references. 490e0ef65abSDaniel Dunbar if (it == VariableTable.end()) { 491e0ef65abSDaniel Dunbar OS << "uses undefined variable \""; 49292987fb3SAlexander Kornienko OS.write_escaped(Var) << "\""; 493e0ef65abSDaniel Dunbar } else { 494e0ef65abSDaniel Dunbar OS << "with variable \""; 495e0ef65abSDaniel Dunbar OS.write_escaped(Var) << "\" equal to \""; 496e0ef65abSDaniel Dunbar OS.write_escaped(it->second) << "\""; 497e0ef65abSDaniel Dunbar } 49892987fb3SAlexander Kornienko } 499e0ef65abSDaniel Dunbar 50003b80a40SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 50103b80a40SChris Lattner OS.str()); 502e0ef65abSDaniel Dunbar } 503e0ef65abSDaniel Dunbar } 504fd29d886SDaniel Dunbar 505fd29d886SDaniel Dunbar // Attempt to find the closest/best fuzzy match. Usually an error happens 506fd29d886SDaniel Dunbar // because some string in the output didn't exactly match. In these cases, we 507fd29d886SDaniel Dunbar // would like to show the user a best guess at what "should have" matched, to 508fd29d886SDaniel Dunbar // save them having to actually check the input manually. 509fd29d886SDaniel Dunbar size_t NumLinesForward = 0; 510fd29d886SDaniel Dunbar size_t Best = StringRef::npos; 511fd29d886SDaniel Dunbar double BestQuality = 0; 512fd29d886SDaniel Dunbar 513fd29d886SDaniel Dunbar // Use an arbitrary 4k limit on how far we will search. 5142bf486ebSDan Gohman for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 515fd29d886SDaniel Dunbar if (Buffer[i] == '\n') 516fd29d886SDaniel Dunbar ++NumLinesForward; 517fd29d886SDaniel Dunbar 518df22bbf7SDan Gohman // Patterns have leading whitespace stripped, so skip whitespace when 519df22bbf7SDan Gohman // looking for something which looks like a pattern. 520df22bbf7SDan Gohman if (Buffer[i] == ' ' || Buffer[i] == '\t') 521df22bbf7SDan Gohman continue; 522df22bbf7SDan Gohman 523fd29d886SDaniel Dunbar // Compute the "quality" of this match as an arbitrary combination of the 524fd29d886SDaniel Dunbar // match distance and the number of lines skipped to get to this match. 525fd29d886SDaniel Dunbar unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable); 526fd29d886SDaniel Dunbar double Quality = Distance + (NumLinesForward / 100.); 527fd29d886SDaniel Dunbar 528fd29d886SDaniel Dunbar if (Quality < BestQuality || Best == StringRef::npos) { 529fd29d886SDaniel Dunbar Best = i; 530fd29d886SDaniel Dunbar BestQuality = Quality; 531fd29d886SDaniel Dunbar } 532fd29d886SDaniel Dunbar } 533fd29d886SDaniel Dunbar 534fd29d886SDaniel Dunbar // Print the "possible intended match here" line if we found something 535c069cc8eSDaniel Dunbar // reasonable and not equal to what we showed in the "scanning from here" 536c069cc8eSDaniel Dunbar // line. 537c069cc8eSDaniel Dunbar if (Best && Best != StringRef::npos && BestQuality < 50) { 538fd29d886SDaniel Dunbar SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best), 53903b80a40SChris Lattner SourceMgr::DK_Note, "possible intended match here"); 540fd29d886SDaniel Dunbar 541fd29d886SDaniel Dunbar // FIXME: If we wanted to be really friendly we would show why the match 542fd29d886SDaniel Dunbar // failed, as it can be hard to spot simple one character differences. 543fd29d886SDaniel Dunbar } 544e0ef65abSDaniel Dunbar } 54574d50731SChris Lattner 54681e5cd9eSAdrian Prantl size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 547061d2baaSEli Bendersky // Offset keeps track of the current offset within the input Str 548061d2baaSEli Bendersky size_t Offset = 0; 549061d2baaSEli Bendersky // [...] Nesting depth 550061d2baaSEli Bendersky size_t BracketDepth = 0; 551061d2baaSEli Bendersky 552061d2baaSEli Bendersky while (!Str.empty()) { 553061d2baaSEli Bendersky if (Str.startswith("]]") && BracketDepth == 0) 554061d2baaSEli Bendersky return Offset; 555061d2baaSEli Bendersky if (Str[0] == '\\') { 556061d2baaSEli Bendersky // Backslash escapes the next char within regexes, so skip them both. 557061d2baaSEli Bendersky Str = Str.substr(2); 558061d2baaSEli Bendersky Offset += 2; 559061d2baaSEli Bendersky } else { 560061d2baaSEli Bendersky switch (Str[0]) { 561061d2baaSEli Bendersky default: 562061d2baaSEli Bendersky break; 563061d2baaSEli Bendersky case '[': 564061d2baaSEli Bendersky BracketDepth++; 565061d2baaSEli Bendersky break; 566061d2baaSEli Bendersky case ']': 56781e5cd9eSAdrian Prantl if (BracketDepth == 0) { 56881e5cd9eSAdrian Prantl SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 56981e5cd9eSAdrian Prantl SourceMgr::DK_Error, 57081e5cd9eSAdrian Prantl "missing closing \"]\" for regex variable"); 57181e5cd9eSAdrian Prantl exit(1); 57281e5cd9eSAdrian Prantl } 573061d2baaSEli Bendersky BracketDepth--; 574061d2baaSEli Bendersky break; 575061d2baaSEli Bendersky } 576061d2baaSEli Bendersky Str = Str.substr(1); 577061d2baaSEli Bendersky Offset++; 578061d2baaSEli Bendersky } 579061d2baaSEli Bendersky } 580061d2baaSEli Bendersky 581061d2baaSEli Bendersky return StringRef::npos; 582061d2baaSEli Bendersky } 583061d2baaSEli Bendersky 584061d2baaSEli Bendersky 58574d50731SChris Lattner //===----------------------------------------------------------------------===// 58674d50731SChris Lattner // Check Strings. 58774d50731SChris Lattner //===----------------------------------------------------------------------===// 5883b40b445SChris Lattner 5893b40b445SChris Lattner /// CheckString - This is a check that we found in the input file. 5903b40b445SChris Lattner struct CheckString { 5913b40b445SChris Lattner /// Pat - The pattern to match. 5923b40b445SChris Lattner Pattern Pat; 59326cccfe1SChris Lattner 59413df4626SMatt Arsenault /// Prefix - Which prefix name this check matched. 59513df4626SMatt Arsenault StringRef Prefix; 59613df4626SMatt Arsenault 59726cccfe1SChris Lattner /// Loc - The location in the match file that the check string was specified. 59826cccfe1SChris Lattner SMLoc Loc; 59926cccfe1SChris Lattner 60038820972SMatt Arsenault /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive, 60138820972SMatt Arsenault /// as opposed to a CHECK: directive. 60238820972SMatt Arsenault Check::CheckType CheckTy; 603f8bd2e5bSStephen Lin 60491a1b2c9SMichael Liao /// DagNotStrings - These are all of the strings that are disallowed from 605236d2d5eSChris Lattner /// occurring between this match string and the previous one (or start of 606236d2d5eSChris Lattner /// file). 60791a1b2c9SMichael Liao std::vector<Pattern> DagNotStrings; 608236d2d5eSChris Lattner 60913df4626SMatt Arsenault 61013df4626SMatt Arsenault CheckString(const Pattern &P, 61113df4626SMatt Arsenault StringRef S, 61213df4626SMatt Arsenault SMLoc L, 61313df4626SMatt Arsenault Check::CheckType Ty) 61413df4626SMatt Arsenault : Pat(P), Prefix(S), Loc(L), CheckTy(Ty) {} 615dcc7d48dSMichael Liao 61691a1b2c9SMichael Liao /// Check - Match check string and its "not strings" and/or "dag strings". 617e93a3a08SStephen Lin size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, 618f8bd2e5bSStephen Lin size_t &MatchLen, StringMap<StringRef> &VariableTable) const; 619dcc7d48dSMichael Liao 620dcc7d48dSMichael Liao /// CheckNext - Verify there is a single line in the given buffer. 621dcc7d48dSMichael Liao bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; 622dcc7d48dSMichael Liao 623dcc7d48dSMichael Liao /// CheckNot - Verify there's no "not strings" in the given buffer. 624dcc7d48dSMichael Liao bool CheckNot(const SourceMgr &SM, StringRef Buffer, 62591a1b2c9SMichael Liao const std::vector<const Pattern *> &NotStrings, 62691a1b2c9SMichael Liao StringMap<StringRef> &VariableTable) const; 62791a1b2c9SMichael Liao 62891a1b2c9SMichael Liao /// CheckDag - Match "dag strings" and their mixed "not strings". 62991a1b2c9SMichael Liao size_t CheckDag(const SourceMgr &SM, StringRef Buffer, 63091a1b2c9SMichael Liao std::vector<const Pattern *> &NotStrings, 631dcc7d48dSMichael Liao StringMap<StringRef> &VariableTable) const; 63226cccfe1SChris Lattner }; 63326cccfe1SChris Lattner 6345ea04c38SGuy Benyei /// Canonicalize whitespaces in the input file. Line endings are replaced 6355ea04c38SGuy Benyei /// with UNIX-style '\n'. 6365ea04c38SGuy Benyei /// 6375ea04c38SGuy Benyei /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace 6385ea04c38SGuy Benyei /// characters to a single space. 6391961f14cSDavid Blaikie static std::unique_ptr<MemoryBuffer> 6401961f14cSDavid Blaikie CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB, 6415ea04c38SGuy Benyei bool PreserveHorizontal) { 6420e45d24aSChris Lattner SmallString<128> NewFile; 643a2f8fc5aSChris Lattner NewFile.reserve(MB->getBufferSize()); 644a2f8fc5aSChris Lattner 645a2f8fc5aSChris Lattner for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd(); 646a2f8fc5aSChris Lattner Ptr != End; ++Ptr) { 647fd781bf0SNAKAMURA Takumi // Eliminate trailing dosish \r. 648fd781bf0SNAKAMURA Takumi if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 649fd781bf0SNAKAMURA Takumi continue; 650fd781bf0SNAKAMURA Takumi } 651fd781bf0SNAKAMURA Takumi 6525ea04c38SGuy Benyei // If current char is not a horizontal whitespace or if horizontal 6535ea04c38SGuy Benyei // whitespace canonicalization is disabled, dump it to output as is. 6545ea04c38SGuy Benyei if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) { 655a2f8fc5aSChris Lattner NewFile.push_back(*Ptr); 656a2f8fc5aSChris Lattner continue; 657a2f8fc5aSChris Lattner } 658a2f8fc5aSChris Lattner 659a2f8fc5aSChris Lattner // Otherwise, add one space and advance over neighboring space. 660a2f8fc5aSChris Lattner NewFile.push_back(' '); 661a2f8fc5aSChris Lattner while (Ptr+1 != End && 662a2f8fc5aSChris Lattner (Ptr[1] == ' ' || Ptr[1] == '\t')) 663a2f8fc5aSChris Lattner ++Ptr; 664a2f8fc5aSChris Lattner } 665a2f8fc5aSChris Lattner 6661961f14cSDavid Blaikie return std::unique_ptr<MemoryBuffer>( 6671961f14cSDavid Blaikie MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier())); 668a2f8fc5aSChris Lattner } 669a2f8fc5aSChris Lattner 67038820972SMatt Arsenault static bool IsPartOfWord(char c) { 67138820972SMatt Arsenault return (isalnum(c) || c == '-' || c == '_'); 67238820972SMatt Arsenault } 67338820972SMatt Arsenault 67413df4626SMatt Arsenault // Get the size of the prefix extension. 67513df4626SMatt Arsenault static size_t CheckTypeSize(Check::CheckType Ty) { 67613df4626SMatt Arsenault switch (Ty) { 67713df4626SMatt Arsenault case Check::CheckNone: 67813df4626SMatt Arsenault return 0; 67913df4626SMatt Arsenault 68013df4626SMatt Arsenault case Check::CheckPlain: 68113df4626SMatt Arsenault return sizeof(":") - 1; 68213df4626SMatt Arsenault 68313df4626SMatt Arsenault case Check::CheckNext: 68413df4626SMatt Arsenault return sizeof("-NEXT:") - 1; 68513df4626SMatt Arsenault 68613df4626SMatt Arsenault case Check::CheckNot: 68713df4626SMatt Arsenault return sizeof("-NOT:") - 1; 68813df4626SMatt Arsenault 68913df4626SMatt Arsenault case Check::CheckDAG: 69013df4626SMatt Arsenault return sizeof("-DAG:") - 1; 69113df4626SMatt Arsenault 69213df4626SMatt Arsenault case Check::CheckLabel: 69313df4626SMatt Arsenault return sizeof("-LABEL:") - 1; 69413df4626SMatt Arsenault 69513df4626SMatt Arsenault case Check::CheckEOF: 69613df4626SMatt Arsenault llvm_unreachable("Should not be using EOF size"); 69713df4626SMatt Arsenault } 69813df4626SMatt Arsenault 69913df4626SMatt Arsenault llvm_unreachable("Bad check type"); 70013df4626SMatt Arsenault } 70113df4626SMatt Arsenault 70213df4626SMatt Arsenault static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) { 703c4d2d471SMatt Arsenault char NextChar = Buffer[Prefix.size()]; 70438820972SMatt Arsenault 70538820972SMatt Arsenault // Verify that the : is present after the prefix. 70613df4626SMatt Arsenault if (NextChar == ':') 70738820972SMatt Arsenault return Check::CheckPlain; 70838820972SMatt Arsenault 70913df4626SMatt Arsenault if (NextChar != '-') 71038820972SMatt Arsenault return Check::CheckNone; 71138820972SMatt Arsenault 712c4d2d471SMatt Arsenault StringRef Rest = Buffer.drop_front(Prefix.size() + 1); 71313df4626SMatt Arsenault if (Rest.startswith("NEXT:")) 71438820972SMatt Arsenault return Check::CheckNext; 71538820972SMatt Arsenault 71613df4626SMatt Arsenault if (Rest.startswith("NOT:")) 71738820972SMatt Arsenault return Check::CheckNot; 71838820972SMatt Arsenault 71913df4626SMatt Arsenault if (Rest.startswith("DAG:")) 72038820972SMatt Arsenault return Check::CheckDAG; 72138820972SMatt Arsenault 72213df4626SMatt Arsenault if (Rest.startswith("LABEL:")) 72338820972SMatt Arsenault return Check::CheckLabel; 72413df4626SMatt Arsenault 72513df4626SMatt Arsenault return Check::CheckNone; 72638820972SMatt Arsenault } 72738820972SMatt Arsenault 72813df4626SMatt Arsenault // From the given position, find the next character after the word. 72913df4626SMatt Arsenault static size_t SkipWord(StringRef Str, size_t Loc) { 73013df4626SMatt Arsenault while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 73113df4626SMatt Arsenault ++Loc; 73213df4626SMatt Arsenault return Loc; 73313df4626SMatt Arsenault } 73413df4626SMatt Arsenault 73513df4626SMatt Arsenault // Try to find the first match in buffer for any prefix. If a valid match is 73613df4626SMatt Arsenault // found, return that prefix and set its type and location. If there are almost 73713df4626SMatt Arsenault // matches (e.g. the actual prefix string is found, but is not an actual check 73813df4626SMatt Arsenault // string), but no valid match, return an empty string and set the position to 73913df4626SMatt Arsenault // resume searching from. If no partial matches are found, return an empty 74013df4626SMatt Arsenault // string and the location will be StringRef::npos. If one prefix is a substring 74113df4626SMatt Arsenault // of another, the maximal match should be found. e.g. if "A" and "AA" are 74213df4626SMatt Arsenault // prefixes then AA-CHECK: should match the second one. 74313df4626SMatt Arsenault static StringRef FindFirstCandidateMatch(StringRef &Buffer, 74413df4626SMatt Arsenault Check::CheckType &CheckTy, 74513df4626SMatt Arsenault size_t &CheckLoc) { 74613df4626SMatt Arsenault StringRef FirstPrefix; 74713df4626SMatt Arsenault size_t FirstLoc = StringRef::npos; 74813df4626SMatt Arsenault size_t SearchLoc = StringRef::npos; 74913df4626SMatt Arsenault Check::CheckType FirstTy = Check::CheckNone; 75013df4626SMatt Arsenault 75113df4626SMatt Arsenault CheckTy = Check::CheckNone; 75213df4626SMatt Arsenault CheckLoc = StringRef::npos; 75313df4626SMatt Arsenault 75413df4626SMatt Arsenault for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end(); 75513df4626SMatt Arsenault I != E; ++I) { 75613df4626SMatt Arsenault StringRef Prefix(*I); 75713df4626SMatt Arsenault size_t PrefixLoc = Buffer.find(Prefix); 75813df4626SMatt Arsenault 75913df4626SMatt Arsenault if (PrefixLoc == StringRef::npos) 76013df4626SMatt Arsenault continue; 76113df4626SMatt Arsenault 76213df4626SMatt Arsenault // Track where we are searching for invalid prefixes that look almost right. 76313df4626SMatt Arsenault // We need to only advance to the first partial match on the next attempt 76413df4626SMatt Arsenault // since a partial match could be a substring of a later, valid prefix. 76513df4626SMatt Arsenault // Need to skip to the end of the word, otherwise we could end up 76613df4626SMatt Arsenault // matching a prefix in a substring later. 76713df4626SMatt Arsenault if (PrefixLoc < SearchLoc) 76813df4626SMatt Arsenault SearchLoc = SkipWord(Buffer, PrefixLoc); 76913df4626SMatt Arsenault 77013df4626SMatt Arsenault // We only want to find the first match to avoid skipping some. 77113df4626SMatt Arsenault if (PrefixLoc > FirstLoc) 77213df4626SMatt Arsenault continue; 773a7181a1bSAlexey Samsonov // If one matching check-prefix is a prefix of another, choose the 774a7181a1bSAlexey Samsonov // longer one. 775a7181a1bSAlexey Samsonov if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size()) 776a7181a1bSAlexey Samsonov continue; 77713df4626SMatt Arsenault 77813df4626SMatt Arsenault StringRef Rest = Buffer.drop_front(PrefixLoc); 77913df4626SMatt Arsenault // Make sure we have actually found the prefix, and not a word containing 78013df4626SMatt Arsenault // it. This should also prevent matching the wrong prefix when one is a 78113df4626SMatt Arsenault // substring of another. 78213df4626SMatt Arsenault if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1])) 78343b5f572SDaniel Sanders FirstTy = Check::CheckNone; 78443b5f572SDaniel Sanders else 78543b5f572SDaniel Sanders FirstTy = FindCheckType(Rest, Prefix); 78613df4626SMatt Arsenault 78713df4626SMatt Arsenault FirstLoc = PrefixLoc; 788a7181a1bSAlexey Samsonov FirstPrefix = Prefix; 78913df4626SMatt Arsenault } 79013df4626SMatt Arsenault 791a7181a1bSAlexey Samsonov // If the first prefix is invalid, we should continue the search after it. 792a7181a1bSAlexey Samsonov if (FirstTy == Check::CheckNone) { 79313df4626SMatt Arsenault CheckLoc = SearchLoc; 794a7181a1bSAlexey Samsonov return ""; 795a7181a1bSAlexey Samsonov } 796a7181a1bSAlexey Samsonov 79713df4626SMatt Arsenault CheckTy = FirstTy; 79813df4626SMatt Arsenault CheckLoc = FirstLoc; 79913df4626SMatt Arsenault return FirstPrefix; 80013df4626SMatt Arsenault } 80113df4626SMatt Arsenault 80213df4626SMatt Arsenault static StringRef FindFirstMatchingPrefix(StringRef &Buffer, 80313df4626SMatt Arsenault unsigned &LineNumber, 80413df4626SMatt Arsenault Check::CheckType &CheckTy, 80513df4626SMatt Arsenault size_t &CheckLoc) { 80613df4626SMatt Arsenault while (!Buffer.empty()) { 80713df4626SMatt Arsenault StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc); 80813df4626SMatt Arsenault // If we found a real match, we are done. 80913df4626SMatt Arsenault if (!Prefix.empty()) { 81013df4626SMatt Arsenault LineNumber += Buffer.substr(0, CheckLoc).count('\n'); 81113df4626SMatt Arsenault return Prefix; 81213df4626SMatt Arsenault } 81313df4626SMatt Arsenault 81413df4626SMatt Arsenault // We didn't find any almost matches either, we are also done. 81513df4626SMatt Arsenault if (CheckLoc == StringRef::npos) 81613df4626SMatt Arsenault return StringRef(); 81713df4626SMatt Arsenault 81813df4626SMatt Arsenault LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n'); 81913df4626SMatt Arsenault 82013df4626SMatt Arsenault // Advance to the last possible match we found and try again. 82113df4626SMatt Arsenault Buffer = Buffer.drop_front(CheckLoc + 1); 82213df4626SMatt Arsenault } 82313df4626SMatt Arsenault 82413df4626SMatt Arsenault return StringRef(); 82538820972SMatt Arsenault } 826ee3c74fbSChris Lattner 827ee3c74fbSChris Lattner /// ReadCheckFile - Read the check file, which specifies the sequence of 828ee3c74fbSChris Lattner /// expected strings. The strings are added to the CheckStrings vector. 82943d50d4aSEli Bendersky /// Returns true in case of an error, false otherwise. 830ee3c74fbSChris Lattner static bool ReadCheckFile(SourceMgr &SM, 83126cccfe1SChris Lattner std::vector<CheckString> &CheckStrings) { 832adf21f2aSRafael Espindola ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = 833adf21f2aSRafael Espindola MemoryBuffer::getFileOrSTDIN(CheckFilename); 834adf21f2aSRafael Espindola if (std::error_code EC = FileOrErr.getError()) { 835adf21f2aSRafael Espindola errs() << "Could not open check file '" << CheckFilename 836adf21f2aSRafael Espindola << "': " << EC.message() << '\n'; 837ee3c74fbSChris Lattner return true; 838ee3c74fbSChris Lattner } 839a2f8fc5aSChris Lattner 840a2f8fc5aSChris Lattner // If we want to canonicalize whitespace, strip excess whitespace from the 8415ea04c38SGuy Benyei // buffer containing the CHECK lines. Remove DOS style line endings. 842*3560ff2cSRafael Espindola std::unique_ptr<MemoryBuffer> F = CanonicalizeInputFile( 843*3560ff2cSRafael Espindola std::move(FileOrErr.get()), NoCanonicalizeWhiteSpace); 844ee3c74fbSChris Lattner 84510f10cedSChris Lattner // Find all instances of CheckPrefix followed by : in the file. 846caa5fc0cSChris Lattner StringRef Buffer = F->getBuffer(); 84756ccdbbdSAlexander Kornienko 8481961f14cSDavid Blaikie SM.AddNewSourceBuffer(std::move(F), SMLoc()); 8491961f14cSDavid Blaikie 85056ccdbbdSAlexander Kornienko std::vector<Pattern> ImplicitNegativeChecks; 85156ccdbbdSAlexander Kornienko for (const auto &PatternString : ImplicitCheckNot) { 85256ccdbbdSAlexander Kornienko // Create a buffer with fake command line content in order to display the 85356ccdbbdSAlexander Kornienko // command line option responsible for the specific implicit CHECK-NOT. 85456ccdbbdSAlexander Kornienko std::string Prefix = std::string("-") + ImplicitCheckNot.ArgStr + "='"; 85556ccdbbdSAlexander Kornienko std::string Suffix = "'"; 856*3560ff2cSRafael Espindola std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( 857*3560ff2cSRafael Espindola Prefix + PatternString + Suffix, "command line"); 858*3560ff2cSRafael Espindola 85956ccdbbdSAlexander Kornienko StringRef PatternInBuffer = 86056ccdbbdSAlexander Kornienko CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 8611961f14cSDavid Blaikie SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 86256ccdbbdSAlexander Kornienko 86356ccdbbdSAlexander Kornienko ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot)); 86456ccdbbdSAlexander Kornienko ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer, 86556ccdbbdSAlexander Kornienko "IMPLICIT-CHECK", SM, 0); 86656ccdbbdSAlexander Kornienko } 86756ccdbbdSAlexander Kornienko 86856ccdbbdSAlexander Kornienko 86956ccdbbdSAlexander Kornienko std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 870236d2d5eSChris Lattner 87143d50d4aSEli Bendersky // LineNumber keeps track of the line on which CheckPrefix instances are 87243d50d4aSEli Bendersky // found. 87392987fb3SAlexander Kornienko unsigned LineNumber = 1; 87492987fb3SAlexander Kornienko 875ee3c74fbSChris Lattner while (1) { 87613df4626SMatt Arsenault Check::CheckType CheckTy; 87713df4626SMatt Arsenault size_t PrefixLoc; 87813df4626SMatt Arsenault 87913df4626SMatt Arsenault // See if a prefix occurs in the memory buffer. 88013df4626SMatt Arsenault StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer, 88113df4626SMatt Arsenault LineNumber, 88213df4626SMatt Arsenault CheckTy, 88313df4626SMatt Arsenault PrefixLoc); 88413df4626SMatt Arsenault if (UsedPrefix.empty()) 885ee3c74fbSChris Lattner break; 886ee3c74fbSChris Lattner 88713df4626SMatt Arsenault Buffer = Buffer.drop_front(PrefixLoc); 88892987fb3SAlexander Kornienko 88913df4626SMatt Arsenault // Location to use for error messages. 89013df4626SMatt Arsenault const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1); 89192987fb3SAlexander Kornienko 89213df4626SMatt Arsenault // PrefixLoc is to the start of the prefix. Skip to the end. 89313df4626SMatt Arsenault Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy)); 89410f10cedSChris Lattner 89538820972SMatt Arsenault // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 89638820972SMatt Arsenault // leading and trailing whitespace. 897236d2d5eSChris Lattner Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 898ee3c74fbSChris Lattner 899ee3c74fbSChris Lattner // Scan ahead to the end of line. 900caa5fc0cSChris Lattner size_t EOL = Buffer.find_first_of("\n\r"); 901ee3c74fbSChris Lattner 902838fb09aSDan Gohman // Remember the location of the start of the pattern, for diagnostics. 903838fb09aSDan Gohman SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 904838fb09aSDan Gohman 90574d50731SChris Lattner // Parse the pattern. 90638820972SMatt Arsenault Pattern P(CheckTy); 90713df4626SMatt Arsenault if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber)) 908ee3c74fbSChris Lattner return true; 909ee3c74fbSChris Lattner 910f8bd2e5bSStephen Lin // Verify that CHECK-LABEL lines do not define or use variables 91138820972SMatt Arsenault if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 91213df4626SMatt Arsenault SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 913f8bd2e5bSStephen Lin SourceMgr::DK_Error, 91413df4626SMatt Arsenault "found '" + UsedPrefix + "-LABEL:'" 91513df4626SMatt Arsenault " with variable definition or use"); 916f8bd2e5bSStephen Lin return true; 917f8bd2e5bSStephen Lin } 918f8bd2e5bSStephen Lin 919236d2d5eSChris Lattner Buffer = Buffer.substr(EOL); 92074d50731SChris Lattner 921da108b4eSChris Lattner // Verify that CHECK-NEXT lines have at least one CHECK line before them. 92238820972SMatt Arsenault if ((CheckTy == Check::CheckNext) && CheckStrings.empty()) { 92313df4626SMatt Arsenault SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 92403b80a40SChris Lattner SourceMgr::DK_Error, 92513df4626SMatt Arsenault "found '" + UsedPrefix + "-NEXT:' without previous '" 92613df4626SMatt Arsenault + UsedPrefix + ": line"); 927da108b4eSChris Lattner return true; 928da108b4eSChris Lattner } 929da108b4eSChris Lattner 93091a1b2c9SMichael Liao // Handle CHECK-DAG/-NOT. 93138820972SMatt Arsenault if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 93291a1b2c9SMichael Liao DagNotMatches.push_back(P); 93374d50731SChris Lattner continue; 93474d50731SChris Lattner } 93574d50731SChris Lattner 936ee3c74fbSChris Lattner // Okay, add the string we captured to the output vector and move on. 9373b40b445SChris Lattner CheckStrings.push_back(CheckString(P, 93813df4626SMatt Arsenault UsedPrefix, 939838fb09aSDan Gohman PatternLoc, 94038820972SMatt Arsenault CheckTy)); 94191a1b2c9SMichael Liao std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 94256ccdbbdSAlexander Kornienko DagNotMatches = ImplicitNegativeChecks; 943ee3c74fbSChris Lattner } 944ee3c74fbSChris Lattner 94513df4626SMatt Arsenault // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first 94613df4626SMatt Arsenault // prefix as a filler for the error message. 94791a1b2c9SMichael Liao if (!DagNotMatches.empty()) { 94838820972SMatt Arsenault CheckStrings.push_back(CheckString(Pattern(Check::CheckEOF), 94913df4626SMatt Arsenault CheckPrefixes[0], 950eba55822SJakob Stoklund Olesen SMLoc::getFromPointer(Buffer.data()), 95138820972SMatt Arsenault Check::CheckEOF)); 95291a1b2c9SMichael Liao std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 953eba55822SJakob Stoklund Olesen } 954eba55822SJakob Stoklund Olesen 955ee3c74fbSChris Lattner if (CheckStrings.empty()) { 95613df4626SMatt Arsenault errs() << "error: no check strings found with prefix" 95713df4626SMatt Arsenault << (CheckPrefixes.size() > 1 ? "es " : " "); 95813df4626SMatt Arsenault for (size_t I = 0, N = CheckPrefixes.size(); I != N; ++I) { 95913df4626SMatt Arsenault StringRef Prefix(CheckPrefixes[I]); 96013df4626SMatt Arsenault errs() << '\'' << Prefix << ":'"; 96113df4626SMatt Arsenault if (I != N - 1) 96213df4626SMatt Arsenault errs() << ", "; 96313df4626SMatt Arsenault } 96413df4626SMatt Arsenault 96513df4626SMatt Arsenault errs() << '\n'; 966ee3c74fbSChris Lattner return true; 967ee3c74fbSChris Lattner } 968ee3c74fbSChris Lattner 969ee3c74fbSChris Lattner return false; 970ee3c74fbSChris Lattner } 971ee3c74fbSChris Lattner 97291a1b2c9SMichael Liao static void PrintCheckFailed(const SourceMgr &SM, const SMLoc &Loc, 97391a1b2c9SMichael Liao const Pattern &Pat, StringRef Buffer, 974e0ef65abSDaniel Dunbar StringMap<StringRef> &VariableTable) { 975da108b4eSChris Lattner // Otherwise, we have an error, emit an error message. 97691a1b2c9SMichael Liao SM.PrintMessage(Loc, SourceMgr::DK_Error, 97703b80a40SChris Lattner "expected string not found in input"); 978da108b4eSChris Lattner 979da108b4eSChris Lattner // Print the "scanning from here" line. If the current position is at the 980da108b4eSChris Lattner // end of a line, advance to the start of the next line. 981caa5fc0cSChris Lattner Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 982da108b4eSChris Lattner 98303b80a40SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 98403b80a40SChris Lattner "scanning from here"); 985e0ef65abSDaniel Dunbar 986e0ef65abSDaniel Dunbar // Allow the pattern to print additional information if desired. 98791a1b2c9SMichael Liao Pat.PrintFailureInfo(SM, Buffer, VariableTable); 98891a1b2c9SMichael Liao } 98991a1b2c9SMichael Liao 99091a1b2c9SMichael Liao static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr, 99191a1b2c9SMichael Liao StringRef Buffer, 99291a1b2c9SMichael Liao StringMap<StringRef> &VariableTable) { 99391a1b2c9SMichael Liao PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable); 994da108b4eSChris Lattner } 995da108b4eSChris Lattner 99637183584SChris Lattner /// CountNumNewlinesBetween - Count the number of newlines in the specified 99737183584SChris Lattner /// range. 998592fe880SRichard Smith static unsigned CountNumNewlinesBetween(StringRef Range, 999592fe880SRichard Smith const char *&FirstNewLine) { 1000da108b4eSChris Lattner unsigned NumNewLines = 0; 100137183584SChris Lattner while (1) { 1002da108b4eSChris Lattner // Scan for newline. 100337183584SChris Lattner Range = Range.substr(Range.find_first_of("\n\r")); 100437183584SChris Lattner if (Range.empty()) return NumNewLines; 1005da108b4eSChris Lattner 1006da108b4eSChris Lattner ++NumNewLines; 1007da108b4eSChris Lattner 1008da108b4eSChris Lattner // Handle \n\r and \r\n as a single newline. 100937183584SChris Lattner if (Range.size() > 1 && 101037183584SChris Lattner (Range[1] == '\n' || Range[1] == '\r') && 101137183584SChris Lattner (Range[0] != Range[1])) 101237183584SChris Lattner Range = Range.substr(1); 101337183584SChris Lattner Range = Range.substr(1); 1014592fe880SRichard Smith 1015592fe880SRichard Smith if (NumNewLines == 1) 1016592fe880SRichard Smith FirstNewLine = Range.begin(); 1017da108b4eSChris Lattner } 1018da108b4eSChris Lattner } 1019da108b4eSChris Lattner 1020dcc7d48dSMichael Liao size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer, 1021e93a3a08SStephen Lin bool IsLabelScanMode, size_t &MatchLen, 1022dcc7d48dSMichael Liao StringMap<StringRef> &VariableTable) const { 102391a1b2c9SMichael Liao size_t LastPos = 0; 102491a1b2c9SMichael Liao std::vector<const Pattern *> NotStrings; 102591a1b2c9SMichael Liao 1026e93a3a08SStephen Lin // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 1027e93a3a08SStephen Lin // bounds; we have not processed variable definitions within the bounded block 1028e93a3a08SStephen Lin // yet so cannot handle any final CHECK-DAG yet; this is handled when going 1029e93a3a08SStephen Lin // over the block again (including the last CHECK-LABEL) in normal mode. 1030e93a3a08SStephen Lin if (!IsLabelScanMode) { 103191a1b2c9SMichael Liao // Match "dag strings" (with mixed "not strings" if any). 103291a1b2c9SMichael Liao LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable); 103391a1b2c9SMichael Liao if (LastPos == StringRef::npos) 103491a1b2c9SMichael Liao return StringRef::npos; 1035e93a3a08SStephen Lin } 103691a1b2c9SMichael Liao 103791a1b2c9SMichael Liao // Match itself from the last position after matching CHECK-DAG. 103891a1b2c9SMichael Liao StringRef MatchBuffer = Buffer.substr(LastPos); 103991a1b2c9SMichael Liao size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1040dcc7d48dSMichael Liao if (MatchPos == StringRef::npos) { 104191a1b2c9SMichael Liao PrintCheckFailed(SM, *this, MatchBuffer, VariableTable); 1042dcc7d48dSMichael Liao return StringRef::npos; 1043dcc7d48dSMichael Liao } 104491a1b2c9SMichael Liao MatchPos += LastPos; 1045dcc7d48dSMichael Liao 1046e93a3a08SStephen Lin // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 1047e93a3a08SStephen Lin // or CHECK-NOT 1048e93a3a08SStephen Lin if (!IsLabelScanMode) { 104991a1b2c9SMichael Liao StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1050dcc7d48dSMichael Liao 1051dcc7d48dSMichael Liao // If this check is a "CHECK-NEXT", verify that the previous match was on 1052dcc7d48dSMichael Liao // the previous line (i.e. that there is one newline between them). 1053dcc7d48dSMichael Liao if (CheckNext(SM, SkippedRegion)) 1054dcc7d48dSMichael Liao return StringRef::npos; 1055dcc7d48dSMichael Liao 1056dcc7d48dSMichael Liao // If this match had "not strings", verify that they don't exist in the 1057dcc7d48dSMichael Liao // skipped region. 105891a1b2c9SMichael Liao if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1059dcc7d48dSMichael Liao return StringRef::npos; 1060f8bd2e5bSStephen Lin } 1061dcc7d48dSMichael Liao 1062dcc7d48dSMichael Liao return MatchPos; 1063dcc7d48dSMichael Liao } 1064dcc7d48dSMichael Liao 1065dcc7d48dSMichael Liao bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 106638820972SMatt Arsenault if (CheckTy != Check::CheckNext) 1067dcc7d48dSMichael Liao return false; 1068dcc7d48dSMichael Liao 1069dcc7d48dSMichael Liao // Count the number of newlines between the previous match and this one. 1070dcc7d48dSMichael Liao assert(Buffer.data() != 1071dcc7d48dSMichael Liao SM.getMemoryBuffer( 1072dcc7d48dSMichael Liao SM.FindBufferContainingLoc( 1073dcc7d48dSMichael Liao SMLoc::getFromPointer(Buffer.data())))->getBufferStart() && 1074dcc7d48dSMichael Liao "CHECK-NEXT can't be the first check in a file"); 1075dcc7d48dSMichael Liao 107666f09ad0SCraig Topper const char *FirstNewLine = nullptr; 1077592fe880SRichard Smith unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1078dcc7d48dSMichael Liao 1079dcc7d48dSMichael Liao if (NumNewLines == 0) { 108013df4626SMatt Arsenault SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix + 1081dcc7d48dSMichael Liao "-NEXT: is on the same line as previous match"); 1082dcc7d48dSMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), 1083dcc7d48dSMichael Liao SourceMgr::DK_Note, "'next' match was here"); 1084dcc7d48dSMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1085dcc7d48dSMichael Liao "previous match ended here"); 1086dcc7d48dSMichael Liao return true; 1087dcc7d48dSMichael Liao } 1088dcc7d48dSMichael Liao 1089dcc7d48dSMichael Liao if (NumNewLines != 1) { 109013df4626SMatt Arsenault SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix + 1091dcc7d48dSMichael Liao "-NEXT: is not on the line after the previous match"); 1092dcc7d48dSMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), 1093dcc7d48dSMichael Liao SourceMgr::DK_Note, "'next' match was here"); 1094dcc7d48dSMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1095dcc7d48dSMichael Liao "previous match ended here"); 1096592fe880SRichard Smith SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 1097592fe880SRichard Smith "non-matching line after previous match is here"); 1098dcc7d48dSMichael Liao return true; 1099dcc7d48dSMichael Liao } 1100dcc7d48dSMichael Liao 1101dcc7d48dSMichael Liao return false; 1102dcc7d48dSMichael Liao } 1103dcc7d48dSMichael Liao 1104dcc7d48dSMichael Liao bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 110591a1b2c9SMichael Liao const std::vector<const Pattern *> &NotStrings, 1106dcc7d48dSMichael Liao StringMap<StringRef> &VariableTable) const { 1107dcc7d48dSMichael Liao for (unsigned ChunkNo = 0, e = NotStrings.size(); 1108dcc7d48dSMichael Liao ChunkNo != e; ++ChunkNo) { 110991a1b2c9SMichael Liao const Pattern *Pat = NotStrings[ChunkNo]; 111038820972SMatt Arsenault assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 111191a1b2c9SMichael Liao 1112dcc7d48dSMichael Liao size_t MatchLen = 0; 111391a1b2c9SMichael Liao size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable); 1114dcc7d48dSMichael Liao 1115dcc7d48dSMichael Liao if (Pos == StringRef::npos) continue; 1116dcc7d48dSMichael Liao 1117dcc7d48dSMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos), 1118dcc7d48dSMichael Liao SourceMgr::DK_Error, 111913df4626SMatt Arsenault Prefix + "-NOT: string occurred!"); 112091a1b2c9SMichael Liao SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note, 112113df4626SMatt Arsenault Prefix + "-NOT: pattern specified here"); 1122dcc7d48dSMichael Liao return true; 1123dcc7d48dSMichael Liao } 1124dcc7d48dSMichael Liao 1125dcc7d48dSMichael Liao return false; 1126dcc7d48dSMichael Liao } 1127dcc7d48dSMichael Liao 112891a1b2c9SMichael Liao size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 112991a1b2c9SMichael Liao std::vector<const Pattern *> &NotStrings, 113091a1b2c9SMichael Liao StringMap<StringRef> &VariableTable) const { 113191a1b2c9SMichael Liao if (DagNotStrings.empty()) 113291a1b2c9SMichael Liao return 0; 113391a1b2c9SMichael Liao 113491a1b2c9SMichael Liao size_t LastPos = 0; 113591a1b2c9SMichael Liao size_t StartPos = LastPos; 113691a1b2c9SMichael Liao 113791a1b2c9SMichael Liao for (unsigned ChunkNo = 0, e = DagNotStrings.size(); 113891a1b2c9SMichael Liao ChunkNo != e; ++ChunkNo) { 113991a1b2c9SMichael Liao const Pattern &Pat = DagNotStrings[ChunkNo]; 114091a1b2c9SMichael Liao 114138820972SMatt Arsenault assert((Pat.getCheckTy() == Check::CheckDAG || 114238820972SMatt Arsenault Pat.getCheckTy() == Check::CheckNot) && 114391a1b2c9SMichael Liao "Invalid CHECK-DAG or CHECK-NOT!"); 114491a1b2c9SMichael Liao 114538820972SMatt Arsenault if (Pat.getCheckTy() == Check::CheckNot) { 114691a1b2c9SMichael Liao NotStrings.push_back(&Pat); 114791a1b2c9SMichael Liao continue; 114891a1b2c9SMichael Liao } 114991a1b2c9SMichael Liao 115038820972SMatt Arsenault assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 115191a1b2c9SMichael Liao 115291a1b2c9SMichael Liao size_t MatchLen = 0, MatchPos; 115391a1b2c9SMichael Liao 115491a1b2c9SMichael Liao // CHECK-DAG always matches from the start. 115591a1b2c9SMichael Liao StringRef MatchBuffer = Buffer.substr(StartPos); 115691a1b2c9SMichael Liao MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 115791a1b2c9SMichael Liao // With a group of CHECK-DAGs, a single mismatching means the match on 115891a1b2c9SMichael Liao // that group of CHECK-DAGs fails immediately. 115991a1b2c9SMichael Liao if (MatchPos == StringRef::npos) { 116091a1b2c9SMichael Liao PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable); 116191a1b2c9SMichael Liao return StringRef::npos; 116291a1b2c9SMichael Liao } 116391a1b2c9SMichael Liao // Re-calc it as the offset relative to the start of the original string. 116491a1b2c9SMichael Liao MatchPos += StartPos; 116591a1b2c9SMichael Liao 116691a1b2c9SMichael Liao if (!NotStrings.empty()) { 116791a1b2c9SMichael Liao if (MatchPos < LastPos) { 116891a1b2c9SMichael Liao // Reordered? 116991a1b2c9SMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos), 117091a1b2c9SMichael Liao SourceMgr::DK_Error, 117113df4626SMatt Arsenault Prefix + "-DAG: found a match of CHECK-DAG" 117291a1b2c9SMichael Liao " reordering across a CHECK-NOT"); 117391a1b2c9SMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos), 117491a1b2c9SMichael Liao SourceMgr::DK_Note, 117513df4626SMatt Arsenault Prefix + "-DAG: the farthest match of CHECK-DAG" 117691a1b2c9SMichael Liao " is found here"); 117791a1b2c9SMichael Liao SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note, 117813df4626SMatt Arsenault Prefix + "-NOT: the crossed pattern specified" 117991a1b2c9SMichael Liao " here"); 118091a1b2c9SMichael Liao SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note, 118113df4626SMatt Arsenault Prefix + "-DAG: the reordered pattern specified" 118291a1b2c9SMichael Liao " here"); 118391a1b2c9SMichael Liao return StringRef::npos; 118491a1b2c9SMichael Liao } 118591a1b2c9SMichael Liao // All subsequent CHECK-DAGs should be matched from the farthest 118691a1b2c9SMichael Liao // position of all precedent CHECK-DAGs (including this one.) 118791a1b2c9SMichael Liao StartPos = LastPos; 118891a1b2c9SMichael Liao // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to 118991a1b2c9SMichael Liao // CHECK-DAG, verify that there's no 'not' strings occurred in that 119091a1b2c9SMichael Liao // region. 119191a1b2c9SMichael Liao StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1192cf708c32STim Northover if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 119391a1b2c9SMichael Liao return StringRef::npos; 119491a1b2c9SMichael Liao // Clear "not strings". 119591a1b2c9SMichael Liao NotStrings.clear(); 119691a1b2c9SMichael Liao } 119791a1b2c9SMichael Liao 119891a1b2c9SMichael Liao // Update the last position with CHECK-DAG matches. 119991a1b2c9SMichael Liao LastPos = std::max(MatchPos + MatchLen, LastPos); 120091a1b2c9SMichael Liao } 120191a1b2c9SMichael Liao 120291a1b2c9SMichael Liao return LastPos; 120391a1b2c9SMichael Liao } 120491a1b2c9SMichael Liao 120513df4626SMatt Arsenault // A check prefix must contain only alphanumeric, hyphens and underscores. 120613df4626SMatt Arsenault static bool ValidateCheckPrefix(StringRef CheckPrefix) { 120713df4626SMatt Arsenault Regex Validator("^[a-zA-Z0-9_-]*$"); 120813df4626SMatt Arsenault return Validator.match(CheckPrefix); 120913df4626SMatt Arsenault } 121013df4626SMatt Arsenault 121113df4626SMatt Arsenault static bool ValidateCheckPrefixes() { 121213df4626SMatt Arsenault StringSet<> PrefixSet; 121313df4626SMatt Arsenault 121413df4626SMatt Arsenault for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end(); 121513df4626SMatt Arsenault I != E; ++I) { 121613df4626SMatt Arsenault StringRef Prefix(*I); 121713df4626SMatt Arsenault 121824412b14SEli Bendersky // Reject empty prefixes. 121924412b14SEli Bendersky if (Prefix == "") 122024412b14SEli Bendersky return false; 122124412b14SEli Bendersky 122213df4626SMatt Arsenault if (!PrefixSet.insert(Prefix)) 122313df4626SMatt Arsenault return false; 122413df4626SMatt Arsenault 122513df4626SMatt Arsenault if (!ValidateCheckPrefix(Prefix)) 122613df4626SMatt Arsenault return false; 122713df4626SMatt Arsenault } 122813df4626SMatt Arsenault 122913df4626SMatt Arsenault return true; 123013df4626SMatt Arsenault } 123113df4626SMatt Arsenault 123213df4626SMatt Arsenault // I don't think there's a way to specify an initial value for cl::list, 123313df4626SMatt Arsenault // so if nothing was specified, add the default 123413df4626SMatt Arsenault static void AddCheckPrefixIfNeeded() { 123513df4626SMatt Arsenault if (CheckPrefixes.empty()) 123613df4626SMatt Arsenault CheckPrefixes.push_back("CHECK"); 1237c2735158SRui Ueyama } 1238c2735158SRui Ueyama 1239ee3c74fbSChris Lattner int main(int argc, char **argv) { 1240ee3c74fbSChris Lattner sys::PrintStackTraceOnErrorSignal(); 1241ee3c74fbSChris Lattner PrettyStackTraceProgram X(argc, argv); 1242ee3c74fbSChris Lattner cl::ParseCommandLineOptions(argc, argv); 1243ee3c74fbSChris Lattner 124413df4626SMatt Arsenault if (!ValidateCheckPrefixes()) { 124513df4626SMatt Arsenault errs() << "Supplied check-prefix is invalid! Prefixes must be unique and " 124613df4626SMatt Arsenault "start with a letter and contain only alphanumeric characters, " 124713df4626SMatt Arsenault "hyphens and underscores\n"; 1248c2735158SRui Ueyama return 2; 1249c2735158SRui Ueyama } 1250c2735158SRui Ueyama 125113df4626SMatt Arsenault AddCheckPrefixIfNeeded(); 125213df4626SMatt Arsenault 1253ee3c74fbSChris Lattner SourceMgr SM; 1254ee3c74fbSChris Lattner 1255ee3c74fbSChris Lattner // Read the expected strings from the check file. 125626cccfe1SChris Lattner std::vector<CheckString> CheckStrings; 1257ee3c74fbSChris Lattner if (ReadCheckFile(SM, CheckStrings)) 1258ee3c74fbSChris Lattner return 2; 1259ee3c74fbSChris Lattner 1260ee3c74fbSChris Lattner // Open the file to check and add it to SourceMgr. 1261adf21f2aSRafael Espindola ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = 1262adf21f2aSRafael Espindola MemoryBuffer::getFileOrSTDIN(InputFilename); 1263adf21f2aSRafael Espindola if (std::error_code EC = FileOrErr.getError()) { 1264adf21f2aSRafael Espindola errs() << "Could not open input file '" << InputFilename 1265adf21f2aSRafael Espindola << "': " << EC.message() << '\n'; 12668e1c6477SEli Bendersky return 2; 1267ee3c74fbSChris Lattner } 12683f6481d0SRafael Espindola std::unique_ptr<MemoryBuffer> &File = FileOrErr.get(); 12692c3e5cdfSChris Lattner 12701b9f936fSJustin Bogner if (File->getBufferSize() == 0 && !AllowEmptyInput) { 1271b692bed7SChris Lattner errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; 12728e1c6477SEli Bendersky return 2; 1273b692bed7SChris Lattner } 1274b692bed7SChris Lattner 12752c3e5cdfSChris Lattner // Remove duplicate spaces in the input file if requested. 12765ea04c38SGuy Benyei // Remove DOS style line endings. 12771961f14cSDavid Blaikie std::unique_ptr<MemoryBuffer> F = 1278ce5dd1acSRafael Espindola CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace); 12792c3e5cdfSChris Lattner 1280ee3c74fbSChris Lattner // Check that we have all of the expected strings, in order, in the input 1281ee3c74fbSChris Lattner // file. 1282caa5fc0cSChris Lattner StringRef Buffer = F->getBuffer(); 1283ee3c74fbSChris Lattner 12841961f14cSDavid Blaikie SM.AddNewSourceBuffer(std::move(F), SMLoc()); 12851961f14cSDavid Blaikie 12861961f14cSDavid Blaikie /// VariableTable - This holds all the current filecheck variables. 12871961f14cSDavid Blaikie StringMap<StringRef> VariableTable; 12881961f14cSDavid Blaikie 1289f8bd2e5bSStephen Lin bool hasError = false; 1290ee3c74fbSChris Lattner 1291f8bd2e5bSStephen Lin unsigned i = 0, j = 0, e = CheckStrings.size(); 1292ee3c74fbSChris Lattner 1293f8bd2e5bSStephen Lin while (true) { 1294f8bd2e5bSStephen Lin StringRef CheckRegion; 1295f8bd2e5bSStephen Lin if (j == e) { 1296f8bd2e5bSStephen Lin CheckRegion = Buffer; 1297f8bd2e5bSStephen Lin } else { 1298f8bd2e5bSStephen Lin const CheckString &CheckLabelStr = CheckStrings[j]; 129938820972SMatt Arsenault if (CheckLabelStr.CheckTy != Check::CheckLabel) { 1300f8bd2e5bSStephen Lin ++j; 1301f8bd2e5bSStephen Lin continue; 1302da108b4eSChris Lattner } 1303da108b4eSChris Lattner 1304f8bd2e5bSStephen Lin // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 1305f8bd2e5bSStephen Lin size_t MatchLabelLen = 0; 1306e93a3a08SStephen Lin size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true, 1307f8bd2e5bSStephen Lin MatchLabelLen, VariableTable); 1308f8bd2e5bSStephen Lin if (MatchLabelPos == StringRef::npos) { 1309f8bd2e5bSStephen Lin hasError = true; 1310f8bd2e5bSStephen Lin break; 1311f8bd2e5bSStephen Lin } 1312f8bd2e5bSStephen Lin 1313f8bd2e5bSStephen Lin CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 1314f8bd2e5bSStephen Lin Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 1315f8bd2e5bSStephen Lin ++j; 1316f8bd2e5bSStephen Lin } 1317f8bd2e5bSStephen Lin 1318f8bd2e5bSStephen Lin for ( ; i != j; ++i) { 1319f8bd2e5bSStephen Lin const CheckString &CheckStr = CheckStrings[i]; 1320f8bd2e5bSStephen Lin 1321f8bd2e5bSStephen Lin // Check each string within the scanned region, including a second check 1322f8bd2e5bSStephen Lin // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 1323f8bd2e5bSStephen Lin size_t MatchLen = 0; 1324e93a3a08SStephen Lin size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen, 1325f8bd2e5bSStephen Lin VariableTable); 1326f8bd2e5bSStephen Lin 1327f8bd2e5bSStephen Lin if (MatchPos == StringRef::npos) { 1328f8bd2e5bSStephen Lin hasError = true; 1329f8bd2e5bSStephen Lin i = j; 1330f8bd2e5bSStephen Lin break; 1331f8bd2e5bSStephen Lin } 1332f8bd2e5bSStephen Lin 1333f8bd2e5bSStephen Lin CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 1334f8bd2e5bSStephen Lin } 1335f8bd2e5bSStephen Lin 1336f8bd2e5bSStephen Lin if (j == e) 1337f8bd2e5bSStephen Lin break; 1338f8bd2e5bSStephen Lin } 1339f8bd2e5bSStephen Lin 1340f8bd2e5bSStephen Lin return hasError ? 1 : 0; 1341ee3c74fbSChris Lattner } 1342