1ee3c74fbSChris Lattner //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2ee3c74fbSChris Lattner // 3ee3c74fbSChris Lattner // The LLVM Compiler Infrastructure 4ee3c74fbSChris Lattner // 5ee3c74fbSChris Lattner // This file is distributed under the University of Illinois Open Source 6ee3c74fbSChris Lattner // License. See LICENSE.TXT for details. 7ee3c74fbSChris Lattner // 8ee3c74fbSChris Lattner //===----------------------------------------------------------------------===// 9ee3c74fbSChris Lattner // 10ee3c74fbSChris Lattner // FileCheck does a line-by line check of a file that validates whether it 11ee3c74fbSChris Lattner // contains the expected content. This is useful for regression tests etc. 12ee3c74fbSChris Lattner // 13ee3c74fbSChris Lattner // This program exits with an error status of 2 on error, exit status of 0 if 14ee3c74fbSChris Lattner // the file matched the expected contents, and exit status of 1 if it did not 15ee3c74fbSChris Lattner // contain the expected contents. 16ee3c74fbSChris Lattner // 17ee3c74fbSChris Lattner //===----------------------------------------------------------------------===// 18ee3c74fbSChris Lattner 1991d19d8eSChandler Carruth #include "llvm/ADT/SmallString.h" 2091d19d8eSChandler Carruth #include "llvm/ADT/StringExtras.h" 2191d19d8eSChandler Carruth #include "llvm/ADT/StringMap.h" 2213df4626SMatt Arsenault #include "llvm/ADT/StringSet.h" 23ee3c74fbSChris Lattner #include "llvm/Support/CommandLine.h" 24ee3c74fbSChris Lattner #include "llvm/Support/MemoryBuffer.h" 25ee3c74fbSChris Lattner #include "llvm/Support/PrettyStackTrace.h" 26f08d2db9SChris Lattner #include "llvm/Support/Regex.h" 2791d19d8eSChandler Carruth #include "llvm/Support/Signals.h" 28ee3c74fbSChris Lattner #include "llvm/Support/SourceMgr.h" 29ee3c74fbSChris Lattner #include "llvm/Support/raw_ostream.h" 307b6fef82SMichael J. Spencer #include "llvm/Support/system_error.h" 318879e06dSChris Lattner #include <algorithm> 32981af002SWill Dietz #include <cctype> 33e8b8f1bcSEli Bendersky #include <map> 34e8b8f1bcSEli Bendersky #include <string> 35e8b8f1bcSEli Bendersky #include <vector> 36ee3c74fbSChris Lattner using namespace llvm; 37ee3c74fbSChris Lattner 38ee3c74fbSChris Lattner static cl::opt<std::string> 39ee3c74fbSChris Lattner CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); 40ee3c74fbSChris Lattner 41ee3c74fbSChris Lattner static cl::opt<std::string> 42ee3c74fbSChris Lattner InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), 43ee3c74fbSChris Lattner cl::init("-"), cl::value_desc("filename")); 44ee3c74fbSChris Lattner 4513df4626SMatt Arsenault static cl::list<std::string> 4613df4626SMatt Arsenault CheckPrefixes("check-prefix", 47ee3c74fbSChris Lattner cl::desc("Prefix to use from check file (defaults to 'CHECK')")); 48ee3c74fbSChris Lattner 492c3e5cdfSChris Lattner static cl::opt<bool> 502c3e5cdfSChris Lattner NoCanonicalizeWhiteSpace("strict-whitespace", 512c3e5cdfSChris Lattner cl::desc("Do not treat all horizontal whitespace as equivalent")); 522c3e5cdfSChris Lattner 5313df4626SMatt Arsenault typedef cl::list<std::string>::const_iterator prefix_iterator; 5413df4626SMatt Arsenault 5574d50731SChris Lattner //===----------------------------------------------------------------------===// 5674d50731SChris Lattner // Pattern Handling Code. 5774d50731SChris Lattner //===----------------------------------------------------------------------===// 5874d50731SChris Lattner 5938820972SMatt Arsenault namespace Check { 6038820972SMatt Arsenault enum CheckType { 6138820972SMatt Arsenault CheckNone = 0, 6238820972SMatt Arsenault CheckPlain, 6338820972SMatt Arsenault CheckNext, 6438820972SMatt Arsenault CheckNot, 6538820972SMatt Arsenault CheckDAG, 6638820972SMatt Arsenault CheckLabel, 670a4c44bdSChris Lattner 68eba55822SJakob Stoklund Olesen /// MatchEOF - When set, this pattern only matches the end of file. This is 69eba55822SJakob Stoklund Olesen /// used for trailing CHECK-NOTs. 7038820972SMatt Arsenault CheckEOF 7138820972SMatt Arsenault }; 7238820972SMatt Arsenault } 73eba55822SJakob Stoklund Olesen 7438820972SMatt Arsenault class Pattern { 7538820972SMatt Arsenault SMLoc PatternLoc; 7691a1b2c9SMichael Liao 7738820972SMatt Arsenault Check::CheckType CheckTy; 7891a1b2c9SMichael Liao 79b16ab0c4SChris Lattner /// FixedStr - If non-empty, this pattern is a fixed string match with the 80b16ab0c4SChris Lattner /// specified fixed string. 81221460e0SChris Lattner StringRef FixedStr; 82b16ab0c4SChris Lattner 83b16ab0c4SChris Lattner /// RegEx - If non-empty, this is a regex pattern. 84b16ab0c4SChris Lattner std::string RegExStr; 858879e06dSChris Lattner 8692987fb3SAlexander Kornienko /// \brief Contains the number of line this pattern is in. 8792987fb3SAlexander Kornienko unsigned LineNumber; 8892987fb3SAlexander Kornienko 898879e06dSChris Lattner /// VariableUses - Entries in this vector map to uses of a variable in the 908879e06dSChris Lattner /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain 918879e06dSChris Lattner /// "foobaz" and we'll get an entry in this vector that tells us to insert the 928879e06dSChris Lattner /// value of bar at offset 3. 938879e06dSChris Lattner std::vector<std::pair<StringRef, unsigned> > VariableUses; 948879e06dSChris Lattner 95e8b8f1bcSEli Bendersky /// VariableDefs - Maps definitions of variables to their parenthesized 96e8b8f1bcSEli Bendersky /// capture numbers. 97e8b8f1bcSEli Bendersky /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1. 98e8b8f1bcSEli Bendersky std::map<StringRef, unsigned> VariableDefs; 998879e06dSChris Lattner 1003b40b445SChris Lattner public: 1013b40b445SChris Lattner 10238820972SMatt Arsenault Pattern(Check::CheckType Ty) 10338820972SMatt Arsenault : CheckTy(Ty) { } 10474d50731SChris Lattner 1050b707eb8SMichael Liao /// getLoc - Return the location in source code. 1060b707eb8SMichael Liao SMLoc getLoc() const { return PatternLoc; } 1070b707eb8SMichael Liao 10813df4626SMatt Arsenault /// ParsePattern - Parse the given string into the Pattern. Prefix provides 10913df4626SMatt Arsenault /// which prefix is being matched, SM provides the SourceMgr used for error 11013df4626SMatt Arsenault /// reports, and LineNumber is the line number in the input file from which 11113df4626SMatt Arsenault /// the pattern string was read. Returns true in case of an error, false 11213df4626SMatt Arsenault /// otherwise. 11313df4626SMatt Arsenault bool ParsePattern(StringRef PatternStr, 11413df4626SMatt Arsenault StringRef Prefix, 11513df4626SMatt Arsenault SourceMgr &SM, 11613df4626SMatt Arsenault unsigned LineNumber); 1173b40b445SChris Lattner 1183b40b445SChris Lattner /// Match - Match the pattern string against the input buffer Buffer. This 1193b40b445SChris Lattner /// returns the position that is matched or npos if there is no match. If 1203b40b445SChris Lattner /// there is a match, the size of the matched string is returned in MatchLen. 1218879e06dSChris Lattner /// 1228879e06dSChris Lattner /// The VariableTable StringMap provides the current values of filecheck 1238879e06dSChris Lattner /// variables and is updated if this match defines new values. 1248879e06dSChris Lattner size_t Match(StringRef Buffer, size_t &MatchLen, 1258879e06dSChris Lattner StringMap<StringRef> &VariableTable) const; 126b16ab0c4SChris Lattner 127e0ef65abSDaniel Dunbar /// PrintFailureInfo - Print additional information about a failure to match 128e0ef65abSDaniel Dunbar /// involving this pattern. 129e0ef65abSDaniel Dunbar void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 130e0ef65abSDaniel Dunbar const StringMap<StringRef> &VariableTable) const; 131e0ef65abSDaniel Dunbar 132f8bd2e5bSStephen Lin bool hasVariable() const { return !(VariableUses.empty() && 133f8bd2e5bSStephen Lin VariableDefs.empty()); } 134f8bd2e5bSStephen Lin 13538820972SMatt Arsenault Check::CheckType getCheckTy() const { return CheckTy; } 13691a1b2c9SMichael Liao 137b16ab0c4SChris Lattner private: 138e8b8f1bcSEli Bendersky bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); 139e8b8f1bcSEli Bendersky void AddBackrefToRegEx(unsigned BackrefNum); 140fd29d886SDaniel Dunbar 141fd29d886SDaniel Dunbar /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of 142fd29d886SDaniel Dunbar /// matching this pattern at the start of \arg Buffer; a distance of zero 143fd29d886SDaniel Dunbar /// should correspond to a perfect match. 144fd29d886SDaniel Dunbar unsigned ComputeMatchDistance(StringRef Buffer, 145fd29d886SDaniel Dunbar const StringMap<StringRef> &VariableTable) const; 14692987fb3SAlexander Kornienko 14792987fb3SAlexander Kornienko /// \brief Evaluates expression and stores the result to \p Value. 14892987fb3SAlexander Kornienko /// \return true on success. false when the expression has invalid syntax. 14992987fb3SAlexander Kornienko bool EvaluateExpression(StringRef Expr, std::string &Value) const; 150061d2baaSEli Bendersky 151061d2baaSEli Bendersky /// \brief Finds the closing sequence of a regex variable usage or 152061d2baaSEli Bendersky /// definition. Str has to point in the beginning of the definition 153061d2baaSEli Bendersky /// (right after the opening sequence). 154061d2baaSEli Bendersky /// \return offset of the closing sequence within Str, or npos if it was not 155061d2baaSEli Bendersky /// found. 15681e5cd9eSAdrian Prantl size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); 1573b40b445SChris Lattner }; 1583b40b445SChris Lattner 1598879e06dSChris Lattner 16013df4626SMatt Arsenault bool Pattern::ParsePattern(StringRef PatternStr, 16113df4626SMatt Arsenault StringRef Prefix, 16213df4626SMatt Arsenault SourceMgr &SM, 16392987fb3SAlexander Kornienko unsigned LineNumber) { 16492987fb3SAlexander Kornienko this->LineNumber = LineNumber; 1650a4c44bdSChris Lattner PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 1660a4c44bdSChris Lattner 16774d50731SChris Lattner // Ignore trailing whitespace. 16874d50731SChris Lattner while (!PatternStr.empty() && 16974d50731SChris Lattner (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 17074d50731SChris Lattner PatternStr = PatternStr.substr(0, PatternStr.size()-1); 17174d50731SChris Lattner 17274d50731SChris Lattner // Check that there is something on the line. 17374d50731SChris Lattner if (PatternStr.empty()) { 17403b80a40SChris Lattner SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 17503b80a40SChris Lattner "found empty check string with prefix '" + 17613df4626SMatt Arsenault Prefix + ":'"); 17774d50731SChris Lattner return true; 17874d50731SChris Lattner } 17974d50731SChris Lattner 180221460e0SChris Lattner // Check to see if this is a fixed string, or if it has regex pieces. 181d9466967STed Kremenek if (PatternStr.size() < 2 || 1828879e06dSChris Lattner (PatternStr.find("{{") == StringRef::npos && 1838879e06dSChris Lattner PatternStr.find("[[") == StringRef::npos)) { 184221460e0SChris Lattner FixedStr = PatternStr; 185221460e0SChris Lattner return false; 186221460e0SChris Lattner } 187221460e0SChris Lattner 1888879e06dSChris Lattner // Paren value #0 is for the fully matched string. Any new parenthesized 18953e0679dSChris Lattner // values add from there. 1908879e06dSChris Lattner unsigned CurParen = 1; 1918879e06dSChris Lattner 192b16ab0c4SChris Lattner // Otherwise, there is at least one regex piece. Build up the regex pattern 193b16ab0c4SChris Lattner // by escaping scary characters in fixed strings, building up one big regex. 194f08d2db9SChris Lattner while (!PatternStr.empty()) { 1958879e06dSChris Lattner // RegEx matches. 19653e0679dSChris Lattner if (PatternStr.startswith("{{")) { 19743d50d4aSEli Bendersky // This is the start of a regex match. Scan for the }}. 198f08d2db9SChris Lattner size_t End = PatternStr.find("}}"); 199f08d2db9SChris Lattner if (End == StringRef::npos) { 200f08d2db9SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 20103b80a40SChris Lattner SourceMgr::DK_Error, 20203b80a40SChris Lattner "found start of regex string with no end '}}'"); 203f08d2db9SChris Lattner return true; 204f08d2db9SChris Lattner } 205f08d2db9SChris Lattner 206e53c95f1SChris Lattner // Enclose {{}} patterns in parens just like [[]] even though we're not 207e53c95f1SChris Lattner // capturing the result for any purpose. This is required in case the 208e53c95f1SChris Lattner // expression contains an alternation like: CHECK: abc{{x|z}}def. We 209e53c95f1SChris Lattner // want this to turn into: "abc(x|z)def" not "abcx|zdef". 210e53c95f1SChris Lattner RegExStr += '('; 211e53c95f1SChris Lattner ++CurParen; 212e53c95f1SChris Lattner 2138879e06dSChris Lattner if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM)) 2148879e06dSChris Lattner return true; 215e53c95f1SChris Lattner RegExStr += ')'; 21653e0679dSChris Lattner 2178879e06dSChris Lattner PatternStr = PatternStr.substr(End+2); 2188879e06dSChris Lattner continue; 2198879e06dSChris Lattner } 2208879e06dSChris Lattner 2218879e06dSChris Lattner // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* 2228879e06dSChris Lattner // (or some other regex) and assigns it to the FileCheck variable 'foo'. The 2238879e06dSChris Lattner // second form is [[foo]] which is a reference to foo. The variable name 22457cb733bSDaniel Dunbar // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject 2258879e06dSChris Lattner // it. This is to catch some common errors. 22653e0679dSChris Lattner if (PatternStr.startswith("[[")) { 227061d2baaSEli Bendersky // Find the closing bracket pair ending the match. End is going to be an 228061d2baaSEli Bendersky // offset relative to the beginning of the match string. 22981e5cd9eSAdrian Prantl size_t End = FindRegexVarEnd(PatternStr.substr(2), SM); 230061d2baaSEli Bendersky 2318879e06dSChris Lattner if (End == StringRef::npos) { 2328879e06dSChris Lattner SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 23303b80a40SChris Lattner SourceMgr::DK_Error, 23403b80a40SChris Lattner "invalid named regex reference, no ]] found"); 235f08d2db9SChris Lattner return true; 236f08d2db9SChris Lattner } 237f08d2db9SChris Lattner 238061d2baaSEli Bendersky StringRef MatchStr = PatternStr.substr(2, End); 239061d2baaSEli Bendersky PatternStr = PatternStr.substr(End+4); 2408879e06dSChris Lattner 2418879e06dSChris Lattner // Get the regex name (e.g. "foo"). 2428879e06dSChris Lattner size_t NameEnd = MatchStr.find(':'); 2438879e06dSChris Lattner StringRef Name = MatchStr.substr(0, NameEnd); 2448879e06dSChris Lattner 2458879e06dSChris Lattner if (Name.empty()) { 24603b80a40SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 24703b80a40SChris Lattner "invalid name in named regex: empty name"); 2488879e06dSChris Lattner return true; 2498879e06dSChris Lattner } 2508879e06dSChris Lattner 25192987fb3SAlexander Kornienko // Verify that the name/expression is well formed. FileCheck currently 25292987fb3SAlexander Kornienko // supports @LINE, @LINE+number, @LINE-number expressions. The check here 25392987fb3SAlexander Kornienko // is relaxed, more strict check is performed in \c EvaluateExpression. 25492987fb3SAlexander Kornienko bool IsExpression = false; 25592987fb3SAlexander Kornienko for (unsigned i = 0, e = Name.size(); i != e; ++i) { 25692987fb3SAlexander Kornienko if (i == 0 && Name[i] == '@') { 25792987fb3SAlexander Kornienko if (NameEnd != StringRef::npos) { 25892987fb3SAlexander Kornienko SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 25992987fb3SAlexander Kornienko SourceMgr::DK_Error, 26092987fb3SAlexander Kornienko "invalid name in named regex definition"); 26192987fb3SAlexander Kornienko return true; 26292987fb3SAlexander Kornienko } 26392987fb3SAlexander Kornienko IsExpression = true; 26492987fb3SAlexander Kornienko continue; 26592987fb3SAlexander Kornienko } 26692987fb3SAlexander Kornienko if (Name[i] != '_' && !isalnum(Name[i]) && 26792987fb3SAlexander Kornienko (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { 2688879e06dSChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i), 26903b80a40SChris Lattner SourceMgr::DK_Error, "invalid name in named regex"); 2708879e06dSChris Lattner return true; 2718879e06dSChris Lattner } 27292987fb3SAlexander Kornienko } 2738879e06dSChris Lattner 2748879e06dSChris Lattner // Name can't start with a digit. 27583c74e9fSGuy Benyei if (isdigit(static_cast<unsigned char>(Name[0]))) { 27603b80a40SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 27703b80a40SChris Lattner "invalid name in named regex"); 2788879e06dSChris Lattner return true; 2798879e06dSChris Lattner } 2808879e06dSChris Lattner 2818879e06dSChris Lattner // Handle [[foo]]. 2828879e06dSChris Lattner if (NameEnd == StringRef::npos) { 283e8b8f1bcSEli Bendersky // Handle variables that were defined earlier on the same line by 284e8b8f1bcSEli Bendersky // emitting a backreference. 285e8b8f1bcSEli Bendersky if (VariableDefs.find(Name) != VariableDefs.end()) { 286e8b8f1bcSEli Bendersky unsigned VarParenNum = VariableDefs[Name]; 287e8b8f1bcSEli Bendersky if (VarParenNum < 1 || VarParenNum > 9) { 288e8b8f1bcSEli Bendersky SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 289e8b8f1bcSEli Bendersky SourceMgr::DK_Error, 290e8b8f1bcSEli Bendersky "Can't back-reference more than 9 variables"); 291e8b8f1bcSEli Bendersky return true; 292e8b8f1bcSEli Bendersky } 293e8b8f1bcSEli Bendersky AddBackrefToRegEx(VarParenNum); 294e8b8f1bcSEli Bendersky } else { 2958879e06dSChris Lattner VariableUses.push_back(std::make_pair(Name, RegExStr.size())); 296e8b8f1bcSEli Bendersky } 2978879e06dSChris Lattner continue; 2988879e06dSChris Lattner } 2998879e06dSChris Lattner 3008879e06dSChris Lattner // Handle [[foo:.*]]. 301e8b8f1bcSEli Bendersky VariableDefs[Name] = CurParen; 3028879e06dSChris Lattner RegExStr += '('; 3038879e06dSChris Lattner ++CurParen; 3048879e06dSChris Lattner 3058879e06dSChris Lattner if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM)) 3068879e06dSChris Lattner return true; 3078879e06dSChris Lattner 3088879e06dSChris Lattner RegExStr += ')'; 3098879e06dSChris Lattner } 3108879e06dSChris Lattner 3118879e06dSChris Lattner // Handle fixed string matches. 3128879e06dSChris Lattner // Find the end, which is the start of the next regex. 3138879e06dSChris Lattner size_t FixedMatchEnd = PatternStr.find("{{"); 3148879e06dSChris Lattner FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 3156f4f77b7SHans Wennborg RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 3168879e06dSChris Lattner PatternStr = PatternStr.substr(FixedMatchEnd); 317f08d2db9SChris Lattner } 318f08d2db9SChris Lattner 31974d50731SChris Lattner return false; 32074d50731SChris Lattner } 32174d50731SChris Lattner 322e8b8f1bcSEli Bendersky bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, 3238879e06dSChris Lattner SourceMgr &SM) { 324e8b8f1bcSEli Bendersky Regex R(RS); 3258879e06dSChris Lattner std::string Error; 3268879e06dSChris Lattner if (!R.isValid(Error)) { 327e8b8f1bcSEli Bendersky SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 32803b80a40SChris Lattner "invalid regex: " + Error); 3298879e06dSChris Lattner return true; 3308879e06dSChris Lattner } 3318879e06dSChris Lattner 332e8b8f1bcSEli Bendersky RegExStr += RS.str(); 3338879e06dSChris Lattner CurParen += R.getNumMatches(); 3348879e06dSChris Lattner return false; 3358879e06dSChris Lattner } 336b16ab0c4SChris Lattner 337e8b8f1bcSEli Bendersky void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 338e8b8f1bcSEli Bendersky assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 339e8b8f1bcSEli Bendersky std::string Backref = std::string("\\") + 340e8b8f1bcSEli Bendersky std::string(1, '0' + BackrefNum); 341e8b8f1bcSEli Bendersky RegExStr += Backref; 342e8b8f1bcSEli Bendersky } 343e8b8f1bcSEli Bendersky 34492987fb3SAlexander Kornienko bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { 34592987fb3SAlexander Kornienko // The only supported expression is @LINE([\+-]\d+)? 34692987fb3SAlexander Kornienko if (!Expr.startswith("@LINE")) 34792987fb3SAlexander Kornienko return false; 34892987fb3SAlexander Kornienko Expr = Expr.substr(StringRef("@LINE").size()); 34992987fb3SAlexander Kornienko int Offset = 0; 35092987fb3SAlexander Kornienko if (!Expr.empty()) { 35192987fb3SAlexander Kornienko if (Expr[0] == '+') 35292987fb3SAlexander Kornienko Expr = Expr.substr(1); 35392987fb3SAlexander Kornienko else if (Expr[0] != '-') 35492987fb3SAlexander Kornienko return false; 35592987fb3SAlexander Kornienko if (Expr.getAsInteger(10, Offset)) 35692987fb3SAlexander Kornienko return false; 35792987fb3SAlexander Kornienko } 35892987fb3SAlexander Kornienko Value = llvm::itostr(LineNumber + Offset); 35992987fb3SAlexander Kornienko return true; 36092987fb3SAlexander Kornienko } 36192987fb3SAlexander Kornienko 362f08d2db9SChris Lattner /// Match - Match the pattern string against the input buffer Buffer. This 363f08d2db9SChris Lattner /// returns the position that is matched or npos if there is no match. If 364f08d2db9SChris Lattner /// there is a match, the size of the matched string is returned in MatchLen. 3658879e06dSChris Lattner size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, 3668879e06dSChris Lattner StringMap<StringRef> &VariableTable) const { 367eba55822SJakob Stoklund Olesen // If this is the EOF pattern, match it immediately. 36838820972SMatt Arsenault if (CheckTy == Check::CheckEOF) { 369eba55822SJakob Stoklund Olesen MatchLen = 0; 370eba55822SJakob Stoklund Olesen return Buffer.size(); 371eba55822SJakob Stoklund Olesen } 372eba55822SJakob Stoklund Olesen 373221460e0SChris Lattner // If this is a fixed string pattern, just match it now. 374221460e0SChris Lattner if (!FixedStr.empty()) { 375221460e0SChris Lattner MatchLen = FixedStr.size(); 376221460e0SChris Lattner return Buffer.find(FixedStr); 377221460e0SChris Lattner } 378221460e0SChris Lattner 379b16ab0c4SChris Lattner // Regex match. 3808879e06dSChris Lattner 3818879e06dSChris Lattner // If there are variable uses, we need to create a temporary string with the 3828879e06dSChris Lattner // actual value. 3838879e06dSChris Lattner StringRef RegExToMatch = RegExStr; 3848879e06dSChris Lattner std::string TmpStr; 3858879e06dSChris Lattner if (!VariableUses.empty()) { 3868879e06dSChris Lattner TmpStr = RegExStr; 3878879e06dSChris Lattner 3888879e06dSChris Lattner unsigned InsertOffset = 0; 3898879e06dSChris Lattner for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) { 39092987fb3SAlexander Kornienko std::string Value; 39192987fb3SAlexander Kornienko 39292987fb3SAlexander Kornienko if (VariableUses[i].first[0] == '@') { 39392987fb3SAlexander Kornienko if (!EvaluateExpression(VariableUses[i].first, Value)) 39492987fb3SAlexander Kornienko return StringRef::npos; 39592987fb3SAlexander Kornienko } else { 396e0ef65abSDaniel Dunbar StringMap<StringRef>::iterator it = 397e0ef65abSDaniel Dunbar VariableTable.find(VariableUses[i].first); 398e0ef65abSDaniel Dunbar // If the variable is undefined, return an error. 399e0ef65abSDaniel Dunbar if (it == VariableTable.end()) 400e0ef65abSDaniel Dunbar return StringRef::npos; 401e0ef65abSDaniel Dunbar 4026f4f77b7SHans Wennborg // Look up the value and escape it so that we can put it into the regex. 4036f4f77b7SHans Wennborg Value += Regex::escape(it->second); 40492987fb3SAlexander Kornienko } 4058879e06dSChris Lattner 4068879e06dSChris Lattner // Plop it into the regex at the adjusted offset. 4078879e06dSChris Lattner TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset, 4088879e06dSChris Lattner Value.begin(), Value.end()); 4098879e06dSChris Lattner InsertOffset += Value.size(); 4108879e06dSChris Lattner } 4118879e06dSChris Lattner 4128879e06dSChris Lattner // Match the newly constructed regex. 4138879e06dSChris Lattner RegExToMatch = TmpStr; 4148879e06dSChris Lattner } 4158879e06dSChris Lattner 4168879e06dSChris Lattner 417b16ab0c4SChris Lattner SmallVector<StringRef, 4> MatchInfo; 4188879e06dSChris Lattner if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) 419f08d2db9SChris Lattner return StringRef::npos; 420b16ab0c4SChris Lattner 421b16ab0c4SChris Lattner // Successful regex match. 422b16ab0c4SChris Lattner assert(!MatchInfo.empty() && "Didn't get any match"); 423b16ab0c4SChris Lattner StringRef FullMatch = MatchInfo[0]; 424b16ab0c4SChris Lattner 4258879e06dSChris Lattner // If this defines any variables, remember their values. 426e8b8f1bcSEli Bendersky for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(), 427e8b8f1bcSEli Bendersky E = VariableDefs.end(); 428e8b8f1bcSEli Bendersky I != E; ++I) { 429e8b8f1bcSEli Bendersky assert(I->second < MatchInfo.size() && "Internal paren error"); 430e8b8f1bcSEli Bendersky VariableTable[I->first] = MatchInfo[I->second]; 4310a4c44bdSChris Lattner } 4320a4c44bdSChris Lattner 433b16ab0c4SChris Lattner MatchLen = FullMatch.size(); 434b16ab0c4SChris Lattner return FullMatch.data()-Buffer.data(); 435f08d2db9SChris Lattner } 436f08d2db9SChris Lattner 437fd29d886SDaniel Dunbar unsigned Pattern::ComputeMatchDistance(StringRef Buffer, 438fd29d886SDaniel Dunbar const StringMap<StringRef> &VariableTable) const { 439fd29d886SDaniel Dunbar // Just compute the number of matching characters. For regular expressions, we 440fd29d886SDaniel Dunbar // just compare against the regex itself and hope for the best. 441fd29d886SDaniel Dunbar // 442fd29d886SDaniel Dunbar // FIXME: One easy improvement here is have the regex lib generate a single 443fd29d886SDaniel Dunbar // example regular expression which matches, and use that as the example 444fd29d886SDaniel Dunbar // string. 445fd29d886SDaniel Dunbar StringRef ExampleString(FixedStr); 446fd29d886SDaniel Dunbar if (ExampleString.empty()) 447fd29d886SDaniel Dunbar ExampleString = RegExStr; 448fd29d886SDaniel Dunbar 449e9aa36c8SDaniel Dunbar // Only compare up to the first line in the buffer, or the string size. 450e9aa36c8SDaniel Dunbar StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 451e9aa36c8SDaniel Dunbar BufferPrefix = BufferPrefix.split('\n').first; 452e9aa36c8SDaniel Dunbar return BufferPrefix.edit_distance(ExampleString); 453fd29d886SDaniel Dunbar } 454fd29d886SDaniel Dunbar 455e0ef65abSDaniel Dunbar void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 456e0ef65abSDaniel Dunbar const StringMap<StringRef> &VariableTable) const{ 457e0ef65abSDaniel Dunbar // If this was a regular expression using variables, print the current 458e0ef65abSDaniel Dunbar // variable values. 459e0ef65abSDaniel Dunbar if (!VariableUses.empty()) { 460e0ef65abSDaniel Dunbar for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) { 461e0ef65abSDaniel Dunbar SmallString<256> Msg; 462e0ef65abSDaniel Dunbar raw_svector_ostream OS(Msg); 46392987fb3SAlexander Kornienko StringRef Var = VariableUses[i].first; 46492987fb3SAlexander Kornienko if (Var[0] == '@') { 46592987fb3SAlexander Kornienko std::string Value; 46692987fb3SAlexander Kornienko if (EvaluateExpression(Var, Value)) { 46792987fb3SAlexander Kornienko OS << "with expression \""; 46892987fb3SAlexander Kornienko OS.write_escaped(Var) << "\" equal to \""; 46992987fb3SAlexander Kornienko OS.write_escaped(Value) << "\""; 47092987fb3SAlexander Kornienko } else { 47192987fb3SAlexander Kornienko OS << "uses incorrect expression \""; 47292987fb3SAlexander Kornienko OS.write_escaped(Var) << "\""; 47392987fb3SAlexander Kornienko } 47492987fb3SAlexander Kornienko } else { 47592987fb3SAlexander Kornienko StringMap<StringRef>::const_iterator it = VariableTable.find(Var); 476e0ef65abSDaniel Dunbar 477e0ef65abSDaniel Dunbar // Check for undefined variable references. 478e0ef65abSDaniel Dunbar if (it == VariableTable.end()) { 479e0ef65abSDaniel Dunbar OS << "uses undefined variable \""; 48092987fb3SAlexander Kornienko OS.write_escaped(Var) << "\""; 481e0ef65abSDaniel Dunbar } else { 482e0ef65abSDaniel Dunbar OS << "with variable \""; 483e0ef65abSDaniel Dunbar OS.write_escaped(Var) << "\" equal to \""; 484e0ef65abSDaniel Dunbar OS.write_escaped(it->second) << "\""; 485e0ef65abSDaniel Dunbar } 48692987fb3SAlexander Kornienko } 487e0ef65abSDaniel Dunbar 48803b80a40SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 48903b80a40SChris Lattner OS.str()); 490e0ef65abSDaniel Dunbar } 491e0ef65abSDaniel Dunbar } 492fd29d886SDaniel Dunbar 493fd29d886SDaniel Dunbar // Attempt to find the closest/best fuzzy match. Usually an error happens 494fd29d886SDaniel Dunbar // because some string in the output didn't exactly match. In these cases, we 495fd29d886SDaniel Dunbar // would like to show the user a best guess at what "should have" matched, to 496fd29d886SDaniel Dunbar // save them having to actually check the input manually. 497fd29d886SDaniel Dunbar size_t NumLinesForward = 0; 498fd29d886SDaniel Dunbar size_t Best = StringRef::npos; 499fd29d886SDaniel Dunbar double BestQuality = 0; 500fd29d886SDaniel Dunbar 501fd29d886SDaniel Dunbar // Use an arbitrary 4k limit on how far we will search. 5022bf486ebSDan Gohman for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 503fd29d886SDaniel Dunbar if (Buffer[i] == '\n') 504fd29d886SDaniel Dunbar ++NumLinesForward; 505fd29d886SDaniel Dunbar 506df22bbf7SDan Gohman // Patterns have leading whitespace stripped, so skip whitespace when 507df22bbf7SDan Gohman // looking for something which looks like a pattern. 508df22bbf7SDan Gohman if (Buffer[i] == ' ' || Buffer[i] == '\t') 509df22bbf7SDan Gohman continue; 510df22bbf7SDan Gohman 511fd29d886SDaniel Dunbar // Compute the "quality" of this match as an arbitrary combination of the 512fd29d886SDaniel Dunbar // match distance and the number of lines skipped to get to this match. 513fd29d886SDaniel Dunbar unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable); 514fd29d886SDaniel Dunbar double Quality = Distance + (NumLinesForward / 100.); 515fd29d886SDaniel Dunbar 516fd29d886SDaniel Dunbar if (Quality < BestQuality || Best == StringRef::npos) { 517fd29d886SDaniel Dunbar Best = i; 518fd29d886SDaniel Dunbar BestQuality = Quality; 519fd29d886SDaniel Dunbar } 520fd29d886SDaniel Dunbar } 521fd29d886SDaniel Dunbar 522fd29d886SDaniel Dunbar // Print the "possible intended match here" line if we found something 523c069cc8eSDaniel Dunbar // reasonable and not equal to what we showed in the "scanning from here" 524c069cc8eSDaniel Dunbar // line. 525c069cc8eSDaniel Dunbar if (Best && Best != StringRef::npos && BestQuality < 50) { 526fd29d886SDaniel Dunbar SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best), 52703b80a40SChris Lattner SourceMgr::DK_Note, "possible intended match here"); 528fd29d886SDaniel Dunbar 529fd29d886SDaniel Dunbar // FIXME: If we wanted to be really friendly we would show why the match 530fd29d886SDaniel Dunbar // failed, as it can be hard to spot simple one character differences. 531fd29d886SDaniel Dunbar } 532e0ef65abSDaniel Dunbar } 53374d50731SChris Lattner 53481e5cd9eSAdrian Prantl size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 535061d2baaSEli Bendersky // Offset keeps track of the current offset within the input Str 536061d2baaSEli Bendersky size_t Offset = 0; 537061d2baaSEli Bendersky // [...] Nesting depth 538061d2baaSEli Bendersky size_t BracketDepth = 0; 539061d2baaSEli Bendersky 540061d2baaSEli Bendersky while (!Str.empty()) { 541061d2baaSEli Bendersky if (Str.startswith("]]") && BracketDepth == 0) 542061d2baaSEli Bendersky return Offset; 543061d2baaSEli Bendersky if (Str[0] == '\\') { 544061d2baaSEli Bendersky // Backslash escapes the next char within regexes, so skip them both. 545061d2baaSEli Bendersky Str = Str.substr(2); 546061d2baaSEli Bendersky Offset += 2; 547061d2baaSEli Bendersky } else { 548061d2baaSEli Bendersky switch (Str[0]) { 549061d2baaSEli Bendersky default: 550061d2baaSEli Bendersky break; 551061d2baaSEli Bendersky case '[': 552061d2baaSEli Bendersky BracketDepth++; 553061d2baaSEli Bendersky break; 554061d2baaSEli Bendersky case ']': 55581e5cd9eSAdrian Prantl if (BracketDepth == 0) { 55681e5cd9eSAdrian Prantl SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 55781e5cd9eSAdrian Prantl SourceMgr::DK_Error, 55881e5cd9eSAdrian Prantl "missing closing \"]\" for regex variable"); 55981e5cd9eSAdrian Prantl exit(1); 56081e5cd9eSAdrian Prantl } 561061d2baaSEli Bendersky BracketDepth--; 562061d2baaSEli Bendersky break; 563061d2baaSEli Bendersky } 564061d2baaSEli Bendersky Str = Str.substr(1); 565061d2baaSEli Bendersky Offset++; 566061d2baaSEli Bendersky } 567061d2baaSEli Bendersky } 568061d2baaSEli Bendersky 569061d2baaSEli Bendersky return StringRef::npos; 570061d2baaSEli Bendersky } 571061d2baaSEli Bendersky 572061d2baaSEli Bendersky 57374d50731SChris Lattner //===----------------------------------------------------------------------===// 57474d50731SChris Lattner // Check Strings. 57574d50731SChris Lattner //===----------------------------------------------------------------------===// 5763b40b445SChris Lattner 5773b40b445SChris Lattner /// CheckString - This is a check that we found in the input file. 5783b40b445SChris Lattner struct CheckString { 5793b40b445SChris Lattner /// Pat - The pattern to match. 5803b40b445SChris Lattner Pattern Pat; 58126cccfe1SChris Lattner 58213df4626SMatt Arsenault /// Prefix - Which prefix name this check matched. 58313df4626SMatt Arsenault StringRef Prefix; 58413df4626SMatt Arsenault 58526cccfe1SChris Lattner /// Loc - The location in the match file that the check string was specified. 58626cccfe1SChris Lattner SMLoc Loc; 58726cccfe1SChris Lattner 58838820972SMatt Arsenault /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive, 58938820972SMatt Arsenault /// as opposed to a CHECK: directive. 59038820972SMatt Arsenault Check::CheckType CheckTy; 591f8bd2e5bSStephen Lin 59291a1b2c9SMichael Liao /// DagNotStrings - These are all of the strings that are disallowed from 593236d2d5eSChris Lattner /// occurring between this match string and the previous one (or start of 594236d2d5eSChris Lattner /// file). 59591a1b2c9SMichael Liao std::vector<Pattern> DagNotStrings; 596236d2d5eSChris Lattner 59713df4626SMatt Arsenault 59813df4626SMatt Arsenault CheckString(const Pattern &P, 59913df4626SMatt Arsenault StringRef S, 60013df4626SMatt Arsenault SMLoc L, 60113df4626SMatt Arsenault Check::CheckType Ty) 60213df4626SMatt Arsenault : Pat(P), Prefix(S), Loc(L), CheckTy(Ty) {} 603dcc7d48dSMichael Liao 60491a1b2c9SMichael Liao /// Check - Match check string and its "not strings" and/or "dag strings". 605e93a3a08SStephen Lin size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, 606f8bd2e5bSStephen Lin size_t &MatchLen, StringMap<StringRef> &VariableTable) const; 607dcc7d48dSMichael Liao 608dcc7d48dSMichael Liao /// CheckNext - Verify there is a single line in the given buffer. 609dcc7d48dSMichael Liao bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; 610dcc7d48dSMichael Liao 611dcc7d48dSMichael Liao /// CheckNot - Verify there's no "not strings" in the given buffer. 612dcc7d48dSMichael Liao bool CheckNot(const SourceMgr &SM, StringRef Buffer, 61391a1b2c9SMichael Liao const std::vector<const Pattern *> &NotStrings, 61491a1b2c9SMichael Liao StringMap<StringRef> &VariableTable) const; 61591a1b2c9SMichael Liao 61691a1b2c9SMichael Liao /// CheckDag - Match "dag strings" and their mixed "not strings". 61791a1b2c9SMichael Liao size_t CheckDag(const SourceMgr &SM, StringRef Buffer, 61891a1b2c9SMichael Liao std::vector<const Pattern *> &NotStrings, 619dcc7d48dSMichael Liao StringMap<StringRef> &VariableTable) const; 62026cccfe1SChris Lattner }; 62126cccfe1SChris Lattner 6225ea04c38SGuy Benyei /// Canonicalize whitespaces in the input file. Line endings are replaced 6235ea04c38SGuy Benyei /// with UNIX-style '\n'. 6245ea04c38SGuy Benyei /// 6255ea04c38SGuy Benyei /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace 6265ea04c38SGuy Benyei /// characters to a single space. 6275ea04c38SGuy Benyei static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB, 6285ea04c38SGuy Benyei bool PreserveHorizontal) { 6290e45d24aSChris Lattner SmallString<128> NewFile; 630a2f8fc5aSChris Lattner NewFile.reserve(MB->getBufferSize()); 631a2f8fc5aSChris Lattner 632a2f8fc5aSChris Lattner for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd(); 633a2f8fc5aSChris Lattner Ptr != End; ++Ptr) { 634fd781bf0SNAKAMURA Takumi // Eliminate trailing dosish \r. 635fd781bf0SNAKAMURA Takumi if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 636fd781bf0SNAKAMURA Takumi continue; 637fd781bf0SNAKAMURA Takumi } 638fd781bf0SNAKAMURA Takumi 6395ea04c38SGuy Benyei // If current char is not a horizontal whitespace or if horizontal 6405ea04c38SGuy Benyei // whitespace canonicalization is disabled, dump it to output as is. 6415ea04c38SGuy Benyei if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) { 642a2f8fc5aSChris Lattner NewFile.push_back(*Ptr); 643a2f8fc5aSChris Lattner continue; 644a2f8fc5aSChris Lattner } 645a2f8fc5aSChris Lattner 646a2f8fc5aSChris Lattner // Otherwise, add one space and advance over neighboring space. 647a2f8fc5aSChris Lattner NewFile.push_back(' '); 648a2f8fc5aSChris Lattner while (Ptr+1 != End && 649a2f8fc5aSChris Lattner (Ptr[1] == ' ' || Ptr[1] == '\t')) 650a2f8fc5aSChris Lattner ++Ptr; 651a2f8fc5aSChris Lattner } 652a2f8fc5aSChris Lattner 653a2f8fc5aSChris Lattner // Free the old buffer and return a new one. 654a2f8fc5aSChris Lattner MemoryBuffer *MB2 = 6550e45d24aSChris Lattner MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier()); 656a2f8fc5aSChris Lattner 657a2f8fc5aSChris Lattner delete MB; 658a2f8fc5aSChris Lattner return MB2; 659a2f8fc5aSChris Lattner } 660a2f8fc5aSChris Lattner 66138820972SMatt Arsenault static bool IsPartOfWord(char c) { 66238820972SMatt Arsenault return (isalnum(c) || c == '-' || c == '_'); 66338820972SMatt Arsenault } 66438820972SMatt Arsenault 66513df4626SMatt Arsenault // Get the size of the prefix extension. 66613df4626SMatt Arsenault static size_t CheckTypeSize(Check::CheckType Ty) { 66713df4626SMatt Arsenault switch (Ty) { 66813df4626SMatt Arsenault case Check::CheckNone: 66913df4626SMatt Arsenault return 0; 67013df4626SMatt Arsenault 67113df4626SMatt Arsenault case Check::CheckPlain: 67213df4626SMatt Arsenault return sizeof(":") - 1; 67313df4626SMatt Arsenault 67413df4626SMatt Arsenault case Check::CheckNext: 67513df4626SMatt Arsenault return sizeof("-NEXT:") - 1; 67613df4626SMatt Arsenault 67713df4626SMatt Arsenault case Check::CheckNot: 67813df4626SMatt Arsenault return sizeof("-NOT:") - 1; 67913df4626SMatt Arsenault 68013df4626SMatt Arsenault case Check::CheckDAG: 68113df4626SMatt Arsenault return sizeof("-DAG:") - 1; 68213df4626SMatt Arsenault 68313df4626SMatt Arsenault case Check::CheckLabel: 68413df4626SMatt Arsenault return sizeof("-LABEL:") - 1; 68513df4626SMatt Arsenault 68613df4626SMatt Arsenault case Check::CheckEOF: 68713df4626SMatt Arsenault llvm_unreachable("Should not be using EOF size"); 68813df4626SMatt Arsenault } 68913df4626SMatt Arsenault 69013df4626SMatt Arsenault llvm_unreachable("Bad check type"); 69113df4626SMatt Arsenault } 69213df4626SMatt Arsenault 69313df4626SMatt Arsenault static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) { 694c4d2d471SMatt Arsenault char NextChar = Buffer[Prefix.size()]; 69538820972SMatt Arsenault 69638820972SMatt Arsenault // Verify that the : is present after the prefix. 69713df4626SMatt Arsenault if (NextChar == ':') 69838820972SMatt Arsenault return Check::CheckPlain; 69938820972SMatt Arsenault 70013df4626SMatt Arsenault if (NextChar != '-') 70138820972SMatt Arsenault return Check::CheckNone; 70238820972SMatt Arsenault 703c4d2d471SMatt Arsenault StringRef Rest = Buffer.drop_front(Prefix.size() + 1); 70413df4626SMatt Arsenault if (Rest.startswith("NEXT:")) 70538820972SMatt Arsenault return Check::CheckNext; 70638820972SMatt Arsenault 70713df4626SMatt Arsenault if (Rest.startswith("NOT:")) 70838820972SMatt Arsenault return Check::CheckNot; 70938820972SMatt Arsenault 71013df4626SMatt Arsenault if (Rest.startswith("DAG:")) 71138820972SMatt Arsenault return Check::CheckDAG; 71238820972SMatt Arsenault 71313df4626SMatt Arsenault if (Rest.startswith("LABEL:")) 71438820972SMatt Arsenault return Check::CheckLabel; 71513df4626SMatt Arsenault 71613df4626SMatt Arsenault return Check::CheckNone; 71738820972SMatt Arsenault } 71838820972SMatt Arsenault 71913df4626SMatt Arsenault // From the given position, find the next character after the word. 72013df4626SMatt Arsenault static size_t SkipWord(StringRef Str, size_t Loc) { 72113df4626SMatt Arsenault while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 72213df4626SMatt Arsenault ++Loc; 72313df4626SMatt Arsenault return Loc; 72413df4626SMatt Arsenault } 72513df4626SMatt Arsenault 72613df4626SMatt Arsenault // Try to find the first match in buffer for any prefix. If a valid match is 72713df4626SMatt Arsenault // found, return that prefix and set its type and location. If there are almost 72813df4626SMatt Arsenault // matches (e.g. the actual prefix string is found, but is not an actual check 72913df4626SMatt Arsenault // string), but no valid match, return an empty string and set the position to 73013df4626SMatt Arsenault // resume searching from. If no partial matches are found, return an empty 73113df4626SMatt Arsenault // string and the location will be StringRef::npos. If one prefix is a substring 73213df4626SMatt Arsenault // of another, the maximal match should be found. e.g. if "A" and "AA" are 73313df4626SMatt Arsenault // prefixes then AA-CHECK: should match the second one. 73413df4626SMatt Arsenault static StringRef FindFirstCandidateMatch(StringRef &Buffer, 73513df4626SMatt Arsenault Check::CheckType &CheckTy, 73613df4626SMatt Arsenault size_t &CheckLoc) { 73713df4626SMatt Arsenault StringRef FirstPrefix; 73813df4626SMatt Arsenault size_t FirstLoc = StringRef::npos; 73913df4626SMatt Arsenault size_t SearchLoc = StringRef::npos; 74013df4626SMatt Arsenault Check::CheckType FirstTy = Check::CheckNone; 74113df4626SMatt Arsenault 74213df4626SMatt Arsenault CheckTy = Check::CheckNone; 74313df4626SMatt Arsenault CheckLoc = StringRef::npos; 74413df4626SMatt Arsenault 74513df4626SMatt Arsenault for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end(); 74613df4626SMatt Arsenault I != E; ++I) { 74713df4626SMatt Arsenault StringRef Prefix(*I); 74813df4626SMatt Arsenault size_t PrefixLoc = Buffer.find(Prefix); 74913df4626SMatt Arsenault 75013df4626SMatt Arsenault if (PrefixLoc == StringRef::npos) 75113df4626SMatt Arsenault continue; 75213df4626SMatt Arsenault 75313df4626SMatt Arsenault // Track where we are searching for invalid prefixes that look almost right. 75413df4626SMatt Arsenault // We need to only advance to the first partial match on the next attempt 75513df4626SMatt Arsenault // since a partial match could be a substring of a later, valid prefix. 75613df4626SMatt Arsenault // Need to skip to the end of the word, otherwise we could end up 75713df4626SMatt Arsenault // matching a prefix in a substring later. 75813df4626SMatt Arsenault if (PrefixLoc < SearchLoc) 75913df4626SMatt Arsenault SearchLoc = SkipWord(Buffer, PrefixLoc); 76013df4626SMatt Arsenault 76113df4626SMatt Arsenault // We only want to find the first match to avoid skipping some. 76213df4626SMatt Arsenault if (PrefixLoc > FirstLoc) 76313df4626SMatt Arsenault continue; 764a7181a1bSAlexey Samsonov // If one matching check-prefix is a prefix of another, choose the 765a7181a1bSAlexey Samsonov // longer one. 766a7181a1bSAlexey Samsonov if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size()) 767a7181a1bSAlexey Samsonov continue; 76813df4626SMatt Arsenault 76913df4626SMatt Arsenault StringRef Rest = Buffer.drop_front(PrefixLoc); 77013df4626SMatt Arsenault // Make sure we have actually found the prefix, and not a word containing 77113df4626SMatt Arsenault // it. This should also prevent matching the wrong prefix when one is a 77213df4626SMatt Arsenault // substring of another. 77313df4626SMatt Arsenault if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1])) 77443b5f572SDaniel Sanders FirstTy = Check::CheckNone; 77543b5f572SDaniel Sanders else 77643b5f572SDaniel Sanders FirstTy = FindCheckType(Rest, Prefix); 77713df4626SMatt Arsenault 77813df4626SMatt Arsenault FirstLoc = PrefixLoc; 779a7181a1bSAlexey Samsonov FirstPrefix = Prefix; 78013df4626SMatt Arsenault } 78113df4626SMatt Arsenault 782a7181a1bSAlexey Samsonov // If the first prefix is invalid, we should continue the search after it. 783a7181a1bSAlexey Samsonov if (FirstTy == Check::CheckNone) { 78413df4626SMatt Arsenault CheckLoc = SearchLoc; 785a7181a1bSAlexey Samsonov return ""; 786a7181a1bSAlexey Samsonov } 787a7181a1bSAlexey Samsonov 78813df4626SMatt Arsenault CheckTy = FirstTy; 78913df4626SMatt Arsenault CheckLoc = FirstLoc; 79013df4626SMatt Arsenault return FirstPrefix; 79113df4626SMatt Arsenault } 79213df4626SMatt Arsenault 79313df4626SMatt Arsenault static StringRef FindFirstMatchingPrefix(StringRef &Buffer, 79413df4626SMatt Arsenault unsigned &LineNumber, 79513df4626SMatt Arsenault Check::CheckType &CheckTy, 79613df4626SMatt Arsenault size_t &CheckLoc) { 79713df4626SMatt Arsenault while (!Buffer.empty()) { 79813df4626SMatt Arsenault StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc); 79913df4626SMatt Arsenault // If we found a real match, we are done. 80013df4626SMatt Arsenault if (!Prefix.empty()) { 80113df4626SMatt Arsenault LineNumber += Buffer.substr(0, CheckLoc).count('\n'); 80213df4626SMatt Arsenault return Prefix; 80313df4626SMatt Arsenault } 80413df4626SMatt Arsenault 80513df4626SMatt Arsenault // We didn't find any almost matches either, we are also done. 80613df4626SMatt Arsenault if (CheckLoc == StringRef::npos) 80713df4626SMatt Arsenault return StringRef(); 80813df4626SMatt Arsenault 80913df4626SMatt Arsenault LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n'); 81013df4626SMatt Arsenault 81113df4626SMatt Arsenault // Advance to the last possible match we found and try again. 81213df4626SMatt Arsenault Buffer = Buffer.drop_front(CheckLoc + 1); 81313df4626SMatt Arsenault } 81413df4626SMatt Arsenault 81513df4626SMatt Arsenault return StringRef(); 81638820972SMatt Arsenault } 817ee3c74fbSChris Lattner 818ee3c74fbSChris Lattner /// ReadCheckFile - Read the check file, which specifies the sequence of 819ee3c74fbSChris Lattner /// expected strings. The strings are added to the CheckStrings vector. 82043d50d4aSEli Bendersky /// Returns true in case of an error, false otherwise. 821ee3c74fbSChris Lattner static bool ReadCheckFile(SourceMgr &SM, 82226cccfe1SChris Lattner std::vector<CheckString> &CheckStrings) { 82356440fd8SAhmed Charles std::unique_ptr<MemoryBuffer> File; 82439a0ffc3SMichael J. Spencer if (error_code ec = 8258c811724SRafael Espindola MemoryBuffer::getFileOrSTDIN(CheckFilename, File)) { 826ee3c74fbSChris Lattner errs() << "Could not open check file '" << CheckFilename << "': " 8277b6fef82SMichael J. Spencer << ec.message() << '\n'; 828ee3c74fbSChris Lattner return true; 829ee3c74fbSChris Lattner } 830a2f8fc5aSChris Lattner 831a2f8fc5aSChris Lattner // If we want to canonicalize whitespace, strip excess whitespace from the 8325ea04c38SGuy Benyei // buffer containing the CHECK lines. Remove DOS style line endings. 833e963d660SBenjamin Kramer MemoryBuffer *F = 83496c9d95fSAhmed Charles CanonicalizeInputFile(File.release(), NoCanonicalizeWhiteSpace); 835a2f8fc5aSChris Lattner 836ee3c74fbSChris Lattner SM.AddNewSourceBuffer(F, SMLoc()); 837ee3c74fbSChris Lattner 83810f10cedSChris Lattner // Find all instances of CheckPrefix followed by : in the file. 839caa5fc0cSChris Lattner StringRef Buffer = F->getBuffer(); 84091a1b2c9SMichael Liao std::vector<Pattern> DagNotMatches; 841236d2d5eSChris Lattner 84243d50d4aSEli Bendersky // LineNumber keeps track of the line on which CheckPrefix instances are 84343d50d4aSEli Bendersky // found. 84492987fb3SAlexander Kornienko unsigned LineNumber = 1; 84592987fb3SAlexander Kornienko 846ee3c74fbSChris Lattner while (1) { 84713df4626SMatt Arsenault Check::CheckType CheckTy; 84813df4626SMatt Arsenault size_t PrefixLoc; 84913df4626SMatt Arsenault 85013df4626SMatt Arsenault // See if a prefix occurs in the memory buffer. 85113df4626SMatt Arsenault StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer, 85213df4626SMatt Arsenault LineNumber, 85313df4626SMatt Arsenault CheckTy, 85413df4626SMatt Arsenault PrefixLoc); 85513df4626SMatt Arsenault if (UsedPrefix.empty()) 856ee3c74fbSChris Lattner break; 857ee3c74fbSChris Lattner 85813df4626SMatt Arsenault Buffer = Buffer.drop_front(PrefixLoc); 85992987fb3SAlexander Kornienko 86013df4626SMatt Arsenault // Location to use for error messages. 86113df4626SMatt Arsenault const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1); 86292987fb3SAlexander Kornienko 86313df4626SMatt Arsenault // PrefixLoc is to the start of the prefix. Skip to the end. 86413df4626SMatt Arsenault Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy)); 86510f10cedSChris Lattner 86638820972SMatt Arsenault // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 86738820972SMatt Arsenault // leading and trailing whitespace. 868236d2d5eSChris Lattner Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 869ee3c74fbSChris Lattner 870ee3c74fbSChris Lattner // Scan ahead to the end of line. 871caa5fc0cSChris Lattner size_t EOL = Buffer.find_first_of("\n\r"); 872ee3c74fbSChris Lattner 873838fb09aSDan Gohman // Remember the location of the start of the pattern, for diagnostics. 874838fb09aSDan Gohman SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 875838fb09aSDan Gohman 87674d50731SChris Lattner // Parse the pattern. 87738820972SMatt Arsenault Pattern P(CheckTy); 87813df4626SMatt Arsenault if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber)) 879ee3c74fbSChris Lattner return true; 880ee3c74fbSChris Lattner 881f8bd2e5bSStephen Lin // Verify that CHECK-LABEL lines do not define or use variables 88238820972SMatt Arsenault if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 88313df4626SMatt Arsenault SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 884f8bd2e5bSStephen Lin SourceMgr::DK_Error, 88513df4626SMatt Arsenault "found '" + UsedPrefix + "-LABEL:'" 88613df4626SMatt Arsenault " with variable definition or use"); 887f8bd2e5bSStephen Lin return true; 888f8bd2e5bSStephen Lin } 889f8bd2e5bSStephen Lin 890236d2d5eSChris Lattner Buffer = Buffer.substr(EOL); 89174d50731SChris Lattner 892da108b4eSChris Lattner // Verify that CHECK-NEXT lines have at least one CHECK line before them. 89338820972SMatt Arsenault if ((CheckTy == Check::CheckNext) && CheckStrings.empty()) { 89413df4626SMatt Arsenault SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 89503b80a40SChris Lattner SourceMgr::DK_Error, 89613df4626SMatt Arsenault "found '" + UsedPrefix + "-NEXT:' without previous '" 89713df4626SMatt Arsenault + UsedPrefix + ": line"); 898da108b4eSChris Lattner return true; 899da108b4eSChris Lattner } 900da108b4eSChris Lattner 90191a1b2c9SMichael Liao // Handle CHECK-DAG/-NOT. 90238820972SMatt Arsenault if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 90391a1b2c9SMichael Liao DagNotMatches.push_back(P); 90474d50731SChris Lattner continue; 90574d50731SChris Lattner } 90674d50731SChris Lattner 907ee3c74fbSChris Lattner // Okay, add the string we captured to the output vector and move on. 9083b40b445SChris Lattner CheckStrings.push_back(CheckString(P, 90913df4626SMatt Arsenault UsedPrefix, 910838fb09aSDan Gohman PatternLoc, 91138820972SMatt Arsenault CheckTy)); 91291a1b2c9SMichael Liao std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 913ee3c74fbSChris Lattner } 914ee3c74fbSChris Lattner 91513df4626SMatt Arsenault // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first 91613df4626SMatt Arsenault // prefix as a filler for the error message. 91791a1b2c9SMichael Liao if (!DagNotMatches.empty()) { 91838820972SMatt Arsenault CheckStrings.push_back(CheckString(Pattern(Check::CheckEOF), 91913df4626SMatt Arsenault CheckPrefixes[0], 920eba55822SJakob Stoklund Olesen SMLoc::getFromPointer(Buffer.data()), 92138820972SMatt Arsenault Check::CheckEOF)); 92291a1b2c9SMichael Liao std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 923eba55822SJakob Stoklund Olesen } 924eba55822SJakob Stoklund Olesen 925ee3c74fbSChris Lattner if (CheckStrings.empty()) { 92613df4626SMatt Arsenault errs() << "error: no check strings found with prefix" 92713df4626SMatt Arsenault << (CheckPrefixes.size() > 1 ? "es " : " "); 92813df4626SMatt Arsenault for (size_t I = 0, N = CheckPrefixes.size(); I != N; ++I) { 92913df4626SMatt Arsenault StringRef Prefix(CheckPrefixes[I]); 93013df4626SMatt Arsenault errs() << '\'' << Prefix << ":'"; 93113df4626SMatt Arsenault if (I != N - 1) 93213df4626SMatt Arsenault errs() << ", "; 93313df4626SMatt Arsenault } 93413df4626SMatt Arsenault 93513df4626SMatt Arsenault errs() << '\n'; 936ee3c74fbSChris Lattner return true; 937ee3c74fbSChris Lattner } 938ee3c74fbSChris Lattner 939ee3c74fbSChris Lattner return false; 940ee3c74fbSChris Lattner } 941ee3c74fbSChris Lattner 94291a1b2c9SMichael Liao static void PrintCheckFailed(const SourceMgr &SM, const SMLoc &Loc, 94391a1b2c9SMichael Liao const Pattern &Pat, StringRef Buffer, 944e0ef65abSDaniel Dunbar StringMap<StringRef> &VariableTable) { 945da108b4eSChris Lattner // Otherwise, we have an error, emit an error message. 94691a1b2c9SMichael Liao SM.PrintMessage(Loc, SourceMgr::DK_Error, 94703b80a40SChris Lattner "expected string not found in input"); 948da108b4eSChris Lattner 949da108b4eSChris Lattner // Print the "scanning from here" line. If the current position is at the 950da108b4eSChris Lattner // end of a line, advance to the start of the next line. 951caa5fc0cSChris Lattner Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 952da108b4eSChris Lattner 95303b80a40SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 95403b80a40SChris Lattner "scanning from here"); 955e0ef65abSDaniel Dunbar 956e0ef65abSDaniel Dunbar // Allow the pattern to print additional information if desired. 95791a1b2c9SMichael Liao Pat.PrintFailureInfo(SM, Buffer, VariableTable); 95891a1b2c9SMichael Liao } 95991a1b2c9SMichael Liao 96091a1b2c9SMichael Liao static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr, 96191a1b2c9SMichael Liao StringRef Buffer, 96291a1b2c9SMichael Liao StringMap<StringRef> &VariableTable) { 96391a1b2c9SMichael Liao PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable); 964da108b4eSChris Lattner } 965da108b4eSChris Lattner 96637183584SChris Lattner /// CountNumNewlinesBetween - Count the number of newlines in the specified 96737183584SChris Lattner /// range. 968*592fe880SRichard Smith static unsigned CountNumNewlinesBetween(StringRef Range, 969*592fe880SRichard Smith const char *&FirstNewLine) { 970da108b4eSChris Lattner unsigned NumNewLines = 0; 97137183584SChris Lattner while (1) { 972da108b4eSChris Lattner // Scan for newline. 97337183584SChris Lattner Range = Range.substr(Range.find_first_of("\n\r")); 97437183584SChris Lattner if (Range.empty()) return NumNewLines; 975da108b4eSChris Lattner 976da108b4eSChris Lattner ++NumNewLines; 977da108b4eSChris Lattner 978da108b4eSChris Lattner // Handle \n\r and \r\n as a single newline. 97937183584SChris Lattner if (Range.size() > 1 && 98037183584SChris Lattner (Range[1] == '\n' || Range[1] == '\r') && 98137183584SChris Lattner (Range[0] != Range[1])) 98237183584SChris Lattner Range = Range.substr(1); 98337183584SChris Lattner Range = Range.substr(1); 984*592fe880SRichard Smith 985*592fe880SRichard Smith if (NumNewLines == 1) 986*592fe880SRichard Smith FirstNewLine = Range.begin(); 987da108b4eSChris Lattner } 988da108b4eSChris Lattner } 989da108b4eSChris Lattner 990dcc7d48dSMichael Liao size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer, 991e93a3a08SStephen Lin bool IsLabelScanMode, size_t &MatchLen, 992dcc7d48dSMichael Liao StringMap<StringRef> &VariableTable) const { 99391a1b2c9SMichael Liao size_t LastPos = 0; 99491a1b2c9SMichael Liao std::vector<const Pattern *> NotStrings; 99591a1b2c9SMichael Liao 996e93a3a08SStephen Lin // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 997e93a3a08SStephen Lin // bounds; we have not processed variable definitions within the bounded block 998e93a3a08SStephen Lin // yet so cannot handle any final CHECK-DAG yet; this is handled when going 999e93a3a08SStephen Lin // over the block again (including the last CHECK-LABEL) in normal mode. 1000e93a3a08SStephen Lin if (!IsLabelScanMode) { 100191a1b2c9SMichael Liao // Match "dag strings" (with mixed "not strings" if any). 100291a1b2c9SMichael Liao LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable); 100391a1b2c9SMichael Liao if (LastPos == StringRef::npos) 100491a1b2c9SMichael Liao return StringRef::npos; 1005e93a3a08SStephen Lin } 100691a1b2c9SMichael Liao 100791a1b2c9SMichael Liao // Match itself from the last position after matching CHECK-DAG. 100891a1b2c9SMichael Liao StringRef MatchBuffer = Buffer.substr(LastPos); 100991a1b2c9SMichael Liao size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1010dcc7d48dSMichael Liao if (MatchPos == StringRef::npos) { 101191a1b2c9SMichael Liao PrintCheckFailed(SM, *this, MatchBuffer, VariableTable); 1012dcc7d48dSMichael Liao return StringRef::npos; 1013dcc7d48dSMichael Liao } 101491a1b2c9SMichael Liao MatchPos += LastPos; 1015dcc7d48dSMichael Liao 1016e93a3a08SStephen Lin // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 1017e93a3a08SStephen Lin // or CHECK-NOT 1018e93a3a08SStephen Lin if (!IsLabelScanMode) { 101991a1b2c9SMichael Liao StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1020dcc7d48dSMichael Liao 1021dcc7d48dSMichael Liao // If this check is a "CHECK-NEXT", verify that the previous match was on 1022dcc7d48dSMichael Liao // the previous line (i.e. that there is one newline between them). 1023dcc7d48dSMichael Liao if (CheckNext(SM, SkippedRegion)) 1024dcc7d48dSMichael Liao return StringRef::npos; 1025dcc7d48dSMichael Liao 1026dcc7d48dSMichael Liao // If this match had "not strings", verify that they don't exist in the 1027dcc7d48dSMichael Liao // skipped region. 102891a1b2c9SMichael Liao if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1029dcc7d48dSMichael Liao return StringRef::npos; 1030f8bd2e5bSStephen Lin } 1031dcc7d48dSMichael Liao 1032dcc7d48dSMichael Liao return MatchPos; 1033dcc7d48dSMichael Liao } 1034dcc7d48dSMichael Liao 1035dcc7d48dSMichael Liao bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 103638820972SMatt Arsenault if (CheckTy != Check::CheckNext) 1037dcc7d48dSMichael Liao return false; 1038dcc7d48dSMichael Liao 1039dcc7d48dSMichael Liao // Count the number of newlines between the previous match and this one. 1040dcc7d48dSMichael Liao assert(Buffer.data() != 1041dcc7d48dSMichael Liao SM.getMemoryBuffer( 1042dcc7d48dSMichael Liao SM.FindBufferContainingLoc( 1043dcc7d48dSMichael Liao SMLoc::getFromPointer(Buffer.data())))->getBufferStart() && 1044dcc7d48dSMichael Liao "CHECK-NEXT can't be the first check in a file"); 1045dcc7d48dSMichael Liao 1046*592fe880SRichard Smith const char *FirstNewLine = 0; 1047*592fe880SRichard Smith unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1048dcc7d48dSMichael Liao 1049dcc7d48dSMichael Liao if (NumNewLines == 0) { 105013df4626SMatt Arsenault SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix + 1051dcc7d48dSMichael Liao "-NEXT: is on the same line as previous match"); 1052dcc7d48dSMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), 1053dcc7d48dSMichael Liao SourceMgr::DK_Note, "'next' match was here"); 1054dcc7d48dSMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1055dcc7d48dSMichael Liao "previous match ended here"); 1056dcc7d48dSMichael Liao return true; 1057dcc7d48dSMichael Liao } 1058dcc7d48dSMichael Liao 1059dcc7d48dSMichael Liao if (NumNewLines != 1) { 106013df4626SMatt Arsenault SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix + 1061dcc7d48dSMichael Liao "-NEXT: is not on the line after the previous match"); 1062dcc7d48dSMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), 1063dcc7d48dSMichael Liao SourceMgr::DK_Note, "'next' match was here"); 1064dcc7d48dSMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1065dcc7d48dSMichael Liao "previous match ended here"); 1066*592fe880SRichard Smith SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 1067*592fe880SRichard Smith "non-matching line after previous match is here"); 1068dcc7d48dSMichael Liao return true; 1069dcc7d48dSMichael Liao } 1070dcc7d48dSMichael Liao 1071dcc7d48dSMichael Liao return false; 1072dcc7d48dSMichael Liao } 1073dcc7d48dSMichael Liao 1074dcc7d48dSMichael Liao bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 107591a1b2c9SMichael Liao const std::vector<const Pattern *> &NotStrings, 1076dcc7d48dSMichael Liao StringMap<StringRef> &VariableTable) const { 1077dcc7d48dSMichael Liao for (unsigned ChunkNo = 0, e = NotStrings.size(); 1078dcc7d48dSMichael Liao ChunkNo != e; ++ChunkNo) { 107991a1b2c9SMichael Liao const Pattern *Pat = NotStrings[ChunkNo]; 108038820972SMatt Arsenault assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 108191a1b2c9SMichael Liao 1082dcc7d48dSMichael Liao size_t MatchLen = 0; 108391a1b2c9SMichael Liao size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable); 1084dcc7d48dSMichael Liao 1085dcc7d48dSMichael Liao if (Pos == StringRef::npos) continue; 1086dcc7d48dSMichael Liao 1087dcc7d48dSMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos), 1088dcc7d48dSMichael Liao SourceMgr::DK_Error, 108913df4626SMatt Arsenault Prefix + "-NOT: string occurred!"); 109091a1b2c9SMichael Liao SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note, 109113df4626SMatt Arsenault Prefix + "-NOT: pattern specified here"); 1092dcc7d48dSMichael Liao return true; 1093dcc7d48dSMichael Liao } 1094dcc7d48dSMichael Liao 1095dcc7d48dSMichael Liao return false; 1096dcc7d48dSMichael Liao } 1097dcc7d48dSMichael Liao 109891a1b2c9SMichael Liao size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 109991a1b2c9SMichael Liao std::vector<const Pattern *> &NotStrings, 110091a1b2c9SMichael Liao StringMap<StringRef> &VariableTable) const { 110191a1b2c9SMichael Liao if (DagNotStrings.empty()) 110291a1b2c9SMichael Liao return 0; 110391a1b2c9SMichael Liao 110491a1b2c9SMichael Liao size_t LastPos = 0; 110591a1b2c9SMichael Liao size_t StartPos = LastPos; 110691a1b2c9SMichael Liao 110791a1b2c9SMichael Liao for (unsigned ChunkNo = 0, e = DagNotStrings.size(); 110891a1b2c9SMichael Liao ChunkNo != e; ++ChunkNo) { 110991a1b2c9SMichael Liao const Pattern &Pat = DagNotStrings[ChunkNo]; 111091a1b2c9SMichael Liao 111138820972SMatt Arsenault assert((Pat.getCheckTy() == Check::CheckDAG || 111238820972SMatt Arsenault Pat.getCheckTy() == Check::CheckNot) && 111391a1b2c9SMichael Liao "Invalid CHECK-DAG or CHECK-NOT!"); 111491a1b2c9SMichael Liao 111538820972SMatt Arsenault if (Pat.getCheckTy() == Check::CheckNot) { 111691a1b2c9SMichael Liao NotStrings.push_back(&Pat); 111791a1b2c9SMichael Liao continue; 111891a1b2c9SMichael Liao } 111991a1b2c9SMichael Liao 112038820972SMatt Arsenault assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 112191a1b2c9SMichael Liao 112291a1b2c9SMichael Liao size_t MatchLen = 0, MatchPos; 112391a1b2c9SMichael Liao 112491a1b2c9SMichael Liao // CHECK-DAG always matches from the start. 112591a1b2c9SMichael Liao StringRef MatchBuffer = Buffer.substr(StartPos); 112691a1b2c9SMichael Liao MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 112791a1b2c9SMichael Liao // With a group of CHECK-DAGs, a single mismatching means the match on 112891a1b2c9SMichael Liao // that group of CHECK-DAGs fails immediately. 112991a1b2c9SMichael Liao if (MatchPos == StringRef::npos) { 113091a1b2c9SMichael Liao PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable); 113191a1b2c9SMichael Liao return StringRef::npos; 113291a1b2c9SMichael Liao } 113391a1b2c9SMichael Liao // Re-calc it as the offset relative to the start of the original string. 113491a1b2c9SMichael Liao MatchPos += StartPos; 113591a1b2c9SMichael Liao 113691a1b2c9SMichael Liao if (!NotStrings.empty()) { 113791a1b2c9SMichael Liao if (MatchPos < LastPos) { 113891a1b2c9SMichael Liao // Reordered? 113991a1b2c9SMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos), 114091a1b2c9SMichael Liao SourceMgr::DK_Error, 114113df4626SMatt Arsenault Prefix + "-DAG: found a match of CHECK-DAG" 114291a1b2c9SMichael Liao " reordering across a CHECK-NOT"); 114391a1b2c9SMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos), 114491a1b2c9SMichael Liao SourceMgr::DK_Note, 114513df4626SMatt Arsenault Prefix + "-DAG: the farthest match of CHECK-DAG" 114691a1b2c9SMichael Liao " is found here"); 114791a1b2c9SMichael Liao SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note, 114813df4626SMatt Arsenault Prefix + "-NOT: the crossed pattern specified" 114991a1b2c9SMichael Liao " here"); 115091a1b2c9SMichael Liao SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note, 115113df4626SMatt Arsenault Prefix + "-DAG: the reordered pattern specified" 115291a1b2c9SMichael Liao " here"); 115391a1b2c9SMichael Liao return StringRef::npos; 115491a1b2c9SMichael Liao } 115591a1b2c9SMichael Liao // All subsequent CHECK-DAGs should be matched from the farthest 115691a1b2c9SMichael Liao // position of all precedent CHECK-DAGs (including this one.) 115791a1b2c9SMichael Liao StartPos = LastPos; 115891a1b2c9SMichael Liao // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to 115991a1b2c9SMichael Liao // CHECK-DAG, verify that there's no 'not' strings occurred in that 116091a1b2c9SMichael Liao // region. 116191a1b2c9SMichael Liao StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1162cf708c32STim Northover if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 116391a1b2c9SMichael Liao return StringRef::npos; 116491a1b2c9SMichael Liao // Clear "not strings". 116591a1b2c9SMichael Liao NotStrings.clear(); 116691a1b2c9SMichael Liao } 116791a1b2c9SMichael Liao 116891a1b2c9SMichael Liao // Update the last position with CHECK-DAG matches. 116991a1b2c9SMichael Liao LastPos = std::max(MatchPos + MatchLen, LastPos); 117091a1b2c9SMichael Liao } 117191a1b2c9SMichael Liao 117291a1b2c9SMichael Liao return LastPos; 117391a1b2c9SMichael Liao } 117491a1b2c9SMichael Liao 117513df4626SMatt Arsenault // A check prefix must contain only alphanumeric, hyphens and underscores. 117613df4626SMatt Arsenault static bool ValidateCheckPrefix(StringRef CheckPrefix) { 117713df4626SMatt Arsenault Regex Validator("^[a-zA-Z0-9_-]*$"); 117813df4626SMatt Arsenault return Validator.match(CheckPrefix); 117913df4626SMatt Arsenault } 118013df4626SMatt Arsenault 118113df4626SMatt Arsenault static bool ValidateCheckPrefixes() { 118213df4626SMatt Arsenault StringSet<> PrefixSet; 118313df4626SMatt Arsenault 118413df4626SMatt Arsenault for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end(); 118513df4626SMatt Arsenault I != E; ++I) { 118613df4626SMatt Arsenault StringRef Prefix(*I); 118713df4626SMatt Arsenault 118813df4626SMatt Arsenault if (!PrefixSet.insert(Prefix)) 118913df4626SMatt Arsenault return false; 119013df4626SMatt Arsenault 119113df4626SMatt Arsenault if (!ValidateCheckPrefix(Prefix)) 119213df4626SMatt Arsenault return false; 119313df4626SMatt Arsenault } 119413df4626SMatt Arsenault 119513df4626SMatt Arsenault return true; 119613df4626SMatt Arsenault } 119713df4626SMatt Arsenault 119813df4626SMatt Arsenault // I don't think there's a way to specify an initial value for cl::list, 119913df4626SMatt Arsenault // so if nothing was specified, add the default 120013df4626SMatt Arsenault static void AddCheckPrefixIfNeeded() { 120113df4626SMatt Arsenault if (CheckPrefixes.empty()) 120213df4626SMatt Arsenault CheckPrefixes.push_back("CHECK"); 1203c2735158SRui Ueyama } 1204c2735158SRui Ueyama 1205ee3c74fbSChris Lattner int main(int argc, char **argv) { 1206ee3c74fbSChris Lattner sys::PrintStackTraceOnErrorSignal(); 1207ee3c74fbSChris Lattner PrettyStackTraceProgram X(argc, argv); 1208ee3c74fbSChris Lattner cl::ParseCommandLineOptions(argc, argv); 1209ee3c74fbSChris Lattner 121013df4626SMatt Arsenault if (!ValidateCheckPrefixes()) { 121113df4626SMatt Arsenault errs() << "Supplied check-prefix is invalid! Prefixes must be unique and " 121213df4626SMatt Arsenault "start with a letter and contain only alphanumeric characters, " 121313df4626SMatt Arsenault "hyphens and underscores\n"; 1214c2735158SRui Ueyama return 2; 1215c2735158SRui Ueyama } 1216c2735158SRui Ueyama 121713df4626SMatt Arsenault AddCheckPrefixIfNeeded(); 121813df4626SMatt Arsenault 1219ee3c74fbSChris Lattner SourceMgr SM; 1220ee3c74fbSChris Lattner 1221ee3c74fbSChris Lattner // Read the expected strings from the check file. 122226cccfe1SChris Lattner std::vector<CheckString> CheckStrings; 1223ee3c74fbSChris Lattner if (ReadCheckFile(SM, CheckStrings)) 1224ee3c74fbSChris Lattner return 2; 1225ee3c74fbSChris Lattner 1226ee3c74fbSChris Lattner // Open the file to check and add it to SourceMgr. 122756440fd8SAhmed Charles std::unique_ptr<MemoryBuffer> File; 122839a0ffc3SMichael J. Spencer if (error_code ec = 12298c811724SRafael Espindola MemoryBuffer::getFileOrSTDIN(InputFilename, File)) { 1230ee3c74fbSChris Lattner errs() << "Could not open input file '" << InputFilename << "': " 12317b6fef82SMichael J. Spencer << ec.message() << '\n'; 12328e1c6477SEli Bendersky return 2; 1233ee3c74fbSChris Lattner } 12342c3e5cdfSChris Lattner 1235e963d660SBenjamin Kramer if (File->getBufferSize() == 0) { 1236b692bed7SChris Lattner errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; 12378e1c6477SEli Bendersky return 2; 1238b692bed7SChris Lattner } 1239b692bed7SChris Lattner 12402c3e5cdfSChris Lattner // Remove duplicate spaces in the input file if requested. 12415ea04c38SGuy Benyei // Remove DOS style line endings. 1242e963d660SBenjamin Kramer MemoryBuffer *F = 124396c9d95fSAhmed Charles CanonicalizeInputFile(File.release(), NoCanonicalizeWhiteSpace); 12442c3e5cdfSChris Lattner 1245ee3c74fbSChris Lattner SM.AddNewSourceBuffer(F, SMLoc()); 1246ee3c74fbSChris Lattner 12478879e06dSChris Lattner /// VariableTable - This holds all the current filecheck variables. 12488879e06dSChris Lattner StringMap<StringRef> VariableTable; 12498879e06dSChris Lattner 1250ee3c74fbSChris Lattner // Check that we have all of the expected strings, in order, in the input 1251ee3c74fbSChris Lattner // file. 1252caa5fc0cSChris Lattner StringRef Buffer = F->getBuffer(); 1253ee3c74fbSChris Lattner 1254f8bd2e5bSStephen Lin bool hasError = false; 1255ee3c74fbSChris Lattner 1256f8bd2e5bSStephen Lin unsigned i = 0, j = 0, e = CheckStrings.size(); 1257ee3c74fbSChris Lattner 1258f8bd2e5bSStephen Lin while (true) { 1259f8bd2e5bSStephen Lin StringRef CheckRegion; 1260f8bd2e5bSStephen Lin if (j == e) { 1261f8bd2e5bSStephen Lin CheckRegion = Buffer; 1262f8bd2e5bSStephen Lin } else { 1263f8bd2e5bSStephen Lin const CheckString &CheckLabelStr = CheckStrings[j]; 126438820972SMatt Arsenault if (CheckLabelStr.CheckTy != Check::CheckLabel) { 1265f8bd2e5bSStephen Lin ++j; 1266f8bd2e5bSStephen Lin continue; 1267da108b4eSChris Lattner } 1268da108b4eSChris Lattner 1269f8bd2e5bSStephen Lin // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 1270f8bd2e5bSStephen Lin size_t MatchLabelLen = 0; 1271e93a3a08SStephen Lin size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true, 1272f8bd2e5bSStephen Lin MatchLabelLen, VariableTable); 1273f8bd2e5bSStephen Lin if (MatchLabelPos == StringRef::npos) { 1274f8bd2e5bSStephen Lin hasError = true; 1275f8bd2e5bSStephen Lin break; 1276f8bd2e5bSStephen Lin } 1277f8bd2e5bSStephen Lin 1278f8bd2e5bSStephen Lin CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 1279f8bd2e5bSStephen Lin Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 1280f8bd2e5bSStephen Lin ++j; 1281f8bd2e5bSStephen Lin } 1282f8bd2e5bSStephen Lin 1283f8bd2e5bSStephen Lin for ( ; i != j; ++i) { 1284f8bd2e5bSStephen Lin const CheckString &CheckStr = CheckStrings[i]; 1285f8bd2e5bSStephen Lin 1286f8bd2e5bSStephen Lin // Check each string within the scanned region, including a second check 1287f8bd2e5bSStephen Lin // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 1288f8bd2e5bSStephen Lin size_t MatchLen = 0; 1289e93a3a08SStephen Lin size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen, 1290f8bd2e5bSStephen Lin VariableTable); 1291f8bd2e5bSStephen Lin 1292f8bd2e5bSStephen Lin if (MatchPos == StringRef::npos) { 1293f8bd2e5bSStephen Lin hasError = true; 1294f8bd2e5bSStephen Lin i = j; 1295f8bd2e5bSStephen Lin break; 1296f8bd2e5bSStephen Lin } 1297f8bd2e5bSStephen Lin 1298f8bd2e5bSStephen Lin CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 1299f8bd2e5bSStephen Lin } 1300f8bd2e5bSStephen Lin 1301f8bd2e5bSStephen Lin if (j == e) 1302f8bd2e5bSStephen Lin break; 1303f8bd2e5bSStephen Lin } 1304f8bd2e5bSStephen Lin 1305f8bd2e5bSStephen Lin return hasError ? 1 : 0; 1306ee3c74fbSChris Lattner } 1307