1ee3c74fbSChris Lattner //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2ee3c74fbSChris Lattner // 3ee3c74fbSChris Lattner // The LLVM Compiler Infrastructure 4ee3c74fbSChris Lattner // 5ee3c74fbSChris Lattner // This file is distributed under the University of Illinois Open Source 6ee3c74fbSChris Lattner // License. See LICENSE.TXT for details. 7ee3c74fbSChris Lattner // 8ee3c74fbSChris Lattner //===----------------------------------------------------------------------===// 9ee3c74fbSChris Lattner // 10ee3c74fbSChris Lattner // FileCheck does a line-by line check of a file that validates whether it 11ee3c74fbSChris Lattner // contains the expected content. This is useful for regression tests etc. 12ee3c74fbSChris Lattner // 13ee3c74fbSChris Lattner // This program exits with an error status of 2 on error, exit status of 0 if 14ee3c74fbSChris Lattner // the file matched the expected contents, and exit status of 1 if it did not 15ee3c74fbSChris Lattner // contain the expected contents. 16ee3c74fbSChris Lattner // 17ee3c74fbSChris Lattner //===----------------------------------------------------------------------===// 18ee3c74fbSChris Lattner 1991d19d8eSChandler Carruth #include "llvm/ADT/SmallString.h" 2091d19d8eSChandler Carruth #include "llvm/ADT/StringExtras.h" 2191d19d8eSChandler Carruth #include "llvm/ADT/StringMap.h" 2213df4626SMatt Arsenault #include "llvm/ADT/StringSet.h" 23ee3c74fbSChris Lattner #include "llvm/Support/CommandLine.h" 24ee3c74fbSChris Lattner #include "llvm/Support/MemoryBuffer.h" 25ee3c74fbSChris Lattner #include "llvm/Support/PrettyStackTrace.h" 26f08d2db9SChris Lattner #include "llvm/Support/Regex.h" 2791d19d8eSChandler Carruth #include "llvm/Support/Signals.h" 28ee3c74fbSChris Lattner #include "llvm/Support/SourceMgr.h" 29ee3c74fbSChris Lattner #include "llvm/Support/raw_ostream.h" 308879e06dSChris Lattner #include <algorithm> 31981af002SWill Dietz #include <cctype> 32e8b8f1bcSEli Bendersky #include <map> 33e8b8f1bcSEli Bendersky #include <string> 34a6e9c3e4SRafael Espindola #include <system_error> 35e8b8f1bcSEli Bendersky #include <vector> 36ee3c74fbSChris Lattner using namespace llvm; 37ee3c74fbSChris Lattner 38ee3c74fbSChris Lattner static cl::opt<std::string> 39ee3c74fbSChris Lattner CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); 40ee3c74fbSChris Lattner 41ee3c74fbSChris Lattner static cl::opt<std::string> 42ee3c74fbSChris Lattner InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), 43ee3c74fbSChris Lattner cl::init("-"), cl::value_desc("filename")); 44ee3c74fbSChris Lattner 45e8f2fb20SChandler Carruth static cl::list<std::string> CheckPrefixes( 46e8f2fb20SChandler Carruth "check-prefix", 47ee3c74fbSChris Lattner cl::desc("Prefix to use from check file (defaults to 'CHECK')")); 48fd557cb0SDaniel Sanders static cl::alias CheckPrefixesAlias( 49fd557cb0SDaniel Sanders "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated, 50fd557cb0SDaniel Sanders cl::NotHidden, 51fd557cb0SDaniel Sanders cl::desc( 52fd557cb0SDaniel Sanders "Alias for -check-prefix permitting multiple comma separated values")); 53ee3c74fbSChris Lattner 54e8f2fb20SChandler Carruth static cl::opt<bool> NoCanonicalizeWhiteSpace( 55e8f2fb20SChandler Carruth "strict-whitespace", 562c3e5cdfSChris Lattner cl::desc("Do not treat all horizontal whitespace as equivalent")); 572c3e5cdfSChris Lattner 5856ccdbbdSAlexander Kornienko static cl::list<std::string> ImplicitCheckNot( 5956ccdbbdSAlexander Kornienko "implicit-check-not", 6056ccdbbdSAlexander Kornienko cl::desc("Add an implicit negative check with this pattern to every\n" 6156ccdbbdSAlexander Kornienko "positive check. This can be used to ensure that no instances of\n" 6256ccdbbdSAlexander Kornienko "this pattern occur which are not matched by a positive pattern"), 6356ccdbbdSAlexander Kornienko cl::value_desc("pattern")); 6456ccdbbdSAlexander Kornienko 651b9f936fSJustin Bogner static cl::opt<bool> AllowEmptyInput( 661b9f936fSJustin Bogner "allow-empty", cl::init(false), 671b9f936fSJustin Bogner cl::desc("Allow the input file to be empty. This is useful when making\n" 681b9f936fSJustin Bogner "checks that some error message does not occur, for example.")); 691b9f936fSJustin Bogner 7085913ccaSJames Y Knight static cl::opt<bool> MatchFullLines( 7185913ccaSJames Y Knight "match-full-lines", cl::init(false), 7285913ccaSJames Y Knight cl::desc("Require all positive matches to cover an entire input line.\n" 7385913ccaSJames Y Knight "Allows leading and trailing whitespace if --strict-whitespace\n" 7485913ccaSJames Y Knight "is not also passed.")); 7585913ccaSJames Y Knight 7613df4626SMatt Arsenault typedef cl::list<std::string>::const_iterator prefix_iterator; 7713df4626SMatt Arsenault 7874d50731SChris Lattner //===----------------------------------------------------------------------===// 7974d50731SChris Lattner // Pattern Handling Code. 8074d50731SChris Lattner //===----------------------------------------------------------------------===// 8174d50731SChris Lattner 8238820972SMatt Arsenault namespace Check { 8338820972SMatt Arsenault enum CheckType { 8438820972SMatt Arsenault CheckNone = 0, 8538820972SMatt Arsenault CheckPlain, 8638820972SMatt Arsenault CheckNext, 8701ac1707SDuncan P. N. Exon Smith CheckSame, 8838820972SMatt Arsenault CheckNot, 8938820972SMatt Arsenault CheckDAG, 9038820972SMatt Arsenault CheckLabel, 910a4c44bdSChris Lattner 924dabac20SChandler Carruth /// Indicates the pattern only matches the end of file. This is used for 934dabac20SChandler Carruth /// trailing CHECK-NOTs. 94a908e7bdSPaul Robinson CheckEOF, 954dabac20SChandler Carruth 964dabac20SChandler Carruth /// Marks when parsing found a -NOT check combined with another CHECK suffix. 97a908e7bdSPaul Robinson CheckBadNot 9838820972SMatt Arsenault }; 9938820972SMatt Arsenault } 100eba55822SJakob Stoklund Olesen 10138820972SMatt Arsenault class Pattern { 10238820972SMatt Arsenault SMLoc PatternLoc; 10391a1b2c9SMichael Liao 1044dabac20SChandler Carruth /// A fixed string to match as the pattern or empty if this pattern requires 1054dabac20SChandler Carruth /// a regex match. 106221460e0SChris Lattner StringRef FixedStr; 107b16ab0c4SChris Lattner 1084dabac20SChandler Carruth /// A regex string to match as the pattern or empty if this pattern requires 1094dabac20SChandler Carruth /// a fixed string to match. 110b16ab0c4SChris Lattner std::string RegExStr; 1118879e06dSChris Lattner 1124dabac20SChandler Carruth /// Entries in this vector map to uses of a variable in the pattern, e.g. 1134dabac20SChandler Carruth /// "foo[[bar]]baz". In this case, the RegExStr will contain "foobaz" and 1144dabac20SChandler Carruth /// we'll get an entry in this vector that tells us to insert the value of 1154dabac20SChandler Carruth /// bar at offset 3. 1168879e06dSChris Lattner std::vector<std::pair<StringRef, unsigned>> VariableUses; 1178879e06dSChris Lattner 1184dabac20SChandler Carruth /// Maps definitions of variables to their parenthesized capture numbers. 1194dabac20SChandler Carruth /// 1204dabac20SChandler Carruth /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1214dabac20SChandler Carruth /// 1. 122e8b8f1bcSEli Bendersky std::map<StringRef, unsigned> VariableDefs; 1238879e06dSChris Lattner 124d1e020f7SSaleem Abdulrasool Check::CheckType CheckTy; 1253b40b445SChris Lattner 1264dabac20SChandler Carruth /// Contains the number of line this pattern is in. 127d1e020f7SSaleem Abdulrasool unsigned LineNumber; 128d1e020f7SSaleem Abdulrasool 129d1e020f7SSaleem Abdulrasool public: 130d1e020f7SSaleem Abdulrasool explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {} 13174d50731SChris Lattner 1324dabac20SChandler Carruth /// Returns the location in source code. 1330b707eb8SMichael Liao SMLoc getLoc() const { return PatternLoc; } 1340b707eb8SMichael Liao 135e8f2fb20SChandler Carruth bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, 13613df4626SMatt Arsenault unsigned LineNumber); 1378879e06dSChris Lattner size_t Match(StringRef Buffer, size_t &MatchLen, 1388879e06dSChris Lattner StringMap<StringRef> &VariableTable) const; 139e0ef65abSDaniel Dunbar void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 140e0ef65abSDaniel Dunbar const StringMap<StringRef> &VariableTable) const; 141e0ef65abSDaniel Dunbar 142e8f2fb20SChandler Carruth bool hasVariable() const { 143e8f2fb20SChandler Carruth return !(VariableUses.empty() && VariableDefs.empty()); 144e8f2fb20SChandler Carruth } 145f8bd2e5bSStephen Lin 14638820972SMatt Arsenault Check::CheckType getCheckTy() const { return CheckTy; } 14791a1b2c9SMichael Liao 148b16ab0c4SChris Lattner private: 149e8b8f1bcSEli Bendersky bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); 150e8b8f1bcSEli Bendersky void AddBackrefToRegEx(unsigned BackrefNum); 151e8f2fb20SChandler Carruth unsigned 152e8f2fb20SChandler Carruth ComputeMatchDistance(StringRef Buffer, 153fd29d886SDaniel Dunbar const StringMap<StringRef> &VariableTable) const; 15492987fb3SAlexander Kornienko bool EvaluateExpression(StringRef Expr, std::string &Value) const; 15581e5cd9eSAdrian Prantl size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); 1563b40b445SChris Lattner }; 1573b40b445SChris Lattner 1584dabac20SChandler Carruth /// Parses the given string into the Pattern. 1594dabac20SChandler Carruth /// 1604dabac20SChandler Carruth /// \p Prefix provides which prefix is being matched, \p SM provides the 1614dabac20SChandler Carruth /// SourceMgr used for error reports, and \p LineNumber is the line number in 1624dabac20SChandler Carruth /// the input file from which the pattern string was read. Returns true in 1634dabac20SChandler Carruth /// case of an error, false otherwise. 164e8f2fb20SChandler Carruth bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix, 165e8f2fb20SChandler Carruth SourceMgr &SM, unsigned LineNumber) { 16685913ccaSJames Y Knight bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot; 16785913ccaSJames Y Knight 16892987fb3SAlexander Kornienko this->LineNumber = LineNumber; 1690a4c44bdSChris Lattner PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 1700a4c44bdSChris Lattner 17174d50731SChris Lattner // Ignore trailing whitespace. 17274d50731SChris Lattner while (!PatternStr.empty() && 17374d50731SChris Lattner (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 17474d50731SChris Lattner PatternStr = PatternStr.substr(0, PatternStr.size() - 1); 17574d50731SChris Lattner 17674d50731SChris Lattner // Check that there is something on the line. 17774d50731SChris Lattner if (PatternStr.empty()) { 17803b80a40SChris Lattner SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 179e8f2fb20SChandler Carruth "found empty check string with prefix '" + Prefix + ":'"); 18074d50731SChris Lattner return true; 18174d50731SChris Lattner } 18274d50731SChris Lattner 183221460e0SChris Lattner // Check to see if this is a fixed string, or if it has regex pieces. 18485913ccaSJames Y Knight if (!MatchFullLinesHere && 18585913ccaSJames Y Knight (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos && 18685913ccaSJames Y Knight PatternStr.find("[[") == StringRef::npos))) { 187221460e0SChris Lattner FixedStr = PatternStr; 188221460e0SChris Lattner return false; 189221460e0SChris Lattner } 190221460e0SChris Lattner 19185913ccaSJames Y Knight if (MatchFullLinesHere) { 19285913ccaSJames Y Knight RegExStr += '^'; 19385913ccaSJames Y Knight if (!NoCanonicalizeWhiteSpace) 19485913ccaSJames Y Knight RegExStr += " *"; 19585913ccaSJames Y Knight } 19685913ccaSJames Y Knight 1978879e06dSChris Lattner // Paren value #0 is for the fully matched string. Any new parenthesized 19853e0679dSChris Lattner // values add from there. 1998879e06dSChris Lattner unsigned CurParen = 1; 2008879e06dSChris Lattner 201b16ab0c4SChris Lattner // Otherwise, there is at least one regex piece. Build up the regex pattern 202b16ab0c4SChris Lattner // by escaping scary characters in fixed strings, building up one big regex. 203f08d2db9SChris Lattner while (!PatternStr.empty()) { 2048879e06dSChris Lattner // RegEx matches. 20553e0679dSChris Lattner if (PatternStr.startswith("{{")) { 20643d50d4aSEli Bendersky // This is the start of a regex match. Scan for the }}. 207f08d2db9SChris Lattner size_t End = PatternStr.find("}}"); 208f08d2db9SChris Lattner if (End == StringRef::npos) { 209f08d2db9SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 21003b80a40SChris Lattner SourceMgr::DK_Error, 21103b80a40SChris Lattner "found start of regex string with no end '}}'"); 212f08d2db9SChris Lattner return true; 213f08d2db9SChris Lattner } 214f08d2db9SChris Lattner 215e53c95f1SChris Lattner // Enclose {{}} patterns in parens just like [[]] even though we're not 216e53c95f1SChris Lattner // capturing the result for any purpose. This is required in case the 217e53c95f1SChris Lattner // expression contains an alternation like: CHECK: abc{{x|z}}def. We 218e53c95f1SChris Lattner // want this to turn into: "abc(x|z)def" not "abcx|zdef". 219e53c95f1SChris Lattner RegExStr += '('; 220e53c95f1SChris Lattner ++CurParen; 221e53c95f1SChris Lattner 2228879e06dSChris Lattner if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM)) 2238879e06dSChris Lattner return true; 224e53c95f1SChris Lattner RegExStr += ')'; 22553e0679dSChris Lattner 2268879e06dSChris Lattner PatternStr = PatternStr.substr(End + 2); 2278879e06dSChris Lattner continue; 2288879e06dSChris Lattner } 2298879e06dSChris Lattner 2308879e06dSChris Lattner // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* 2318879e06dSChris Lattner // (or some other regex) and assigns it to the FileCheck variable 'foo'. The 2328879e06dSChris Lattner // second form is [[foo]] which is a reference to foo. The variable name 23357cb733bSDaniel Dunbar // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject 2348879e06dSChris Lattner // it. This is to catch some common errors. 23553e0679dSChris Lattner if (PatternStr.startswith("[[")) { 236061d2baaSEli Bendersky // Find the closing bracket pair ending the match. End is going to be an 237061d2baaSEli Bendersky // offset relative to the beginning of the match string. 23881e5cd9eSAdrian Prantl size_t End = FindRegexVarEnd(PatternStr.substr(2), SM); 239061d2baaSEli Bendersky 2408879e06dSChris Lattner if (End == StringRef::npos) { 2418879e06dSChris Lattner SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 24203b80a40SChris Lattner SourceMgr::DK_Error, 24303b80a40SChris Lattner "invalid named regex reference, no ]] found"); 244f08d2db9SChris Lattner return true; 245f08d2db9SChris Lattner } 246f08d2db9SChris Lattner 247061d2baaSEli Bendersky StringRef MatchStr = PatternStr.substr(2, End); 248061d2baaSEli Bendersky PatternStr = PatternStr.substr(End + 4); 2498879e06dSChris Lattner 2508879e06dSChris Lattner // Get the regex name (e.g. "foo"). 2518879e06dSChris Lattner size_t NameEnd = MatchStr.find(':'); 2528879e06dSChris Lattner StringRef Name = MatchStr.substr(0, NameEnd); 2538879e06dSChris Lattner 2548879e06dSChris Lattner if (Name.empty()) { 25503b80a40SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 25603b80a40SChris Lattner "invalid name in named regex: empty name"); 2578879e06dSChris Lattner return true; 2588879e06dSChris Lattner } 2598879e06dSChris Lattner 26092987fb3SAlexander Kornienko // Verify that the name/expression is well formed. FileCheck currently 26192987fb3SAlexander Kornienko // supports @LINE, @LINE+number, @LINE-number expressions. The check here 26292987fb3SAlexander Kornienko // is relaxed, more strict check is performed in \c EvaluateExpression. 26392987fb3SAlexander Kornienko bool IsExpression = false; 26492987fb3SAlexander Kornienko for (unsigned i = 0, e = Name.size(); i != e; ++i) { 26592987fb3SAlexander Kornienko if (i == 0 && Name[i] == '@') { 26692987fb3SAlexander Kornienko if (NameEnd != StringRef::npos) { 26792987fb3SAlexander Kornienko SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 26892987fb3SAlexander Kornienko SourceMgr::DK_Error, 26992987fb3SAlexander Kornienko "invalid name in named regex definition"); 27092987fb3SAlexander Kornienko return true; 27192987fb3SAlexander Kornienko } 27292987fb3SAlexander Kornienko IsExpression = true; 27392987fb3SAlexander Kornienko continue; 27492987fb3SAlexander Kornienko } 27592987fb3SAlexander Kornienko if (Name[i] != '_' && !isalnum(Name[i]) && 27692987fb3SAlexander Kornienko (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { 2778879e06dSChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i), 27803b80a40SChris Lattner SourceMgr::DK_Error, "invalid name in named regex"); 2798879e06dSChris Lattner return true; 2808879e06dSChris Lattner } 28192987fb3SAlexander Kornienko } 2828879e06dSChris Lattner 2838879e06dSChris Lattner // Name can't start with a digit. 28483c74e9fSGuy Benyei if (isdigit(static_cast<unsigned char>(Name[0]))) { 28503b80a40SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 28603b80a40SChris Lattner "invalid name in named regex"); 2878879e06dSChris Lattner return true; 2888879e06dSChris Lattner } 2898879e06dSChris Lattner 2908879e06dSChris Lattner // Handle [[foo]]. 2918879e06dSChris Lattner if (NameEnd == StringRef::npos) { 292e8b8f1bcSEli Bendersky // Handle variables that were defined earlier on the same line by 293e8b8f1bcSEli Bendersky // emitting a backreference. 294e8b8f1bcSEli Bendersky if (VariableDefs.find(Name) != VariableDefs.end()) { 295e8b8f1bcSEli Bendersky unsigned VarParenNum = VariableDefs[Name]; 296e8b8f1bcSEli Bendersky if (VarParenNum < 1 || VarParenNum > 9) { 297e8b8f1bcSEli Bendersky SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 298e8b8f1bcSEli Bendersky SourceMgr::DK_Error, 299e8b8f1bcSEli Bendersky "Can't back-reference more than 9 variables"); 300e8b8f1bcSEli Bendersky return true; 301e8b8f1bcSEli Bendersky } 302e8b8f1bcSEli Bendersky AddBackrefToRegEx(VarParenNum); 303e8b8f1bcSEli Bendersky } else { 3048879e06dSChris Lattner VariableUses.push_back(std::make_pair(Name, RegExStr.size())); 305e8b8f1bcSEli Bendersky } 3068879e06dSChris Lattner continue; 3078879e06dSChris Lattner } 3088879e06dSChris Lattner 3098879e06dSChris Lattner // Handle [[foo:.*]]. 310e8b8f1bcSEli Bendersky VariableDefs[Name] = CurParen; 3118879e06dSChris Lattner RegExStr += '('; 3128879e06dSChris Lattner ++CurParen; 3138879e06dSChris Lattner 3148879e06dSChris Lattner if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM)) 3158879e06dSChris Lattner return true; 3168879e06dSChris Lattner 3178879e06dSChris Lattner RegExStr += ')'; 3188879e06dSChris Lattner } 3198879e06dSChris Lattner 3208879e06dSChris Lattner // Handle fixed string matches. 3218879e06dSChris Lattner // Find the end, which is the start of the next regex. 3228879e06dSChris Lattner size_t FixedMatchEnd = PatternStr.find("{{"); 3238879e06dSChris Lattner FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 3246f4f77b7SHans Wennborg RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 3258879e06dSChris Lattner PatternStr = PatternStr.substr(FixedMatchEnd); 326f08d2db9SChris Lattner } 327f08d2db9SChris Lattner 32885913ccaSJames Y Knight if (MatchFullLinesHere) { 32985913ccaSJames Y Knight if (!NoCanonicalizeWhiteSpace) 33085913ccaSJames Y Knight RegExStr += " *"; 33185913ccaSJames Y Knight RegExStr += '$'; 33285913ccaSJames Y Knight } 33385913ccaSJames Y Knight 33474d50731SChris Lattner return false; 33574d50731SChris Lattner } 33674d50731SChris Lattner 337e8f2fb20SChandler Carruth bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) { 338e8b8f1bcSEli Bendersky Regex R(RS); 3398879e06dSChris Lattner std::string Error; 3408879e06dSChris Lattner if (!R.isValid(Error)) { 341e8b8f1bcSEli Bendersky SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 34203b80a40SChris Lattner "invalid regex: " + Error); 3438879e06dSChris Lattner return true; 3448879e06dSChris Lattner } 3458879e06dSChris Lattner 346e8b8f1bcSEli Bendersky RegExStr += RS.str(); 3478879e06dSChris Lattner CurParen += R.getNumMatches(); 3488879e06dSChris Lattner return false; 3498879e06dSChris Lattner } 350b16ab0c4SChris Lattner 351e8b8f1bcSEli Bendersky void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 352e8b8f1bcSEli Bendersky assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 353e8f2fb20SChandler Carruth std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum); 354e8b8f1bcSEli Bendersky RegExStr += Backref; 355e8b8f1bcSEli Bendersky } 356e8b8f1bcSEli Bendersky 3574dabac20SChandler Carruth /// Evaluates expression and stores the result to \p Value. 3584dabac20SChandler Carruth /// 3594dabac20SChandler Carruth /// Returns true on success and false when the expression has invalid syntax. 36092987fb3SAlexander Kornienko bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { 36192987fb3SAlexander Kornienko // The only supported expression is @LINE([\+-]\d+)? 36292987fb3SAlexander Kornienko if (!Expr.startswith("@LINE")) 36392987fb3SAlexander Kornienko return false; 36492987fb3SAlexander Kornienko Expr = Expr.substr(StringRef("@LINE").size()); 36592987fb3SAlexander Kornienko int Offset = 0; 36692987fb3SAlexander Kornienko if (!Expr.empty()) { 36792987fb3SAlexander Kornienko if (Expr[0] == '+') 36892987fb3SAlexander Kornienko Expr = Expr.substr(1); 36992987fb3SAlexander Kornienko else if (Expr[0] != '-') 37092987fb3SAlexander Kornienko return false; 37192987fb3SAlexander Kornienko if (Expr.getAsInteger(10, Offset)) 37292987fb3SAlexander Kornienko return false; 37392987fb3SAlexander Kornienko } 37492987fb3SAlexander Kornienko Value = llvm::itostr(LineNumber + Offset); 37592987fb3SAlexander Kornienko return true; 37692987fb3SAlexander Kornienko } 37792987fb3SAlexander Kornienko 3784dabac20SChandler Carruth /// Matches the pattern string against the input buffer \p Buffer 3794dabac20SChandler Carruth /// 3804dabac20SChandler Carruth /// This returns the position that is matched or npos if there is no match. If 3814dabac20SChandler Carruth /// there is a match, the size of the matched string is returned in \p 3824dabac20SChandler Carruth /// MatchLen. 3834dabac20SChandler Carruth /// 3844dabac20SChandler Carruth /// The \p VariableTable StringMap provides the current values of filecheck 3854dabac20SChandler Carruth /// variables and is updated if this match defines new values. 3868879e06dSChris Lattner size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, 3878879e06dSChris Lattner StringMap<StringRef> &VariableTable) const { 388eba55822SJakob Stoklund Olesen // If this is the EOF pattern, match it immediately. 38938820972SMatt Arsenault if (CheckTy == Check::CheckEOF) { 390eba55822SJakob Stoklund Olesen MatchLen = 0; 391eba55822SJakob Stoklund Olesen return Buffer.size(); 392eba55822SJakob Stoklund Olesen } 393eba55822SJakob Stoklund Olesen 394221460e0SChris Lattner // If this is a fixed string pattern, just match it now. 395221460e0SChris Lattner if (!FixedStr.empty()) { 396221460e0SChris Lattner MatchLen = FixedStr.size(); 397221460e0SChris Lattner return Buffer.find(FixedStr); 398221460e0SChris Lattner } 399221460e0SChris Lattner 400b16ab0c4SChris Lattner // Regex match. 4018879e06dSChris Lattner 4028879e06dSChris Lattner // If there are variable uses, we need to create a temporary string with the 4038879e06dSChris Lattner // actual value. 4048879e06dSChris Lattner StringRef RegExToMatch = RegExStr; 4058879e06dSChris Lattner std::string TmpStr; 4068879e06dSChris Lattner if (!VariableUses.empty()) { 4078879e06dSChris Lattner TmpStr = RegExStr; 4088879e06dSChris Lattner 4098879e06dSChris Lattner unsigned InsertOffset = 0; 4108f870499SBenjamin Kramer for (const auto &VariableUse : VariableUses) { 41192987fb3SAlexander Kornienko std::string Value; 41292987fb3SAlexander Kornienko 4138f870499SBenjamin Kramer if (VariableUse.first[0] == '@') { 4148f870499SBenjamin Kramer if (!EvaluateExpression(VariableUse.first, Value)) 41592987fb3SAlexander Kornienko return StringRef::npos; 41692987fb3SAlexander Kornienko } else { 417e0ef65abSDaniel Dunbar StringMap<StringRef>::iterator it = 4188f870499SBenjamin Kramer VariableTable.find(VariableUse.first); 419e0ef65abSDaniel Dunbar // If the variable is undefined, return an error. 420e0ef65abSDaniel Dunbar if (it == VariableTable.end()) 421e0ef65abSDaniel Dunbar return StringRef::npos; 422e0ef65abSDaniel Dunbar 4236f4f77b7SHans Wennborg // Look up the value and escape it so that we can put it into the regex. 4246f4f77b7SHans Wennborg Value += Regex::escape(it->second); 42592987fb3SAlexander Kornienko } 4268879e06dSChris Lattner 4278879e06dSChris Lattner // Plop it into the regex at the adjusted offset. 4288f870499SBenjamin Kramer TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset, 4298879e06dSChris Lattner Value.begin(), Value.end()); 4308879e06dSChris Lattner InsertOffset += Value.size(); 4318879e06dSChris Lattner } 4328879e06dSChris Lattner 4338879e06dSChris Lattner // Match the newly constructed regex. 4348879e06dSChris Lattner RegExToMatch = TmpStr; 4358879e06dSChris Lattner } 4368879e06dSChris Lattner 437b16ab0c4SChris Lattner SmallVector<StringRef, 4> MatchInfo; 4388879e06dSChris Lattner if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) 439f08d2db9SChris Lattner return StringRef::npos; 440b16ab0c4SChris Lattner 441b16ab0c4SChris Lattner // Successful regex match. 442b16ab0c4SChris Lattner assert(!MatchInfo.empty() && "Didn't get any match"); 443b16ab0c4SChris Lattner StringRef FullMatch = MatchInfo[0]; 444b16ab0c4SChris Lattner 4458879e06dSChris Lattner // If this defines any variables, remember their values. 4468f870499SBenjamin Kramer for (const auto &VariableDef : VariableDefs) { 4478f870499SBenjamin Kramer assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); 4488f870499SBenjamin Kramer VariableTable[VariableDef.first] = MatchInfo[VariableDef.second]; 4490a4c44bdSChris Lattner } 4500a4c44bdSChris Lattner 451b16ab0c4SChris Lattner MatchLen = FullMatch.size(); 452b16ab0c4SChris Lattner return FullMatch.data() - Buffer.data(); 453f08d2db9SChris Lattner } 454f08d2db9SChris Lattner 4554dabac20SChandler Carruth 4564dabac20SChandler Carruth /// Computes an arbitrary estimate for the quality of matching this pattern at 4574dabac20SChandler Carruth /// the start of \p Buffer; a distance of zero should correspond to a perfect 4584dabac20SChandler Carruth /// match. 459e8f2fb20SChandler Carruth unsigned 460e8f2fb20SChandler Carruth Pattern::ComputeMatchDistance(StringRef Buffer, 461fd29d886SDaniel Dunbar const StringMap<StringRef> &VariableTable) const { 462fd29d886SDaniel Dunbar // Just compute the number of matching characters. For regular expressions, we 463fd29d886SDaniel Dunbar // just compare against the regex itself and hope for the best. 464fd29d886SDaniel Dunbar // 465fd29d886SDaniel Dunbar // FIXME: One easy improvement here is have the regex lib generate a single 466fd29d886SDaniel Dunbar // example regular expression which matches, and use that as the example 467fd29d886SDaniel Dunbar // string. 468fd29d886SDaniel Dunbar StringRef ExampleString(FixedStr); 469fd29d886SDaniel Dunbar if (ExampleString.empty()) 470fd29d886SDaniel Dunbar ExampleString = RegExStr; 471fd29d886SDaniel Dunbar 472e9aa36c8SDaniel Dunbar // Only compare up to the first line in the buffer, or the string size. 473e9aa36c8SDaniel Dunbar StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 474e9aa36c8SDaniel Dunbar BufferPrefix = BufferPrefix.split('\n').first; 475e9aa36c8SDaniel Dunbar return BufferPrefix.edit_distance(ExampleString); 476fd29d886SDaniel Dunbar } 477fd29d886SDaniel Dunbar 4784dabac20SChandler Carruth /// Prints additional information about a failure to match involving this 4794dabac20SChandler Carruth /// pattern. 480e8f2fb20SChandler Carruth void Pattern::PrintFailureInfo( 481e8f2fb20SChandler Carruth const SourceMgr &SM, StringRef Buffer, 482e0ef65abSDaniel Dunbar const StringMap<StringRef> &VariableTable) const { 483e0ef65abSDaniel Dunbar // If this was a regular expression using variables, print the current 484e0ef65abSDaniel Dunbar // variable values. 485e0ef65abSDaniel Dunbar if (!VariableUses.empty()) { 4868f870499SBenjamin Kramer for (const auto &VariableUse : VariableUses) { 487e69170a1SAlp Toker SmallString<256> Msg; 488e69170a1SAlp Toker raw_svector_ostream OS(Msg); 4898f870499SBenjamin Kramer StringRef Var = VariableUse.first; 49092987fb3SAlexander Kornienko if (Var[0] == '@') { 49192987fb3SAlexander Kornienko std::string Value; 49292987fb3SAlexander Kornienko if (EvaluateExpression(Var, Value)) { 49392987fb3SAlexander Kornienko OS << "with expression \""; 49492987fb3SAlexander Kornienko OS.write_escaped(Var) << "\" equal to \""; 49592987fb3SAlexander Kornienko OS.write_escaped(Value) << "\""; 49692987fb3SAlexander Kornienko } else { 49792987fb3SAlexander Kornienko OS << "uses incorrect expression \""; 49892987fb3SAlexander Kornienko OS.write_escaped(Var) << "\""; 49992987fb3SAlexander Kornienko } 50092987fb3SAlexander Kornienko } else { 50192987fb3SAlexander Kornienko StringMap<StringRef>::const_iterator it = VariableTable.find(Var); 502e0ef65abSDaniel Dunbar 503e0ef65abSDaniel Dunbar // Check for undefined variable references. 504e0ef65abSDaniel Dunbar if (it == VariableTable.end()) { 505e0ef65abSDaniel Dunbar OS << "uses undefined variable \""; 50692987fb3SAlexander Kornienko OS.write_escaped(Var) << "\""; 507e0ef65abSDaniel Dunbar } else { 508e0ef65abSDaniel Dunbar OS << "with variable \""; 509e0ef65abSDaniel Dunbar OS.write_escaped(Var) << "\" equal to \""; 510e0ef65abSDaniel Dunbar OS.write_escaped(it->second) << "\""; 511e0ef65abSDaniel Dunbar } 51292987fb3SAlexander Kornienko } 513e0ef65abSDaniel Dunbar 51403b80a40SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 51503b80a40SChris Lattner OS.str()); 516e0ef65abSDaniel Dunbar } 517e0ef65abSDaniel Dunbar } 518fd29d886SDaniel Dunbar 519fd29d886SDaniel Dunbar // Attempt to find the closest/best fuzzy match. Usually an error happens 520fd29d886SDaniel Dunbar // because some string in the output didn't exactly match. In these cases, we 521fd29d886SDaniel Dunbar // would like to show the user a best guess at what "should have" matched, to 522fd29d886SDaniel Dunbar // save them having to actually check the input manually. 523fd29d886SDaniel Dunbar size_t NumLinesForward = 0; 524fd29d886SDaniel Dunbar size_t Best = StringRef::npos; 525fd29d886SDaniel Dunbar double BestQuality = 0; 526fd29d886SDaniel Dunbar 527fd29d886SDaniel Dunbar // Use an arbitrary 4k limit on how far we will search. 5282bf486ebSDan Gohman for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 529fd29d886SDaniel Dunbar if (Buffer[i] == '\n') 530fd29d886SDaniel Dunbar ++NumLinesForward; 531fd29d886SDaniel Dunbar 532df22bbf7SDan Gohman // Patterns have leading whitespace stripped, so skip whitespace when 533df22bbf7SDan Gohman // looking for something which looks like a pattern. 534df22bbf7SDan Gohman if (Buffer[i] == ' ' || Buffer[i] == '\t') 535df22bbf7SDan Gohman continue; 536df22bbf7SDan Gohman 537fd29d886SDaniel Dunbar // Compute the "quality" of this match as an arbitrary combination of the 538fd29d886SDaniel Dunbar // match distance and the number of lines skipped to get to this match. 539fd29d886SDaniel Dunbar unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable); 540fd29d886SDaniel Dunbar double Quality = Distance + (NumLinesForward / 100.); 541fd29d886SDaniel Dunbar 542fd29d886SDaniel Dunbar if (Quality < BestQuality || Best == StringRef::npos) { 543fd29d886SDaniel Dunbar Best = i; 544fd29d886SDaniel Dunbar BestQuality = Quality; 545fd29d886SDaniel Dunbar } 546fd29d886SDaniel Dunbar } 547fd29d886SDaniel Dunbar 548fd29d886SDaniel Dunbar // Print the "possible intended match here" line if we found something 549c069cc8eSDaniel Dunbar // reasonable and not equal to what we showed in the "scanning from here" 550c069cc8eSDaniel Dunbar // line. 551c069cc8eSDaniel Dunbar if (Best && Best != StringRef::npos && BestQuality < 50) { 552fd29d886SDaniel Dunbar SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best), 55303b80a40SChris Lattner SourceMgr::DK_Note, "possible intended match here"); 554fd29d886SDaniel Dunbar 555fd29d886SDaniel Dunbar // FIXME: If we wanted to be really friendly we would show why the match 556fd29d886SDaniel Dunbar // failed, as it can be hard to spot simple one character differences. 557fd29d886SDaniel Dunbar } 558e0ef65abSDaniel Dunbar } 55974d50731SChris Lattner 5604dabac20SChandler Carruth /// Finds the closing sequence of a regex variable usage or definition. 5614dabac20SChandler Carruth /// 5624dabac20SChandler Carruth /// \p Str has to point in the beginning of the definition (right after the 5634dabac20SChandler Carruth /// opening sequence). Returns the offset of the closing sequence within Str, 5644dabac20SChandler Carruth /// or npos if it was not found. 56581e5cd9eSAdrian Prantl size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 566061d2baaSEli Bendersky // Offset keeps track of the current offset within the input Str 567061d2baaSEli Bendersky size_t Offset = 0; 568061d2baaSEli Bendersky // [...] Nesting depth 569061d2baaSEli Bendersky size_t BracketDepth = 0; 570061d2baaSEli Bendersky 571061d2baaSEli Bendersky while (!Str.empty()) { 572061d2baaSEli Bendersky if (Str.startswith("]]") && BracketDepth == 0) 573061d2baaSEli Bendersky return Offset; 574061d2baaSEli Bendersky if (Str[0] == '\\') { 575061d2baaSEli Bendersky // Backslash escapes the next char within regexes, so skip them both. 576061d2baaSEli Bendersky Str = Str.substr(2); 577061d2baaSEli Bendersky Offset += 2; 578061d2baaSEli Bendersky } else { 579061d2baaSEli Bendersky switch (Str[0]) { 580061d2baaSEli Bendersky default: 581061d2baaSEli Bendersky break; 582061d2baaSEli Bendersky case '[': 583061d2baaSEli Bendersky BracketDepth++; 584061d2baaSEli Bendersky break; 585061d2baaSEli Bendersky case ']': 58681e5cd9eSAdrian Prantl if (BracketDepth == 0) { 58781e5cd9eSAdrian Prantl SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 58881e5cd9eSAdrian Prantl SourceMgr::DK_Error, 58981e5cd9eSAdrian Prantl "missing closing \"]\" for regex variable"); 59081e5cd9eSAdrian Prantl exit(1); 59181e5cd9eSAdrian Prantl } 592061d2baaSEli Bendersky BracketDepth--; 593061d2baaSEli Bendersky break; 594061d2baaSEli Bendersky } 595061d2baaSEli Bendersky Str = Str.substr(1); 596061d2baaSEli Bendersky Offset++; 597061d2baaSEli Bendersky } 598061d2baaSEli Bendersky } 599061d2baaSEli Bendersky 600061d2baaSEli Bendersky return StringRef::npos; 601061d2baaSEli Bendersky } 602061d2baaSEli Bendersky 60374d50731SChris Lattner //===----------------------------------------------------------------------===// 60474d50731SChris Lattner // Check Strings. 60574d50731SChris Lattner //===----------------------------------------------------------------------===// 6063b40b445SChris Lattner 6074dabac20SChandler Carruth /// A check that we found in the input file. 6083b40b445SChris Lattner struct CheckString { 6094dabac20SChandler Carruth /// The pattern to match. 6103b40b445SChris Lattner Pattern Pat; 61126cccfe1SChris Lattner 6124dabac20SChandler Carruth /// Which prefix name this check matched. 61313df4626SMatt Arsenault StringRef Prefix; 61413df4626SMatt Arsenault 6154dabac20SChandler Carruth /// The location in the match file that the check string was specified. 61626cccfe1SChris Lattner SMLoc Loc; 61726cccfe1SChris Lattner 6184dabac20SChandler Carruth /// All of the strings that are disallowed from occurring between this match 6194dabac20SChandler Carruth /// string and the previous one (or start of file). 62091a1b2c9SMichael Liao std::vector<Pattern> DagNotStrings; 621236d2d5eSChris Lattner 62285913ccaSJames Y Knight CheckString(const Pattern &P, StringRef S, SMLoc L) 62385913ccaSJames Y Knight : Pat(P), Prefix(S), Loc(L) {} 624dcc7d48dSMichael Liao 625e93a3a08SStephen Lin size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, 626f8bd2e5bSStephen Lin size_t &MatchLen, StringMap<StringRef> &VariableTable) const; 627dcc7d48dSMichael Liao 628dcc7d48dSMichael Liao bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; 62901ac1707SDuncan P. N. Exon Smith bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; 630dcc7d48dSMichael Liao bool CheckNot(const SourceMgr &SM, StringRef Buffer, 63191a1b2c9SMichael Liao const std::vector<const Pattern *> &NotStrings, 63291a1b2c9SMichael Liao StringMap<StringRef> &VariableTable) const; 63391a1b2c9SMichael Liao size_t CheckDag(const SourceMgr &SM, StringRef Buffer, 63491a1b2c9SMichael Liao std::vector<const Pattern *> &NotStrings, 635dcc7d48dSMichael Liao StringMap<StringRef> &VariableTable) const; 63626cccfe1SChris Lattner }; 63726cccfe1SChris Lattner 63820247900SChandler Carruth /// Canonicalize whitespaces in the file. Line endings are replaced with 63920247900SChandler Carruth /// UNIX-style '\n'. 640*b03c166aSChandler Carruth static StringRef CanonicalizeFile(MemoryBuffer &MB, 64120247900SChandler Carruth SmallVectorImpl<char> &OutputBuffer) { 64220247900SChandler Carruth OutputBuffer.reserve(MB.getBufferSize()); 643a2f8fc5aSChris Lattner 64420247900SChandler Carruth for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd(); 645a2f8fc5aSChris Lattner Ptr != End; ++Ptr) { 646fd781bf0SNAKAMURA Takumi // Eliminate trailing dosish \r. 647fd781bf0SNAKAMURA Takumi if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 648fd781bf0SNAKAMURA Takumi continue; 649fd781bf0SNAKAMURA Takumi } 650fd781bf0SNAKAMURA Takumi 6515ea04c38SGuy Benyei // If current char is not a horizontal whitespace or if horizontal 6525ea04c38SGuy Benyei // whitespace canonicalization is disabled, dump it to output as is. 653*b03c166aSChandler Carruth if (NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) { 65420247900SChandler Carruth OutputBuffer.push_back(*Ptr); 655a2f8fc5aSChris Lattner continue; 656a2f8fc5aSChris Lattner } 657a2f8fc5aSChris Lattner 658a2f8fc5aSChris Lattner // Otherwise, add one space and advance over neighboring space. 65920247900SChandler Carruth OutputBuffer.push_back(' '); 660e8f2fb20SChandler Carruth while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t')) 661a2f8fc5aSChris Lattner ++Ptr; 662a2f8fc5aSChris Lattner } 663a2f8fc5aSChris Lattner 66420247900SChandler Carruth // Add a null byte and then return all but that byte. 66520247900SChandler Carruth OutputBuffer.push_back('\0'); 66620247900SChandler Carruth return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1); 667a2f8fc5aSChris Lattner } 668a2f8fc5aSChris Lattner 66938820972SMatt Arsenault static bool IsPartOfWord(char c) { 67038820972SMatt Arsenault return (isalnum(c) || c == '-' || c == '_'); 67138820972SMatt Arsenault } 67238820972SMatt Arsenault 67313df4626SMatt Arsenault // Get the size of the prefix extension. 67413df4626SMatt Arsenault static size_t CheckTypeSize(Check::CheckType Ty) { 67513df4626SMatt Arsenault switch (Ty) { 67613df4626SMatt Arsenault case Check::CheckNone: 677a908e7bdSPaul Robinson case Check::CheckBadNot: 67813df4626SMatt Arsenault return 0; 67913df4626SMatt Arsenault 68013df4626SMatt Arsenault case Check::CheckPlain: 68113df4626SMatt Arsenault return sizeof(":") - 1; 68213df4626SMatt Arsenault 68313df4626SMatt Arsenault case Check::CheckNext: 68413df4626SMatt Arsenault return sizeof("-NEXT:") - 1; 68513df4626SMatt Arsenault 68601ac1707SDuncan P. N. Exon Smith case Check::CheckSame: 68701ac1707SDuncan P. N. Exon Smith return sizeof("-SAME:") - 1; 68801ac1707SDuncan P. N. Exon Smith 68913df4626SMatt Arsenault case Check::CheckNot: 69013df4626SMatt Arsenault return sizeof("-NOT:") - 1; 69113df4626SMatt Arsenault 69213df4626SMatt Arsenault case Check::CheckDAG: 69313df4626SMatt Arsenault return sizeof("-DAG:") - 1; 69413df4626SMatt Arsenault 69513df4626SMatt Arsenault case Check::CheckLabel: 69613df4626SMatt Arsenault return sizeof("-LABEL:") - 1; 69713df4626SMatt Arsenault 69813df4626SMatt Arsenault case Check::CheckEOF: 69913df4626SMatt Arsenault llvm_unreachable("Should not be using EOF size"); 70013df4626SMatt Arsenault } 70113df4626SMatt Arsenault 70213df4626SMatt Arsenault llvm_unreachable("Bad check type"); 70313df4626SMatt Arsenault } 70413df4626SMatt Arsenault 70513df4626SMatt Arsenault static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) { 706c4d2d471SMatt Arsenault char NextChar = Buffer[Prefix.size()]; 70738820972SMatt Arsenault 70838820972SMatt Arsenault // Verify that the : is present after the prefix. 70913df4626SMatt Arsenault if (NextChar == ':') 71038820972SMatt Arsenault return Check::CheckPlain; 71138820972SMatt Arsenault 71213df4626SMatt Arsenault if (NextChar != '-') 71338820972SMatt Arsenault return Check::CheckNone; 71438820972SMatt Arsenault 715c4d2d471SMatt Arsenault StringRef Rest = Buffer.drop_front(Prefix.size() + 1); 71613df4626SMatt Arsenault if (Rest.startswith("NEXT:")) 71738820972SMatt Arsenault return Check::CheckNext; 71838820972SMatt Arsenault 71901ac1707SDuncan P. N. Exon Smith if (Rest.startswith("SAME:")) 72001ac1707SDuncan P. N. Exon Smith return Check::CheckSame; 72101ac1707SDuncan P. N. Exon Smith 72213df4626SMatt Arsenault if (Rest.startswith("NOT:")) 72338820972SMatt Arsenault return Check::CheckNot; 72438820972SMatt Arsenault 72513df4626SMatt Arsenault if (Rest.startswith("DAG:")) 72638820972SMatt Arsenault return Check::CheckDAG; 72738820972SMatt Arsenault 72813df4626SMatt Arsenault if (Rest.startswith("LABEL:")) 72938820972SMatt Arsenault return Check::CheckLabel; 73013df4626SMatt Arsenault 731a908e7bdSPaul Robinson // You can't combine -NOT with another suffix. 732a908e7bdSPaul Robinson if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") || 733a908e7bdSPaul Robinson Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") || 734a908e7bdSPaul Robinson Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:")) 735a908e7bdSPaul Robinson return Check::CheckBadNot; 736a908e7bdSPaul Robinson 73713df4626SMatt Arsenault return Check::CheckNone; 73838820972SMatt Arsenault } 73938820972SMatt Arsenault 74013df4626SMatt Arsenault // From the given position, find the next character after the word. 74113df4626SMatt Arsenault static size_t SkipWord(StringRef Str, size_t Loc) { 74213df4626SMatt Arsenault while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 74313df4626SMatt Arsenault ++Loc; 74413df4626SMatt Arsenault return Loc; 74513df4626SMatt Arsenault } 74613df4626SMatt Arsenault 74713df4626SMatt Arsenault // Try to find the first match in buffer for any prefix. If a valid match is 74813df4626SMatt Arsenault // found, return that prefix and set its type and location. If there are almost 74913df4626SMatt Arsenault // matches (e.g. the actual prefix string is found, but is not an actual check 75013df4626SMatt Arsenault // string), but no valid match, return an empty string and set the position to 75113df4626SMatt Arsenault // resume searching from. If no partial matches are found, return an empty 75213df4626SMatt Arsenault // string and the location will be StringRef::npos. If one prefix is a substring 75313df4626SMatt Arsenault // of another, the maximal match should be found. e.g. if "A" and "AA" are 75413df4626SMatt Arsenault // prefixes then AA-CHECK: should match the second one. 75513df4626SMatt Arsenault static StringRef FindFirstCandidateMatch(StringRef &Buffer, 75613df4626SMatt Arsenault Check::CheckType &CheckTy, 75713df4626SMatt Arsenault size_t &CheckLoc) { 75813df4626SMatt Arsenault StringRef FirstPrefix; 75913df4626SMatt Arsenault size_t FirstLoc = StringRef::npos; 76013df4626SMatt Arsenault size_t SearchLoc = StringRef::npos; 76113df4626SMatt Arsenault Check::CheckType FirstTy = Check::CheckNone; 76213df4626SMatt Arsenault 76313df4626SMatt Arsenault CheckTy = Check::CheckNone; 76413df4626SMatt Arsenault CheckLoc = StringRef::npos; 76513df4626SMatt Arsenault 7668f870499SBenjamin Kramer for (StringRef Prefix : CheckPrefixes) { 76713df4626SMatt Arsenault size_t PrefixLoc = Buffer.find(Prefix); 76813df4626SMatt Arsenault 76913df4626SMatt Arsenault if (PrefixLoc == StringRef::npos) 77013df4626SMatt Arsenault continue; 77113df4626SMatt Arsenault 77213df4626SMatt Arsenault // Track where we are searching for invalid prefixes that look almost right. 77313df4626SMatt Arsenault // We need to only advance to the first partial match on the next attempt 77413df4626SMatt Arsenault // since a partial match could be a substring of a later, valid prefix. 77513df4626SMatt Arsenault // Need to skip to the end of the word, otherwise we could end up 77613df4626SMatt Arsenault // matching a prefix in a substring later. 77713df4626SMatt Arsenault if (PrefixLoc < SearchLoc) 77813df4626SMatt Arsenault SearchLoc = SkipWord(Buffer, PrefixLoc); 77913df4626SMatt Arsenault 78013df4626SMatt Arsenault // We only want to find the first match to avoid skipping some. 78113df4626SMatt Arsenault if (PrefixLoc > FirstLoc) 78213df4626SMatt Arsenault continue; 783a7181a1bSAlexey Samsonov // If one matching check-prefix is a prefix of another, choose the 784a7181a1bSAlexey Samsonov // longer one. 785a7181a1bSAlexey Samsonov if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size()) 786a7181a1bSAlexey Samsonov continue; 78713df4626SMatt Arsenault 78813df4626SMatt Arsenault StringRef Rest = Buffer.drop_front(PrefixLoc); 78913df4626SMatt Arsenault // Make sure we have actually found the prefix, and not a word containing 79013df4626SMatt Arsenault // it. This should also prevent matching the wrong prefix when one is a 79113df4626SMatt Arsenault // substring of another. 79213df4626SMatt Arsenault if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1])) 79343b5f572SDaniel Sanders FirstTy = Check::CheckNone; 79443b5f572SDaniel Sanders else 79543b5f572SDaniel Sanders FirstTy = FindCheckType(Rest, Prefix); 79613df4626SMatt Arsenault 79713df4626SMatt Arsenault FirstLoc = PrefixLoc; 798a7181a1bSAlexey Samsonov FirstPrefix = Prefix; 79913df4626SMatt Arsenault } 80013df4626SMatt Arsenault 801a7181a1bSAlexey Samsonov // If the first prefix is invalid, we should continue the search after it. 802a7181a1bSAlexey Samsonov if (FirstTy == Check::CheckNone) { 80313df4626SMatt Arsenault CheckLoc = SearchLoc; 804a7181a1bSAlexey Samsonov return ""; 805a7181a1bSAlexey Samsonov } 806a7181a1bSAlexey Samsonov 80713df4626SMatt Arsenault CheckTy = FirstTy; 80813df4626SMatt Arsenault CheckLoc = FirstLoc; 80913df4626SMatt Arsenault return FirstPrefix; 81013df4626SMatt Arsenault } 81113df4626SMatt Arsenault 81213df4626SMatt Arsenault static StringRef FindFirstMatchingPrefix(StringRef &Buffer, 81313df4626SMatt Arsenault unsigned &LineNumber, 81413df4626SMatt Arsenault Check::CheckType &CheckTy, 81513df4626SMatt Arsenault size_t &CheckLoc) { 81613df4626SMatt Arsenault while (!Buffer.empty()) { 81713df4626SMatt Arsenault StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc); 81813df4626SMatt Arsenault // If we found a real match, we are done. 81913df4626SMatt Arsenault if (!Prefix.empty()) { 82013df4626SMatt Arsenault LineNumber += Buffer.substr(0, CheckLoc).count('\n'); 82113df4626SMatt Arsenault return Prefix; 82213df4626SMatt Arsenault } 82313df4626SMatt Arsenault 82413df4626SMatt Arsenault // We didn't find any almost matches either, we are also done. 82513df4626SMatt Arsenault if (CheckLoc == StringRef::npos) 82613df4626SMatt Arsenault return StringRef(); 82713df4626SMatt Arsenault 82813df4626SMatt Arsenault LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n'); 82913df4626SMatt Arsenault 83013df4626SMatt Arsenault // Advance to the last possible match we found and try again. 83113df4626SMatt Arsenault Buffer = Buffer.drop_front(CheckLoc + 1); 83213df4626SMatt Arsenault } 83313df4626SMatt Arsenault 83413df4626SMatt Arsenault return StringRef(); 83538820972SMatt Arsenault } 836ee3c74fbSChris Lattner 8374dabac20SChandler Carruth /// Read the check file, which specifies the sequence of expected strings. 8384dabac20SChandler Carruth /// 8394dabac20SChandler Carruth /// The strings are added to the CheckStrings vector. Returns true in case of 8404dabac20SChandler Carruth /// an error, false otherwise. 84120247900SChandler Carruth static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, 84226cccfe1SChris Lattner std::vector<CheckString> &CheckStrings) { 84356ccdbbdSAlexander Kornienko std::vector<Pattern> ImplicitNegativeChecks; 84456ccdbbdSAlexander Kornienko for (const auto &PatternString : ImplicitCheckNot) { 84556ccdbbdSAlexander Kornienko // Create a buffer with fake command line content in order to display the 84656ccdbbdSAlexander Kornienko // command line option responsible for the specific implicit CHECK-NOT. 847ff43d69dSDavid Blaikie std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str(); 84856ccdbbdSAlexander Kornienko std::string Suffix = "'"; 8493560ff2cSRafael Espindola std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( 8503560ff2cSRafael Espindola Prefix + PatternString + Suffix, "command line"); 8513560ff2cSRafael Espindola 85256ccdbbdSAlexander Kornienko StringRef PatternInBuffer = 85356ccdbbdSAlexander Kornienko CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 8541961f14cSDavid Blaikie SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 85556ccdbbdSAlexander Kornienko 85656ccdbbdSAlexander Kornienko ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot)); 85756ccdbbdSAlexander Kornienko ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer, 85856ccdbbdSAlexander Kornienko "IMPLICIT-CHECK", SM, 0); 85956ccdbbdSAlexander Kornienko } 86056ccdbbdSAlexander Kornienko 86156ccdbbdSAlexander Kornienko std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 862236d2d5eSChris Lattner 86343d50d4aSEli Bendersky // LineNumber keeps track of the line on which CheckPrefix instances are 86443d50d4aSEli Bendersky // found. 86592987fb3SAlexander Kornienko unsigned LineNumber = 1; 86692987fb3SAlexander Kornienko 867ee3c74fbSChris Lattner while (1) { 86813df4626SMatt Arsenault Check::CheckType CheckTy; 86913df4626SMatt Arsenault size_t PrefixLoc; 87013df4626SMatt Arsenault 87113df4626SMatt Arsenault // See if a prefix occurs in the memory buffer. 872e8f2fb20SChandler Carruth StringRef UsedPrefix = 873e8f2fb20SChandler Carruth FindFirstMatchingPrefix(Buffer, LineNumber, CheckTy, PrefixLoc); 87413df4626SMatt Arsenault if (UsedPrefix.empty()) 875ee3c74fbSChris Lattner break; 876ee3c74fbSChris Lattner 87713df4626SMatt Arsenault Buffer = Buffer.drop_front(PrefixLoc); 87892987fb3SAlexander Kornienko 87913df4626SMatt Arsenault // Location to use for error messages. 88013df4626SMatt Arsenault const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1); 88192987fb3SAlexander Kornienko 88213df4626SMatt Arsenault // PrefixLoc is to the start of the prefix. Skip to the end. 88313df4626SMatt Arsenault Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy)); 88410f10cedSChris Lattner 885a908e7bdSPaul Robinson // Complain about useful-looking but unsupported suffixes. 886a908e7bdSPaul Robinson if (CheckTy == Check::CheckBadNot) { 887e8f2fb20SChandler Carruth SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, 888a908e7bdSPaul Robinson "unsupported -NOT combo on prefix '" + UsedPrefix + "'"); 889a908e7bdSPaul Robinson return true; 890a908e7bdSPaul Robinson } 891a908e7bdSPaul Robinson 89238820972SMatt Arsenault // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 89338820972SMatt Arsenault // leading and trailing whitespace. 894236d2d5eSChris Lattner Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 895ee3c74fbSChris Lattner 896ee3c74fbSChris Lattner // Scan ahead to the end of line. 897caa5fc0cSChris Lattner size_t EOL = Buffer.find_first_of("\n\r"); 898ee3c74fbSChris Lattner 899838fb09aSDan Gohman // Remember the location of the start of the pattern, for diagnostics. 900838fb09aSDan Gohman SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 901838fb09aSDan Gohman 90274d50731SChris Lattner // Parse the pattern. 90338820972SMatt Arsenault Pattern P(CheckTy); 90413df4626SMatt Arsenault if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber)) 905ee3c74fbSChris Lattner return true; 906ee3c74fbSChris Lattner 907f8bd2e5bSStephen Lin // Verify that CHECK-LABEL lines do not define or use variables 90838820972SMatt Arsenault if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 909e8f2fb20SChandler Carruth SM.PrintMessage( 910e8f2fb20SChandler Carruth SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error, 91113df4626SMatt Arsenault "found '" + UsedPrefix + "-LABEL:'" 91213df4626SMatt Arsenault " with variable definition or use"); 913f8bd2e5bSStephen Lin return true; 914f8bd2e5bSStephen Lin } 915f8bd2e5bSStephen Lin 916236d2d5eSChris Lattner Buffer = Buffer.substr(EOL); 91774d50731SChris Lattner 918da108b4eSChris Lattner // Verify that CHECK-NEXT lines have at least one CHECK line before them. 91901ac1707SDuncan P. N. Exon Smith if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) && 92001ac1707SDuncan P. N. Exon Smith CheckStrings.empty()) { 92101ac1707SDuncan P. N. Exon Smith StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME"; 92213df4626SMatt Arsenault SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 92303b80a40SChris Lattner SourceMgr::DK_Error, 924e8f2fb20SChandler Carruth "found '" + UsedPrefix + "-" + Type + 925e8f2fb20SChandler Carruth "' without previous '" + UsedPrefix + ": line"); 926da108b4eSChris Lattner return true; 927da108b4eSChris Lattner } 928da108b4eSChris Lattner 92991a1b2c9SMichael Liao // Handle CHECK-DAG/-NOT. 93038820972SMatt Arsenault if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 93191a1b2c9SMichael Liao DagNotMatches.push_back(P); 93274d50731SChris Lattner continue; 93374d50731SChris Lattner } 93474d50731SChris Lattner 935ee3c74fbSChris Lattner // Okay, add the string we captured to the output vector and move on. 93685913ccaSJames Y Knight CheckStrings.emplace_back(P, UsedPrefix, PatternLoc); 93791a1b2c9SMichael Liao std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 93856ccdbbdSAlexander Kornienko DagNotMatches = ImplicitNegativeChecks; 939ee3c74fbSChris Lattner } 940ee3c74fbSChris Lattner 94113df4626SMatt Arsenault // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first 94213df4626SMatt Arsenault // prefix as a filler for the error message. 94391a1b2c9SMichael Liao if (!DagNotMatches.empty()) { 944f5e2fc47SBenjamin Kramer CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(), 94585913ccaSJames Y Knight SMLoc::getFromPointer(Buffer.data())); 94691a1b2c9SMichael Liao std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 947eba55822SJakob Stoklund Olesen } 948eba55822SJakob Stoklund Olesen 949ee3c74fbSChris Lattner if (CheckStrings.empty()) { 95013df4626SMatt Arsenault errs() << "error: no check strings found with prefix" 95113df4626SMatt Arsenault << (CheckPrefixes.size() > 1 ? "es " : " "); 9523e3ef2f2SChris Bieneman prefix_iterator I = CheckPrefixes.begin(); 9533e3ef2f2SChris Bieneman prefix_iterator E = CheckPrefixes.end(); 9543e3ef2f2SChris Bieneman if (I != E) { 9553e3ef2f2SChris Bieneman errs() << "\'" << *I << ":'"; 9563e3ef2f2SChris Bieneman ++I; 95713df4626SMatt Arsenault } 9583e3ef2f2SChris Bieneman for (; I != E; ++I) 9593e3ef2f2SChris Bieneman errs() << ", \'" << *I << ":'"; 96013df4626SMatt Arsenault 96113df4626SMatt Arsenault errs() << '\n'; 962ee3c74fbSChris Lattner return true; 963ee3c74fbSChris Lattner } 964ee3c74fbSChris Lattner 965ee3c74fbSChris Lattner return false; 966ee3c74fbSChris Lattner } 967ee3c74fbSChris Lattner 968e8f2fb20SChandler Carruth static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, const Pattern &Pat, 969e8f2fb20SChandler Carruth StringRef Buffer, 970e0ef65abSDaniel Dunbar StringMap<StringRef> &VariableTable) { 971da108b4eSChris Lattner // Otherwise, we have an error, emit an error message. 97291a1b2c9SMichael Liao SM.PrintMessage(Loc, SourceMgr::DK_Error, 97303b80a40SChris Lattner "expected string not found in input"); 974da108b4eSChris Lattner 975da108b4eSChris Lattner // Print the "scanning from here" line. If the current position is at the 976da108b4eSChris Lattner // end of a line, advance to the start of the next line. 977caa5fc0cSChris Lattner Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 978da108b4eSChris Lattner 97903b80a40SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 98003b80a40SChris Lattner "scanning from here"); 981e0ef65abSDaniel Dunbar 982e0ef65abSDaniel Dunbar // Allow the pattern to print additional information if desired. 98391a1b2c9SMichael Liao Pat.PrintFailureInfo(SM, Buffer, VariableTable); 98491a1b2c9SMichael Liao } 98591a1b2c9SMichael Liao 98691a1b2c9SMichael Liao static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr, 98791a1b2c9SMichael Liao StringRef Buffer, 98891a1b2c9SMichael Liao StringMap<StringRef> &VariableTable) { 98991a1b2c9SMichael Liao PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable); 990da108b4eSChris Lattner } 991da108b4eSChris Lattner 9924dabac20SChandler Carruth /// Count the number of newlines in the specified range. 993592fe880SRichard Smith static unsigned CountNumNewlinesBetween(StringRef Range, 994592fe880SRichard Smith const char *&FirstNewLine) { 995da108b4eSChris Lattner unsigned NumNewLines = 0; 99637183584SChris Lattner while (1) { 997da108b4eSChris Lattner // Scan for newline. 99837183584SChris Lattner Range = Range.substr(Range.find_first_of("\n\r")); 999e8f2fb20SChandler Carruth if (Range.empty()) 1000e8f2fb20SChandler Carruth return NumNewLines; 1001da108b4eSChris Lattner 1002da108b4eSChris Lattner ++NumNewLines; 1003da108b4eSChris Lattner 1004da108b4eSChris Lattner // Handle \n\r and \r\n as a single newline. 1005e8f2fb20SChandler Carruth if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') && 100637183584SChris Lattner (Range[0] != Range[1])) 100737183584SChris Lattner Range = Range.substr(1); 100837183584SChris Lattner Range = Range.substr(1); 1009592fe880SRichard Smith 1010592fe880SRichard Smith if (NumNewLines == 1) 1011592fe880SRichard Smith FirstNewLine = Range.begin(); 1012da108b4eSChris Lattner } 1013da108b4eSChris Lattner } 1014da108b4eSChris Lattner 10154dabac20SChandler Carruth /// Match check string and its "not strings" and/or "dag strings". 1016dcc7d48dSMichael Liao size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer, 1017e93a3a08SStephen Lin bool IsLabelScanMode, size_t &MatchLen, 1018dcc7d48dSMichael Liao StringMap<StringRef> &VariableTable) const { 101991a1b2c9SMichael Liao size_t LastPos = 0; 102091a1b2c9SMichael Liao std::vector<const Pattern *> NotStrings; 102191a1b2c9SMichael Liao 1022e93a3a08SStephen Lin // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 1023e93a3a08SStephen Lin // bounds; we have not processed variable definitions within the bounded block 1024e93a3a08SStephen Lin // yet so cannot handle any final CHECK-DAG yet; this is handled when going 1025e93a3a08SStephen Lin // over the block again (including the last CHECK-LABEL) in normal mode. 1026e93a3a08SStephen Lin if (!IsLabelScanMode) { 102791a1b2c9SMichael Liao // Match "dag strings" (with mixed "not strings" if any). 102891a1b2c9SMichael Liao LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable); 102991a1b2c9SMichael Liao if (LastPos == StringRef::npos) 103091a1b2c9SMichael Liao return StringRef::npos; 1031e93a3a08SStephen Lin } 103291a1b2c9SMichael Liao 103391a1b2c9SMichael Liao // Match itself from the last position after matching CHECK-DAG. 103491a1b2c9SMichael Liao StringRef MatchBuffer = Buffer.substr(LastPos); 103591a1b2c9SMichael Liao size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1036dcc7d48dSMichael Liao if (MatchPos == StringRef::npos) { 103791a1b2c9SMichael Liao PrintCheckFailed(SM, *this, MatchBuffer, VariableTable); 1038dcc7d48dSMichael Liao return StringRef::npos; 1039dcc7d48dSMichael Liao } 1040dcc7d48dSMichael Liao 1041e93a3a08SStephen Lin // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 1042e93a3a08SStephen Lin // or CHECK-NOT 1043e93a3a08SStephen Lin if (!IsLabelScanMode) { 104491a1b2c9SMichael Liao StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1045dcc7d48dSMichael Liao 1046dcc7d48dSMichael Liao // If this check is a "CHECK-NEXT", verify that the previous match was on 1047dcc7d48dSMichael Liao // the previous line (i.e. that there is one newline between them). 1048dcc7d48dSMichael Liao if (CheckNext(SM, SkippedRegion)) 1049dcc7d48dSMichael Liao return StringRef::npos; 1050dcc7d48dSMichael Liao 105101ac1707SDuncan P. N. Exon Smith // If this check is a "CHECK-SAME", verify that the previous match was on 105201ac1707SDuncan P. N. Exon Smith // the same line (i.e. that there is no newline between them). 105301ac1707SDuncan P. N. Exon Smith if (CheckSame(SM, SkippedRegion)) 105401ac1707SDuncan P. N. Exon Smith return StringRef::npos; 105501ac1707SDuncan P. N. Exon Smith 1056dcc7d48dSMichael Liao // If this match had "not strings", verify that they don't exist in the 1057dcc7d48dSMichael Liao // skipped region. 105891a1b2c9SMichael Liao if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1059dcc7d48dSMichael Liao return StringRef::npos; 1060f8bd2e5bSStephen Lin } 1061dcc7d48dSMichael Liao 10627dfb92b9SMehdi Amini return LastPos + MatchPos; 1063dcc7d48dSMichael Liao } 1064dcc7d48dSMichael Liao 10654dabac20SChandler Carruth /// Verify there is a single line in the given buffer. 1066dcc7d48dSMichael Liao bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 106785913ccaSJames Y Knight if (Pat.getCheckTy() != Check::CheckNext) 1068dcc7d48dSMichael Liao return false; 1069dcc7d48dSMichael Liao 1070dcc7d48dSMichael Liao // Count the number of newlines between the previous match and this one. 1071dcc7d48dSMichael Liao assert(Buffer.data() != 1072e8f2fb20SChandler Carruth SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1073e8f2fb20SChandler Carruth SMLoc::getFromPointer(Buffer.data()))) 1074e8f2fb20SChandler Carruth ->getBufferStart() && 1075dcc7d48dSMichael Liao "CHECK-NEXT can't be the first check in a file"); 1076dcc7d48dSMichael Liao 107766f09ad0SCraig Topper const char *FirstNewLine = nullptr; 1078592fe880SRichard Smith unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1079dcc7d48dSMichael Liao 1080dcc7d48dSMichael Liao if (NumNewLines == 0) { 1081e8f2fb20SChandler Carruth SM.PrintMessage(Loc, SourceMgr::DK_Error, 1082e8f2fb20SChandler Carruth Prefix + "-NEXT: is on the same line as previous match"); 1083e8f2fb20SChandler Carruth SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1084e8f2fb20SChandler Carruth "'next' match was here"); 1085dcc7d48dSMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1086dcc7d48dSMichael Liao "previous match ended here"); 1087dcc7d48dSMichael Liao return true; 1088dcc7d48dSMichael Liao } 1089dcc7d48dSMichael Liao 1090dcc7d48dSMichael Liao if (NumNewLines != 1) { 1091e8f2fb20SChandler Carruth SM.PrintMessage(Loc, SourceMgr::DK_Error, 1092e8f2fb20SChandler Carruth Prefix + 1093dcc7d48dSMichael Liao "-NEXT: is not on the line after the previous match"); 1094e8f2fb20SChandler Carruth SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1095e8f2fb20SChandler Carruth "'next' match was here"); 1096dcc7d48dSMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1097dcc7d48dSMichael Liao "previous match ended here"); 1098592fe880SRichard Smith SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 1099592fe880SRichard Smith "non-matching line after previous match is here"); 1100dcc7d48dSMichael Liao return true; 1101dcc7d48dSMichael Liao } 1102dcc7d48dSMichael Liao 1103dcc7d48dSMichael Liao return false; 1104dcc7d48dSMichael Liao } 1105dcc7d48dSMichael Liao 11064dabac20SChandler Carruth /// Verify there is no newline in the given buffer. 110701ac1707SDuncan P. N. Exon Smith bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { 110885913ccaSJames Y Knight if (Pat.getCheckTy() != Check::CheckSame) 110901ac1707SDuncan P. N. Exon Smith return false; 111001ac1707SDuncan P. N. Exon Smith 111101ac1707SDuncan P. N. Exon Smith // Count the number of newlines between the previous match and this one. 111201ac1707SDuncan P. N. Exon Smith assert(Buffer.data() != 111301ac1707SDuncan P. N. Exon Smith SM.getMemoryBuffer(SM.FindBufferContainingLoc( 111401ac1707SDuncan P. N. Exon Smith SMLoc::getFromPointer(Buffer.data()))) 111501ac1707SDuncan P. N. Exon Smith ->getBufferStart() && 111601ac1707SDuncan P. N. Exon Smith "CHECK-SAME can't be the first check in a file"); 111701ac1707SDuncan P. N. Exon Smith 111801ac1707SDuncan P. N. Exon Smith const char *FirstNewLine = nullptr; 111901ac1707SDuncan P. N. Exon Smith unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 112001ac1707SDuncan P. N. Exon Smith 112101ac1707SDuncan P. N. Exon Smith if (NumNewLines != 0) { 112201ac1707SDuncan P. N. Exon Smith SM.PrintMessage(Loc, SourceMgr::DK_Error, 112301ac1707SDuncan P. N. Exon Smith Prefix + 112401ac1707SDuncan P. N. Exon Smith "-SAME: is not on the same line as the previous match"); 112501ac1707SDuncan P. N. Exon Smith SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 112601ac1707SDuncan P. N. Exon Smith "'next' match was here"); 112701ac1707SDuncan P. N. Exon Smith SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 112801ac1707SDuncan P. N. Exon Smith "previous match ended here"); 112901ac1707SDuncan P. N. Exon Smith return true; 113001ac1707SDuncan P. N. Exon Smith } 113101ac1707SDuncan P. N. Exon Smith 113201ac1707SDuncan P. N. Exon Smith return false; 113301ac1707SDuncan P. N. Exon Smith } 113401ac1707SDuncan P. N. Exon Smith 11354dabac20SChandler Carruth /// Verify there's no "not strings" in the given buffer. 1136dcc7d48dSMichael Liao bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 113791a1b2c9SMichael Liao const std::vector<const Pattern *> &NotStrings, 1138dcc7d48dSMichael Liao StringMap<StringRef> &VariableTable) const { 11398f870499SBenjamin Kramer for (const Pattern *Pat : NotStrings) { 114038820972SMatt Arsenault assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 114191a1b2c9SMichael Liao 1142dcc7d48dSMichael Liao size_t MatchLen = 0; 114391a1b2c9SMichael Liao size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable); 1144dcc7d48dSMichael Liao 1145e8f2fb20SChandler Carruth if (Pos == StringRef::npos) 1146e8f2fb20SChandler Carruth continue; 1147dcc7d48dSMichael Liao 1148dcc7d48dSMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Pos), 1149e8f2fb20SChandler Carruth SourceMgr::DK_Error, Prefix + "-NOT: string occurred!"); 115091a1b2c9SMichael Liao SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note, 115113df4626SMatt Arsenault Prefix + "-NOT: pattern specified here"); 1152dcc7d48dSMichael Liao return true; 1153dcc7d48dSMichael Liao } 1154dcc7d48dSMichael Liao 1155dcc7d48dSMichael Liao return false; 1156dcc7d48dSMichael Liao } 1157dcc7d48dSMichael Liao 11584dabac20SChandler Carruth /// Match "dag strings" and their mixed "not strings". 115991a1b2c9SMichael Liao size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 116091a1b2c9SMichael Liao std::vector<const Pattern *> &NotStrings, 116191a1b2c9SMichael Liao StringMap<StringRef> &VariableTable) const { 116291a1b2c9SMichael Liao if (DagNotStrings.empty()) 116391a1b2c9SMichael Liao return 0; 116491a1b2c9SMichael Liao 116591a1b2c9SMichael Liao size_t LastPos = 0; 116691a1b2c9SMichael Liao size_t StartPos = LastPos; 116791a1b2c9SMichael Liao 11688f870499SBenjamin Kramer for (const Pattern &Pat : DagNotStrings) { 116938820972SMatt Arsenault assert((Pat.getCheckTy() == Check::CheckDAG || 117038820972SMatt Arsenault Pat.getCheckTy() == Check::CheckNot) && 117191a1b2c9SMichael Liao "Invalid CHECK-DAG or CHECK-NOT!"); 117291a1b2c9SMichael Liao 117338820972SMatt Arsenault if (Pat.getCheckTy() == Check::CheckNot) { 117491a1b2c9SMichael Liao NotStrings.push_back(&Pat); 117591a1b2c9SMichael Liao continue; 117691a1b2c9SMichael Liao } 117791a1b2c9SMichael Liao 117838820972SMatt Arsenault assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 117991a1b2c9SMichael Liao 118091a1b2c9SMichael Liao size_t MatchLen = 0, MatchPos; 118191a1b2c9SMichael Liao 118291a1b2c9SMichael Liao // CHECK-DAG always matches from the start. 118391a1b2c9SMichael Liao StringRef MatchBuffer = Buffer.substr(StartPos); 118491a1b2c9SMichael Liao MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 118591a1b2c9SMichael Liao // With a group of CHECK-DAGs, a single mismatching means the match on 118691a1b2c9SMichael Liao // that group of CHECK-DAGs fails immediately. 118791a1b2c9SMichael Liao if (MatchPos == StringRef::npos) { 118891a1b2c9SMichael Liao PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable); 118991a1b2c9SMichael Liao return StringRef::npos; 119091a1b2c9SMichael Liao } 119191a1b2c9SMichael Liao // Re-calc it as the offset relative to the start of the original string. 119291a1b2c9SMichael Liao MatchPos += StartPos; 119391a1b2c9SMichael Liao 119491a1b2c9SMichael Liao if (!NotStrings.empty()) { 119591a1b2c9SMichael Liao if (MatchPos < LastPos) { 119691a1b2c9SMichael Liao // Reordered? 119791a1b2c9SMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos), 119891a1b2c9SMichael Liao SourceMgr::DK_Error, 119913df4626SMatt Arsenault Prefix + "-DAG: found a match of CHECK-DAG" 120091a1b2c9SMichael Liao " reordering across a CHECK-NOT"); 120191a1b2c9SMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos), 120291a1b2c9SMichael Liao SourceMgr::DK_Note, 120313df4626SMatt Arsenault Prefix + "-DAG: the farthest match of CHECK-DAG" 120491a1b2c9SMichael Liao " is found here"); 120591a1b2c9SMichael Liao SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note, 120613df4626SMatt Arsenault Prefix + "-NOT: the crossed pattern specified" 120791a1b2c9SMichael Liao " here"); 120891a1b2c9SMichael Liao SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note, 120913df4626SMatt Arsenault Prefix + "-DAG: the reordered pattern specified" 121091a1b2c9SMichael Liao " here"); 121191a1b2c9SMichael Liao return StringRef::npos; 121291a1b2c9SMichael Liao } 121391a1b2c9SMichael Liao // All subsequent CHECK-DAGs should be matched from the farthest 121491a1b2c9SMichael Liao // position of all precedent CHECK-DAGs (including this one.) 121591a1b2c9SMichael Liao StartPos = LastPos; 121691a1b2c9SMichael Liao // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to 121791a1b2c9SMichael Liao // CHECK-DAG, verify that there's no 'not' strings occurred in that 121891a1b2c9SMichael Liao // region. 121991a1b2c9SMichael Liao StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1220cf708c32STim Northover if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 122191a1b2c9SMichael Liao return StringRef::npos; 122291a1b2c9SMichael Liao // Clear "not strings". 122391a1b2c9SMichael Liao NotStrings.clear(); 122491a1b2c9SMichael Liao } 122591a1b2c9SMichael Liao 122691a1b2c9SMichael Liao // Update the last position with CHECK-DAG matches. 122791a1b2c9SMichael Liao LastPos = std::max(MatchPos + MatchLen, LastPos); 122891a1b2c9SMichael Liao } 122991a1b2c9SMichael Liao 123091a1b2c9SMichael Liao return LastPos; 123191a1b2c9SMichael Liao } 123291a1b2c9SMichael Liao 123313df4626SMatt Arsenault // A check prefix must contain only alphanumeric, hyphens and underscores. 123413df4626SMatt Arsenault static bool ValidateCheckPrefix(StringRef CheckPrefix) { 123513df4626SMatt Arsenault Regex Validator("^[a-zA-Z0-9_-]*$"); 123613df4626SMatt Arsenault return Validator.match(CheckPrefix); 123713df4626SMatt Arsenault } 123813df4626SMatt Arsenault 123913df4626SMatt Arsenault static bool ValidateCheckPrefixes() { 124013df4626SMatt Arsenault StringSet<> PrefixSet; 124113df4626SMatt Arsenault 12428f870499SBenjamin Kramer for (StringRef Prefix : CheckPrefixes) { 124324412b14SEli Bendersky // Reject empty prefixes. 124424412b14SEli Bendersky if (Prefix == "") 124524412b14SEli Bendersky return false; 124624412b14SEli Bendersky 12470356975cSDavid Blaikie if (!PrefixSet.insert(Prefix).second) 124813df4626SMatt Arsenault return false; 124913df4626SMatt Arsenault 125013df4626SMatt Arsenault if (!ValidateCheckPrefix(Prefix)) 125113df4626SMatt Arsenault return false; 125213df4626SMatt Arsenault } 125313df4626SMatt Arsenault 125413df4626SMatt Arsenault return true; 125513df4626SMatt Arsenault } 125613df4626SMatt Arsenault 125713df4626SMatt Arsenault // I don't think there's a way to specify an initial value for cl::list, 125813df4626SMatt Arsenault // so if nothing was specified, add the default 125913df4626SMatt Arsenault static void AddCheckPrefixIfNeeded() { 126013df4626SMatt Arsenault if (CheckPrefixes.empty()) 126113df4626SMatt Arsenault CheckPrefixes.push_back("CHECK"); 1262c2735158SRui Ueyama } 1263c2735158SRui Ueyama 12642bd4f8b6SXinliang David Li static void DumpCommandLine(int argc, char **argv) { 12652bd4f8b6SXinliang David Li errs() << "FileCheck command line: "; 12662bd4f8b6SXinliang David Li for (int I = 0; I < argc; I++) 12672bd4f8b6SXinliang David Li errs() << " " << argv[I]; 12682bd4f8b6SXinliang David Li errs() << "\n"; 12692bd4f8b6SXinliang David Li } 12702bd4f8b6SXinliang David Li 127120247900SChandler Carruth /// Check the input to FileCheck provided in the \p Buffer against the \p 127220247900SChandler Carruth /// CheckStrings read from the check file. 127320247900SChandler Carruth /// 127420247900SChandler Carruth /// Returns false if the input fails to satisfy the checks. 127520247900SChandler Carruth bool CheckInput(SourceMgr &SM, StringRef Buffer, 127620247900SChandler Carruth ArrayRef<CheckString> CheckStrings) { 127720247900SChandler Carruth bool ChecksFailed = false; 127820247900SChandler Carruth 127920247900SChandler Carruth /// VariableTable - This holds all the current filecheck variables. 128020247900SChandler Carruth StringMap<StringRef> VariableTable; 128120247900SChandler Carruth 128220247900SChandler Carruth unsigned i = 0, j = 0, e = CheckStrings.size(); 128320247900SChandler Carruth while (true) { 128420247900SChandler Carruth StringRef CheckRegion; 128520247900SChandler Carruth if (j == e) { 128620247900SChandler Carruth CheckRegion = Buffer; 128720247900SChandler Carruth } else { 128820247900SChandler Carruth const CheckString &CheckLabelStr = CheckStrings[j]; 128920247900SChandler Carruth if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { 129020247900SChandler Carruth ++j; 129120247900SChandler Carruth continue; 129220247900SChandler Carruth } 129320247900SChandler Carruth 129420247900SChandler Carruth // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 129520247900SChandler Carruth size_t MatchLabelLen = 0; 1296e8f2fb20SChandler Carruth size_t MatchLabelPos = 1297e8f2fb20SChandler Carruth CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable); 129820247900SChandler Carruth if (MatchLabelPos == StringRef::npos) 129920247900SChandler Carruth // Immediately bail of CHECK-LABEL fails, nothing else we can do. 130020247900SChandler Carruth return false; 130120247900SChandler Carruth 130220247900SChandler Carruth CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 130320247900SChandler Carruth Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 130420247900SChandler Carruth ++j; 130520247900SChandler Carruth } 130620247900SChandler Carruth 130720247900SChandler Carruth for (; i != j; ++i) { 130820247900SChandler Carruth const CheckString &CheckStr = CheckStrings[i]; 130920247900SChandler Carruth 131020247900SChandler Carruth // Check each string within the scanned region, including a second check 131120247900SChandler Carruth // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 131220247900SChandler Carruth size_t MatchLen = 0; 1313e8f2fb20SChandler Carruth size_t MatchPos = 1314e8f2fb20SChandler Carruth CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable); 131520247900SChandler Carruth 131620247900SChandler Carruth if (MatchPos == StringRef::npos) { 131720247900SChandler Carruth ChecksFailed = true; 131820247900SChandler Carruth i = j; 131920247900SChandler Carruth break; 132020247900SChandler Carruth } 132120247900SChandler Carruth 132220247900SChandler Carruth CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 132320247900SChandler Carruth } 132420247900SChandler Carruth 132520247900SChandler Carruth if (j == e) 132620247900SChandler Carruth break; 132720247900SChandler Carruth } 132820247900SChandler Carruth 132920247900SChandler Carruth // Success if no checks failed. 133020247900SChandler Carruth return !ChecksFailed; 133120247900SChandler Carruth } 133220247900SChandler Carruth 1333ee3c74fbSChris Lattner int main(int argc, char **argv) { 13342ad6d48bSRichard Smith sys::PrintStackTraceOnErrorSignal(argv[0]); 1335ee3c74fbSChris Lattner PrettyStackTraceProgram X(argc, argv); 1336ee3c74fbSChris Lattner cl::ParseCommandLineOptions(argc, argv); 1337ee3c74fbSChris Lattner 133813df4626SMatt Arsenault if (!ValidateCheckPrefixes()) { 133913df4626SMatt Arsenault errs() << "Supplied check-prefix is invalid! Prefixes must be unique and " 134013df4626SMatt Arsenault "start with a letter and contain only alphanumeric characters, " 134113df4626SMatt Arsenault "hyphens and underscores\n"; 1342c2735158SRui Ueyama return 2; 1343c2735158SRui Ueyama } 1344c2735158SRui Ueyama 134513df4626SMatt Arsenault AddCheckPrefixIfNeeded(); 134613df4626SMatt Arsenault 1347ee3c74fbSChris Lattner SourceMgr SM; 1348ee3c74fbSChris Lattner 1349ee3c74fbSChris Lattner // Read the expected strings from the check file. 135020247900SChandler Carruth ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr = 135120247900SChandler Carruth MemoryBuffer::getFileOrSTDIN(CheckFilename); 135220247900SChandler Carruth if (std::error_code EC = CheckFileOrErr.getError()) { 135320247900SChandler Carruth errs() << "Could not open check file '" << CheckFilename 135420247900SChandler Carruth << "': " << EC.message() << '\n'; 135520247900SChandler Carruth return 2; 135620247900SChandler Carruth } 135720247900SChandler Carruth MemoryBuffer &CheckFile = *CheckFileOrErr.get(); 135820247900SChandler Carruth 135920247900SChandler Carruth SmallString<4096> CheckFileBuffer; 1360*b03c166aSChandler Carruth StringRef CheckFileText = CanonicalizeFile(CheckFile, CheckFileBuffer); 136120247900SChandler Carruth 136220247900SChandler Carruth SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( 136320247900SChandler Carruth CheckFileText, CheckFile.getBufferIdentifier()), 136420247900SChandler Carruth SMLoc()); 136520247900SChandler Carruth 136626cccfe1SChris Lattner std::vector<CheckString> CheckStrings; 136720247900SChandler Carruth if (ReadCheckFile(SM, CheckFileText, CheckStrings)) 1368ee3c74fbSChris Lattner return 2; 1369ee3c74fbSChris Lattner 1370ee3c74fbSChris Lattner // Open the file to check and add it to SourceMgr. 137120247900SChandler Carruth ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr = 1372adf21f2aSRafael Espindola MemoryBuffer::getFileOrSTDIN(InputFilename); 137320247900SChandler Carruth if (std::error_code EC = InputFileOrErr.getError()) { 1374adf21f2aSRafael Espindola errs() << "Could not open input file '" << InputFilename 1375adf21f2aSRafael Espindola << "': " << EC.message() << '\n'; 13768e1c6477SEli Bendersky return 2; 1377ee3c74fbSChris Lattner } 137820247900SChandler Carruth MemoryBuffer &InputFile = *InputFileOrErr.get(); 13792c3e5cdfSChris Lattner 138020247900SChandler Carruth if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) { 1381b692bed7SChris Lattner errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; 13822bd4f8b6SXinliang David Li DumpCommandLine(argc, argv); 13838e1c6477SEli Bendersky return 2; 1384b692bed7SChris Lattner } 1385b692bed7SChris Lattner 138620247900SChandler Carruth SmallString<4096> InputFileBuffer; 1387*b03c166aSChandler Carruth StringRef InputFileText = CanonicalizeFile(InputFile, InputFileBuffer); 13882c3e5cdfSChris Lattner 1389e8f2fb20SChandler Carruth SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( 1390e8f2fb20SChandler Carruth InputFileText, InputFile.getBufferIdentifier()), 1391e8f2fb20SChandler Carruth SMLoc()); 1392ee3c74fbSChris Lattner 139320247900SChandler Carruth return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1; 1394ee3c74fbSChris Lattner } 1395