1ee3c74fbSChris Lattner //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2ee3c74fbSChris Lattner // 3ee3c74fbSChris Lattner // The LLVM Compiler Infrastructure 4ee3c74fbSChris Lattner // 5ee3c74fbSChris Lattner // This file is distributed under the University of Illinois Open Source 6ee3c74fbSChris Lattner // License. See LICENSE.TXT for details. 7ee3c74fbSChris Lattner // 8ee3c74fbSChris Lattner //===----------------------------------------------------------------------===// 9ee3c74fbSChris Lattner // 10ee3c74fbSChris Lattner // FileCheck does a line-by line check of a file that validates whether it 11ee3c74fbSChris Lattner // contains the expected content. This is useful for regression tests etc. 12ee3c74fbSChris Lattner // 13ee3c74fbSChris Lattner // This program exits with an error status of 2 on error, exit status of 0 if 14ee3c74fbSChris Lattner // the file matched the expected contents, and exit status of 1 if it did not 15ee3c74fbSChris Lattner // contain the expected contents. 16ee3c74fbSChris Lattner // 17ee3c74fbSChris Lattner //===----------------------------------------------------------------------===// 18ee3c74fbSChris Lattner 1991d19d8eSChandler Carruth #include "llvm/ADT/SmallString.h" 2091d19d8eSChandler Carruth #include "llvm/ADT/StringExtras.h" 2191d19d8eSChandler Carruth #include "llvm/ADT/StringMap.h" 2213df4626SMatt Arsenault #include "llvm/ADT/StringSet.h" 23ee3c74fbSChris Lattner #include "llvm/Support/CommandLine.h" 24ee3c74fbSChris Lattner #include "llvm/Support/MemoryBuffer.h" 25ee3c74fbSChris Lattner #include "llvm/Support/PrettyStackTrace.h" 26f08d2db9SChris Lattner #include "llvm/Support/Regex.h" 2791d19d8eSChandler Carruth #include "llvm/Support/Signals.h" 28ee3c74fbSChris Lattner #include "llvm/Support/SourceMgr.h" 29ee3c74fbSChris Lattner #include "llvm/Support/raw_ostream.h" 308879e06dSChris Lattner #include <algorithm> 31981af002SWill Dietz #include <cctype> 32e8b8f1bcSEli Bendersky #include <map> 33e8b8f1bcSEli Bendersky #include <string> 34a6e9c3e4SRafael Espindola #include <system_error> 35e8b8f1bcSEli Bendersky #include <vector> 36ee3c74fbSChris Lattner using namespace llvm; 37ee3c74fbSChris Lattner 38ee3c74fbSChris Lattner static cl::opt<std::string> 39ee3c74fbSChris Lattner CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); 40ee3c74fbSChris Lattner 41ee3c74fbSChris Lattner static cl::opt<std::string> 42ee3c74fbSChris Lattner InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), 43ee3c74fbSChris Lattner cl::init("-"), cl::value_desc("filename")); 44ee3c74fbSChris Lattner 4513df4626SMatt Arsenault static cl::list<std::string> 4613df4626SMatt Arsenault CheckPrefixes("check-prefix", 47ee3c74fbSChris Lattner cl::desc("Prefix to use from check file (defaults to 'CHECK')")); 48ee3c74fbSChris Lattner 492c3e5cdfSChris Lattner static cl::opt<bool> 502c3e5cdfSChris Lattner NoCanonicalizeWhiteSpace("strict-whitespace", 512c3e5cdfSChris Lattner cl::desc("Do not treat all horizontal whitespace as equivalent")); 522c3e5cdfSChris Lattner 5356ccdbbdSAlexander Kornienko static cl::list<std::string> ImplicitCheckNot( 5456ccdbbdSAlexander Kornienko "implicit-check-not", 5556ccdbbdSAlexander Kornienko cl::desc("Add an implicit negative check with this pattern to every\n" 5656ccdbbdSAlexander Kornienko "positive check. This can be used to ensure that no instances of\n" 5756ccdbbdSAlexander Kornienko "this pattern occur which are not matched by a positive pattern"), 5856ccdbbdSAlexander Kornienko cl::value_desc("pattern")); 5956ccdbbdSAlexander Kornienko 601b9f936fSJustin Bogner static cl::opt<bool> AllowEmptyInput( 611b9f936fSJustin Bogner "allow-empty", cl::init(false), 621b9f936fSJustin Bogner cl::desc("Allow the input file to be empty. This is useful when making\n" 631b9f936fSJustin Bogner "checks that some error message does not occur, for example.")); 641b9f936fSJustin Bogner 6513df4626SMatt Arsenault typedef cl::list<std::string>::const_iterator prefix_iterator; 6613df4626SMatt Arsenault 6774d50731SChris Lattner //===----------------------------------------------------------------------===// 6874d50731SChris Lattner // Pattern Handling Code. 6974d50731SChris Lattner //===----------------------------------------------------------------------===// 7074d50731SChris Lattner 7138820972SMatt Arsenault namespace Check { 7238820972SMatt Arsenault enum CheckType { 7338820972SMatt Arsenault CheckNone = 0, 7438820972SMatt Arsenault CheckPlain, 7538820972SMatt Arsenault CheckNext, 7601ac1707SDuncan P. N. Exon Smith CheckSame, 7738820972SMatt Arsenault CheckNot, 7838820972SMatt Arsenault CheckDAG, 7938820972SMatt Arsenault CheckLabel, 800a4c44bdSChris Lattner 81eba55822SJakob Stoklund Olesen /// MatchEOF - When set, this pattern only matches the end of file. This is 82eba55822SJakob Stoklund Olesen /// used for trailing CHECK-NOTs. 8338820972SMatt Arsenault CheckEOF 8438820972SMatt Arsenault }; 8538820972SMatt Arsenault } 86eba55822SJakob Stoklund Olesen 8738820972SMatt Arsenault class Pattern { 8838820972SMatt Arsenault SMLoc PatternLoc; 8991a1b2c9SMichael Liao 9038820972SMatt Arsenault Check::CheckType CheckTy; 9191a1b2c9SMichael Liao 92b16ab0c4SChris Lattner /// FixedStr - If non-empty, this pattern is a fixed string match with the 93b16ab0c4SChris Lattner /// specified fixed string. 94221460e0SChris Lattner StringRef FixedStr; 95b16ab0c4SChris Lattner 96b16ab0c4SChris Lattner /// RegEx - If non-empty, this is a regex pattern. 97b16ab0c4SChris Lattner std::string RegExStr; 988879e06dSChris Lattner 9992987fb3SAlexander Kornienko /// \brief Contains the number of line this pattern is in. 10092987fb3SAlexander Kornienko unsigned LineNumber; 10192987fb3SAlexander Kornienko 1028879e06dSChris Lattner /// VariableUses - Entries in this vector map to uses of a variable in the 1038879e06dSChris Lattner /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain 1048879e06dSChris Lattner /// "foobaz" and we'll get an entry in this vector that tells us to insert the 1058879e06dSChris Lattner /// value of bar at offset 3. 1068879e06dSChris Lattner std::vector<std::pair<StringRef, unsigned> > VariableUses; 1078879e06dSChris Lattner 108e8b8f1bcSEli Bendersky /// VariableDefs - Maps definitions of variables to their parenthesized 109e8b8f1bcSEli Bendersky /// capture numbers. 110e8b8f1bcSEli Bendersky /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1. 111e8b8f1bcSEli Bendersky std::map<StringRef, unsigned> VariableDefs; 1128879e06dSChris Lattner 1133b40b445SChris Lattner public: 1143b40b445SChris Lattner 11538820972SMatt Arsenault Pattern(Check::CheckType Ty) 11638820972SMatt Arsenault : CheckTy(Ty) { } 11774d50731SChris Lattner 1180b707eb8SMichael Liao /// getLoc - Return the location in source code. 1190b707eb8SMichael Liao SMLoc getLoc() const { return PatternLoc; } 1200b707eb8SMichael Liao 12113df4626SMatt Arsenault /// ParsePattern - Parse the given string into the Pattern. Prefix provides 12213df4626SMatt Arsenault /// which prefix is being matched, SM provides the SourceMgr used for error 12313df4626SMatt Arsenault /// reports, and LineNumber is the line number in the input file from which 12413df4626SMatt Arsenault /// the pattern string was read. Returns true in case of an error, false 12513df4626SMatt Arsenault /// otherwise. 12613df4626SMatt Arsenault bool ParsePattern(StringRef PatternStr, 12713df4626SMatt Arsenault StringRef Prefix, 12813df4626SMatt Arsenault SourceMgr &SM, 12913df4626SMatt Arsenault unsigned LineNumber); 1303b40b445SChris Lattner 1313b40b445SChris Lattner /// Match - Match the pattern string against the input buffer Buffer. This 1323b40b445SChris Lattner /// returns the position that is matched or npos if there is no match. If 1333b40b445SChris Lattner /// there is a match, the size of the matched string is returned in MatchLen. 1348879e06dSChris Lattner /// 1358879e06dSChris Lattner /// The VariableTable StringMap provides the current values of filecheck 1368879e06dSChris Lattner /// variables and is updated if this match defines new values. 1378879e06dSChris Lattner size_t Match(StringRef Buffer, size_t &MatchLen, 1388879e06dSChris Lattner StringMap<StringRef> &VariableTable) const; 139b16ab0c4SChris Lattner 140e0ef65abSDaniel Dunbar /// PrintFailureInfo - Print additional information about a failure to match 141e0ef65abSDaniel Dunbar /// involving this pattern. 142e0ef65abSDaniel Dunbar void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 143e0ef65abSDaniel Dunbar const StringMap<StringRef> &VariableTable) const; 144e0ef65abSDaniel Dunbar 145f8bd2e5bSStephen Lin bool hasVariable() const { return !(VariableUses.empty() && 146f8bd2e5bSStephen Lin VariableDefs.empty()); } 147f8bd2e5bSStephen Lin 14838820972SMatt Arsenault Check::CheckType getCheckTy() const { return CheckTy; } 14991a1b2c9SMichael Liao 150b16ab0c4SChris Lattner private: 151e8b8f1bcSEli Bendersky bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); 152e8b8f1bcSEli Bendersky void AddBackrefToRegEx(unsigned BackrefNum); 153fd29d886SDaniel Dunbar 154fd29d886SDaniel Dunbar /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of 155fd29d886SDaniel Dunbar /// matching this pattern at the start of \arg Buffer; a distance of zero 156fd29d886SDaniel Dunbar /// should correspond to a perfect match. 157fd29d886SDaniel Dunbar unsigned ComputeMatchDistance(StringRef Buffer, 158fd29d886SDaniel Dunbar const StringMap<StringRef> &VariableTable) const; 15992987fb3SAlexander Kornienko 16092987fb3SAlexander Kornienko /// \brief Evaluates expression and stores the result to \p Value. 16192987fb3SAlexander Kornienko /// \return true on success. false when the expression has invalid syntax. 16292987fb3SAlexander Kornienko bool EvaluateExpression(StringRef Expr, std::string &Value) const; 163061d2baaSEli Bendersky 164061d2baaSEli Bendersky /// \brief Finds the closing sequence of a regex variable usage or 165061d2baaSEli Bendersky /// definition. Str has to point in the beginning of the definition 166061d2baaSEli Bendersky /// (right after the opening sequence). 167061d2baaSEli Bendersky /// \return offset of the closing sequence within Str, or npos if it was not 168061d2baaSEli Bendersky /// found. 16981e5cd9eSAdrian Prantl size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); 1703b40b445SChris Lattner }; 1713b40b445SChris Lattner 1728879e06dSChris Lattner 17313df4626SMatt Arsenault bool Pattern::ParsePattern(StringRef PatternStr, 17413df4626SMatt Arsenault StringRef Prefix, 17513df4626SMatt Arsenault SourceMgr &SM, 17692987fb3SAlexander Kornienko unsigned LineNumber) { 17792987fb3SAlexander Kornienko this->LineNumber = LineNumber; 1780a4c44bdSChris Lattner PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 1790a4c44bdSChris Lattner 18074d50731SChris Lattner // Ignore trailing whitespace. 18174d50731SChris Lattner while (!PatternStr.empty() && 18274d50731SChris Lattner (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 18374d50731SChris Lattner PatternStr = PatternStr.substr(0, PatternStr.size()-1); 18474d50731SChris Lattner 18574d50731SChris Lattner // Check that there is something on the line. 18674d50731SChris Lattner if (PatternStr.empty()) { 18703b80a40SChris Lattner SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 18803b80a40SChris Lattner "found empty check string with prefix '" + 18913df4626SMatt Arsenault Prefix + ":'"); 19074d50731SChris Lattner return true; 19174d50731SChris Lattner } 19274d50731SChris Lattner 193221460e0SChris Lattner // Check to see if this is a fixed string, or if it has regex pieces. 194d9466967STed Kremenek if (PatternStr.size() < 2 || 1958879e06dSChris Lattner (PatternStr.find("{{") == StringRef::npos && 1968879e06dSChris Lattner PatternStr.find("[[") == StringRef::npos)) { 197221460e0SChris Lattner FixedStr = PatternStr; 198221460e0SChris Lattner return false; 199221460e0SChris Lattner } 200221460e0SChris Lattner 2018879e06dSChris Lattner // Paren value #0 is for the fully matched string. Any new parenthesized 20253e0679dSChris Lattner // values add from there. 2038879e06dSChris Lattner unsigned CurParen = 1; 2048879e06dSChris Lattner 205b16ab0c4SChris Lattner // Otherwise, there is at least one regex piece. Build up the regex pattern 206b16ab0c4SChris Lattner // by escaping scary characters in fixed strings, building up one big regex. 207f08d2db9SChris Lattner while (!PatternStr.empty()) { 2088879e06dSChris Lattner // RegEx matches. 20953e0679dSChris Lattner if (PatternStr.startswith("{{")) { 21043d50d4aSEli Bendersky // This is the start of a regex match. Scan for the }}. 211f08d2db9SChris Lattner size_t End = PatternStr.find("}}"); 212f08d2db9SChris Lattner if (End == StringRef::npos) { 213f08d2db9SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 21403b80a40SChris Lattner SourceMgr::DK_Error, 21503b80a40SChris Lattner "found start of regex string with no end '}}'"); 216f08d2db9SChris Lattner return true; 217f08d2db9SChris Lattner } 218f08d2db9SChris Lattner 219e53c95f1SChris Lattner // Enclose {{}} patterns in parens just like [[]] even though we're not 220e53c95f1SChris Lattner // capturing the result for any purpose. This is required in case the 221e53c95f1SChris Lattner // expression contains an alternation like: CHECK: abc{{x|z}}def. We 222e53c95f1SChris Lattner // want this to turn into: "abc(x|z)def" not "abcx|zdef". 223e53c95f1SChris Lattner RegExStr += '('; 224e53c95f1SChris Lattner ++CurParen; 225e53c95f1SChris Lattner 2268879e06dSChris Lattner if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM)) 2278879e06dSChris Lattner return true; 228e53c95f1SChris Lattner RegExStr += ')'; 22953e0679dSChris Lattner 2308879e06dSChris Lattner PatternStr = PatternStr.substr(End+2); 2318879e06dSChris Lattner continue; 2328879e06dSChris Lattner } 2338879e06dSChris Lattner 2348879e06dSChris Lattner // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* 2358879e06dSChris Lattner // (or some other regex) and assigns it to the FileCheck variable 'foo'. The 2368879e06dSChris Lattner // second form is [[foo]] which is a reference to foo. The variable name 23757cb733bSDaniel Dunbar // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject 2388879e06dSChris Lattner // it. This is to catch some common errors. 23953e0679dSChris Lattner if (PatternStr.startswith("[[")) { 240061d2baaSEli Bendersky // Find the closing bracket pair ending the match. End is going to be an 241061d2baaSEli Bendersky // offset relative to the beginning of the match string. 24281e5cd9eSAdrian Prantl size_t End = FindRegexVarEnd(PatternStr.substr(2), SM); 243061d2baaSEli Bendersky 2448879e06dSChris Lattner if (End == StringRef::npos) { 2458879e06dSChris Lattner SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 24603b80a40SChris Lattner SourceMgr::DK_Error, 24703b80a40SChris Lattner "invalid named regex reference, no ]] found"); 248f08d2db9SChris Lattner return true; 249f08d2db9SChris Lattner } 250f08d2db9SChris Lattner 251061d2baaSEli Bendersky StringRef MatchStr = PatternStr.substr(2, End); 252061d2baaSEli Bendersky PatternStr = PatternStr.substr(End+4); 2538879e06dSChris Lattner 2548879e06dSChris Lattner // Get the regex name (e.g. "foo"). 2558879e06dSChris Lattner size_t NameEnd = MatchStr.find(':'); 2568879e06dSChris Lattner StringRef Name = MatchStr.substr(0, NameEnd); 2578879e06dSChris Lattner 2588879e06dSChris Lattner if (Name.empty()) { 25903b80a40SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 26003b80a40SChris Lattner "invalid name in named regex: empty name"); 2618879e06dSChris Lattner return true; 2628879e06dSChris Lattner } 2638879e06dSChris Lattner 26492987fb3SAlexander Kornienko // Verify that the name/expression is well formed. FileCheck currently 26592987fb3SAlexander Kornienko // supports @LINE, @LINE+number, @LINE-number expressions. The check here 26692987fb3SAlexander Kornienko // is relaxed, more strict check is performed in \c EvaluateExpression. 26792987fb3SAlexander Kornienko bool IsExpression = false; 26892987fb3SAlexander Kornienko for (unsigned i = 0, e = Name.size(); i != e; ++i) { 26992987fb3SAlexander Kornienko if (i == 0 && Name[i] == '@') { 27092987fb3SAlexander Kornienko if (NameEnd != StringRef::npos) { 27192987fb3SAlexander Kornienko SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 27292987fb3SAlexander Kornienko SourceMgr::DK_Error, 27392987fb3SAlexander Kornienko "invalid name in named regex definition"); 27492987fb3SAlexander Kornienko return true; 27592987fb3SAlexander Kornienko } 27692987fb3SAlexander Kornienko IsExpression = true; 27792987fb3SAlexander Kornienko continue; 27892987fb3SAlexander Kornienko } 27992987fb3SAlexander Kornienko if (Name[i] != '_' && !isalnum(Name[i]) && 28092987fb3SAlexander Kornienko (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { 2818879e06dSChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i), 28203b80a40SChris Lattner SourceMgr::DK_Error, "invalid name in named regex"); 2838879e06dSChris Lattner return true; 2848879e06dSChris Lattner } 28592987fb3SAlexander Kornienko } 2868879e06dSChris Lattner 2878879e06dSChris Lattner // Name can't start with a digit. 28883c74e9fSGuy Benyei if (isdigit(static_cast<unsigned char>(Name[0]))) { 28903b80a40SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 29003b80a40SChris Lattner "invalid name in named regex"); 2918879e06dSChris Lattner return true; 2928879e06dSChris Lattner } 2938879e06dSChris Lattner 2948879e06dSChris Lattner // Handle [[foo]]. 2958879e06dSChris Lattner if (NameEnd == StringRef::npos) { 296e8b8f1bcSEli Bendersky // Handle variables that were defined earlier on the same line by 297e8b8f1bcSEli Bendersky // emitting a backreference. 298e8b8f1bcSEli Bendersky if (VariableDefs.find(Name) != VariableDefs.end()) { 299e8b8f1bcSEli Bendersky unsigned VarParenNum = VariableDefs[Name]; 300e8b8f1bcSEli Bendersky if (VarParenNum < 1 || VarParenNum > 9) { 301e8b8f1bcSEli Bendersky SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 302e8b8f1bcSEli Bendersky SourceMgr::DK_Error, 303e8b8f1bcSEli Bendersky "Can't back-reference more than 9 variables"); 304e8b8f1bcSEli Bendersky return true; 305e8b8f1bcSEli Bendersky } 306e8b8f1bcSEli Bendersky AddBackrefToRegEx(VarParenNum); 307e8b8f1bcSEli Bendersky } else { 3088879e06dSChris Lattner VariableUses.push_back(std::make_pair(Name, RegExStr.size())); 309e8b8f1bcSEli Bendersky } 3108879e06dSChris Lattner continue; 3118879e06dSChris Lattner } 3128879e06dSChris Lattner 3138879e06dSChris Lattner // Handle [[foo:.*]]. 314e8b8f1bcSEli Bendersky VariableDefs[Name] = CurParen; 3158879e06dSChris Lattner RegExStr += '('; 3168879e06dSChris Lattner ++CurParen; 3178879e06dSChris Lattner 3188879e06dSChris Lattner if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM)) 3198879e06dSChris Lattner return true; 3208879e06dSChris Lattner 3218879e06dSChris Lattner RegExStr += ')'; 3228879e06dSChris Lattner } 3238879e06dSChris Lattner 3248879e06dSChris Lattner // Handle fixed string matches. 3258879e06dSChris Lattner // Find the end, which is the start of the next regex. 3268879e06dSChris Lattner size_t FixedMatchEnd = PatternStr.find("{{"); 3278879e06dSChris Lattner FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 3286f4f77b7SHans Wennborg RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 3298879e06dSChris Lattner PatternStr = PatternStr.substr(FixedMatchEnd); 330f08d2db9SChris Lattner } 331f08d2db9SChris Lattner 33274d50731SChris Lattner return false; 33374d50731SChris Lattner } 33474d50731SChris Lattner 335e8b8f1bcSEli Bendersky bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, 3368879e06dSChris Lattner SourceMgr &SM) { 337e8b8f1bcSEli Bendersky Regex R(RS); 3388879e06dSChris Lattner std::string Error; 3398879e06dSChris Lattner if (!R.isValid(Error)) { 340e8b8f1bcSEli Bendersky SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 34103b80a40SChris Lattner "invalid regex: " + Error); 3428879e06dSChris Lattner return true; 3438879e06dSChris Lattner } 3448879e06dSChris Lattner 345e8b8f1bcSEli Bendersky RegExStr += RS.str(); 3468879e06dSChris Lattner CurParen += R.getNumMatches(); 3478879e06dSChris Lattner return false; 3488879e06dSChris Lattner } 349b16ab0c4SChris Lattner 350e8b8f1bcSEli Bendersky void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 351e8b8f1bcSEli Bendersky assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 352e8b8f1bcSEli Bendersky std::string Backref = std::string("\\") + 353e8b8f1bcSEli Bendersky std::string(1, '0' + BackrefNum); 354e8b8f1bcSEli Bendersky RegExStr += Backref; 355e8b8f1bcSEli Bendersky } 356e8b8f1bcSEli Bendersky 35792987fb3SAlexander Kornienko bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { 35892987fb3SAlexander Kornienko // The only supported expression is @LINE([\+-]\d+)? 35992987fb3SAlexander Kornienko if (!Expr.startswith("@LINE")) 36092987fb3SAlexander Kornienko return false; 36192987fb3SAlexander Kornienko Expr = Expr.substr(StringRef("@LINE").size()); 36292987fb3SAlexander Kornienko int Offset = 0; 36392987fb3SAlexander Kornienko if (!Expr.empty()) { 36492987fb3SAlexander Kornienko if (Expr[0] == '+') 36592987fb3SAlexander Kornienko Expr = Expr.substr(1); 36692987fb3SAlexander Kornienko else if (Expr[0] != '-') 36792987fb3SAlexander Kornienko return false; 36892987fb3SAlexander Kornienko if (Expr.getAsInteger(10, Offset)) 36992987fb3SAlexander Kornienko return false; 37092987fb3SAlexander Kornienko } 37192987fb3SAlexander Kornienko Value = llvm::itostr(LineNumber + Offset); 37292987fb3SAlexander Kornienko return true; 37392987fb3SAlexander Kornienko } 37492987fb3SAlexander Kornienko 375f08d2db9SChris Lattner /// Match - Match the pattern string against the input buffer Buffer. This 376f08d2db9SChris Lattner /// returns the position that is matched or npos if there is no match. If 377f08d2db9SChris Lattner /// there is a match, the size of the matched string is returned in MatchLen. 3788879e06dSChris Lattner size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, 3798879e06dSChris Lattner StringMap<StringRef> &VariableTable) const { 380eba55822SJakob Stoklund Olesen // If this is the EOF pattern, match it immediately. 38138820972SMatt Arsenault if (CheckTy == Check::CheckEOF) { 382eba55822SJakob Stoklund Olesen MatchLen = 0; 383eba55822SJakob Stoklund Olesen return Buffer.size(); 384eba55822SJakob Stoklund Olesen } 385eba55822SJakob Stoklund Olesen 386221460e0SChris Lattner // If this is a fixed string pattern, just match it now. 387221460e0SChris Lattner if (!FixedStr.empty()) { 388221460e0SChris Lattner MatchLen = FixedStr.size(); 389221460e0SChris Lattner return Buffer.find(FixedStr); 390221460e0SChris Lattner } 391221460e0SChris Lattner 392b16ab0c4SChris Lattner // Regex match. 3938879e06dSChris Lattner 3948879e06dSChris Lattner // If there are variable uses, we need to create a temporary string with the 3958879e06dSChris Lattner // actual value. 3968879e06dSChris Lattner StringRef RegExToMatch = RegExStr; 3978879e06dSChris Lattner std::string TmpStr; 3988879e06dSChris Lattner if (!VariableUses.empty()) { 3998879e06dSChris Lattner TmpStr = RegExStr; 4008879e06dSChris Lattner 4018879e06dSChris Lattner unsigned InsertOffset = 0; 4028f870499SBenjamin Kramer for (const auto &VariableUse : VariableUses) { 40392987fb3SAlexander Kornienko std::string Value; 40492987fb3SAlexander Kornienko 4058f870499SBenjamin Kramer if (VariableUse.first[0] == '@') { 4068f870499SBenjamin Kramer if (!EvaluateExpression(VariableUse.first, Value)) 40792987fb3SAlexander Kornienko return StringRef::npos; 40892987fb3SAlexander Kornienko } else { 409e0ef65abSDaniel Dunbar StringMap<StringRef>::iterator it = 4108f870499SBenjamin Kramer VariableTable.find(VariableUse.first); 411e0ef65abSDaniel Dunbar // If the variable is undefined, return an error. 412e0ef65abSDaniel Dunbar if (it == VariableTable.end()) 413e0ef65abSDaniel Dunbar return StringRef::npos; 414e0ef65abSDaniel Dunbar 4156f4f77b7SHans Wennborg // Look up the value and escape it so that we can put it into the regex. 4166f4f77b7SHans Wennborg Value += Regex::escape(it->second); 41792987fb3SAlexander Kornienko } 4188879e06dSChris Lattner 4198879e06dSChris Lattner // Plop it into the regex at the adjusted offset. 4208f870499SBenjamin Kramer TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset, 4218879e06dSChris Lattner Value.begin(), Value.end()); 4228879e06dSChris Lattner InsertOffset += Value.size(); 4238879e06dSChris Lattner } 4248879e06dSChris Lattner 4258879e06dSChris Lattner // Match the newly constructed regex. 4268879e06dSChris Lattner RegExToMatch = TmpStr; 4278879e06dSChris Lattner } 4288879e06dSChris Lattner 4298879e06dSChris Lattner 430b16ab0c4SChris Lattner SmallVector<StringRef, 4> MatchInfo; 4318879e06dSChris Lattner if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) 432f08d2db9SChris Lattner return StringRef::npos; 433b16ab0c4SChris Lattner 434b16ab0c4SChris Lattner // Successful regex match. 435b16ab0c4SChris Lattner assert(!MatchInfo.empty() && "Didn't get any match"); 436b16ab0c4SChris Lattner StringRef FullMatch = MatchInfo[0]; 437b16ab0c4SChris Lattner 4388879e06dSChris Lattner // If this defines any variables, remember their values. 4398f870499SBenjamin Kramer for (const auto &VariableDef : VariableDefs) { 4408f870499SBenjamin Kramer assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); 4418f870499SBenjamin Kramer VariableTable[VariableDef.first] = MatchInfo[VariableDef.second]; 4420a4c44bdSChris Lattner } 4430a4c44bdSChris Lattner 444b16ab0c4SChris Lattner MatchLen = FullMatch.size(); 445b16ab0c4SChris Lattner return FullMatch.data()-Buffer.data(); 446f08d2db9SChris Lattner } 447f08d2db9SChris Lattner 448fd29d886SDaniel Dunbar unsigned Pattern::ComputeMatchDistance(StringRef Buffer, 449fd29d886SDaniel Dunbar const StringMap<StringRef> &VariableTable) const { 450fd29d886SDaniel Dunbar // Just compute the number of matching characters. For regular expressions, we 451fd29d886SDaniel Dunbar // just compare against the regex itself and hope for the best. 452fd29d886SDaniel Dunbar // 453fd29d886SDaniel Dunbar // FIXME: One easy improvement here is have the regex lib generate a single 454fd29d886SDaniel Dunbar // example regular expression which matches, and use that as the example 455fd29d886SDaniel Dunbar // string. 456fd29d886SDaniel Dunbar StringRef ExampleString(FixedStr); 457fd29d886SDaniel Dunbar if (ExampleString.empty()) 458fd29d886SDaniel Dunbar ExampleString = RegExStr; 459fd29d886SDaniel Dunbar 460e9aa36c8SDaniel Dunbar // Only compare up to the first line in the buffer, or the string size. 461e9aa36c8SDaniel Dunbar StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 462e9aa36c8SDaniel Dunbar BufferPrefix = BufferPrefix.split('\n').first; 463e9aa36c8SDaniel Dunbar return BufferPrefix.edit_distance(ExampleString); 464fd29d886SDaniel Dunbar } 465fd29d886SDaniel Dunbar 466e0ef65abSDaniel Dunbar void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 467e0ef65abSDaniel Dunbar const StringMap<StringRef> &VariableTable) const{ 468e0ef65abSDaniel Dunbar // If this was a regular expression using variables, print the current 469e0ef65abSDaniel Dunbar // variable values. 470e0ef65abSDaniel Dunbar if (!VariableUses.empty()) { 4718f870499SBenjamin Kramer for (const auto &VariableUse : VariableUses) { 472e69170a1SAlp Toker SmallString<256> Msg; 473e69170a1SAlp Toker raw_svector_ostream OS(Msg); 4748f870499SBenjamin Kramer StringRef Var = VariableUse.first; 47592987fb3SAlexander Kornienko if (Var[0] == '@') { 47692987fb3SAlexander Kornienko std::string Value; 47792987fb3SAlexander Kornienko if (EvaluateExpression(Var, Value)) { 47892987fb3SAlexander Kornienko OS << "with expression \""; 47992987fb3SAlexander Kornienko OS.write_escaped(Var) << "\" equal to \""; 48092987fb3SAlexander Kornienko OS.write_escaped(Value) << "\""; 48192987fb3SAlexander Kornienko } else { 48292987fb3SAlexander Kornienko OS << "uses incorrect expression \""; 48392987fb3SAlexander Kornienko OS.write_escaped(Var) << "\""; 48492987fb3SAlexander Kornienko } 48592987fb3SAlexander Kornienko } else { 48692987fb3SAlexander Kornienko StringMap<StringRef>::const_iterator it = VariableTable.find(Var); 487e0ef65abSDaniel Dunbar 488e0ef65abSDaniel Dunbar // Check for undefined variable references. 489e0ef65abSDaniel Dunbar if (it == VariableTable.end()) { 490e0ef65abSDaniel Dunbar OS << "uses undefined variable \""; 49192987fb3SAlexander Kornienko OS.write_escaped(Var) << "\""; 492e0ef65abSDaniel Dunbar } else { 493e0ef65abSDaniel Dunbar OS << "with variable \""; 494e0ef65abSDaniel Dunbar OS.write_escaped(Var) << "\" equal to \""; 495e0ef65abSDaniel Dunbar OS.write_escaped(it->second) << "\""; 496e0ef65abSDaniel Dunbar } 49792987fb3SAlexander Kornienko } 498e0ef65abSDaniel Dunbar 49903b80a40SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 50003b80a40SChris Lattner OS.str()); 501e0ef65abSDaniel Dunbar } 502e0ef65abSDaniel Dunbar } 503fd29d886SDaniel Dunbar 504fd29d886SDaniel Dunbar // Attempt to find the closest/best fuzzy match. Usually an error happens 505fd29d886SDaniel Dunbar // because some string in the output didn't exactly match. In these cases, we 506fd29d886SDaniel Dunbar // would like to show the user a best guess at what "should have" matched, to 507fd29d886SDaniel Dunbar // save them having to actually check the input manually. 508fd29d886SDaniel Dunbar size_t NumLinesForward = 0; 509fd29d886SDaniel Dunbar size_t Best = StringRef::npos; 510fd29d886SDaniel Dunbar double BestQuality = 0; 511fd29d886SDaniel Dunbar 512fd29d886SDaniel Dunbar // Use an arbitrary 4k limit on how far we will search. 5132bf486ebSDan Gohman for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 514fd29d886SDaniel Dunbar if (Buffer[i] == '\n') 515fd29d886SDaniel Dunbar ++NumLinesForward; 516fd29d886SDaniel Dunbar 517df22bbf7SDan Gohman // Patterns have leading whitespace stripped, so skip whitespace when 518df22bbf7SDan Gohman // looking for something which looks like a pattern. 519df22bbf7SDan Gohman if (Buffer[i] == ' ' || Buffer[i] == '\t') 520df22bbf7SDan Gohman continue; 521df22bbf7SDan Gohman 522fd29d886SDaniel Dunbar // Compute the "quality" of this match as an arbitrary combination of the 523fd29d886SDaniel Dunbar // match distance and the number of lines skipped to get to this match. 524fd29d886SDaniel Dunbar unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable); 525fd29d886SDaniel Dunbar double Quality = Distance + (NumLinesForward / 100.); 526fd29d886SDaniel Dunbar 527fd29d886SDaniel Dunbar if (Quality < BestQuality || Best == StringRef::npos) { 528fd29d886SDaniel Dunbar Best = i; 529fd29d886SDaniel Dunbar BestQuality = Quality; 530fd29d886SDaniel Dunbar } 531fd29d886SDaniel Dunbar } 532fd29d886SDaniel Dunbar 533fd29d886SDaniel Dunbar // Print the "possible intended match here" line if we found something 534c069cc8eSDaniel Dunbar // reasonable and not equal to what we showed in the "scanning from here" 535c069cc8eSDaniel Dunbar // line. 536c069cc8eSDaniel Dunbar if (Best && Best != StringRef::npos && BestQuality < 50) { 537fd29d886SDaniel Dunbar SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best), 53803b80a40SChris Lattner SourceMgr::DK_Note, "possible intended match here"); 539fd29d886SDaniel Dunbar 540fd29d886SDaniel Dunbar // FIXME: If we wanted to be really friendly we would show why the match 541fd29d886SDaniel Dunbar // failed, as it can be hard to spot simple one character differences. 542fd29d886SDaniel Dunbar } 543e0ef65abSDaniel Dunbar } 54474d50731SChris Lattner 54581e5cd9eSAdrian Prantl size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 546061d2baaSEli Bendersky // Offset keeps track of the current offset within the input Str 547061d2baaSEli Bendersky size_t Offset = 0; 548061d2baaSEli Bendersky // [...] Nesting depth 549061d2baaSEli Bendersky size_t BracketDepth = 0; 550061d2baaSEli Bendersky 551061d2baaSEli Bendersky while (!Str.empty()) { 552061d2baaSEli Bendersky if (Str.startswith("]]") && BracketDepth == 0) 553061d2baaSEli Bendersky return Offset; 554061d2baaSEli Bendersky if (Str[0] == '\\') { 555061d2baaSEli Bendersky // Backslash escapes the next char within regexes, so skip them both. 556061d2baaSEli Bendersky Str = Str.substr(2); 557061d2baaSEli Bendersky Offset += 2; 558061d2baaSEli Bendersky } else { 559061d2baaSEli Bendersky switch (Str[0]) { 560061d2baaSEli Bendersky default: 561061d2baaSEli Bendersky break; 562061d2baaSEli Bendersky case '[': 563061d2baaSEli Bendersky BracketDepth++; 564061d2baaSEli Bendersky break; 565061d2baaSEli Bendersky case ']': 56681e5cd9eSAdrian Prantl if (BracketDepth == 0) { 56781e5cd9eSAdrian Prantl SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 56881e5cd9eSAdrian Prantl SourceMgr::DK_Error, 56981e5cd9eSAdrian Prantl "missing closing \"]\" for regex variable"); 57081e5cd9eSAdrian Prantl exit(1); 57181e5cd9eSAdrian Prantl } 572061d2baaSEli Bendersky BracketDepth--; 573061d2baaSEli Bendersky break; 574061d2baaSEli Bendersky } 575061d2baaSEli Bendersky Str = Str.substr(1); 576061d2baaSEli Bendersky Offset++; 577061d2baaSEli Bendersky } 578061d2baaSEli Bendersky } 579061d2baaSEli Bendersky 580061d2baaSEli Bendersky return StringRef::npos; 581061d2baaSEli Bendersky } 582061d2baaSEli Bendersky 583061d2baaSEli Bendersky 58474d50731SChris Lattner //===----------------------------------------------------------------------===// 58574d50731SChris Lattner // Check Strings. 58674d50731SChris Lattner //===----------------------------------------------------------------------===// 5873b40b445SChris Lattner 5883b40b445SChris Lattner /// CheckString - This is a check that we found in the input file. 5893b40b445SChris Lattner struct CheckString { 5903b40b445SChris Lattner /// Pat - The pattern to match. 5913b40b445SChris Lattner Pattern Pat; 59226cccfe1SChris Lattner 59313df4626SMatt Arsenault /// Prefix - Which prefix name this check matched. 59413df4626SMatt Arsenault StringRef Prefix; 59513df4626SMatt Arsenault 59626cccfe1SChris Lattner /// Loc - The location in the match file that the check string was specified. 59726cccfe1SChris Lattner SMLoc Loc; 59826cccfe1SChris Lattner 59938820972SMatt Arsenault /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive, 60038820972SMatt Arsenault /// as opposed to a CHECK: directive. 60138820972SMatt Arsenault Check::CheckType CheckTy; 602f8bd2e5bSStephen Lin 60391a1b2c9SMichael Liao /// DagNotStrings - These are all of the strings that are disallowed from 604236d2d5eSChris Lattner /// occurring between this match string and the previous one (or start of 605236d2d5eSChris Lattner /// file). 60691a1b2c9SMichael Liao std::vector<Pattern> DagNotStrings; 607236d2d5eSChris Lattner 60813df4626SMatt Arsenault 60913df4626SMatt Arsenault CheckString(const Pattern &P, 61013df4626SMatt Arsenault StringRef S, 61113df4626SMatt Arsenault SMLoc L, 61213df4626SMatt Arsenault Check::CheckType Ty) 61313df4626SMatt Arsenault : Pat(P), Prefix(S), Loc(L), CheckTy(Ty) {} 614dcc7d48dSMichael Liao 61591a1b2c9SMichael Liao /// Check - Match check string and its "not strings" and/or "dag strings". 616e93a3a08SStephen Lin size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, 617f8bd2e5bSStephen Lin size_t &MatchLen, StringMap<StringRef> &VariableTable) const; 618dcc7d48dSMichael Liao 619dcc7d48dSMichael Liao /// CheckNext - Verify there is a single line in the given buffer. 620dcc7d48dSMichael Liao bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; 621dcc7d48dSMichael Liao 62201ac1707SDuncan P. N. Exon Smith /// CheckSame - Verify there is no newline in the given buffer. 62301ac1707SDuncan P. N. Exon Smith bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; 62401ac1707SDuncan P. N. Exon Smith 625dcc7d48dSMichael Liao /// CheckNot - Verify there's no "not strings" in the given buffer. 626dcc7d48dSMichael Liao bool CheckNot(const SourceMgr &SM, StringRef Buffer, 62791a1b2c9SMichael Liao const std::vector<const Pattern *> &NotStrings, 62891a1b2c9SMichael Liao StringMap<StringRef> &VariableTable) const; 62991a1b2c9SMichael Liao 63091a1b2c9SMichael Liao /// CheckDag - Match "dag strings" and their mixed "not strings". 63191a1b2c9SMichael Liao size_t CheckDag(const SourceMgr &SM, StringRef Buffer, 63291a1b2c9SMichael Liao std::vector<const Pattern *> &NotStrings, 633dcc7d48dSMichael Liao StringMap<StringRef> &VariableTable) const; 63426cccfe1SChris Lattner }; 63526cccfe1SChris Lattner 6365ea04c38SGuy Benyei /// Canonicalize whitespaces in the input file. Line endings are replaced 6375ea04c38SGuy Benyei /// with UNIX-style '\n'. 6385ea04c38SGuy Benyei /// 6395ea04c38SGuy Benyei /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace 6405ea04c38SGuy Benyei /// characters to a single space. 6411961f14cSDavid Blaikie static std::unique_ptr<MemoryBuffer> 6421961f14cSDavid Blaikie CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB, 6435ea04c38SGuy Benyei bool PreserveHorizontal) { 6440e45d24aSChris Lattner SmallString<128> NewFile; 645a2f8fc5aSChris Lattner NewFile.reserve(MB->getBufferSize()); 646a2f8fc5aSChris Lattner 647a2f8fc5aSChris Lattner for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd(); 648a2f8fc5aSChris Lattner Ptr != End; ++Ptr) { 649fd781bf0SNAKAMURA Takumi // Eliminate trailing dosish \r. 650fd781bf0SNAKAMURA Takumi if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 651fd781bf0SNAKAMURA Takumi continue; 652fd781bf0SNAKAMURA Takumi } 653fd781bf0SNAKAMURA Takumi 6545ea04c38SGuy Benyei // If current char is not a horizontal whitespace or if horizontal 6555ea04c38SGuy Benyei // whitespace canonicalization is disabled, dump it to output as is. 6565ea04c38SGuy Benyei if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) { 657a2f8fc5aSChris Lattner NewFile.push_back(*Ptr); 658a2f8fc5aSChris Lattner continue; 659a2f8fc5aSChris Lattner } 660a2f8fc5aSChris Lattner 661a2f8fc5aSChris Lattner // Otherwise, add one space and advance over neighboring space. 662a2f8fc5aSChris Lattner NewFile.push_back(' '); 663a2f8fc5aSChris Lattner while (Ptr+1 != End && 664a2f8fc5aSChris Lattner (Ptr[1] == ' ' || Ptr[1] == '\t')) 665a2f8fc5aSChris Lattner ++Ptr; 666a2f8fc5aSChris Lattner } 667a2f8fc5aSChris Lattner 6681961f14cSDavid Blaikie return std::unique_ptr<MemoryBuffer>( 6691961f14cSDavid Blaikie MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier())); 670a2f8fc5aSChris Lattner } 671a2f8fc5aSChris Lattner 67238820972SMatt Arsenault static bool IsPartOfWord(char c) { 67338820972SMatt Arsenault return (isalnum(c) || c == '-' || c == '_'); 67438820972SMatt Arsenault } 67538820972SMatt Arsenault 67613df4626SMatt Arsenault // Get the size of the prefix extension. 67713df4626SMatt Arsenault static size_t CheckTypeSize(Check::CheckType Ty) { 67813df4626SMatt Arsenault switch (Ty) { 67913df4626SMatt Arsenault case Check::CheckNone: 68013df4626SMatt Arsenault return 0; 68113df4626SMatt Arsenault 68213df4626SMatt Arsenault case Check::CheckPlain: 68313df4626SMatt Arsenault return sizeof(":") - 1; 68413df4626SMatt Arsenault 68513df4626SMatt Arsenault case Check::CheckNext: 68613df4626SMatt Arsenault return sizeof("-NEXT:") - 1; 68713df4626SMatt Arsenault 68801ac1707SDuncan P. N. Exon Smith case Check::CheckSame: 68901ac1707SDuncan P. N. Exon Smith return sizeof("-SAME:") - 1; 69001ac1707SDuncan P. N. Exon Smith 69113df4626SMatt Arsenault case Check::CheckNot: 69213df4626SMatt Arsenault return sizeof("-NOT:") - 1; 69313df4626SMatt Arsenault 69413df4626SMatt Arsenault case Check::CheckDAG: 69513df4626SMatt Arsenault return sizeof("-DAG:") - 1; 69613df4626SMatt Arsenault 69713df4626SMatt Arsenault case Check::CheckLabel: 69813df4626SMatt Arsenault return sizeof("-LABEL:") - 1; 69913df4626SMatt Arsenault 70013df4626SMatt Arsenault case Check::CheckEOF: 70113df4626SMatt Arsenault llvm_unreachable("Should not be using EOF size"); 70213df4626SMatt Arsenault } 70313df4626SMatt Arsenault 70413df4626SMatt Arsenault llvm_unreachable("Bad check type"); 70513df4626SMatt Arsenault } 70613df4626SMatt Arsenault 70713df4626SMatt Arsenault static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) { 708c4d2d471SMatt Arsenault char NextChar = Buffer[Prefix.size()]; 70938820972SMatt Arsenault 71038820972SMatt Arsenault // Verify that the : is present after the prefix. 71113df4626SMatt Arsenault if (NextChar == ':') 71238820972SMatt Arsenault return Check::CheckPlain; 71338820972SMatt Arsenault 71413df4626SMatt Arsenault if (NextChar != '-') 71538820972SMatt Arsenault return Check::CheckNone; 71638820972SMatt Arsenault 717c4d2d471SMatt Arsenault StringRef Rest = Buffer.drop_front(Prefix.size() + 1); 71813df4626SMatt Arsenault if (Rest.startswith("NEXT:")) 71938820972SMatt Arsenault return Check::CheckNext; 72038820972SMatt Arsenault 72101ac1707SDuncan P. N. Exon Smith if (Rest.startswith("SAME:")) 72201ac1707SDuncan P. N. Exon Smith return Check::CheckSame; 72301ac1707SDuncan P. N. Exon Smith 72413df4626SMatt Arsenault if (Rest.startswith("NOT:")) 72538820972SMatt Arsenault return Check::CheckNot; 72638820972SMatt Arsenault 72713df4626SMatt Arsenault if (Rest.startswith("DAG:")) 72838820972SMatt Arsenault return Check::CheckDAG; 72938820972SMatt Arsenault 73013df4626SMatt Arsenault if (Rest.startswith("LABEL:")) 73138820972SMatt Arsenault return Check::CheckLabel; 73213df4626SMatt Arsenault 73313df4626SMatt Arsenault return Check::CheckNone; 73438820972SMatt Arsenault } 73538820972SMatt Arsenault 73613df4626SMatt Arsenault // From the given position, find the next character after the word. 73713df4626SMatt Arsenault static size_t SkipWord(StringRef Str, size_t Loc) { 73813df4626SMatt Arsenault while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 73913df4626SMatt Arsenault ++Loc; 74013df4626SMatt Arsenault return Loc; 74113df4626SMatt Arsenault } 74213df4626SMatt Arsenault 74313df4626SMatt Arsenault // Try to find the first match in buffer for any prefix. If a valid match is 74413df4626SMatt Arsenault // found, return that prefix and set its type and location. If there are almost 74513df4626SMatt Arsenault // matches (e.g. the actual prefix string is found, but is not an actual check 74613df4626SMatt Arsenault // string), but no valid match, return an empty string and set the position to 74713df4626SMatt Arsenault // resume searching from. If no partial matches are found, return an empty 74813df4626SMatt Arsenault // string and the location will be StringRef::npos. If one prefix is a substring 74913df4626SMatt Arsenault // of another, the maximal match should be found. e.g. if "A" and "AA" are 75013df4626SMatt Arsenault // prefixes then AA-CHECK: should match the second one. 75113df4626SMatt Arsenault static StringRef FindFirstCandidateMatch(StringRef &Buffer, 75213df4626SMatt Arsenault Check::CheckType &CheckTy, 75313df4626SMatt Arsenault size_t &CheckLoc) { 75413df4626SMatt Arsenault StringRef FirstPrefix; 75513df4626SMatt Arsenault size_t FirstLoc = StringRef::npos; 75613df4626SMatt Arsenault size_t SearchLoc = StringRef::npos; 75713df4626SMatt Arsenault Check::CheckType FirstTy = Check::CheckNone; 75813df4626SMatt Arsenault 75913df4626SMatt Arsenault CheckTy = Check::CheckNone; 76013df4626SMatt Arsenault CheckLoc = StringRef::npos; 76113df4626SMatt Arsenault 7628f870499SBenjamin Kramer for (StringRef Prefix : CheckPrefixes) { 76313df4626SMatt Arsenault size_t PrefixLoc = Buffer.find(Prefix); 76413df4626SMatt Arsenault 76513df4626SMatt Arsenault if (PrefixLoc == StringRef::npos) 76613df4626SMatt Arsenault continue; 76713df4626SMatt Arsenault 76813df4626SMatt Arsenault // Track where we are searching for invalid prefixes that look almost right. 76913df4626SMatt Arsenault // We need to only advance to the first partial match on the next attempt 77013df4626SMatt Arsenault // since a partial match could be a substring of a later, valid prefix. 77113df4626SMatt Arsenault // Need to skip to the end of the word, otherwise we could end up 77213df4626SMatt Arsenault // matching a prefix in a substring later. 77313df4626SMatt Arsenault if (PrefixLoc < SearchLoc) 77413df4626SMatt Arsenault SearchLoc = SkipWord(Buffer, PrefixLoc); 77513df4626SMatt Arsenault 77613df4626SMatt Arsenault // We only want to find the first match to avoid skipping some. 77713df4626SMatt Arsenault if (PrefixLoc > FirstLoc) 77813df4626SMatt Arsenault continue; 779a7181a1bSAlexey Samsonov // If one matching check-prefix is a prefix of another, choose the 780a7181a1bSAlexey Samsonov // longer one. 781a7181a1bSAlexey Samsonov if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size()) 782a7181a1bSAlexey Samsonov continue; 78313df4626SMatt Arsenault 78413df4626SMatt Arsenault StringRef Rest = Buffer.drop_front(PrefixLoc); 78513df4626SMatt Arsenault // Make sure we have actually found the prefix, and not a word containing 78613df4626SMatt Arsenault // it. This should also prevent matching the wrong prefix when one is a 78713df4626SMatt Arsenault // substring of another. 78813df4626SMatt Arsenault if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1])) 78943b5f572SDaniel Sanders FirstTy = Check::CheckNone; 79043b5f572SDaniel Sanders else 79143b5f572SDaniel Sanders FirstTy = FindCheckType(Rest, Prefix); 79213df4626SMatt Arsenault 79313df4626SMatt Arsenault FirstLoc = PrefixLoc; 794a7181a1bSAlexey Samsonov FirstPrefix = Prefix; 79513df4626SMatt Arsenault } 79613df4626SMatt Arsenault 797a7181a1bSAlexey Samsonov // If the first prefix is invalid, we should continue the search after it. 798a7181a1bSAlexey Samsonov if (FirstTy == Check::CheckNone) { 79913df4626SMatt Arsenault CheckLoc = SearchLoc; 800a7181a1bSAlexey Samsonov return ""; 801a7181a1bSAlexey Samsonov } 802a7181a1bSAlexey Samsonov 80313df4626SMatt Arsenault CheckTy = FirstTy; 80413df4626SMatt Arsenault CheckLoc = FirstLoc; 80513df4626SMatt Arsenault return FirstPrefix; 80613df4626SMatt Arsenault } 80713df4626SMatt Arsenault 80813df4626SMatt Arsenault static StringRef FindFirstMatchingPrefix(StringRef &Buffer, 80913df4626SMatt Arsenault unsigned &LineNumber, 81013df4626SMatt Arsenault Check::CheckType &CheckTy, 81113df4626SMatt Arsenault size_t &CheckLoc) { 81213df4626SMatt Arsenault while (!Buffer.empty()) { 81313df4626SMatt Arsenault StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc); 81413df4626SMatt Arsenault // If we found a real match, we are done. 81513df4626SMatt Arsenault if (!Prefix.empty()) { 81613df4626SMatt Arsenault LineNumber += Buffer.substr(0, CheckLoc).count('\n'); 81713df4626SMatt Arsenault return Prefix; 81813df4626SMatt Arsenault } 81913df4626SMatt Arsenault 82013df4626SMatt Arsenault // We didn't find any almost matches either, we are also done. 82113df4626SMatt Arsenault if (CheckLoc == StringRef::npos) 82213df4626SMatt Arsenault return StringRef(); 82313df4626SMatt Arsenault 82413df4626SMatt Arsenault LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n'); 82513df4626SMatt Arsenault 82613df4626SMatt Arsenault // Advance to the last possible match we found and try again. 82713df4626SMatt Arsenault Buffer = Buffer.drop_front(CheckLoc + 1); 82813df4626SMatt Arsenault } 82913df4626SMatt Arsenault 83013df4626SMatt Arsenault return StringRef(); 83138820972SMatt Arsenault } 832ee3c74fbSChris Lattner 833ee3c74fbSChris Lattner /// ReadCheckFile - Read the check file, which specifies the sequence of 834ee3c74fbSChris Lattner /// expected strings. The strings are added to the CheckStrings vector. 83543d50d4aSEli Bendersky /// Returns true in case of an error, false otherwise. 836ee3c74fbSChris Lattner static bool ReadCheckFile(SourceMgr &SM, 83726cccfe1SChris Lattner std::vector<CheckString> &CheckStrings) { 838adf21f2aSRafael Espindola ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = 839adf21f2aSRafael Espindola MemoryBuffer::getFileOrSTDIN(CheckFilename); 840adf21f2aSRafael Espindola if (std::error_code EC = FileOrErr.getError()) { 841adf21f2aSRafael Espindola errs() << "Could not open check file '" << CheckFilename 842adf21f2aSRafael Espindola << "': " << EC.message() << '\n'; 843ee3c74fbSChris Lattner return true; 844ee3c74fbSChris Lattner } 845a2f8fc5aSChris Lattner 846a2f8fc5aSChris Lattner // If we want to canonicalize whitespace, strip excess whitespace from the 8475ea04c38SGuy Benyei // buffer containing the CHECK lines. Remove DOS style line endings. 8483560ff2cSRafael Espindola std::unique_ptr<MemoryBuffer> F = CanonicalizeInputFile( 8493560ff2cSRafael Espindola std::move(FileOrErr.get()), NoCanonicalizeWhiteSpace); 850ee3c74fbSChris Lattner 85110f10cedSChris Lattner // Find all instances of CheckPrefix followed by : in the file. 852caa5fc0cSChris Lattner StringRef Buffer = F->getBuffer(); 85356ccdbbdSAlexander Kornienko 8541961f14cSDavid Blaikie SM.AddNewSourceBuffer(std::move(F), SMLoc()); 8551961f14cSDavid Blaikie 85656ccdbbdSAlexander Kornienko std::vector<Pattern> ImplicitNegativeChecks; 85756ccdbbdSAlexander Kornienko for (const auto &PatternString : ImplicitCheckNot) { 85856ccdbbdSAlexander Kornienko // Create a buffer with fake command line content in order to display the 85956ccdbbdSAlexander Kornienko // command line option responsible for the specific implicit CHECK-NOT. 860*ff43d69dSDavid Blaikie std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str(); 86156ccdbbdSAlexander Kornienko std::string Suffix = "'"; 8623560ff2cSRafael Espindola std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( 8633560ff2cSRafael Espindola Prefix + PatternString + Suffix, "command line"); 8643560ff2cSRafael Espindola 86556ccdbbdSAlexander Kornienko StringRef PatternInBuffer = 86656ccdbbdSAlexander Kornienko CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 8671961f14cSDavid Blaikie SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 86856ccdbbdSAlexander Kornienko 86956ccdbbdSAlexander Kornienko ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot)); 87056ccdbbdSAlexander Kornienko ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer, 87156ccdbbdSAlexander Kornienko "IMPLICIT-CHECK", SM, 0); 87256ccdbbdSAlexander Kornienko } 87356ccdbbdSAlexander Kornienko 87456ccdbbdSAlexander Kornienko 87556ccdbbdSAlexander Kornienko std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 876236d2d5eSChris Lattner 87743d50d4aSEli Bendersky // LineNumber keeps track of the line on which CheckPrefix instances are 87843d50d4aSEli Bendersky // found. 87992987fb3SAlexander Kornienko unsigned LineNumber = 1; 88092987fb3SAlexander Kornienko 881ee3c74fbSChris Lattner while (1) { 88213df4626SMatt Arsenault Check::CheckType CheckTy; 88313df4626SMatt Arsenault size_t PrefixLoc; 88413df4626SMatt Arsenault 88513df4626SMatt Arsenault // See if a prefix occurs in the memory buffer. 88613df4626SMatt Arsenault StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer, 88713df4626SMatt Arsenault LineNumber, 88813df4626SMatt Arsenault CheckTy, 88913df4626SMatt Arsenault PrefixLoc); 89013df4626SMatt Arsenault if (UsedPrefix.empty()) 891ee3c74fbSChris Lattner break; 892ee3c74fbSChris Lattner 89313df4626SMatt Arsenault Buffer = Buffer.drop_front(PrefixLoc); 89492987fb3SAlexander Kornienko 89513df4626SMatt Arsenault // Location to use for error messages. 89613df4626SMatt Arsenault const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1); 89792987fb3SAlexander Kornienko 89813df4626SMatt Arsenault // PrefixLoc is to the start of the prefix. Skip to the end. 89913df4626SMatt Arsenault Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy)); 90010f10cedSChris Lattner 90138820972SMatt Arsenault // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 90238820972SMatt Arsenault // leading and trailing whitespace. 903236d2d5eSChris Lattner Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 904ee3c74fbSChris Lattner 905ee3c74fbSChris Lattner // Scan ahead to the end of line. 906caa5fc0cSChris Lattner size_t EOL = Buffer.find_first_of("\n\r"); 907ee3c74fbSChris Lattner 908838fb09aSDan Gohman // Remember the location of the start of the pattern, for diagnostics. 909838fb09aSDan Gohman SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 910838fb09aSDan Gohman 91174d50731SChris Lattner // Parse the pattern. 91238820972SMatt Arsenault Pattern P(CheckTy); 91313df4626SMatt Arsenault if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber)) 914ee3c74fbSChris Lattner return true; 915ee3c74fbSChris Lattner 916f8bd2e5bSStephen Lin // Verify that CHECK-LABEL lines do not define or use variables 91738820972SMatt Arsenault if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 91813df4626SMatt Arsenault SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 919f8bd2e5bSStephen Lin SourceMgr::DK_Error, 92013df4626SMatt Arsenault "found '" + UsedPrefix + "-LABEL:'" 92113df4626SMatt Arsenault " with variable definition or use"); 922f8bd2e5bSStephen Lin return true; 923f8bd2e5bSStephen Lin } 924f8bd2e5bSStephen Lin 925236d2d5eSChris Lattner Buffer = Buffer.substr(EOL); 92674d50731SChris Lattner 927da108b4eSChris Lattner // Verify that CHECK-NEXT lines have at least one CHECK line before them. 92801ac1707SDuncan P. N. Exon Smith if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) && 92901ac1707SDuncan P. N. Exon Smith CheckStrings.empty()) { 93001ac1707SDuncan P. N. Exon Smith StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME"; 93113df4626SMatt Arsenault SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 93203b80a40SChris Lattner SourceMgr::DK_Error, 93301ac1707SDuncan P. N. Exon Smith "found '" + UsedPrefix + "-" + Type + "' without previous '" 93413df4626SMatt Arsenault + UsedPrefix + ": line"); 935da108b4eSChris Lattner return true; 936da108b4eSChris Lattner } 937da108b4eSChris Lattner 93891a1b2c9SMichael Liao // Handle CHECK-DAG/-NOT. 93938820972SMatt Arsenault if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 94091a1b2c9SMichael Liao DagNotMatches.push_back(P); 94174d50731SChris Lattner continue; 94274d50731SChris Lattner } 94374d50731SChris Lattner 944ee3c74fbSChris Lattner // Okay, add the string we captured to the output vector and move on. 945f5e2fc47SBenjamin Kramer CheckStrings.emplace_back(P, UsedPrefix, PatternLoc, CheckTy); 94691a1b2c9SMichael Liao std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 94756ccdbbdSAlexander Kornienko DagNotMatches = ImplicitNegativeChecks; 948ee3c74fbSChris Lattner } 949ee3c74fbSChris Lattner 95013df4626SMatt Arsenault // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first 95113df4626SMatt Arsenault // prefix as a filler for the error message. 95291a1b2c9SMichael Liao if (!DagNotMatches.empty()) { 953f5e2fc47SBenjamin Kramer CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(), 954eba55822SJakob Stoklund Olesen SMLoc::getFromPointer(Buffer.data()), 955f5e2fc47SBenjamin Kramer Check::CheckEOF); 95691a1b2c9SMichael Liao std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 957eba55822SJakob Stoklund Olesen } 958eba55822SJakob Stoklund Olesen 959ee3c74fbSChris Lattner if (CheckStrings.empty()) { 96013df4626SMatt Arsenault errs() << "error: no check strings found with prefix" 96113df4626SMatt Arsenault << (CheckPrefixes.size() > 1 ? "es " : " "); 9623e3ef2f2SChris Bieneman prefix_iterator I = CheckPrefixes.begin(); 9633e3ef2f2SChris Bieneman prefix_iterator E = CheckPrefixes.end(); 9643e3ef2f2SChris Bieneman if (I != E) { 9653e3ef2f2SChris Bieneman errs() << "\'" << *I << ":'"; 9663e3ef2f2SChris Bieneman ++I; 96713df4626SMatt Arsenault } 9683e3ef2f2SChris Bieneman for (; I != E; ++I) 9693e3ef2f2SChris Bieneman errs() << ", \'" << *I << ":'"; 97013df4626SMatt Arsenault 97113df4626SMatt Arsenault errs() << '\n'; 972ee3c74fbSChris Lattner return true; 973ee3c74fbSChris Lattner } 974ee3c74fbSChris Lattner 975ee3c74fbSChris Lattner return false; 976ee3c74fbSChris Lattner } 977ee3c74fbSChris Lattner 9783c76c523SCraig Topper static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, 97991a1b2c9SMichael Liao const Pattern &Pat, StringRef Buffer, 980e0ef65abSDaniel Dunbar StringMap<StringRef> &VariableTable) { 981da108b4eSChris Lattner // Otherwise, we have an error, emit an error message. 98291a1b2c9SMichael Liao SM.PrintMessage(Loc, SourceMgr::DK_Error, 98303b80a40SChris Lattner "expected string not found in input"); 984da108b4eSChris Lattner 985da108b4eSChris Lattner // Print the "scanning from here" line. If the current position is at the 986da108b4eSChris Lattner // end of a line, advance to the start of the next line. 987caa5fc0cSChris Lattner Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 988da108b4eSChris Lattner 98903b80a40SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 99003b80a40SChris Lattner "scanning from here"); 991e0ef65abSDaniel Dunbar 992e0ef65abSDaniel Dunbar // Allow the pattern to print additional information if desired. 99391a1b2c9SMichael Liao Pat.PrintFailureInfo(SM, Buffer, VariableTable); 99491a1b2c9SMichael Liao } 99591a1b2c9SMichael Liao 99691a1b2c9SMichael Liao static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr, 99791a1b2c9SMichael Liao StringRef Buffer, 99891a1b2c9SMichael Liao StringMap<StringRef> &VariableTable) { 99991a1b2c9SMichael Liao PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable); 1000da108b4eSChris Lattner } 1001da108b4eSChris Lattner 100237183584SChris Lattner /// CountNumNewlinesBetween - Count the number of newlines in the specified 100337183584SChris Lattner /// range. 1004592fe880SRichard Smith static unsigned CountNumNewlinesBetween(StringRef Range, 1005592fe880SRichard Smith const char *&FirstNewLine) { 1006da108b4eSChris Lattner unsigned NumNewLines = 0; 100737183584SChris Lattner while (1) { 1008da108b4eSChris Lattner // Scan for newline. 100937183584SChris Lattner Range = Range.substr(Range.find_first_of("\n\r")); 101037183584SChris Lattner if (Range.empty()) return NumNewLines; 1011da108b4eSChris Lattner 1012da108b4eSChris Lattner ++NumNewLines; 1013da108b4eSChris Lattner 1014da108b4eSChris Lattner // Handle \n\r and \r\n as a single newline. 101537183584SChris Lattner if (Range.size() > 1 && 101637183584SChris Lattner (Range[1] == '\n' || Range[1] == '\r') && 101737183584SChris Lattner (Range[0] != Range[1])) 101837183584SChris Lattner Range = Range.substr(1); 101937183584SChris Lattner Range = Range.substr(1); 1020592fe880SRichard Smith 1021592fe880SRichard Smith if (NumNewLines == 1) 1022592fe880SRichard Smith FirstNewLine = Range.begin(); 1023da108b4eSChris Lattner } 1024da108b4eSChris Lattner } 1025da108b4eSChris Lattner 1026dcc7d48dSMichael Liao size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer, 1027e93a3a08SStephen Lin bool IsLabelScanMode, size_t &MatchLen, 1028dcc7d48dSMichael Liao StringMap<StringRef> &VariableTable) const { 102991a1b2c9SMichael Liao size_t LastPos = 0; 103091a1b2c9SMichael Liao std::vector<const Pattern *> NotStrings; 103191a1b2c9SMichael Liao 1032e93a3a08SStephen Lin // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 1033e93a3a08SStephen Lin // bounds; we have not processed variable definitions within the bounded block 1034e93a3a08SStephen Lin // yet so cannot handle any final CHECK-DAG yet; this is handled when going 1035e93a3a08SStephen Lin // over the block again (including the last CHECK-LABEL) in normal mode. 1036e93a3a08SStephen Lin if (!IsLabelScanMode) { 103791a1b2c9SMichael Liao // Match "dag strings" (with mixed "not strings" if any). 103891a1b2c9SMichael Liao LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable); 103991a1b2c9SMichael Liao if (LastPos == StringRef::npos) 104091a1b2c9SMichael Liao return StringRef::npos; 1041e93a3a08SStephen Lin } 104291a1b2c9SMichael Liao 104391a1b2c9SMichael Liao // Match itself from the last position after matching CHECK-DAG. 104491a1b2c9SMichael Liao StringRef MatchBuffer = Buffer.substr(LastPos); 104591a1b2c9SMichael Liao size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1046dcc7d48dSMichael Liao if (MatchPos == StringRef::npos) { 104791a1b2c9SMichael Liao PrintCheckFailed(SM, *this, MatchBuffer, VariableTable); 1048dcc7d48dSMichael Liao return StringRef::npos; 1049dcc7d48dSMichael Liao } 1050dcc7d48dSMichael Liao 1051e93a3a08SStephen Lin // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 1052e93a3a08SStephen Lin // or CHECK-NOT 1053e93a3a08SStephen Lin if (!IsLabelScanMode) { 105491a1b2c9SMichael Liao StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1055dcc7d48dSMichael Liao 1056dcc7d48dSMichael Liao // If this check is a "CHECK-NEXT", verify that the previous match was on 1057dcc7d48dSMichael Liao // the previous line (i.e. that there is one newline between them). 1058dcc7d48dSMichael Liao if (CheckNext(SM, SkippedRegion)) 1059dcc7d48dSMichael Liao return StringRef::npos; 1060dcc7d48dSMichael Liao 106101ac1707SDuncan P. N. Exon Smith // If this check is a "CHECK-SAME", verify that the previous match was on 106201ac1707SDuncan P. N. Exon Smith // the same line (i.e. that there is no newline between them). 106301ac1707SDuncan P. N. Exon Smith if (CheckSame(SM, SkippedRegion)) 106401ac1707SDuncan P. N. Exon Smith return StringRef::npos; 106501ac1707SDuncan P. N. Exon Smith 1066dcc7d48dSMichael Liao // If this match had "not strings", verify that they don't exist in the 1067dcc7d48dSMichael Liao // skipped region. 106891a1b2c9SMichael Liao if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1069dcc7d48dSMichael Liao return StringRef::npos; 1070f8bd2e5bSStephen Lin } 1071dcc7d48dSMichael Liao 10727dfb92b9SMehdi Amini return LastPos + MatchPos; 1073dcc7d48dSMichael Liao } 1074dcc7d48dSMichael Liao 1075dcc7d48dSMichael Liao bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 107638820972SMatt Arsenault if (CheckTy != Check::CheckNext) 1077dcc7d48dSMichael Liao return false; 1078dcc7d48dSMichael Liao 1079dcc7d48dSMichael Liao // Count the number of newlines between the previous match and this one. 1080dcc7d48dSMichael Liao assert(Buffer.data() != 1081dcc7d48dSMichael Liao SM.getMemoryBuffer( 1082dcc7d48dSMichael Liao SM.FindBufferContainingLoc( 1083dcc7d48dSMichael Liao SMLoc::getFromPointer(Buffer.data())))->getBufferStart() && 1084dcc7d48dSMichael Liao "CHECK-NEXT can't be the first check in a file"); 1085dcc7d48dSMichael Liao 108666f09ad0SCraig Topper const char *FirstNewLine = nullptr; 1087592fe880SRichard Smith unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1088dcc7d48dSMichael Liao 1089dcc7d48dSMichael Liao if (NumNewLines == 0) { 109013df4626SMatt Arsenault SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix + 1091dcc7d48dSMichael Liao "-NEXT: is on the same line as previous match"); 1092dcc7d48dSMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), 1093dcc7d48dSMichael Liao SourceMgr::DK_Note, "'next' match was here"); 1094dcc7d48dSMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1095dcc7d48dSMichael Liao "previous match ended here"); 1096dcc7d48dSMichael Liao return true; 1097dcc7d48dSMichael Liao } 1098dcc7d48dSMichael Liao 1099dcc7d48dSMichael Liao if (NumNewLines != 1) { 110013df4626SMatt Arsenault SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix + 1101dcc7d48dSMichael Liao "-NEXT: is not on the line after the previous match"); 1102dcc7d48dSMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), 1103dcc7d48dSMichael Liao SourceMgr::DK_Note, "'next' match was here"); 1104dcc7d48dSMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1105dcc7d48dSMichael Liao "previous match ended here"); 1106592fe880SRichard Smith SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 1107592fe880SRichard Smith "non-matching line after previous match is here"); 1108dcc7d48dSMichael Liao return true; 1109dcc7d48dSMichael Liao } 1110dcc7d48dSMichael Liao 1111dcc7d48dSMichael Liao return false; 1112dcc7d48dSMichael Liao } 1113dcc7d48dSMichael Liao 111401ac1707SDuncan P. N. Exon Smith bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { 111501ac1707SDuncan P. N. Exon Smith if (CheckTy != Check::CheckSame) 111601ac1707SDuncan P. N. Exon Smith return false; 111701ac1707SDuncan P. N. Exon Smith 111801ac1707SDuncan P. N. Exon Smith // Count the number of newlines between the previous match and this one. 111901ac1707SDuncan P. N. Exon Smith assert(Buffer.data() != 112001ac1707SDuncan P. N. Exon Smith SM.getMemoryBuffer(SM.FindBufferContainingLoc( 112101ac1707SDuncan P. N. Exon Smith SMLoc::getFromPointer(Buffer.data()))) 112201ac1707SDuncan P. N. Exon Smith ->getBufferStart() && 112301ac1707SDuncan P. N. Exon Smith "CHECK-SAME can't be the first check in a file"); 112401ac1707SDuncan P. N. Exon Smith 112501ac1707SDuncan P. N. Exon Smith const char *FirstNewLine = nullptr; 112601ac1707SDuncan P. N. Exon Smith unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 112701ac1707SDuncan P. N. Exon Smith 112801ac1707SDuncan P. N. Exon Smith if (NumNewLines != 0) { 112901ac1707SDuncan P. N. Exon Smith SM.PrintMessage(Loc, SourceMgr::DK_Error, 113001ac1707SDuncan P. N. Exon Smith Prefix + 113101ac1707SDuncan P. N. Exon Smith "-SAME: is not on the same line as the previous match"); 113201ac1707SDuncan P. N. Exon Smith SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 113301ac1707SDuncan P. N. Exon Smith "'next' match was here"); 113401ac1707SDuncan P. N. Exon Smith SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 113501ac1707SDuncan P. N. Exon Smith "previous match ended here"); 113601ac1707SDuncan P. N. Exon Smith return true; 113701ac1707SDuncan P. N. Exon Smith } 113801ac1707SDuncan P. N. Exon Smith 113901ac1707SDuncan P. N. Exon Smith return false; 114001ac1707SDuncan P. N. Exon Smith } 114101ac1707SDuncan P. N. Exon Smith 1142dcc7d48dSMichael Liao bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 114391a1b2c9SMichael Liao const std::vector<const Pattern *> &NotStrings, 1144dcc7d48dSMichael Liao StringMap<StringRef> &VariableTable) const { 11458f870499SBenjamin Kramer for (const Pattern *Pat : NotStrings) { 114638820972SMatt Arsenault assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 114791a1b2c9SMichael Liao 1148dcc7d48dSMichael Liao size_t MatchLen = 0; 114991a1b2c9SMichael Liao size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable); 1150dcc7d48dSMichael Liao 1151dcc7d48dSMichael Liao if (Pos == StringRef::npos) continue; 1152dcc7d48dSMichael Liao 1153dcc7d48dSMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos), 1154dcc7d48dSMichael Liao SourceMgr::DK_Error, 115513df4626SMatt Arsenault Prefix + "-NOT: string occurred!"); 115691a1b2c9SMichael Liao SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note, 115713df4626SMatt Arsenault Prefix + "-NOT: pattern specified here"); 1158dcc7d48dSMichael Liao return true; 1159dcc7d48dSMichael Liao } 1160dcc7d48dSMichael Liao 1161dcc7d48dSMichael Liao return false; 1162dcc7d48dSMichael Liao } 1163dcc7d48dSMichael Liao 116491a1b2c9SMichael Liao size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 116591a1b2c9SMichael Liao std::vector<const Pattern *> &NotStrings, 116691a1b2c9SMichael Liao StringMap<StringRef> &VariableTable) const { 116791a1b2c9SMichael Liao if (DagNotStrings.empty()) 116891a1b2c9SMichael Liao return 0; 116991a1b2c9SMichael Liao 117091a1b2c9SMichael Liao size_t LastPos = 0; 117191a1b2c9SMichael Liao size_t StartPos = LastPos; 117291a1b2c9SMichael Liao 11738f870499SBenjamin Kramer for (const Pattern &Pat : DagNotStrings) { 117438820972SMatt Arsenault assert((Pat.getCheckTy() == Check::CheckDAG || 117538820972SMatt Arsenault Pat.getCheckTy() == Check::CheckNot) && 117691a1b2c9SMichael Liao "Invalid CHECK-DAG or CHECK-NOT!"); 117791a1b2c9SMichael Liao 117838820972SMatt Arsenault if (Pat.getCheckTy() == Check::CheckNot) { 117991a1b2c9SMichael Liao NotStrings.push_back(&Pat); 118091a1b2c9SMichael Liao continue; 118191a1b2c9SMichael Liao } 118291a1b2c9SMichael Liao 118338820972SMatt Arsenault assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 118491a1b2c9SMichael Liao 118591a1b2c9SMichael Liao size_t MatchLen = 0, MatchPos; 118691a1b2c9SMichael Liao 118791a1b2c9SMichael Liao // CHECK-DAG always matches from the start. 118891a1b2c9SMichael Liao StringRef MatchBuffer = Buffer.substr(StartPos); 118991a1b2c9SMichael Liao MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 119091a1b2c9SMichael Liao // With a group of CHECK-DAGs, a single mismatching means the match on 119191a1b2c9SMichael Liao // that group of CHECK-DAGs fails immediately. 119291a1b2c9SMichael Liao if (MatchPos == StringRef::npos) { 119391a1b2c9SMichael Liao PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable); 119491a1b2c9SMichael Liao return StringRef::npos; 119591a1b2c9SMichael Liao } 119691a1b2c9SMichael Liao // Re-calc it as the offset relative to the start of the original string. 119791a1b2c9SMichael Liao MatchPos += StartPos; 119891a1b2c9SMichael Liao 119991a1b2c9SMichael Liao if (!NotStrings.empty()) { 120091a1b2c9SMichael Liao if (MatchPos < LastPos) { 120191a1b2c9SMichael Liao // Reordered? 120291a1b2c9SMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos), 120391a1b2c9SMichael Liao SourceMgr::DK_Error, 120413df4626SMatt Arsenault Prefix + "-DAG: found a match of CHECK-DAG" 120591a1b2c9SMichael Liao " reordering across a CHECK-NOT"); 120691a1b2c9SMichael Liao SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos), 120791a1b2c9SMichael Liao SourceMgr::DK_Note, 120813df4626SMatt Arsenault Prefix + "-DAG: the farthest match of CHECK-DAG" 120991a1b2c9SMichael Liao " is found here"); 121091a1b2c9SMichael Liao SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note, 121113df4626SMatt Arsenault Prefix + "-NOT: the crossed pattern specified" 121291a1b2c9SMichael Liao " here"); 121391a1b2c9SMichael Liao SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note, 121413df4626SMatt Arsenault Prefix + "-DAG: the reordered pattern specified" 121591a1b2c9SMichael Liao " here"); 121691a1b2c9SMichael Liao return StringRef::npos; 121791a1b2c9SMichael Liao } 121891a1b2c9SMichael Liao // All subsequent CHECK-DAGs should be matched from the farthest 121991a1b2c9SMichael Liao // position of all precedent CHECK-DAGs (including this one.) 122091a1b2c9SMichael Liao StartPos = LastPos; 122191a1b2c9SMichael Liao // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to 122291a1b2c9SMichael Liao // CHECK-DAG, verify that there's no 'not' strings occurred in that 122391a1b2c9SMichael Liao // region. 122491a1b2c9SMichael Liao StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1225cf708c32STim Northover if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 122691a1b2c9SMichael Liao return StringRef::npos; 122791a1b2c9SMichael Liao // Clear "not strings". 122891a1b2c9SMichael Liao NotStrings.clear(); 122991a1b2c9SMichael Liao } 123091a1b2c9SMichael Liao 123191a1b2c9SMichael Liao // Update the last position with CHECK-DAG matches. 123291a1b2c9SMichael Liao LastPos = std::max(MatchPos + MatchLen, LastPos); 123391a1b2c9SMichael Liao } 123491a1b2c9SMichael Liao 123591a1b2c9SMichael Liao return LastPos; 123691a1b2c9SMichael Liao } 123791a1b2c9SMichael Liao 123813df4626SMatt Arsenault // A check prefix must contain only alphanumeric, hyphens and underscores. 123913df4626SMatt Arsenault static bool ValidateCheckPrefix(StringRef CheckPrefix) { 124013df4626SMatt Arsenault Regex Validator("^[a-zA-Z0-9_-]*$"); 124113df4626SMatt Arsenault return Validator.match(CheckPrefix); 124213df4626SMatt Arsenault } 124313df4626SMatt Arsenault 124413df4626SMatt Arsenault static bool ValidateCheckPrefixes() { 124513df4626SMatt Arsenault StringSet<> PrefixSet; 124613df4626SMatt Arsenault 12478f870499SBenjamin Kramer for (StringRef Prefix : CheckPrefixes) { 124824412b14SEli Bendersky // Reject empty prefixes. 124924412b14SEli Bendersky if (Prefix == "") 125024412b14SEli Bendersky return false; 125124412b14SEli Bendersky 12520356975cSDavid Blaikie if (!PrefixSet.insert(Prefix).second) 125313df4626SMatt Arsenault return false; 125413df4626SMatt Arsenault 125513df4626SMatt Arsenault if (!ValidateCheckPrefix(Prefix)) 125613df4626SMatt Arsenault return false; 125713df4626SMatt Arsenault } 125813df4626SMatt Arsenault 125913df4626SMatt Arsenault return true; 126013df4626SMatt Arsenault } 126113df4626SMatt Arsenault 126213df4626SMatt Arsenault // I don't think there's a way to specify an initial value for cl::list, 126313df4626SMatt Arsenault // so if nothing was specified, add the default 126413df4626SMatt Arsenault static void AddCheckPrefixIfNeeded() { 126513df4626SMatt Arsenault if (CheckPrefixes.empty()) 126613df4626SMatt Arsenault CheckPrefixes.push_back("CHECK"); 1267c2735158SRui Ueyama } 1268c2735158SRui Ueyama 1269ee3c74fbSChris Lattner int main(int argc, char **argv) { 1270ee3c74fbSChris Lattner sys::PrintStackTraceOnErrorSignal(); 1271ee3c74fbSChris Lattner PrettyStackTraceProgram X(argc, argv); 1272ee3c74fbSChris Lattner cl::ParseCommandLineOptions(argc, argv); 1273ee3c74fbSChris Lattner 127413df4626SMatt Arsenault if (!ValidateCheckPrefixes()) { 127513df4626SMatt Arsenault errs() << "Supplied check-prefix is invalid! Prefixes must be unique and " 127613df4626SMatt Arsenault "start with a letter and contain only alphanumeric characters, " 127713df4626SMatt Arsenault "hyphens and underscores\n"; 1278c2735158SRui Ueyama return 2; 1279c2735158SRui Ueyama } 1280c2735158SRui Ueyama 128113df4626SMatt Arsenault AddCheckPrefixIfNeeded(); 128213df4626SMatt Arsenault 1283ee3c74fbSChris Lattner SourceMgr SM; 1284ee3c74fbSChris Lattner 1285ee3c74fbSChris Lattner // Read the expected strings from the check file. 128626cccfe1SChris Lattner std::vector<CheckString> CheckStrings; 1287ee3c74fbSChris Lattner if (ReadCheckFile(SM, CheckStrings)) 1288ee3c74fbSChris Lattner return 2; 1289ee3c74fbSChris Lattner 1290ee3c74fbSChris Lattner // Open the file to check and add it to SourceMgr. 1291adf21f2aSRafael Espindola ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = 1292adf21f2aSRafael Espindola MemoryBuffer::getFileOrSTDIN(InputFilename); 1293adf21f2aSRafael Espindola if (std::error_code EC = FileOrErr.getError()) { 1294adf21f2aSRafael Espindola errs() << "Could not open input file '" << InputFilename 1295adf21f2aSRafael Espindola << "': " << EC.message() << '\n'; 12968e1c6477SEli Bendersky return 2; 1297ee3c74fbSChris Lattner } 12983f6481d0SRafael Espindola std::unique_ptr<MemoryBuffer> &File = FileOrErr.get(); 12992c3e5cdfSChris Lattner 13001b9f936fSJustin Bogner if (File->getBufferSize() == 0 && !AllowEmptyInput) { 1301b692bed7SChris Lattner errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; 13028e1c6477SEli Bendersky return 2; 1303b692bed7SChris Lattner } 1304b692bed7SChris Lattner 13052c3e5cdfSChris Lattner // Remove duplicate spaces in the input file if requested. 13065ea04c38SGuy Benyei // Remove DOS style line endings. 13071961f14cSDavid Blaikie std::unique_ptr<MemoryBuffer> F = 1308ce5dd1acSRafael Espindola CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace); 13092c3e5cdfSChris Lattner 1310ee3c74fbSChris Lattner // Check that we have all of the expected strings, in order, in the input 1311ee3c74fbSChris Lattner // file. 1312caa5fc0cSChris Lattner StringRef Buffer = F->getBuffer(); 1313ee3c74fbSChris Lattner 13141961f14cSDavid Blaikie SM.AddNewSourceBuffer(std::move(F), SMLoc()); 13151961f14cSDavid Blaikie 13161961f14cSDavid Blaikie /// VariableTable - This holds all the current filecheck variables. 13171961f14cSDavid Blaikie StringMap<StringRef> VariableTable; 13181961f14cSDavid Blaikie 1319f8bd2e5bSStephen Lin bool hasError = false; 1320ee3c74fbSChris Lattner 1321f8bd2e5bSStephen Lin unsigned i = 0, j = 0, e = CheckStrings.size(); 1322ee3c74fbSChris Lattner 1323f8bd2e5bSStephen Lin while (true) { 1324f8bd2e5bSStephen Lin StringRef CheckRegion; 1325f8bd2e5bSStephen Lin if (j == e) { 1326f8bd2e5bSStephen Lin CheckRegion = Buffer; 1327f8bd2e5bSStephen Lin } else { 1328f8bd2e5bSStephen Lin const CheckString &CheckLabelStr = CheckStrings[j]; 132938820972SMatt Arsenault if (CheckLabelStr.CheckTy != Check::CheckLabel) { 1330f8bd2e5bSStephen Lin ++j; 1331f8bd2e5bSStephen Lin continue; 1332da108b4eSChris Lattner } 1333da108b4eSChris Lattner 1334f8bd2e5bSStephen Lin // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 1335f8bd2e5bSStephen Lin size_t MatchLabelLen = 0; 1336e93a3a08SStephen Lin size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true, 1337f8bd2e5bSStephen Lin MatchLabelLen, VariableTable); 1338f8bd2e5bSStephen Lin if (MatchLabelPos == StringRef::npos) { 1339f8bd2e5bSStephen Lin hasError = true; 1340f8bd2e5bSStephen Lin break; 1341f8bd2e5bSStephen Lin } 1342f8bd2e5bSStephen Lin 1343f8bd2e5bSStephen Lin CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 1344f8bd2e5bSStephen Lin Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 1345f8bd2e5bSStephen Lin ++j; 1346f8bd2e5bSStephen Lin } 1347f8bd2e5bSStephen Lin 1348f8bd2e5bSStephen Lin for ( ; i != j; ++i) { 1349f8bd2e5bSStephen Lin const CheckString &CheckStr = CheckStrings[i]; 1350f8bd2e5bSStephen Lin 1351f8bd2e5bSStephen Lin // Check each string within the scanned region, including a second check 1352f8bd2e5bSStephen Lin // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 1353f8bd2e5bSStephen Lin size_t MatchLen = 0; 1354e93a3a08SStephen Lin size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen, 1355f8bd2e5bSStephen Lin VariableTable); 1356f8bd2e5bSStephen Lin 1357f8bd2e5bSStephen Lin if (MatchPos == StringRef::npos) { 1358f8bd2e5bSStephen Lin hasError = true; 1359f8bd2e5bSStephen Lin i = j; 1360f8bd2e5bSStephen Lin break; 1361f8bd2e5bSStephen Lin } 1362f8bd2e5bSStephen Lin 1363f8bd2e5bSStephen Lin CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 1364f8bd2e5bSStephen Lin } 1365f8bd2e5bSStephen Lin 1366f8bd2e5bSStephen Lin if (j == e) 1367f8bd2e5bSStephen Lin break; 1368f8bd2e5bSStephen Lin } 1369f8bd2e5bSStephen Lin 1370f8bd2e5bSStephen Lin return hasError ? 1 : 0; 1371ee3c74fbSChris Lattner } 1372