1ee3c74fbSChris Lattner //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2ee3c74fbSChris Lattner // 3ee3c74fbSChris Lattner // The LLVM Compiler Infrastructure 4ee3c74fbSChris Lattner // 5ee3c74fbSChris Lattner // This file is distributed under the University of Illinois Open Source 6ee3c74fbSChris Lattner // License. See LICENSE.TXT for details. 7ee3c74fbSChris Lattner // 8ee3c74fbSChris Lattner //===----------------------------------------------------------------------===// 9ee3c74fbSChris Lattner // 10ee3c74fbSChris Lattner // FileCheck does a line-by line check of a file that validates whether it 11ee3c74fbSChris Lattner // contains the expected content. This is useful for regression tests etc. 12ee3c74fbSChris Lattner // 13ee3c74fbSChris Lattner // This program exits with an error status of 2 on error, exit status of 0 if 14ee3c74fbSChris Lattner // the file matched the expected contents, and exit status of 1 if it did not 15ee3c74fbSChris Lattner // contain the expected contents. 16ee3c74fbSChris Lattner // 17ee3c74fbSChris Lattner //===----------------------------------------------------------------------===// 18ee3c74fbSChris Lattner 19ee3c74fbSChris Lattner #include "llvm/Support/CommandLine.h" 20ee3c74fbSChris Lattner #include "llvm/Support/MemoryBuffer.h" 21ee3c74fbSChris Lattner #include "llvm/Support/PrettyStackTrace.h" 22f08d2db9SChris Lattner #include "llvm/Support/Regex.h" 23ee3c74fbSChris Lattner #include "llvm/Support/SourceMgr.h" 24ee3c74fbSChris Lattner #include "llvm/Support/raw_ostream.h" 25ee3c74fbSChris Lattner #include "llvm/System/Signals.h" 268879e06dSChris Lattner #include "llvm/ADT/StringMap.h" 278879e06dSChris Lattner #include <algorithm> 28ee3c74fbSChris Lattner using namespace llvm; 29ee3c74fbSChris Lattner 30ee3c74fbSChris Lattner static cl::opt<std::string> 31ee3c74fbSChris Lattner CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); 32ee3c74fbSChris Lattner 33ee3c74fbSChris Lattner static cl::opt<std::string> 34ee3c74fbSChris Lattner InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), 35ee3c74fbSChris Lattner cl::init("-"), cl::value_desc("filename")); 36ee3c74fbSChris Lattner 37ee3c74fbSChris Lattner static cl::opt<std::string> 38ee3c74fbSChris Lattner CheckPrefix("check-prefix", cl::init("CHECK"), 39ee3c74fbSChris Lattner cl::desc("Prefix to use from check file (defaults to 'CHECK')")); 40ee3c74fbSChris Lattner 412c3e5cdfSChris Lattner static cl::opt<bool> 422c3e5cdfSChris Lattner NoCanonicalizeWhiteSpace("strict-whitespace", 432c3e5cdfSChris Lattner cl::desc("Do not treat all horizontal whitespace as equivalent")); 442c3e5cdfSChris Lattner 4574d50731SChris Lattner //===----------------------------------------------------------------------===// 4674d50731SChris Lattner // Pattern Handling Code. 4774d50731SChris Lattner //===----------------------------------------------------------------------===// 4874d50731SChris Lattner 493b40b445SChris Lattner class Pattern { 500a4c44bdSChris Lattner SMLoc PatternLoc; 510a4c44bdSChris Lattner 52b16ab0c4SChris Lattner /// FixedStr - If non-empty, this pattern is a fixed string match with the 53b16ab0c4SChris Lattner /// specified fixed string. 54221460e0SChris Lattner StringRef FixedStr; 55b16ab0c4SChris Lattner 56b16ab0c4SChris Lattner /// RegEx - If non-empty, this is a regex pattern. 57b16ab0c4SChris Lattner std::string RegExStr; 588879e06dSChris Lattner 598879e06dSChris Lattner /// VariableUses - Entries in this vector map to uses of a variable in the 608879e06dSChris Lattner /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain 618879e06dSChris Lattner /// "foobaz" and we'll get an entry in this vector that tells us to insert the 628879e06dSChris Lattner /// value of bar at offset 3. 638879e06dSChris Lattner std::vector<std::pair<StringRef, unsigned> > VariableUses; 648879e06dSChris Lattner 658879e06dSChris Lattner /// VariableDefs - Entries in this vector map to definitions of a variable in 668879e06dSChris Lattner /// the pattern, e.g. "foo[[bar:.*]]baz". In this case, the RegExStr will 678879e06dSChris Lattner /// contain "foo(.*)baz" and VariableDefs will contain the pair "bar",1. The 688879e06dSChris Lattner /// index indicates what parenthesized value captures the variable value. 698879e06dSChris Lattner std::vector<std::pair<StringRef, unsigned> > VariableDefs; 708879e06dSChris Lattner 713b40b445SChris Lattner public: 723b40b445SChris Lattner 7374d50731SChris Lattner Pattern() { } 7474d50731SChris Lattner 7574d50731SChris Lattner bool ParsePattern(StringRef PatternStr, SourceMgr &SM); 763b40b445SChris Lattner 773b40b445SChris Lattner /// Match - Match the pattern string against the input buffer Buffer. This 783b40b445SChris Lattner /// returns the position that is matched or npos if there is no match. If 793b40b445SChris Lattner /// there is a match, the size of the matched string is returned in MatchLen. 808879e06dSChris Lattner /// 818879e06dSChris Lattner /// The VariableTable StringMap provides the current values of filecheck 828879e06dSChris Lattner /// variables and is updated if this match defines new values. 838879e06dSChris Lattner size_t Match(StringRef Buffer, size_t &MatchLen, 848879e06dSChris Lattner StringMap<StringRef> &VariableTable) const; 85b16ab0c4SChris Lattner 86b16ab0c4SChris Lattner private: 878879e06dSChris Lattner static void AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr); 888879e06dSChris Lattner bool AddRegExToRegEx(StringRef RegExStr, unsigned &CurParen, SourceMgr &SM); 893b40b445SChris Lattner }; 903b40b445SChris Lattner 918879e06dSChris Lattner 9274d50731SChris Lattner bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { 930a4c44bdSChris Lattner PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 940a4c44bdSChris Lattner 9574d50731SChris Lattner // Ignore trailing whitespace. 9674d50731SChris Lattner while (!PatternStr.empty() && 9774d50731SChris Lattner (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 9874d50731SChris Lattner PatternStr = PatternStr.substr(0, PatternStr.size()-1); 9974d50731SChris Lattner 10074d50731SChris Lattner // Check that there is something on the line. 10174d50731SChris Lattner if (PatternStr.empty()) { 1020a4c44bdSChris Lattner SM.PrintMessage(PatternLoc, "found empty check string with prefix '" + 1030a4c44bdSChris Lattner CheckPrefix+":'", "error"); 10474d50731SChris Lattner return true; 10574d50731SChris Lattner } 10674d50731SChris Lattner 107221460e0SChris Lattner // Check to see if this is a fixed string, or if it has regex pieces. 1088879e06dSChris Lattner if (PatternStr.size() < 2 || 1098879e06dSChris Lattner (PatternStr.find("{{") == StringRef::npos && 1108879e06dSChris Lattner PatternStr.find("[[") == StringRef::npos)) { 111221460e0SChris Lattner FixedStr = PatternStr; 112221460e0SChris Lattner return false; 113221460e0SChris Lattner } 114221460e0SChris Lattner 1158879e06dSChris Lattner // Paren value #0 is for the fully matched string. Any new parenthesized 1168879e06dSChris Lattner // values add from their. 1178879e06dSChris Lattner unsigned CurParen = 1; 1188879e06dSChris Lattner 119b16ab0c4SChris Lattner // Otherwise, there is at least one regex piece. Build up the regex pattern 120b16ab0c4SChris Lattner // by escaping scary characters in fixed strings, building up one big regex. 121f08d2db9SChris Lattner while (!PatternStr.empty()) { 1228879e06dSChris Lattner // RegEx matches. 1238879e06dSChris Lattner if (PatternStr.size() >= 2 && 1248879e06dSChris Lattner PatternStr[0] == '{' && PatternStr[1] == '{') { 12574d50731SChris Lattner 126f08d2db9SChris Lattner // Otherwise, this is the start of a regex match. Scan for the }}. 127f08d2db9SChris Lattner size_t End = PatternStr.find("}}"); 128f08d2db9SChris Lattner if (End == StringRef::npos) { 129f08d2db9SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 130f08d2db9SChris Lattner "found start of regex string with no end '}}'", "error"); 131f08d2db9SChris Lattner return true; 132f08d2db9SChris Lattner } 133f08d2db9SChris Lattner 1348879e06dSChris Lattner if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM)) 1358879e06dSChris Lattner return true; 1368879e06dSChris Lattner PatternStr = PatternStr.substr(End+2); 1378879e06dSChris Lattner continue; 1388879e06dSChris Lattner } 1398879e06dSChris Lattner 1408879e06dSChris Lattner // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* 1418879e06dSChris Lattner // (or some other regex) and assigns it to the FileCheck variable 'foo'. The 1428879e06dSChris Lattner // second form is [[foo]] which is a reference to foo. The variable name 143*57cb733bSDaniel Dunbar // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject 1448879e06dSChris Lattner // it. This is to catch some common errors. 1458879e06dSChris Lattner if (PatternStr.size() >= 2 && 1468879e06dSChris Lattner PatternStr[0] == '[' && PatternStr[1] == '[') { 1478879e06dSChris Lattner // Verify that it is terminated properly. 1488879e06dSChris Lattner size_t End = PatternStr.find("]]"); 1498879e06dSChris Lattner if (End == StringRef::npos) { 1508879e06dSChris Lattner SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 1518879e06dSChris Lattner "invalid named regex reference, no ]] found", "error"); 152f08d2db9SChris Lattner return true; 153f08d2db9SChris Lattner } 154f08d2db9SChris Lattner 1558879e06dSChris Lattner StringRef MatchStr = PatternStr.substr(2, End-2); 156f08d2db9SChris Lattner PatternStr = PatternStr.substr(End+2); 1578879e06dSChris Lattner 1588879e06dSChris Lattner // Get the regex name (e.g. "foo"). 1598879e06dSChris Lattner size_t NameEnd = MatchStr.find(':'); 1608879e06dSChris Lattner StringRef Name = MatchStr.substr(0, NameEnd); 1618879e06dSChris Lattner 1628879e06dSChris Lattner if (Name.empty()) { 1638879e06dSChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 1648879e06dSChris Lattner "invalid name in named regex: empty name", "error"); 1658879e06dSChris Lattner return true; 1668879e06dSChris Lattner } 1678879e06dSChris Lattner 1688879e06dSChris Lattner // Verify that the name is well formed. 1698879e06dSChris Lattner for (unsigned i = 0, e = Name.size(); i != e; ++i) 170*57cb733bSDaniel Dunbar if (Name[i] != '_' && 171*57cb733bSDaniel Dunbar (Name[i] < 'a' || Name[i] > 'z') && 1728879e06dSChris Lattner (Name[i] < 'A' || Name[i] > 'Z') && 1738879e06dSChris Lattner (Name[i] < '0' || Name[i] > '9')) { 1748879e06dSChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i), 1758879e06dSChris Lattner "invalid name in named regex", "error"); 1768879e06dSChris Lattner return true; 1778879e06dSChris Lattner } 1788879e06dSChris Lattner 1798879e06dSChris Lattner // Name can't start with a digit. 1808879e06dSChris Lattner if (isdigit(Name[0])) { 1818879e06dSChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 1828879e06dSChris Lattner "invalid name in named regex", "error"); 1838879e06dSChris Lattner return true; 1848879e06dSChris Lattner } 1858879e06dSChris Lattner 1868879e06dSChris Lattner // Handle [[foo]]. 1878879e06dSChris Lattner if (NameEnd == StringRef::npos) { 1888879e06dSChris Lattner VariableUses.push_back(std::make_pair(Name, RegExStr.size())); 1898879e06dSChris Lattner continue; 1908879e06dSChris Lattner } 1918879e06dSChris Lattner 1928879e06dSChris Lattner // Handle [[foo:.*]]. 1938879e06dSChris Lattner VariableDefs.push_back(std::make_pair(Name, CurParen)); 1948879e06dSChris Lattner RegExStr += '('; 1958879e06dSChris Lattner ++CurParen; 1968879e06dSChris Lattner 1978879e06dSChris Lattner if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM)) 1988879e06dSChris Lattner return true; 1998879e06dSChris Lattner 2008879e06dSChris Lattner RegExStr += ')'; 2018879e06dSChris Lattner } 2028879e06dSChris Lattner 2038879e06dSChris Lattner // Handle fixed string matches. 2048879e06dSChris Lattner // Find the end, which is the start of the next regex. 2058879e06dSChris Lattner size_t FixedMatchEnd = PatternStr.find("{{"); 2068879e06dSChris Lattner FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 2078879e06dSChris Lattner AddFixedStringToRegEx(PatternStr.substr(0, FixedMatchEnd), RegExStr); 2088879e06dSChris Lattner PatternStr = PatternStr.substr(FixedMatchEnd); 2098879e06dSChris Lattner continue; 210f08d2db9SChris Lattner } 211f08d2db9SChris Lattner 21274d50731SChris Lattner return false; 21374d50731SChris Lattner } 21474d50731SChris Lattner 2158879e06dSChris Lattner void Pattern::AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr) { 216b16ab0c4SChris Lattner // Add the characters from FixedStr to the regex, escaping as needed. This 217b16ab0c4SChris Lattner // avoids "leaning toothpicks" in common patterns. 218b16ab0c4SChris Lattner for (unsigned i = 0, e = FixedStr.size(); i != e; ++i) { 219b16ab0c4SChris Lattner switch (FixedStr[i]) { 220b16ab0c4SChris Lattner // These are the special characters matched in "p_ere_exp". 221b16ab0c4SChris Lattner case '(': 222b16ab0c4SChris Lattner case ')': 223b16ab0c4SChris Lattner case '^': 224b16ab0c4SChris Lattner case '$': 225b16ab0c4SChris Lattner case '|': 226b16ab0c4SChris Lattner case '*': 227b16ab0c4SChris Lattner case '+': 228b16ab0c4SChris Lattner case '?': 229b16ab0c4SChris Lattner case '.': 230b16ab0c4SChris Lattner case '[': 231b16ab0c4SChris Lattner case '\\': 232b16ab0c4SChris Lattner case '{': 2338879e06dSChris Lattner TheStr += '\\'; 234b16ab0c4SChris Lattner // FALL THROUGH. 235b16ab0c4SChris Lattner default: 2368879e06dSChris Lattner TheStr += FixedStr[i]; 237b16ab0c4SChris Lattner break; 238b16ab0c4SChris Lattner } 239b16ab0c4SChris Lattner } 240b16ab0c4SChris Lattner } 241b16ab0c4SChris Lattner 2428879e06dSChris Lattner bool Pattern::AddRegExToRegEx(StringRef RegexStr, unsigned &CurParen, 2438879e06dSChris Lattner SourceMgr &SM) { 2448879e06dSChris Lattner Regex R(RegexStr); 2458879e06dSChris Lattner std::string Error; 2468879e06dSChris Lattner if (!R.isValid(Error)) { 2478879e06dSChris Lattner SM.PrintMessage(SMLoc::getFromPointer(RegexStr.data()), 2488879e06dSChris Lattner "invalid regex: " + Error, "error"); 2498879e06dSChris Lattner return true; 2508879e06dSChris Lattner } 2518879e06dSChris Lattner 2528879e06dSChris Lattner RegExStr += RegexStr.str(); 2538879e06dSChris Lattner CurParen += R.getNumMatches(); 2548879e06dSChris Lattner return false; 2558879e06dSChris Lattner } 256b16ab0c4SChris Lattner 257f08d2db9SChris Lattner /// Match - Match the pattern string against the input buffer Buffer. This 258f08d2db9SChris Lattner /// returns the position that is matched or npos if there is no match. If 259f08d2db9SChris Lattner /// there is a match, the size of the matched string is returned in MatchLen. 2608879e06dSChris Lattner size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, 2618879e06dSChris Lattner StringMap<StringRef> &VariableTable) const { 262221460e0SChris Lattner // If this is a fixed string pattern, just match it now. 263221460e0SChris Lattner if (!FixedStr.empty()) { 264221460e0SChris Lattner MatchLen = FixedStr.size(); 265221460e0SChris Lattner return Buffer.find(FixedStr); 266221460e0SChris Lattner } 267221460e0SChris Lattner 268b16ab0c4SChris Lattner // Regex match. 2698879e06dSChris Lattner 2708879e06dSChris Lattner // If there are variable uses, we need to create a temporary string with the 2718879e06dSChris Lattner // actual value. 2728879e06dSChris Lattner StringRef RegExToMatch = RegExStr; 2738879e06dSChris Lattner std::string TmpStr; 2748879e06dSChris Lattner if (!VariableUses.empty()) { 2758879e06dSChris Lattner TmpStr = RegExStr; 2768879e06dSChris Lattner 2778879e06dSChris Lattner unsigned InsertOffset = 0; 2788879e06dSChris Lattner for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) { 2798879e06dSChris Lattner // Look up the value and escape it so that we can plop it into the regex. 2808879e06dSChris Lattner std::string Value; 2818879e06dSChris Lattner AddFixedStringToRegEx(VariableTable[VariableUses[i].first], Value); 2828879e06dSChris Lattner 2838879e06dSChris Lattner // Plop it into the regex at the adjusted offset. 2848879e06dSChris Lattner TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset, 2858879e06dSChris Lattner Value.begin(), Value.end()); 2868879e06dSChris Lattner InsertOffset += Value.size(); 2878879e06dSChris Lattner } 2888879e06dSChris Lattner 2898879e06dSChris Lattner // Match the newly constructed regex. 2908879e06dSChris Lattner RegExToMatch = TmpStr; 2918879e06dSChris Lattner } 2928879e06dSChris Lattner 2938879e06dSChris Lattner 294b16ab0c4SChris Lattner SmallVector<StringRef, 4> MatchInfo; 2958879e06dSChris Lattner if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) 296f08d2db9SChris Lattner return StringRef::npos; 297b16ab0c4SChris Lattner 298b16ab0c4SChris Lattner // Successful regex match. 299b16ab0c4SChris Lattner assert(!MatchInfo.empty() && "Didn't get any match"); 300b16ab0c4SChris Lattner StringRef FullMatch = MatchInfo[0]; 301b16ab0c4SChris Lattner 3028879e06dSChris Lattner // If this defines any variables, remember their values. 3038879e06dSChris Lattner for (unsigned i = 0, e = VariableDefs.size(); i != e; ++i) { 3048879e06dSChris Lattner assert(VariableDefs[i].second < MatchInfo.size() && 3058879e06dSChris Lattner "Internal paren error"); 3068879e06dSChris Lattner VariableTable[VariableDefs[i].first] = MatchInfo[VariableDefs[i].second]; 3070a4c44bdSChris Lattner } 3080a4c44bdSChris Lattner 309b16ab0c4SChris Lattner MatchLen = FullMatch.size(); 310b16ab0c4SChris Lattner return FullMatch.data()-Buffer.data(); 311f08d2db9SChris Lattner } 312f08d2db9SChris Lattner 31374d50731SChris Lattner 31474d50731SChris Lattner //===----------------------------------------------------------------------===// 31574d50731SChris Lattner // Check Strings. 31674d50731SChris Lattner //===----------------------------------------------------------------------===// 3173b40b445SChris Lattner 3183b40b445SChris Lattner /// CheckString - This is a check that we found in the input file. 3193b40b445SChris Lattner struct CheckString { 3203b40b445SChris Lattner /// Pat - The pattern to match. 3213b40b445SChris Lattner Pattern Pat; 32226cccfe1SChris Lattner 32326cccfe1SChris Lattner /// Loc - The location in the match file that the check string was specified. 32426cccfe1SChris Lattner SMLoc Loc; 32526cccfe1SChris Lattner 326da108b4eSChris Lattner /// IsCheckNext - This is true if this is a CHECK-NEXT: directive (as opposed 327da108b4eSChris Lattner /// to a CHECK: directive. 328da108b4eSChris Lattner bool IsCheckNext; 329da108b4eSChris Lattner 330236d2d5eSChris Lattner /// NotStrings - These are all of the strings that are disallowed from 331236d2d5eSChris Lattner /// occurring between this match string and the previous one (or start of 332236d2d5eSChris Lattner /// file). 33374d50731SChris Lattner std::vector<std::pair<SMLoc, Pattern> > NotStrings; 334236d2d5eSChris Lattner 3353b40b445SChris Lattner CheckString(const Pattern &P, SMLoc L, bool isCheckNext) 3363b40b445SChris Lattner : Pat(P), Loc(L), IsCheckNext(isCheckNext) {} 33726cccfe1SChris Lattner }; 33826cccfe1SChris Lattner 339a2f8fc5aSChris Lattner /// CanonicalizeInputFile - Remove duplicate horizontal space from the specified 340a2f8fc5aSChris Lattner /// memory buffer, free it, and return a new one. 341a2f8fc5aSChris Lattner static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) { 342a2f8fc5aSChris Lattner SmallVector<char, 16> NewFile; 343a2f8fc5aSChris Lattner NewFile.reserve(MB->getBufferSize()); 344a2f8fc5aSChris Lattner 345a2f8fc5aSChris Lattner for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd(); 346a2f8fc5aSChris Lattner Ptr != End; ++Ptr) { 347a2f8fc5aSChris Lattner // If C is not a horizontal whitespace, skip it. 348a2f8fc5aSChris Lattner if (*Ptr != ' ' && *Ptr != '\t') { 349a2f8fc5aSChris Lattner NewFile.push_back(*Ptr); 350a2f8fc5aSChris Lattner continue; 351a2f8fc5aSChris Lattner } 352a2f8fc5aSChris Lattner 353a2f8fc5aSChris Lattner // Otherwise, add one space and advance over neighboring space. 354a2f8fc5aSChris Lattner NewFile.push_back(' '); 355a2f8fc5aSChris Lattner while (Ptr+1 != End && 356a2f8fc5aSChris Lattner (Ptr[1] == ' ' || Ptr[1] == '\t')) 357a2f8fc5aSChris Lattner ++Ptr; 358a2f8fc5aSChris Lattner } 359a2f8fc5aSChris Lattner 360a2f8fc5aSChris Lattner // Free the old buffer and return a new one. 361a2f8fc5aSChris Lattner MemoryBuffer *MB2 = 362a2f8fc5aSChris Lattner MemoryBuffer::getMemBufferCopy(NewFile.data(), 363a2f8fc5aSChris Lattner NewFile.data() + NewFile.size(), 364a2f8fc5aSChris Lattner MB->getBufferIdentifier()); 365a2f8fc5aSChris Lattner 366a2f8fc5aSChris Lattner delete MB; 367a2f8fc5aSChris Lattner return MB2; 368a2f8fc5aSChris Lattner } 369a2f8fc5aSChris Lattner 370ee3c74fbSChris Lattner 371ee3c74fbSChris Lattner /// ReadCheckFile - Read the check file, which specifies the sequence of 372ee3c74fbSChris Lattner /// expected strings. The strings are added to the CheckStrings vector. 373ee3c74fbSChris Lattner static bool ReadCheckFile(SourceMgr &SM, 37426cccfe1SChris Lattner std::vector<CheckString> &CheckStrings) { 375ee3c74fbSChris Lattner // Open the check file, and tell SourceMgr about it. 376ee3c74fbSChris Lattner std::string ErrorStr; 377ee3c74fbSChris Lattner MemoryBuffer *F = 378ee3c74fbSChris Lattner MemoryBuffer::getFileOrSTDIN(CheckFilename.c_str(), &ErrorStr); 379ee3c74fbSChris Lattner if (F == 0) { 380ee3c74fbSChris Lattner errs() << "Could not open check file '" << CheckFilename << "': " 381ee3c74fbSChris Lattner << ErrorStr << '\n'; 382ee3c74fbSChris Lattner return true; 383ee3c74fbSChris Lattner } 384a2f8fc5aSChris Lattner 385a2f8fc5aSChris Lattner // If we want to canonicalize whitespace, strip excess whitespace from the 386a2f8fc5aSChris Lattner // buffer containing the CHECK lines. 387a2f8fc5aSChris Lattner if (!NoCanonicalizeWhiteSpace) 388a2f8fc5aSChris Lattner F = CanonicalizeInputFile(F); 389a2f8fc5aSChris Lattner 390ee3c74fbSChris Lattner SM.AddNewSourceBuffer(F, SMLoc()); 391ee3c74fbSChris Lattner 39210f10cedSChris Lattner // Find all instances of CheckPrefix followed by : in the file. 393caa5fc0cSChris Lattner StringRef Buffer = F->getBuffer(); 394ee3c74fbSChris Lattner 39574d50731SChris Lattner std::vector<std::pair<SMLoc, Pattern> > NotMatches; 396236d2d5eSChris Lattner 397ee3c74fbSChris Lattner while (1) { 398ee3c74fbSChris Lattner // See if Prefix occurs in the memory buffer. 399caa5fc0cSChris Lattner Buffer = Buffer.substr(Buffer.find(CheckPrefix)); 400ee3c74fbSChris Lattner 401ee3c74fbSChris Lattner // If we didn't find a match, we're done. 402caa5fc0cSChris Lattner if (Buffer.empty()) 403ee3c74fbSChris Lattner break; 404ee3c74fbSChris Lattner 405caa5fc0cSChris Lattner const char *CheckPrefixStart = Buffer.data(); 406da108b4eSChris Lattner 407da108b4eSChris Lattner // When we find a check prefix, keep track of whether we find CHECK: or 408da108b4eSChris Lattner // CHECK-NEXT: 409236d2d5eSChris Lattner bool IsCheckNext = false, IsCheckNot = false; 410da108b4eSChris Lattner 41110f10cedSChris Lattner // Verify that the : is present after the prefix. 412caa5fc0cSChris Lattner if (Buffer[CheckPrefix.size()] == ':') { 413caa5fc0cSChris Lattner Buffer = Buffer.substr(CheckPrefix.size()+1); 414caa5fc0cSChris Lattner } else if (Buffer.size() > CheckPrefix.size()+6 && 415caa5fc0cSChris Lattner memcmp(Buffer.data()+CheckPrefix.size(), "-NEXT:", 6) == 0) { 416caa5fc0cSChris Lattner Buffer = Buffer.substr(CheckPrefix.size()+7); 417da108b4eSChris Lattner IsCheckNext = true; 418236d2d5eSChris Lattner } else if (Buffer.size() > CheckPrefix.size()+5 && 419236d2d5eSChris Lattner memcmp(Buffer.data()+CheckPrefix.size(), "-NOT:", 5) == 0) { 420236d2d5eSChris Lattner Buffer = Buffer.substr(CheckPrefix.size()+6); 421236d2d5eSChris Lattner IsCheckNot = true; 422da108b4eSChris Lattner } else { 423caa5fc0cSChris Lattner Buffer = Buffer.substr(1); 42410f10cedSChris Lattner continue; 42510f10cedSChris Lattner } 42610f10cedSChris Lattner 427ee3c74fbSChris Lattner // Okay, we found the prefix, yay. Remember the rest of the line, but 428ee3c74fbSChris Lattner // ignore leading and trailing whitespace. 429236d2d5eSChris Lattner Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 430ee3c74fbSChris Lattner 431ee3c74fbSChris Lattner // Scan ahead to the end of line. 432caa5fc0cSChris Lattner size_t EOL = Buffer.find_first_of("\n\r"); 433ee3c74fbSChris Lattner 43474d50731SChris Lattner // Parse the pattern. 43574d50731SChris Lattner Pattern P; 43674d50731SChris Lattner if (P.ParsePattern(Buffer.substr(0, EOL), SM)) 437ee3c74fbSChris Lattner return true; 438ee3c74fbSChris Lattner 439236d2d5eSChris Lattner Buffer = Buffer.substr(EOL); 44074d50731SChris Lattner 441236d2d5eSChris Lattner 442da108b4eSChris Lattner // Verify that CHECK-NEXT lines have at least one CHECK line before them. 443da108b4eSChris Lattner if (IsCheckNext && CheckStrings.empty()) { 444da108b4eSChris Lattner SM.PrintMessage(SMLoc::getFromPointer(CheckPrefixStart), 445da108b4eSChris Lattner "found '"+CheckPrefix+"-NEXT:' without previous '"+ 446da108b4eSChris Lattner CheckPrefix+ ": line", "error"); 447da108b4eSChris Lattner return true; 448da108b4eSChris Lattner } 449da108b4eSChris Lattner 45074d50731SChris Lattner // Handle CHECK-NOT. 45174d50731SChris Lattner if (IsCheckNot) { 45274d50731SChris Lattner NotMatches.push_back(std::make_pair(SMLoc::getFromPointer(Buffer.data()), 45374d50731SChris Lattner P)); 45474d50731SChris Lattner continue; 45574d50731SChris Lattner } 45674d50731SChris Lattner 4573b40b445SChris Lattner 458ee3c74fbSChris Lattner // Okay, add the string we captured to the output vector and move on. 4593b40b445SChris Lattner CheckStrings.push_back(CheckString(P, 460caa5fc0cSChris Lattner SMLoc::getFromPointer(Buffer.data()), 461da108b4eSChris Lattner IsCheckNext)); 462236d2d5eSChris Lattner std::swap(NotMatches, CheckStrings.back().NotStrings); 463ee3c74fbSChris Lattner } 464ee3c74fbSChris Lattner 465ee3c74fbSChris Lattner if (CheckStrings.empty()) { 46610f10cedSChris Lattner errs() << "error: no check strings found with prefix '" << CheckPrefix 46710f10cedSChris Lattner << ":'\n"; 468ee3c74fbSChris Lattner return true; 469ee3c74fbSChris Lattner } 470ee3c74fbSChris Lattner 471236d2d5eSChris Lattner if (!NotMatches.empty()) { 472236d2d5eSChris Lattner errs() << "error: '" << CheckPrefix 473236d2d5eSChris Lattner << "-NOT:' not supported after last check line.\n"; 474236d2d5eSChris Lattner return true; 475236d2d5eSChris Lattner } 476236d2d5eSChris Lattner 477ee3c74fbSChris Lattner return false; 478ee3c74fbSChris Lattner } 479ee3c74fbSChris Lattner 480da108b4eSChris Lattner static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr, 481caa5fc0cSChris Lattner StringRef Buffer) { 482da108b4eSChris Lattner // Otherwise, we have an error, emit an error message. 483da108b4eSChris Lattner SM.PrintMessage(CheckStr.Loc, "expected string not found in input", 484da108b4eSChris Lattner "error"); 485da108b4eSChris Lattner 486da108b4eSChris Lattner // Print the "scanning from here" line. If the current position is at the 487da108b4eSChris Lattner // end of a line, advance to the start of the next line. 488caa5fc0cSChris Lattner Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 489da108b4eSChris Lattner 490caa5fc0cSChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), "scanning from here", 491da108b4eSChris Lattner "note"); 492da108b4eSChris Lattner } 493da108b4eSChris Lattner 49437183584SChris Lattner /// CountNumNewlinesBetween - Count the number of newlines in the specified 49537183584SChris Lattner /// range. 49637183584SChris Lattner static unsigned CountNumNewlinesBetween(StringRef Range) { 497da108b4eSChris Lattner unsigned NumNewLines = 0; 49837183584SChris Lattner while (1) { 499da108b4eSChris Lattner // Scan for newline. 50037183584SChris Lattner Range = Range.substr(Range.find_first_of("\n\r")); 50137183584SChris Lattner if (Range.empty()) return NumNewLines; 502da108b4eSChris Lattner 503da108b4eSChris Lattner ++NumNewLines; 504da108b4eSChris Lattner 505da108b4eSChris Lattner // Handle \n\r and \r\n as a single newline. 50637183584SChris Lattner if (Range.size() > 1 && 50737183584SChris Lattner (Range[1] == '\n' || Range[1] == '\r') && 50837183584SChris Lattner (Range[0] != Range[1])) 50937183584SChris Lattner Range = Range.substr(1); 51037183584SChris Lattner Range = Range.substr(1); 511da108b4eSChris Lattner } 512da108b4eSChris Lattner } 513da108b4eSChris Lattner 514ee3c74fbSChris Lattner int main(int argc, char **argv) { 515ee3c74fbSChris Lattner sys::PrintStackTraceOnErrorSignal(); 516ee3c74fbSChris Lattner PrettyStackTraceProgram X(argc, argv); 517ee3c74fbSChris Lattner cl::ParseCommandLineOptions(argc, argv); 518ee3c74fbSChris Lattner 519ee3c74fbSChris Lattner SourceMgr SM; 520ee3c74fbSChris Lattner 521ee3c74fbSChris Lattner // Read the expected strings from the check file. 52226cccfe1SChris Lattner std::vector<CheckString> CheckStrings; 523ee3c74fbSChris Lattner if (ReadCheckFile(SM, CheckStrings)) 524ee3c74fbSChris Lattner return 2; 525ee3c74fbSChris Lattner 526ee3c74fbSChris Lattner // Open the file to check and add it to SourceMgr. 527ee3c74fbSChris Lattner std::string ErrorStr; 528ee3c74fbSChris Lattner MemoryBuffer *F = 529ee3c74fbSChris Lattner MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), &ErrorStr); 530ee3c74fbSChris Lattner if (F == 0) { 531ee3c74fbSChris Lattner errs() << "Could not open input file '" << InputFilename << "': " 532ee3c74fbSChris Lattner << ErrorStr << '\n'; 533ee3c74fbSChris Lattner return true; 534ee3c74fbSChris Lattner } 5352c3e5cdfSChris Lattner 5362c3e5cdfSChris Lattner // Remove duplicate spaces in the input file if requested. 5372c3e5cdfSChris Lattner if (!NoCanonicalizeWhiteSpace) 5382c3e5cdfSChris Lattner F = CanonicalizeInputFile(F); 5392c3e5cdfSChris Lattner 540ee3c74fbSChris Lattner SM.AddNewSourceBuffer(F, SMLoc()); 541ee3c74fbSChris Lattner 5428879e06dSChris Lattner /// VariableTable - This holds all the current filecheck variables. 5438879e06dSChris Lattner StringMap<StringRef> VariableTable; 5448879e06dSChris Lattner 545ee3c74fbSChris Lattner // Check that we have all of the expected strings, in order, in the input 546ee3c74fbSChris Lattner // file. 547caa5fc0cSChris Lattner StringRef Buffer = F->getBuffer(); 548ee3c74fbSChris Lattner 549236d2d5eSChris Lattner const char *LastMatch = Buffer.data(); 550236d2d5eSChris Lattner 551ee3c74fbSChris Lattner for (unsigned StrNo = 0, e = CheckStrings.size(); StrNo != e; ++StrNo) { 55226cccfe1SChris Lattner const CheckString &CheckStr = CheckStrings[StrNo]; 553ee3c74fbSChris Lattner 554caa5fc0cSChris Lattner StringRef SearchFrom = Buffer; 555caa5fc0cSChris Lattner 556ee3c74fbSChris Lattner // Find StrNo in the file. 5573b40b445SChris Lattner size_t MatchLen = 0; 5588879e06dSChris Lattner Buffer = Buffer.substr(CheckStr.Pat.Match(Buffer, MatchLen, VariableTable)); 559ee3c74fbSChris Lattner 560da108b4eSChris Lattner // If we didn't find a match, reject the input. 561caa5fc0cSChris Lattner if (Buffer.empty()) { 562caa5fc0cSChris Lattner PrintCheckFailed(SM, CheckStr, SearchFrom); 563da108b4eSChris Lattner return 1; 564ee3c74fbSChris Lattner } 565ee3c74fbSChris Lattner 56637183584SChris Lattner StringRef SkippedRegion(LastMatch, Buffer.data()-LastMatch); 56737183584SChris Lattner 568da108b4eSChris Lattner // If this check is a "CHECK-NEXT", verify that the previous match was on 569da108b4eSChris Lattner // the previous line (i.e. that there is one newline between them). 570da108b4eSChris Lattner if (CheckStr.IsCheckNext) { 571da108b4eSChris Lattner // Count the number of newlines between the previous match and this one. 572236d2d5eSChris Lattner assert(LastMatch != F->getBufferStart() && 573236d2d5eSChris Lattner "CHECK-NEXT can't be the first check in a file"); 574da108b4eSChris Lattner 57537183584SChris Lattner unsigned NumNewLines = CountNumNewlinesBetween(SkippedRegion); 576da108b4eSChris Lattner if (NumNewLines == 0) { 577107c21eaSChris Lattner SM.PrintMessage(CheckStr.Loc, 578da108b4eSChris Lattner CheckPrefix+"-NEXT: is on the same line as previous match", 579ee3c74fbSChris Lattner "error"); 580caa5fc0cSChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), 581107c21eaSChris Lattner "'next' match was here", "note"); 582da108b4eSChris Lattner SM.PrintMessage(SMLoc::getFromPointer(LastMatch), 583da108b4eSChris Lattner "previous match was here", "note"); 584aedd8185SChris Lattner return 1; 585ee3c74fbSChris Lattner } 586ee3c74fbSChris Lattner 587da108b4eSChris Lattner if (NumNewLines != 1) { 588107c21eaSChris Lattner SM.PrintMessage(CheckStr.Loc, 589da108b4eSChris Lattner CheckPrefix+ 590da108b4eSChris Lattner "-NEXT: is not on the line after the previous match", 591da108b4eSChris Lattner "error"); 592caa5fc0cSChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), 593107c21eaSChris Lattner "'next' match was here", "note"); 594da108b4eSChris Lattner SM.PrintMessage(SMLoc::getFromPointer(LastMatch), 595da108b4eSChris Lattner "previous match was here", "note"); 596da108b4eSChris Lattner return 1; 597da108b4eSChris Lattner } 598da108b4eSChris Lattner } 599da108b4eSChris Lattner 600236d2d5eSChris Lattner // If this match had "not strings", verify that they don't exist in the 601236d2d5eSChris Lattner // skipped region. 6028879e06dSChris Lattner for (unsigned ChunkNo = 0, e = CheckStr.NotStrings.size(); 6038879e06dSChris Lattner ChunkNo != e; ++ChunkNo) { 60474d50731SChris Lattner size_t MatchLen = 0; 6058879e06dSChris Lattner size_t Pos = CheckStr.NotStrings[ChunkNo].second.Match(SkippedRegion, 6068879e06dSChris Lattner MatchLen, 6078879e06dSChris Lattner VariableTable); 608236d2d5eSChris Lattner if (Pos == StringRef::npos) continue; 609236d2d5eSChris Lattner 610236d2d5eSChris Lattner SM.PrintMessage(SMLoc::getFromPointer(LastMatch+Pos), 611236d2d5eSChris Lattner CheckPrefix+"-NOT: string occurred!", "error"); 612f08d2db9SChris Lattner SM.PrintMessage(CheckStr.NotStrings[ChunkNo].first, 613236d2d5eSChris Lattner CheckPrefix+"-NOT: pattern specified here", "note"); 614236d2d5eSChris Lattner return 1; 615236d2d5eSChris Lattner } 616236d2d5eSChris Lattner 617236d2d5eSChris Lattner 618b9f2bf46SChris Lattner // Otherwise, everything is good. Step over the matched text and remember 619b9f2bf46SChris Lattner // the position after the match as the end of the last match. 6203b40b445SChris Lattner Buffer = Buffer.substr(MatchLen); 621b9f2bf46SChris Lattner LastMatch = Buffer.data(); 622da108b4eSChris Lattner } 623da108b4eSChris Lattner 624ee3c74fbSChris Lattner return 0; 625ee3c74fbSChris Lattner } 626