1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // FileCheck does a line-by line check of a file that validates whether it 11 // contains the expected content. This is useful for regression tests etc. 12 // 13 // This program exits with an error status of 2 on error, exit status of 0 if 14 // the file matched the expected contents, and exit status of 1 if it did not 15 // contain the expected contents. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/StringExtras.h" 21 #include "llvm/ADT/StringMap.h" 22 #include "llvm/ADT/StringSet.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/MemoryBuffer.h" 25 #include "llvm/Support/PrettyStackTrace.h" 26 #include "llvm/Support/Regex.h" 27 #include "llvm/Support/Signals.h" 28 #include "llvm/Support/SourceMgr.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <algorithm> 31 #include <cctype> 32 #include <map> 33 #include <string> 34 #include <system_error> 35 #include <vector> 36 using namespace llvm; 37 38 static cl::opt<std::string> 39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); 40 41 static cl::opt<std::string> 42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), 43 cl::init("-"), cl::value_desc("filename")); 44 45 static cl::list<std::string> CheckPrefixes( 46 "check-prefix", 47 cl::desc("Prefix to use from check file (defaults to 'CHECK')")); 48 static cl::alias CheckPrefixesAlias( 49 "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated, 50 cl::NotHidden, 51 cl::desc( 52 "Alias for -check-prefix permitting multiple comma separated values")); 53 54 static cl::opt<bool> NoCanonicalizeWhiteSpace( 55 "strict-whitespace", 56 cl::desc("Do not treat all horizontal whitespace as equivalent")); 57 58 static cl::list<std::string> ImplicitCheckNot( 59 "implicit-check-not", 60 cl::desc("Add an implicit negative check with this pattern to every\n" 61 "positive check. This can be used to ensure that no instances of\n" 62 "this pattern occur which are not matched by a positive pattern"), 63 cl::value_desc("pattern")); 64 65 static cl::opt<bool> AllowEmptyInput( 66 "allow-empty", cl::init(false), 67 cl::desc("Allow the input file to be empty. This is useful when making\n" 68 "checks that some error message does not occur, for example.")); 69 70 static cl::opt<bool> MatchFullLines( 71 "match-full-lines", cl::init(false), 72 cl::desc("Require all positive matches to cover an entire input line.\n" 73 "Allows leading and trailing whitespace if --strict-whitespace\n" 74 "is not also passed.")); 75 76 typedef cl::list<std::string>::const_iterator prefix_iterator; 77 78 //===----------------------------------------------------------------------===// 79 // Pattern Handling Code. 80 //===----------------------------------------------------------------------===// 81 82 namespace Check { 83 enum CheckType { 84 CheckNone = 0, 85 CheckPlain, 86 CheckNext, 87 CheckSame, 88 CheckNot, 89 CheckDAG, 90 CheckLabel, 91 92 /// Indicates the pattern only matches the end of file. This is used for 93 /// trailing CHECK-NOTs. 94 CheckEOF, 95 96 /// Marks when parsing found a -NOT check combined with another CHECK suffix. 97 CheckBadNot 98 }; 99 } 100 101 class Pattern { 102 SMLoc PatternLoc; 103 104 /// A fixed string to match as the pattern or empty if this pattern requires 105 /// a regex match. 106 StringRef FixedStr; 107 108 /// A regex string to match as the pattern or empty if this pattern requires 109 /// a fixed string to match. 110 std::string RegExStr; 111 112 /// Entries in this vector map to uses of a variable in the pattern, e.g. 113 /// "foo[[bar]]baz". In this case, the RegExStr will contain "foobaz" and 114 /// we'll get an entry in this vector that tells us to insert the value of 115 /// bar at offset 3. 116 std::vector<std::pair<StringRef, unsigned>> VariableUses; 117 118 /// Maps definitions of variables to their parenthesized capture numbers. 119 /// 120 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 121 /// 1. 122 std::map<StringRef, unsigned> VariableDefs; 123 124 Check::CheckType CheckTy; 125 126 /// Contains the number of line this pattern is in. 127 unsigned LineNumber; 128 129 public: 130 explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {} 131 132 /// Returns the location in source code. 133 SMLoc getLoc() const { return PatternLoc; } 134 135 bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, 136 unsigned LineNumber); 137 size_t Match(StringRef Buffer, size_t &MatchLen, 138 StringMap<StringRef> &VariableTable) const; 139 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 140 const StringMap<StringRef> &VariableTable) const; 141 142 bool hasVariable() const { 143 return !(VariableUses.empty() && VariableDefs.empty()); 144 } 145 146 Check::CheckType getCheckTy() const { return CheckTy; } 147 148 private: 149 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); 150 void AddBackrefToRegEx(unsigned BackrefNum); 151 unsigned 152 ComputeMatchDistance(StringRef Buffer, 153 const StringMap<StringRef> &VariableTable) const; 154 bool EvaluateExpression(StringRef Expr, std::string &Value) const; 155 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); 156 }; 157 158 /// Parses the given string into the Pattern. 159 /// 160 /// \p Prefix provides which prefix is being matched, \p SM provides the 161 /// SourceMgr used for error reports, and \p LineNumber is the line number in 162 /// the input file from which the pattern string was read. Returns true in 163 /// case of an error, false otherwise. 164 bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix, 165 SourceMgr &SM, unsigned LineNumber) { 166 bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot; 167 168 this->LineNumber = LineNumber; 169 PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 170 171 // Ignore trailing whitespace. 172 while (!PatternStr.empty() && 173 (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 174 PatternStr = PatternStr.substr(0, PatternStr.size() - 1); 175 176 // Check that there is something on the line. 177 if (PatternStr.empty()) { 178 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 179 "found empty check string with prefix '" + Prefix + ":'"); 180 return true; 181 } 182 183 // Check to see if this is a fixed string, or if it has regex pieces. 184 if (!MatchFullLinesHere && 185 (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos && 186 PatternStr.find("[[") == StringRef::npos))) { 187 FixedStr = PatternStr; 188 return false; 189 } 190 191 if (MatchFullLinesHere) { 192 RegExStr += '^'; 193 if (!NoCanonicalizeWhiteSpace) 194 RegExStr += " *"; 195 } 196 197 // Paren value #0 is for the fully matched string. Any new parenthesized 198 // values add from there. 199 unsigned CurParen = 1; 200 201 // Otherwise, there is at least one regex piece. Build up the regex pattern 202 // by escaping scary characters in fixed strings, building up one big regex. 203 while (!PatternStr.empty()) { 204 // RegEx matches. 205 if (PatternStr.startswith("{{")) { 206 // This is the start of a regex match. Scan for the }}. 207 size_t End = PatternStr.find("}}"); 208 if (End == StringRef::npos) { 209 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 210 SourceMgr::DK_Error, 211 "found start of regex string with no end '}}'"); 212 return true; 213 } 214 215 // Enclose {{}} patterns in parens just like [[]] even though we're not 216 // capturing the result for any purpose. This is required in case the 217 // expression contains an alternation like: CHECK: abc{{x|z}}def. We 218 // want this to turn into: "abc(x|z)def" not "abcx|zdef". 219 RegExStr += '('; 220 ++CurParen; 221 222 if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM)) 223 return true; 224 RegExStr += ')'; 225 226 PatternStr = PatternStr.substr(End + 2); 227 continue; 228 } 229 230 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* 231 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The 232 // second form is [[foo]] which is a reference to foo. The variable name 233 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject 234 // it. This is to catch some common errors. 235 if (PatternStr.startswith("[[")) { 236 // Find the closing bracket pair ending the match. End is going to be an 237 // offset relative to the beginning of the match string. 238 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM); 239 240 if (End == StringRef::npos) { 241 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 242 SourceMgr::DK_Error, 243 "invalid named regex reference, no ]] found"); 244 return true; 245 } 246 247 StringRef MatchStr = PatternStr.substr(2, End); 248 PatternStr = PatternStr.substr(End + 4); 249 250 // Get the regex name (e.g. "foo"). 251 size_t NameEnd = MatchStr.find(':'); 252 StringRef Name = MatchStr.substr(0, NameEnd); 253 254 if (Name.empty()) { 255 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 256 "invalid name in named regex: empty name"); 257 return true; 258 } 259 260 // Verify that the name/expression is well formed. FileCheck currently 261 // supports @LINE, @LINE+number, @LINE-number expressions. The check here 262 // is relaxed, more strict check is performed in \c EvaluateExpression. 263 bool IsExpression = false; 264 for (unsigned i = 0, e = Name.size(); i != e; ++i) { 265 if (i == 0 && Name[i] == '@') { 266 if (NameEnd != StringRef::npos) { 267 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 268 SourceMgr::DK_Error, 269 "invalid name in named regex definition"); 270 return true; 271 } 272 IsExpression = true; 273 continue; 274 } 275 if (Name[i] != '_' && !isalnum(Name[i]) && 276 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { 277 SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i), 278 SourceMgr::DK_Error, "invalid name in named regex"); 279 return true; 280 } 281 } 282 283 // Name can't start with a digit. 284 if (isdigit(static_cast<unsigned char>(Name[0]))) { 285 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 286 "invalid name in named regex"); 287 return true; 288 } 289 290 // Handle [[foo]]. 291 if (NameEnd == StringRef::npos) { 292 // Handle variables that were defined earlier on the same line by 293 // emitting a backreference. 294 if (VariableDefs.find(Name) != VariableDefs.end()) { 295 unsigned VarParenNum = VariableDefs[Name]; 296 if (VarParenNum < 1 || VarParenNum > 9) { 297 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 298 SourceMgr::DK_Error, 299 "Can't back-reference more than 9 variables"); 300 return true; 301 } 302 AddBackrefToRegEx(VarParenNum); 303 } else { 304 VariableUses.push_back(std::make_pair(Name, RegExStr.size())); 305 } 306 continue; 307 } 308 309 // Handle [[foo:.*]]. 310 VariableDefs[Name] = CurParen; 311 RegExStr += '('; 312 ++CurParen; 313 314 if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM)) 315 return true; 316 317 RegExStr += ')'; 318 } 319 320 // Handle fixed string matches. 321 // Find the end, which is the start of the next regex. 322 size_t FixedMatchEnd = PatternStr.find("{{"); 323 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 324 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 325 PatternStr = PatternStr.substr(FixedMatchEnd); 326 } 327 328 if (MatchFullLinesHere) { 329 if (!NoCanonicalizeWhiteSpace) 330 RegExStr += " *"; 331 RegExStr += '$'; 332 } 333 334 return false; 335 } 336 337 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) { 338 Regex R(RS); 339 std::string Error; 340 if (!R.isValid(Error)) { 341 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 342 "invalid regex: " + Error); 343 return true; 344 } 345 346 RegExStr += RS.str(); 347 CurParen += R.getNumMatches(); 348 return false; 349 } 350 351 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 352 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 353 std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum); 354 RegExStr += Backref; 355 } 356 357 /// Evaluates expression and stores the result to \p Value. 358 /// 359 /// Returns true on success and false when the expression has invalid syntax. 360 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { 361 // The only supported expression is @LINE([\+-]\d+)? 362 if (!Expr.startswith("@LINE")) 363 return false; 364 Expr = Expr.substr(StringRef("@LINE").size()); 365 int Offset = 0; 366 if (!Expr.empty()) { 367 if (Expr[0] == '+') 368 Expr = Expr.substr(1); 369 else if (Expr[0] != '-') 370 return false; 371 if (Expr.getAsInteger(10, Offset)) 372 return false; 373 } 374 Value = llvm::itostr(LineNumber + Offset); 375 return true; 376 } 377 378 /// Matches the pattern string against the input buffer \p Buffer 379 /// 380 /// This returns the position that is matched or npos if there is no match. If 381 /// there is a match, the size of the matched string is returned in \p 382 /// MatchLen. 383 /// 384 /// The \p VariableTable StringMap provides the current values of filecheck 385 /// variables and is updated if this match defines new values. 386 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, 387 StringMap<StringRef> &VariableTable) const { 388 // If this is the EOF pattern, match it immediately. 389 if (CheckTy == Check::CheckEOF) { 390 MatchLen = 0; 391 return Buffer.size(); 392 } 393 394 // If this is a fixed string pattern, just match it now. 395 if (!FixedStr.empty()) { 396 MatchLen = FixedStr.size(); 397 return Buffer.find(FixedStr); 398 } 399 400 // Regex match. 401 402 // If there are variable uses, we need to create a temporary string with the 403 // actual value. 404 StringRef RegExToMatch = RegExStr; 405 std::string TmpStr; 406 if (!VariableUses.empty()) { 407 TmpStr = RegExStr; 408 409 unsigned InsertOffset = 0; 410 for (const auto &VariableUse : VariableUses) { 411 std::string Value; 412 413 if (VariableUse.first[0] == '@') { 414 if (!EvaluateExpression(VariableUse.first, Value)) 415 return StringRef::npos; 416 } else { 417 StringMap<StringRef>::iterator it = 418 VariableTable.find(VariableUse.first); 419 // If the variable is undefined, return an error. 420 if (it == VariableTable.end()) 421 return StringRef::npos; 422 423 // Look up the value and escape it so that we can put it into the regex. 424 Value += Regex::escape(it->second); 425 } 426 427 // Plop it into the regex at the adjusted offset. 428 TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset, 429 Value.begin(), Value.end()); 430 InsertOffset += Value.size(); 431 } 432 433 // Match the newly constructed regex. 434 RegExToMatch = TmpStr; 435 } 436 437 SmallVector<StringRef, 4> MatchInfo; 438 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) 439 return StringRef::npos; 440 441 // Successful regex match. 442 assert(!MatchInfo.empty() && "Didn't get any match"); 443 StringRef FullMatch = MatchInfo[0]; 444 445 // If this defines any variables, remember their values. 446 for (const auto &VariableDef : VariableDefs) { 447 assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); 448 VariableTable[VariableDef.first] = MatchInfo[VariableDef.second]; 449 } 450 451 MatchLen = FullMatch.size(); 452 return FullMatch.data() - Buffer.data(); 453 } 454 455 456 /// Computes an arbitrary estimate for the quality of matching this pattern at 457 /// the start of \p Buffer; a distance of zero should correspond to a perfect 458 /// match. 459 unsigned 460 Pattern::ComputeMatchDistance(StringRef Buffer, 461 const StringMap<StringRef> &VariableTable) const { 462 // Just compute the number of matching characters. For regular expressions, we 463 // just compare against the regex itself and hope for the best. 464 // 465 // FIXME: One easy improvement here is have the regex lib generate a single 466 // example regular expression which matches, and use that as the example 467 // string. 468 StringRef ExampleString(FixedStr); 469 if (ExampleString.empty()) 470 ExampleString = RegExStr; 471 472 // Only compare up to the first line in the buffer, or the string size. 473 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 474 BufferPrefix = BufferPrefix.split('\n').first; 475 return BufferPrefix.edit_distance(ExampleString); 476 } 477 478 /// Prints additional information about a failure to match involving this 479 /// pattern. 480 void Pattern::PrintFailureInfo( 481 const SourceMgr &SM, StringRef Buffer, 482 const StringMap<StringRef> &VariableTable) const { 483 // If this was a regular expression using variables, print the current 484 // variable values. 485 if (!VariableUses.empty()) { 486 for (const auto &VariableUse : VariableUses) { 487 SmallString<256> Msg; 488 raw_svector_ostream OS(Msg); 489 StringRef Var = VariableUse.first; 490 if (Var[0] == '@') { 491 std::string Value; 492 if (EvaluateExpression(Var, Value)) { 493 OS << "with expression \""; 494 OS.write_escaped(Var) << "\" equal to \""; 495 OS.write_escaped(Value) << "\""; 496 } else { 497 OS << "uses incorrect expression \""; 498 OS.write_escaped(Var) << "\""; 499 } 500 } else { 501 StringMap<StringRef>::const_iterator it = VariableTable.find(Var); 502 503 // Check for undefined variable references. 504 if (it == VariableTable.end()) { 505 OS << "uses undefined variable \""; 506 OS.write_escaped(Var) << "\""; 507 } else { 508 OS << "with variable \""; 509 OS.write_escaped(Var) << "\" equal to \""; 510 OS.write_escaped(it->second) << "\""; 511 } 512 } 513 514 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 515 OS.str()); 516 } 517 } 518 519 // Attempt to find the closest/best fuzzy match. Usually an error happens 520 // because some string in the output didn't exactly match. In these cases, we 521 // would like to show the user a best guess at what "should have" matched, to 522 // save them having to actually check the input manually. 523 size_t NumLinesForward = 0; 524 size_t Best = StringRef::npos; 525 double BestQuality = 0; 526 527 // Use an arbitrary 4k limit on how far we will search. 528 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 529 if (Buffer[i] == '\n') 530 ++NumLinesForward; 531 532 // Patterns have leading whitespace stripped, so skip whitespace when 533 // looking for something which looks like a pattern. 534 if (Buffer[i] == ' ' || Buffer[i] == '\t') 535 continue; 536 537 // Compute the "quality" of this match as an arbitrary combination of the 538 // match distance and the number of lines skipped to get to this match. 539 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable); 540 double Quality = Distance + (NumLinesForward / 100.); 541 542 if (Quality < BestQuality || Best == StringRef::npos) { 543 Best = i; 544 BestQuality = Quality; 545 } 546 } 547 548 // Print the "possible intended match here" line if we found something 549 // reasonable and not equal to what we showed in the "scanning from here" 550 // line. 551 if (Best && Best != StringRef::npos && BestQuality < 50) { 552 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best), 553 SourceMgr::DK_Note, "possible intended match here"); 554 555 // FIXME: If we wanted to be really friendly we would show why the match 556 // failed, as it can be hard to spot simple one character differences. 557 } 558 } 559 560 /// Finds the closing sequence of a regex variable usage or definition. 561 /// 562 /// \p Str has to point in the beginning of the definition (right after the 563 /// opening sequence). Returns the offset of the closing sequence within Str, 564 /// or npos if it was not found. 565 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 566 // Offset keeps track of the current offset within the input Str 567 size_t Offset = 0; 568 // [...] Nesting depth 569 size_t BracketDepth = 0; 570 571 while (!Str.empty()) { 572 if (Str.startswith("]]") && BracketDepth == 0) 573 return Offset; 574 if (Str[0] == '\\') { 575 // Backslash escapes the next char within regexes, so skip them both. 576 Str = Str.substr(2); 577 Offset += 2; 578 } else { 579 switch (Str[0]) { 580 default: 581 break; 582 case '[': 583 BracketDepth++; 584 break; 585 case ']': 586 if (BracketDepth == 0) { 587 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 588 SourceMgr::DK_Error, 589 "missing closing \"]\" for regex variable"); 590 exit(1); 591 } 592 BracketDepth--; 593 break; 594 } 595 Str = Str.substr(1); 596 Offset++; 597 } 598 } 599 600 return StringRef::npos; 601 } 602 603 //===----------------------------------------------------------------------===// 604 // Check Strings. 605 //===----------------------------------------------------------------------===// 606 607 /// A check that we found in the input file. 608 struct CheckString { 609 /// The pattern to match. 610 Pattern Pat; 611 612 /// Which prefix name this check matched. 613 StringRef Prefix; 614 615 /// The location in the match file that the check string was specified. 616 SMLoc Loc; 617 618 /// All of the strings that are disallowed from occurring between this match 619 /// string and the previous one (or start of file). 620 std::vector<Pattern> DagNotStrings; 621 622 CheckString(const Pattern &P, StringRef S, SMLoc L) 623 : Pat(P), Prefix(S), Loc(L) {} 624 625 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, 626 size_t &MatchLen, StringMap<StringRef> &VariableTable) const; 627 628 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; 629 bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; 630 bool CheckNot(const SourceMgr &SM, StringRef Buffer, 631 const std::vector<const Pattern *> &NotStrings, 632 StringMap<StringRef> &VariableTable) const; 633 size_t CheckDag(const SourceMgr &SM, StringRef Buffer, 634 std::vector<const Pattern *> &NotStrings, 635 StringMap<StringRef> &VariableTable) const; 636 }; 637 638 /// Canonicalize whitespaces in the file. Line endings are replaced with 639 /// UNIX-style '\n'. 640 static StringRef CanonicalizeFile(MemoryBuffer &MB, 641 SmallVectorImpl<char> &OutputBuffer) { 642 OutputBuffer.reserve(MB.getBufferSize()); 643 644 for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd(); 645 Ptr != End; ++Ptr) { 646 // Eliminate trailing dosish \r. 647 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 648 continue; 649 } 650 651 // If current char is not a horizontal whitespace or if horizontal 652 // whitespace canonicalization is disabled, dump it to output as is. 653 if (NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) { 654 OutputBuffer.push_back(*Ptr); 655 continue; 656 } 657 658 // Otherwise, add one space and advance over neighboring space. 659 OutputBuffer.push_back(' '); 660 while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t')) 661 ++Ptr; 662 } 663 664 // Add a null byte and then return all but that byte. 665 OutputBuffer.push_back('\0'); 666 return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1); 667 } 668 669 static bool IsPartOfWord(char c) { 670 return (isalnum(c) || c == '-' || c == '_'); 671 } 672 673 // Get the size of the prefix extension. 674 static size_t CheckTypeSize(Check::CheckType Ty) { 675 switch (Ty) { 676 case Check::CheckNone: 677 case Check::CheckBadNot: 678 return 0; 679 680 case Check::CheckPlain: 681 return sizeof(":") - 1; 682 683 case Check::CheckNext: 684 return sizeof("-NEXT:") - 1; 685 686 case Check::CheckSame: 687 return sizeof("-SAME:") - 1; 688 689 case Check::CheckNot: 690 return sizeof("-NOT:") - 1; 691 692 case Check::CheckDAG: 693 return sizeof("-DAG:") - 1; 694 695 case Check::CheckLabel: 696 return sizeof("-LABEL:") - 1; 697 698 case Check::CheckEOF: 699 llvm_unreachable("Should not be using EOF size"); 700 } 701 702 llvm_unreachable("Bad check type"); 703 } 704 705 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) { 706 char NextChar = Buffer[Prefix.size()]; 707 708 // Verify that the : is present after the prefix. 709 if (NextChar == ':') 710 return Check::CheckPlain; 711 712 if (NextChar != '-') 713 return Check::CheckNone; 714 715 StringRef Rest = Buffer.drop_front(Prefix.size() + 1); 716 if (Rest.startswith("NEXT:")) 717 return Check::CheckNext; 718 719 if (Rest.startswith("SAME:")) 720 return Check::CheckSame; 721 722 if (Rest.startswith("NOT:")) 723 return Check::CheckNot; 724 725 if (Rest.startswith("DAG:")) 726 return Check::CheckDAG; 727 728 if (Rest.startswith("LABEL:")) 729 return Check::CheckLabel; 730 731 // You can't combine -NOT with another suffix. 732 if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") || 733 Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") || 734 Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:")) 735 return Check::CheckBadNot; 736 737 return Check::CheckNone; 738 } 739 740 // From the given position, find the next character after the word. 741 static size_t SkipWord(StringRef Str, size_t Loc) { 742 while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 743 ++Loc; 744 return Loc; 745 } 746 747 // Try to find the first match in buffer for any prefix. If a valid match is 748 // found, return that prefix and set its type and location. If there are almost 749 // matches (e.g. the actual prefix string is found, but is not an actual check 750 // string), but no valid match, return an empty string and set the position to 751 // resume searching from. If no partial matches are found, return an empty 752 // string and the location will be StringRef::npos. If one prefix is a substring 753 // of another, the maximal match should be found. e.g. if "A" and "AA" are 754 // prefixes then AA-CHECK: should match the second one. 755 static StringRef FindFirstCandidateMatch(StringRef &Buffer, 756 Check::CheckType &CheckTy, 757 size_t &CheckLoc) { 758 StringRef FirstPrefix; 759 size_t FirstLoc = StringRef::npos; 760 size_t SearchLoc = StringRef::npos; 761 Check::CheckType FirstTy = Check::CheckNone; 762 763 CheckTy = Check::CheckNone; 764 CheckLoc = StringRef::npos; 765 766 for (StringRef Prefix : CheckPrefixes) { 767 size_t PrefixLoc = Buffer.find(Prefix); 768 769 if (PrefixLoc == StringRef::npos) 770 continue; 771 772 // Track where we are searching for invalid prefixes that look almost right. 773 // We need to only advance to the first partial match on the next attempt 774 // since a partial match could be a substring of a later, valid prefix. 775 // Need to skip to the end of the word, otherwise we could end up 776 // matching a prefix in a substring later. 777 if (PrefixLoc < SearchLoc) 778 SearchLoc = SkipWord(Buffer, PrefixLoc); 779 780 // We only want to find the first match to avoid skipping some. 781 if (PrefixLoc > FirstLoc) 782 continue; 783 // If one matching check-prefix is a prefix of another, choose the 784 // longer one. 785 if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size()) 786 continue; 787 788 StringRef Rest = Buffer.drop_front(PrefixLoc); 789 // Make sure we have actually found the prefix, and not a word containing 790 // it. This should also prevent matching the wrong prefix when one is a 791 // substring of another. 792 if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1])) 793 FirstTy = Check::CheckNone; 794 else 795 FirstTy = FindCheckType(Rest, Prefix); 796 797 FirstLoc = PrefixLoc; 798 FirstPrefix = Prefix; 799 } 800 801 // If the first prefix is invalid, we should continue the search after it. 802 if (FirstTy == Check::CheckNone) { 803 CheckLoc = SearchLoc; 804 return ""; 805 } 806 807 CheckTy = FirstTy; 808 CheckLoc = FirstLoc; 809 return FirstPrefix; 810 } 811 812 static StringRef FindFirstMatchingPrefix(StringRef &Buffer, 813 unsigned &LineNumber, 814 Check::CheckType &CheckTy, 815 size_t &CheckLoc) { 816 while (!Buffer.empty()) { 817 StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc); 818 // If we found a real match, we are done. 819 if (!Prefix.empty()) { 820 LineNumber += Buffer.substr(0, CheckLoc).count('\n'); 821 return Prefix; 822 } 823 824 // We didn't find any almost matches either, we are also done. 825 if (CheckLoc == StringRef::npos) 826 return StringRef(); 827 828 LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n'); 829 830 // Advance to the last possible match we found and try again. 831 Buffer = Buffer.drop_front(CheckLoc + 1); 832 } 833 834 return StringRef(); 835 } 836 837 /// Read the check file, which specifies the sequence of expected strings. 838 /// 839 /// The strings are added to the CheckStrings vector. Returns true in case of 840 /// an error, false otherwise. 841 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, 842 std::vector<CheckString> &CheckStrings) { 843 std::vector<Pattern> ImplicitNegativeChecks; 844 for (const auto &PatternString : ImplicitCheckNot) { 845 // Create a buffer with fake command line content in order to display the 846 // command line option responsible for the specific implicit CHECK-NOT. 847 std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str(); 848 std::string Suffix = "'"; 849 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( 850 Prefix + PatternString + Suffix, "command line"); 851 852 StringRef PatternInBuffer = 853 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 854 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 855 856 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot)); 857 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer, 858 "IMPLICIT-CHECK", SM, 0); 859 } 860 861 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 862 863 // LineNumber keeps track of the line on which CheckPrefix instances are 864 // found. 865 unsigned LineNumber = 1; 866 867 while (1) { 868 Check::CheckType CheckTy; 869 size_t PrefixLoc; 870 871 // See if a prefix occurs in the memory buffer. 872 StringRef UsedPrefix = 873 FindFirstMatchingPrefix(Buffer, LineNumber, CheckTy, PrefixLoc); 874 if (UsedPrefix.empty()) 875 break; 876 877 Buffer = Buffer.drop_front(PrefixLoc); 878 879 // Location to use for error messages. 880 const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1); 881 882 // PrefixLoc is to the start of the prefix. Skip to the end. 883 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy)); 884 885 // Complain about useful-looking but unsupported suffixes. 886 if (CheckTy == Check::CheckBadNot) { 887 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, 888 "unsupported -NOT combo on prefix '" + UsedPrefix + "'"); 889 return true; 890 } 891 892 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 893 // leading and trailing whitespace. 894 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 895 896 // Scan ahead to the end of line. 897 size_t EOL = Buffer.find_first_of("\n\r"); 898 899 // Remember the location of the start of the pattern, for diagnostics. 900 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 901 902 // Parse the pattern. 903 Pattern P(CheckTy); 904 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber)) 905 return true; 906 907 // Verify that CHECK-LABEL lines do not define or use variables 908 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 909 SM.PrintMessage( 910 SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error, 911 "found '" + UsedPrefix + "-LABEL:'" 912 " with variable definition or use"); 913 return true; 914 } 915 916 Buffer = Buffer.substr(EOL); 917 918 // Verify that CHECK-NEXT lines have at least one CHECK line before them. 919 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) && 920 CheckStrings.empty()) { 921 StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME"; 922 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 923 SourceMgr::DK_Error, 924 "found '" + UsedPrefix + "-" + Type + 925 "' without previous '" + UsedPrefix + ": line"); 926 return true; 927 } 928 929 // Handle CHECK-DAG/-NOT. 930 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 931 DagNotMatches.push_back(P); 932 continue; 933 } 934 935 // Okay, add the string we captured to the output vector and move on. 936 CheckStrings.emplace_back(P, UsedPrefix, PatternLoc); 937 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 938 DagNotMatches = ImplicitNegativeChecks; 939 } 940 941 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first 942 // prefix as a filler for the error message. 943 if (!DagNotMatches.empty()) { 944 CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(), 945 SMLoc::getFromPointer(Buffer.data())); 946 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 947 } 948 949 if (CheckStrings.empty()) { 950 errs() << "error: no check strings found with prefix" 951 << (CheckPrefixes.size() > 1 ? "es " : " "); 952 prefix_iterator I = CheckPrefixes.begin(); 953 prefix_iterator E = CheckPrefixes.end(); 954 if (I != E) { 955 errs() << "\'" << *I << ":'"; 956 ++I; 957 } 958 for (; I != E; ++I) 959 errs() << ", \'" << *I << ":'"; 960 961 errs() << '\n'; 962 return true; 963 } 964 965 return false; 966 } 967 968 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, const Pattern &Pat, 969 StringRef Buffer, 970 StringMap<StringRef> &VariableTable) { 971 // Otherwise, we have an error, emit an error message. 972 SM.PrintMessage(Loc, SourceMgr::DK_Error, 973 "expected string not found in input"); 974 975 // Print the "scanning from here" line. If the current position is at the 976 // end of a line, advance to the start of the next line. 977 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 978 979 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 980 "scanning from here"); 981 982 // Allow the pattern to print additional information if desired. 983 Pat.PrintFailureInfo(SM, Buffer, VariableTable); 984 } 985 986 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr, 987 StringRef Buffer, 988 StringMap<StringRef> &VariableTable) { 989 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable); 990 } 991 992 /// Count the number of newlines in the specified range. 993 static unsigned CountNumNewlinesBetween(StringRef Range, 994 const char *&FirstNewLine) { 995 unsigned NumNewLines = 0; 996 while (1) { 997 // Scan for newline. 998 Range = Range.substr(Range.find_first_of("\n\r")); 999 if (Range.empty()) 1000 return NumNewLines; 1001 1002 ++NumNewLines; 1003 1004 // Handle \n\r and \r\n as a single newline. 1005 if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') && 1006 (Range[0] != Range[1])) 1007 Range = Range.substr(1); 1008 Range = Range.substr(1); 1009 1010 if (NumNewLines == 1) 1011 FirstNewLine = Range.begin(); 1012 } 1013 } 1014 1015 /// Match check string and its "not strings" and/or "dag strings". 1016 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer, 1017 bool IsLabelScanMode, size_t &MatchLen, 1018 StringMap<StringRef> &VariableTable) const { 1019 size_t LastPos = 0; 1020 std::vector<const Pattern *> NotStrings; 1021 1022 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 1023 // bounds; we have not processed variable definitions within the bounded block 1024 // yet so cannot handle any final CHECK-DAG yet; this is handled when going 1025 // over the block again (including the last CHECK-LABEL) in normal mode. 1026 if (!IsLabelScanMode) { 1027 // Match "dag strings" (with mixed "not strings" if any). 1028 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable); 1029 if (LastPos == StringRef::npos) 1030 return StringRef::npos; 1031 } 1032 1033 // Match itself from the last position after matching CHECK-DAG. 1034 StringRef MatchBuffer = Buffer.substr(LastPos); 1035 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1036 if (MatchPos == StringRef::npos) { 1037 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable); 1038 return StringRef::npos; 1039 } 1040 1041 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 1042 // or CHECK-NOT 1043 if (!IsLabelScanMode) { 1044 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1045 1046 // If this check is a "CHECK-NEXT", verify that the previous match was on 1047 // the previous line (i.e. that there is one newline between them). 1048 if (CheckNext(SM, SkippedRegion)) 1049 return StringRef::npos; 1050 1051 // If this check is a "CHECK-SAME", verify that the previous match was on 1052 // the same line (i.e. that there is no newline between them). 1053 if (CheckSame(SM, SkippedRegion)) 1054 return StringRef::npos; 1055 1056 // If this match had "not strings", verify that they don't exist in the 1057 // skipped region. 1058 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1059 return StringRef::npos; 1060 } 1061 1062 return LastPos + MatchPos; 1063 } 1064 1065 /// Verify there is a single line in the given buffer. 1066 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 1067 if (Pat.getCheckTy() != Check::CheckNext) 1068 return false; 1069 1070 // Count the number of newlines between the previous match and this one. 1071 assert(Buffer.data() != 1072 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1073 SMLoc::getFromPointer(Buffer.data()))) 1074 ->getBufferStart() && 1075 "CHECK-NEXT can't be the first check in a file"); 1076 1077 const char *FirstNewLine = nullptr; 1078 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1079 1080 if (NumNewLines == 0) { 1081 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1082 Prefix + "-NEXT: is on the same line as previous match"); 1083 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1084 "'next' match was here"); 1085 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1086 "previous match ended here"); 1087 return true; 1088 } 1089 1090 if (NumNewLines != 1) { 1091 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1092 Prefix + 1093 "-NEXT: is not on the line after the previous match"); 1094 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1095 "'next' match was here"); 1096 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1097 "previous match ended here"); 1098 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 1099 "non-matching line after previous match is here"); 1100 return true; 1101 } 1102 1103 return false; 1104 } 1105 1106 /// Verify there is no newline in the given buffer. 1107 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { 1108 if (Pat.getCheckTy() != Check::CheckSame) 1109 return false; 1110 1111 // Count the number of newlines between the previous match and this one. 1112 assert(Buffer.data() != 1113 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1114 SMLoc::getFromPointer(Buffer.data()))) 1115 ->getBufferStart() && 1116 "CHECK-SAME can't be the first check in a file"); 1117 1118 const char *FirstNewLine = nullptr; 1119 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1120 1121 if (NumNewLines != 0) { 1122 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1123 Prefix + 1124 "-SAME: is not on the same line as the previous match"); 1125 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1126 "'next' match was here"); 1127 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1128 "previous match ended here"); 1129 return true; 1130 } 1131 1132 return false; 1133 } 1134 1135 /// Verify there's no "not strings" in the given buffer. 1136 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 1137 const std::vector<const Pattern *> &NotStrings, 1138 StringMap<StringRef> &VariableTable) const { 1139 for (const Pattern *Pat : NotStrings) { 1140 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 1141 1142 size_t MatchLen = 0; 1143 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable); 1144 1145 if (Pos == StringRef::npos) 1146 continue; 1147 1148 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Pos), 1149 SourceMgr::DK_Error, Prefix + "-NOT: string occurred!"); 1150 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note, 1151 Prefix + "-NOT: pattern specified here"); 1152 return true; 1153 } 1154 1155 return false; 1156 } 1157 1158 /// Match "dag strings" and their mixed "not strings". 1159 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 1160 std::vector<const Pattern *> &NotStrings, 1161 StringMap<StringRef> &VariableTable) const { 1162 if (DagNotStrings.empty()) 1163 return 0; 1164 1165 size_t LastPos = 0; 1166 size_t StartPos = LastPos; 1167 1168 for (const Pattern &Pat : DagNotStrings) { 1169 assert((Pat.getCheckTy() == Check::CheckDAG || 1170 Pat.getCheckTy() == Check::CheckNot) && 1171 "Invalid CHECK-DAG or CHECK-NOT!"); 1172 1173 if (Pat.getCheckTy() == Check::CheckNot) { 1174 NotStrings.push_back(&Pat); 1175 continue; 1176 } 1177 1178 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 1179 1180 size_t MatchLen = 0, MatchPos; 1181 1182 // CHECK-DAG always matches from the start. 1183 StringRef MatchBuffer = Buffer.substr(StartPos); 1184 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1185 // With a group of CHECK-DAGs, a single mismatching means the match on 1186 // that group of CHECK-DAGs fails immediately. 1187 if (MatchPos == StringRef::npos) { 1188 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable); 1189 return StringRef::npos; 1190 } 1191 // Re-calc it as the offset relative to the start of the original string. 1192 MatchPos += StartPos; 1193 1194 if (!NotStrings.empty()) { 1195 if (MatchPos < LastPos) { 1196 // Reordered? 1197 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos), 1198 SourceMgr::DK_Error, 1199 Prefix + "-DAG: found a match of CHECK-DAG" 1200 " reordering across a CHECK-NOT"); 1201 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos), 1202 SourceMgr::DK_Note, 1203 Prefix + "-DAG: the farthest match of CHECK-DAG" 1204 " is found here"); 1205 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note, 1206 Prefix + "-NOT: the crossed pattern specified" 1207 " here"); 1208 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note, 1209 Prefix + "-DAG: the reordered pattern specified" 1210 " here"); 1211 return StringRef::npos; 1212 } 1213 // All subsequent CHECK-DAGs should be matched from the farthest 1214 // position of all precedent CHECK-DAGs (including this one.) 1215 StartPos = LastPos; 1216 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to 1217 // CHECK-DAG, verify that there's no 'not' strings occurred in that 1218 // region. 1219 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1220 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1221 return StringRef::npos; 1222 // Clear "not strings". 1223 NotStrings.clear(); 1224 } 1225 1226 // Update the last position with CHECK-DAG matches. 1227 LastPos = std::max(MatchPos + MatchLen, LastPos); 1228 } 1229 1230 return LastPos; 1231 } 1232 1233 // A check prefix must contain only alphanumeric, hyphens and underscores. 1234 static bool ValidateCheckPrefix(StringRef CheckPrefix) { 1235 Regex Validator("^[a-zA-Z0-9_-]*$"); 1236 return Validator.match(CheckPrefix); 1237 } 1238 1239 static bool ValidateCheckPrefixes() { 1240 StringSet<> PrefixSet; 1241 1242 for (StringRef Prefix : CheckPrefixes) { 1243 // Reject empty prefixes. 1244 if (Prefix == "") 1245 return false; 1246 1247 if (!PrefixSet.insert(Prefix).second) 1248 return false; 1249 1250 if (!ValidateCheckPrefix(Prefix)) 1251 return false; 1252 } 1253 1254 return true; 1255 } 1256 1257 // I don't think there's a way to specify an initial value for cl::list, 1258 // so if nothing was specified, add the default 1259 static void AddCheckPrefixIfNeeded() { 1260 if (CheckPrefixes.empty()) 1261 CheckPrefixes.push_back("CHECK"); 1262 } 1263 1264 static void DumpCommandLine(int argc, char **argv) { 1265 errs() << "FileCheck command line: "; 1266 for (int I = 0; I < argc; I++) 1267 errs() << " " << argv[I]; 1268 errs() << "\n"; 1269 } 1270 1271 /// Check the input to FileCheck provided in the \p Buffer against the \p 1272 /// CheckStrings read from the check file. 1273 /// 1274 /// Returns false if the input fails to satisfy the checks. 1275 bool CheckInput(SourceMgr &SM, StringRef Buffer, 1276 ArrayRef<CheckString> CheckStrings) { 1277 bool ChecksFailed = false; 1278 1279 /// VariableTable - This holds all the current filecheck variables. 1280 StringMap<StringRef> VariableTable; 1281 1282 unsigned i = 0, j = 0, e = CheckStrings.size(); 1283 while (true) { 1284 StringRef CheckRegion; 1285 if (j == e) { 1286 CheckRegion = Buffer; 1287 } else { 1288 const CheckString &CheckLabelStr = CheckStrings[j]; 1289 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { 1290 ++j; 1291 continue; 1292 } 1293 1294 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 1295 size_t MatchLabelLen = 0; 1296 size_t MatchLabelPos = 1297 CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable); 1298 if (MatchLabelPos == StringRef::npos) 1299 // Immediately bail of CHECK-LABEL fails, nothing else we can do. 1300 return false; 1301 1302 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 1303 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 1304 ++j; 1305 } 1306 1307 for (; i != j; ++i) { 1308 const CheckString &CheckStr = CheckStrings[i]; 1309 1310 // Check each string within the scanned region, including a second check 1311 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 1312 size_t MatchLen = 0; 1313 size_t MatchPos = 1314 CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable); 1315 1316 if (MatchPos == StringRef::npos) { 1317 ChecksFailed = true; 1318 i = j; 1319 break; 1320 } 1321 1322 CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 1323 } 1324 1325 if (j == e) 1326 break; 1327 } 1328 1329 // Success if no checks failed. 1330 return !ChecksFailed; 1331 } 1332 1333 int main(int argc, char **argv) { 1334 sys::PrintStackTraceOnErrorSignal(argv[0]); 1335 PrettyStackTraceProgram X(argc, argv); 1336 cl::ParseCommandLineOptions(argc, argv); 1337 1338 if (!ValidateCheckPrefixes()) { 1339 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and " 1340 "start with a letter and contain only alphanumeric characters, " 1341 "hyphens and underscores\n"; 1342 return 2; 1343 } 1344 1345 AddCheckPrefixIfNeeded(); 1346 1347 SourceMgr SM; 1348 1349 // Read the expected strings from the check file. 1350 ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr = 1351 MemoryBuffer::getFileOrSTDIN(CheckFilename); 1352 if (std::error_code EC = CheckFileOrErr.getError()) { 1353 errs() << "Could not open check file '" << CheckFilename 1354 << "': " << EC.message() << '\n'; 1355 return 2; 1356 } 1357 MemoryBuffer &CheckFile = *CheckFileOrErr.get(); 1358 1359 SmallString<4096> CheckFileBuffer; 1360 StringRef CheckFileText = CanonicalizeFile(CheckFile, CheckFileBuffer); 1361 1362 SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( 1363 CheckFileText, CheckFile.getBufferIdentifier()), 1364 SMLoc()); 1365 1366 std::vector<CheckString> CheckStrings; 1367 if (ReadCheckFile(SM, CheckFileText, CheckStrings)) 1368 return 2; 1369 1370 // Open the file to check and add it to SourceMgr. 1371 ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr = 1372 MemoryBuffer::getFileOrSTDIN(InputFilename); 1373 if (std::error_code EC = InputFileOrErr.getError()) { 1374 errs() << "Could not open input file '" << InputFilename 1375 << "': " << EC.message() << '\n'; 1376 return 2; 1377 } 1378 MemoryBuffer &InputFile = *InputFileOrErr.get(); 1379 1380 if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) { 1381 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; 1382 DumpCommandLine(argc, argv); 1383 return 2; 1384 } 1385 1386 SmallString<4096> InputFileBuffer; 1387 StringRef InputFileText = CanonicalizeFile(InputFile, InputFileBuffer); 1388 1389 SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( 1390 InputFileText, InputFile.getBufferIdentifier()), 1391 SMLoc()); 1392 1393 return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1; 1394 } 1395