1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // FileCheck does a line-by line check of a file that validates whether it 11 // contains the expected content. This is useful for regression tests etc. 12 // 13 // This program exits with an error status of 2 on error, exit status of 0 if 14 // the file matched the expected contents, and exit status of 1 if it did not 15 // contain the expected contents. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/StringExtras.h" 21 #include "llvm/ADT/StringMap.h" 22 #include "llvm/ADT/StringSet.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/MemoryBuffer.h" 25 #include "llvm/Support/PrettyStackTrace.h" 26 #include "llvm/Support/Regex.h" 27 #include "llvm/Support/Signals.h" 28 #include "llvm/Support/SourceMgr.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <algorithm> 31 #include <cctype> 32 #include <map> 33 #include <string> 34 #include <system_error> 35 #include <vector> 36 using namespace llvm; 37 38 static cl::opt<std::string> 39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); 40 41 static cl::opt<std::string> 42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), 43 cl::init("-"), cl::value_desc("filename")); 44 45 static cl::list<std::string> 46 CheckPrefixes("check-prefix", 47 cl::desc("Prefix to use from check file (defaults to 'CHECK')")); 48 static cl::alias CheckPrefixesAlias( 49 "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated, 50 cl::NotHidden, 51 cl::desc( 52 "Alias for -check-prefix permitting multiple comma separated values")); 53 54 static cl::opt<bool> 55 NoCanonicalizeWhiteSpace("strict-whitespace", 56 cl::desc("Do not treat all horizontal whitespace as equivalent")); 57 58 static cl::list<std::string> ImplicitCheckNot( 59 "implicit-check-not", 60 cl::desc("Add an implicit negative check with this pattern to every\n" 61 "positive check. This can be used to ensure that no instances of\n" 62 "this pattern occur which are not matched by a positive pattern"), 63 cl::value_desc("pattern")); 64 65 static cl::opt<bool> AllowEmptyInput( 66 "allow-empty", cl::init(false), 67 cl::desc("Allow the input file to be empty. This is useful when making\n" 68 "checks that some error message does not occur, for example.")); 69 70 static cl::opt<bool> MatchFullLines( 71 "match-full-lines", cl::init(false), 72 cl::desc("Require all positive matches to cover an entire input line.\n" 73 "Allows leading and trailing whitespace if --strict-whitespace\n" 74 "is not also passed.")); 75 76 typedef cl::list<std::string>::const_iterator prefix_iterator; 77 78 //===----------------------------------------------------------------------===// 79 // Pattern Handling Code. 80 //===----------------------------------------------------------------------===// 81 82 namespace Check { 83 enum CheckType { 84 CheckNone = 0, 85 CheckPlain, 86 CheckNext, 87 CheckSame, 88 CheckNot, 89 CheckDAG, 90 CheckLabel, 91 92 /// MatchEOF - When set, this pattern only matches the end of file. This is 93 /// used for trailing CHECK-NOTs. 94 CheckEOF, 95 /// CheckBadNot - Found -NOT combined with another CHECK suffix. 96 CheckBadNot 97 }; 98 } 99 100 class Pattern { 101 SMLoc PatternLoc; 102 103 /// FixedStr - If non-empty, this pattern is a fixed string match with the 104 /// specified fixed string. 105 StringRef FixedStr; 106 107 /// RegEx - If non-empty, this is a regex pattern. 108 std::string RegExStr; 109 110 /// VariableUses - Entries in this vector map to uses of a variable in the 111 /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain 112 /// "foobaz" and we'll get an entry in this vector that tells us to insert the 113 /// value of bar at offset 3. 114 std::vector<std::pair<StringRef, unsigned> > VariableUses; 115 116 /// VariableDefs - Maps definitions of variables to their parenthesized 117 /// capture numbers. 118 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1. 119 std::map<StringRef, unsigned> VariableDefs; 120 121 Check::CheckType CheckTy; 122 123 /// \brief Contains the number of line this pattern is in. 124 unsigned LineNumber; 125 126 public: 127 explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {} 128 129 /// getLoc - Return the location in source code. 130 SMLoc getLoc() const { return PatternLoc; } 131 132 /// ParsePattern - Parse the given string into the Pattern. Prefix provides 133 /// which prefix is being matched, SM provides the SourceMgr used for error 134 /// reports, and LineNumber is the line number in the input file from which 135 /// the pattern string was read. Returns true in case of an error, false 136 /// otherwise. 137 bool ParsePattern(StringRef PatternStr, 138 StringRef Prefix, 139 SourceMgr &SM, 140 unsigned LineNumber); 141 142 /// Match - Match the pattern string against the input buffer Buffer. This 143 /// returns the position that is matched or npos if there is no match. If 144 /// there is a match, the size of the matched string is returned in MatchLen. 145 /// 146 /// The VariableTable StringMap provides the current values of filecheck 147 /// variables and is updated if this match defines new values. 148 size_t Match(StringRef Buffer, size_t &MatchLen, 149 StringMap<StringRef> &VariableTable) const; 150 151 /// PrintFailureInfo - Print additional information about a failure to match 152 /// involving this pattern. 153 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 154 const StringMap<StringRef> &VariableTable) const; 155 156 bool hasVariable() const { return !(VariableUses.empty() && 157 VariableDefs.empty()); } 158 159 Check::CheckType getCheckTy() const { return CheckTy; } 160 161 private: 162 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); 163 void AddBackrefToRegEx(unsigned BackrefNum); 164 165 /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of 166 /// matching this pattern at the start of \arg Buffer; a distance of zero 167 /// should correspond to a perfect match. 168 unsigned ComputeMatchDistance(StringRef Buffer, 169 const StringMap<StringRef> &VariableTable) const; 170 171 /// \brief Evaluates expression and stores the result to \p Value. 172 /// \return true on success. false when the expression has invalid syntax. 173 bool EvaluateExpression(StringRef Expr, std::string &Value) const; 174 175 /// \brief Finds the closing sequence of a regex variable usage or 176 /// definition. Str has to point in the beginning of the definition 177 /// (right after the opening sequence). 178 /// \return offset of the closing sequence within Str, or npos if it was not 179 /// found. 180 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); 181 }; 182 183 184 bool Pattern::ParsePattern(StringRef PatternStr, 185 StringRef Prefix, 186 SourceMgr &SM, 187 unsigned LineNumber) { 188 bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot; 189 190 this->LineNumber = LineNumber; 191 PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 192 193 // Ignore trailing whitespace. 194 while (!PatternStr.empty() && 195 (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 196 PatternStr = PatternStr.substr(0, PatternStr.size()-1); 197 198 // Check that there is something on the line. 199 if (PatternStr.empty()) { 200 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 201 "found empty check string with prefix '" + 202 Prefix + ":'"); 203 return true; 204 } 205 206 // Check to see if this is a fixed string, or if it has regex pieces. 207 if (!MatchFullLinesHere && 208 (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos && 209 PatternStr.find("[[") == StringRef::npos))) { 210 FixedStr = PatternStr; 211 return false; 212 } 213 214 if (MatchFullLinesHere) { 215 RegExStr += '^'; 216 if (!NoCanonicalizeWhiteSpace) 217 RegExStr += " *"; 218 } 219 220 // Paren value #0 is for the fully matched string. Any new parenthesized 221 // values add from there. 222 unsigned CurParen = 1; 223 224 // Otherwise, there is at least one regex piece. Build up the regex pattern 225 // by escaping scary characters in fixed strings, building up one big regex. 226 while (!PatternStr.empty()) { 227 // RegEx matches. 228 if (PatternStr.startswith("{{")) { 229 // This is the start of a regex match. Scan for the }}. 230 size_t End = PatternStr.find("}}"); 231 if (End == StringRef::npos) { 232 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 233 SourceMgr::DK_Error, 234 "found start of regex string with no end '}}'"); 235 return true; 236 } 237 238 // Enclose {{}} patterns in parens just like [[]] even though we're not 239 // capturing the result for any purpose. This is required in case the 240 // expression contains an alternation like: CHECK: abc{{x|z}}def. We 241 // want this to turn into: "abc(x|z)def" not "abcx|zdef". 242 RegExStr += '('; 243 ++CurParen; 244 245 if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM)) 246 return true; 247 RegExStr += ')'; 248 249 PatternStr = PatternStr.substr(End+2); 250 continue; 251 } 252 253 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* 254 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The 255 // second form is [[foo]] which is a reference to foo. The variable name 256 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject 257 // it. This is to catch some common errors. 258 if (PatternStr.startswith("[[")) { 259 // Find the closing bracket pair ending the match. End is going to be an 260 // offset relative to the beginning of the match string. 261 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM); 262 263 if (End == StringRef::npos) { 264 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 265 SourceMgr::DK_Error, 266 "invalid named regex reference, no ]] found"); 267 return true; 268 } 269 270 StringRef MatchStr = PatternStr.substr(2, End); 271 PatternStr = PatternStr.substr(End+4); 272 273 // Get the regex name (e.g. "foo"). 274 size_t NameEnd = MatchStr.find(':'); 275 StringRef Name = MatchStr.substr(0, NameEnd); 276 277 if (Name.empty()) { 278 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 279 "invalid name in named regex: empty name"); 280 return true; 281 } 282 283 // Verify that the name/expression is well formed. FileCheck currently 284 // supports @LINE, @LINE+number, @LINE-number expressions. The check here 285 // is relaxed, more strict check is performed in \c EvaluateExpression. 286 bool IsExpression = false; 287 for (unsigned i = 0, e = Name.size(); i != e; ++i) { 288 if (i == 0 && Name[i] == '@') { 289 if (NameEnd != StringRef::npos) { 290 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 291 SourceMgr::DK_Error, 292 "invalid name in named regex definition"); 293 return true; 294 } 295 IsExpression = true; 296 continue; 297 } 298 if (Name[i] != '_' && !isalnum(Name[i]) && 299 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { 300 SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i), 301 SourceMgr::DK_Error, "invalid name in named regex"); 302 return true; 303 } 304 } 305 306 // Name can't start with a digit. 307 if (isdigit(static_cast<unsigned char>(Name[0]))) { 308 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 309 "invalid name in named regex"); 310 return true; 311 } 312 313 // Handle [[foo]]. 314 if (NameEnd == StringRef::npos) { 315 // Handle variables that were defined earlier on the same line by 316 // emitting a backreference. 317 if (VariableDefs.find(Name) != VariableDefs.end()) { 318 unsigned VarParenNum = VariableDefs[Name]; 319 if (VarParenNum < 1 || VarParenNum > 9) { 320 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 321 SourceMgr::DK_Error, 322 "Can't back-reference more than 9 variables"); 323 return true; 324 } 325 AddBackrefToRegEx(VarParenNum); 326 } else { 327 VariableUses.push_back(std::make_pair(Name, RegExStr.size())); 328 } 329 continue; 330 } 331 332 // Handle [[foo:.*]]. 333 VariableDefs[Name] = CurParen; 334 RegExStr += '('; 335 ++CurParen; 336 337 if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM)) 338 return true; 339 340 RegExStr += ')'; 341 } 342 343 // Handle fixed string matches. 344 // Find the end, which is the start of the next regex. 345 size_t FixedMatchEnd = PatternStr.find("{{"); 346 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 347 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 348 PatternStr = PatternStr.substr(FixedMatchEnd); 349 } 350 351 if (MatchFullLinesHere) { 352 if (!NoCanonicalizeWhiteSpace) 353 RegExStr += " *"; 354 RegExStr += '$'; 355 } 356 357 return false; 358 } 359 360 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, 361 SourceMgr &SM) { 362 Regex R(RS); 363 std::string Error; 364 if (!R.isValid(Error)) { 365 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 366 "invalid regex: " + Error); 367 return true; 368 } 369 370 RegExStr += RS.str(); 371 CurParen += R.getNumMatches(); 372 return false; 373 } 374 375 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 376 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 377 std::string Backref = std::string("\\") + 378 std::string(1, '0' + BackrefNum); 379 RegExStr += Backref; 380 } 381 382 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { 383 // The only supported expression is @LINE([\+-]\d+)? 384 if (!Expr.startswith("@LINE")) 385 return false; 386 Expr = Expr.substr(StringRef("@LINE").size()); 387 int Offset = 0; 388 if (!Expr.empty()) { 389 if (Expr[0] == '+') 390 Expr = Expr.substr(1); 391 else if (Expr[0] != '-') 392 return false; 393 if (Expr.getAsInteger(10, Offset)) 394 return false; 395 } 396 Value = llvm::itostr(LineNumber + Offset); 397 return true; 398 } 399 400 /// Match - Match the pattern string against the input buffer Buffer. This 401 /// returns the position that is matched or npos if there is no match. If 402 /// there is a match, the size of the matched string is returned in MatchLen. 403 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, 404 StringMap<StringRef> &VariableTable) const { 405 // If this is the EOF pattern, match it immediately. 406 if (CheckTy == Check::CheckEOF) { 407 MatchLen = 0; 408 return Buffer.size(); 409 } 410 411 // If this is a fixed string pattern, just match it now. 412 if (!FixedStr.empty()) { 413 MatchLen = FixedStr.size(); 414 return Buffer.find(FixedStr); 415 } 416 417 // Regex match. 418 419 // If there are variable uses, we need to create a temporary string with the 420 // actual value. 421 StringRef RegExToMatch = RegExStr; 422 std::string TmpStr; 423 if (!VariableUses.empty()) { 424 TmpStr = RegExStr; 425 426 unsigned InsertOffset = 0; 427 for (const auto &VariableUse : VariableUses) { 428 std::string Value; 429 430 if (VariableUse.first[0] == '@') { 431 if (!EvaluateExpression(VariableUse.first, Value)) 432 return StringRef::npos; 433 } else { 434 StringMap<StringRef>::iterator it = 435 VariableTable.find(VariableUse.first); 436 // If the variable is undefined, return an error. 437 if (it == VariableTable.end()) 438 return StringRef::npos; 439 440 // Look up the value and escape it so that we can put it into the regex. 441 Value += Regex::escape(it->second); 442 } 443 444 // Plop it into the regex at the adjusted offset. 445 TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset, 446 Value.begin(), Value.end()); 447 InsertOffset += Value.size(); 448 } 449 450 // Match the newly constructed regex. 451 RegExToMatch = TmpStr; 452 } 453 454 455 SmallVector<StringRef, 4> MatchInfo; 456 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) 457 return StringRef::npos; 458 459 // Successful regex match. 460 assert(!MatchInfo.empty() && "Didn't get any match"); 461 StringRef FullMatch = MatchInfo[0]; 462 463 // If this defines any variables, remember their values. 464 for (const auto &VariableDef : VariableDefs) { 465 assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); 466 VariableTable[VariableDef.first] = MatchInfo[VariableDef.second]; 467 } 468 469 MatchLen = FullMatch.size(); 470 return FullMatch.data()-Buffer.data(); 471 } 472 473 unsigned Pattern::ComputeMatchDistance(StringRef Buffer, 474 const StringMap<StringRef> &VariableTable) const { 475 // Just compute the number of matching characters. For regular expressions, we 476 // just compare against the regex itself and hope for the best. 477 // 478 // FIXME: One easy improvement here is have the regex lib generate a single 479 // example regular expression which matches, and use that as the example 480 // string. 481 StringRef ExampleString(FixedStr); 482 if (ExampleString.empty()) 483 ExampleString = RegExStr; 484 485 // Only compare up to the first line in the buffer, or the string size. 486 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 487 BufferPrefix = BufferPrefix.split('\n').first; 488 return BufferPrefix.edit_distance(ExampleString); 489 } 490 491 void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 492 const StringMap<StringRef> &VariableTable) const{ 493 // If this was a regular expression using variables, print the current 494 // variable values. 495 if (!VariableUses.empty()) { 496 for (const auto &VariableUse : VariableUses) { 497 SmallString<256> Msg; 498 raw_svector_ostream OS(Msg); 499 StringRef Var = VariableUse.first; 500 if (Var[0] == '@') { 501 std::string Value; 502 if (EvaluateExpression(Var, Value)) { 503 OS << "with expression \""; 504 OS.write_escaped(Var) << "\" equal to \""; 505 OS.write_escaped(Value) << "\""; 506 } else { 507 OS << "uses incorrect expression \""; 508 OS.write_escaped(Var) << "\""; 509 } 510 } else { 511 StringMap<StringRef>::const_iterator it = VariableTable.find(Var); 512 513 // Check for undefined variable references. 514 if (it == VariableTable.end()) { 515 OS << "uses undefined variable \""; 516 OS.write_escaped(Var) << "\""; 517 } else { 518 OS << "with variable \""; 519 OS.write_escaped(Var) << "\" equal to \""; 520 OS.write_escaped(it->second) << "\""; 521 } 522 } 523 524 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 525 OS.str()); 526 } 527 } 528 529 // Attempt to find the closest/best fuzzy match. Usually an error happens 530 // because some string in the output didn't exactly match. In these cases, we 531 // would like to show the user a best guess at what "should have" matched, to 532 // save them having to actually check the input manually. 533 size_t NumLinesForward = 0; 534 size_t Best = StringRef::npos; 535 double BestQuality = 0; 536 537 // Use an arbitrary 4k limit on how far we will search. 538 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 539 if (Buffer[i] == '\n') 540 ++NumLinesForward; 541 542 // Patterns have leading whitespace stripped, so skip whitespace when 543 // looking for something which looks like a pattern. 544 if (Buffer[i] == ' ' || Buffer[i] == '\t') 545 continue; 546 547 // Compute the "quality" of this match as an arbitrary combination of the 548 // match distance and the number of lines skipped to get to this match. 549 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable); 550 double Quality = Distance + (NumLinesForward / 100.); 551 552 if (Quality < BestQuality || Best == StringRef::npos) { 553 Best = i; 554 BestQuality = Quality; 555 } 556 } 557 558 // Print the "possible intended match here" line if we found something 559 // reasonable and not equal to what we showed in the "scanning from here" 560 // line. 561 if (Best && Best != StringRef::npos && BestQuality < 50) { 562 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best), 563 SourceMgr::DK_Note, "possible intended match here"); 564 565 // FIXME: If we wanted to be really friendly we would show why the match 566 // failed, as it can be hard to spot simple one character differences. 567 } 568 } 569 570 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 571 // Offset keeps track of the current offset within the input Str 572 size_t Offset = 0; 573 // [...] Nesting depth 574 size_t BracketDepth = 0; 575 576 while (!Str.empty()) { 577 if (Str.startswith("]]") && BracketDepth == 0) 578 return Offset; 579 if (Str[0] == '\\') { 580 // Backslash escapes the next char within regexes, so skip them both. 581 Str = Str.substr(2); 582 Offset += 2; 583 } else { 584 switch (Str[0]) { 585 default: 586 break; 587 case '[': 588 BracketDepth++; 589 break; 590 case ']': 591 if (BracketDepth == 0) { 592 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 593 SourceMgr::DK_Error, 594 "missing closing \"]\" for regex variable"); 595 exit(1); 596 } 597 BracketDepth--; 598 break; 599 } 600 Str = Str.substr(1); 601 Offset++; 602 } 603 } 604 605 return StringRef::npos; 606 } 607 608 609 //===----------------------------------------------------------------------===// 610 // Check Strings. 611 //===----------------------------------------------------------------------===// 612 613 /// CheckString - This is a check that we found in the input file. 614 struct CheckString { 615 /// Pat - The pattern to match. 616 Pattern Pat; 617 618 /// Prefix - Which prefix name this check matched. 619 StringRef Prefix; 620 621 /// Loc - The location in the match file that the check string was specified. 622 SMLoc Loc; 623 624 /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive, 625 /// as opposed to a CHECK: directive. 626 // Check::CheckType CheckTy; 627 628 /// DagNotStrings - These are all of the strings that are disallowed from 629 /// occurring between this match string and the previous one (or start of 630 /// file). 631 std::vector<Pattern> DagNotStrings; 632 633 CheckString(const Pattern &P, StringRef S, SMLoc L) 634 : Pat(P), Prefix(S), Loc(L) {} 635 636 /// Check - Match check string and its "not strings" and/or "dag strings". 637 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, 638 size_t &MatchLen, StringMap<StringRef> &VariableTable) const; 639 640 /// CheckNext - Verify there is a single line in the given buffer. 641 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; 642 643 /// CheckSame - Verify there is no newline in the given buffer. 644 bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; 645 646 /// CheckNot - Verify there's no "not strings" in the given buffer. 647 bool CheckNot(const SourceMgr &SM, StringRef Buffer, 648 const std::vector<const Pattern *> &NotStrings, 649 StringMap<StringRef> &VariableTable) const; 650 651 /// CheckDag - Match "dag strings" and their mixed "not strings". 652 size_t CheckDag(const SourceMgr &SM, StringRef Buffer, 653 std::vector<const Pattern *> &NotStrings, 654 StringMap<StringRef> &VariableTable) const; 655 }; 656 657 /// Canonicalize whitespaces in the file. Line endings are replaced with 658 /// UNIX-style '\n'. 659 /// 660 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace 661 /// characters to a single space. 662 static StringRef CanonicalizeFile(MemoryBuffer &MB, bool PreserveHorizontal, 663 SmallVectorImpl<char> &OutputBuffer) { 664 OutputBuffer.reserve(MB.getBufferSize()); 665 666 for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd(); 667 Ptr != End; ++Ptr) { 668 // Eliminate trailing dosish \r. 669 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 670 continue; 671 } 672 673 // If current char is not a horizontal whitespace or if horizontal 674 // whitespace canonicalization is disabled, dump it to output as is. 675 if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) { 676 OutputBuffer.push_back(*Ptr); 677 continue; 678 } 679 680 // Otherwise, add one space and advance over neighboring space. 681 OutputBuffer.push_back(' '); 682 while (Ptr+1 != End && 683 (Ptr[1] == ' ' || Ptr[1] == '\t')) 684 ++Ptr; 685 } 686 687 // Add a null byte and then return all but that byte. 688 OutputBuffer.push_back('\0'); 689 return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1); 690 } 691 692 static bool IsPartOfWord(char c) { 693 return (isalnum(c) || c == '-' || c == '_'); 694 } 695 696 // Get the size of the prefix extension. 697 static size_t CheckTypeSize(Check::CheckType Ty) { 698 switch (Ty) { 699 case Check::CheckNone: 700 case Check::CheckBadNot: 701 return 0; 702 703 case Check::CheckPlain: 704 return sizeof(":") - 1; 705 706 case Check::CheckNext: 707 return sizeof("-NEXT:") - 1; 708 709 case Check::CheckSame: 710 return sizeof("-SAME:") - 1; 711 712 case Check::CheckNot: 713 return sizeof("-NOT:") - 1; 714 715 case Check::CheckDAG: 716 return sizeof("-DAG:") - 1; 717 718 case Check::CheckLabel: 719 return sizeof("-LABEL:") - 1; 720 721 case Check::CheckEOF: 722 llvm_unreachable("Should not be using EOF size"); 723 } 724 725 llvm_unreachable("Bad check type"); 726 } 727 728 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) { 729 char NextChar = Buffer[Prefix.size()]; 730 731 // Verify that the : is present after the prefix. 732 if (NextChar == ':') 733 return Check::CheckPlain; 734 735 if (NextChar != '-') 736 return Check::CheckNone; 737 738 StringRef Rest = Buffer.drop_front(Prefix.size() + 1); 739 if (Rest.startswith("NEXT:")) 740 return Check::CheckNext; 741 742 if (Rest.startswith("SAME:")) 743 return Check::CheckSame; 744 745 if (Rest.startswith("NOT:")) 746 return Check::CheckNot; 747 748 if (Rest.startswith("DAG:")) 749 return Check::CheckDAG; 750 751 if (Rest.startswith("LABEL:")) 752 return Check::CheckLabel; 753 754 // You can't combine -NOT with another suffix. 755 if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") || 756 Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") || 757 Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:")) 758 return Check::CheckBadNot; 759 760 return Check::CheckNone; 761 } 762 763 // From the given position, find the next character after the word. 764 static size_t SkipWord(StringRef Str, size_t Loc) { 765 while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 766 ++Loc; 767 return Loc; 768 } 769 770 // Try to find the first match in buffer for any prefix. If a valid match is 771 // found, return that prefix and set its type and location. If there are almost 772 // matches (e.g. the actual prefix string is found, but is not an actual check 773 // string), but no valid match, return an empty string and set the position to 774 // resume searching from. If no partial matches are found, return an empty 775 // string and the location will be StringRef::npos. If one prefix is a substring 776 // of another, the maximal match should be found. e.g. if "A" and "AA" are 777 // prefixes then AA-CHECK: should match the second one. 778 static StringRef FindFirstCandidateMatch(StringRef &Buffer, 779 Check::CheckType &CheckTy, 780 size_t &CheckLoc) { 781 StringRef FirstPrefix; 782 size_t FirstLoc = StringRef::npos; 783 size_t SearchLoc = StringRef::npos; 784 Check::CheckType FirstTy = Check::CheckNone; 785 786 CheckTy = Check::CheckNone; 787 CheckLoc = StringRef::npos; 788 789 for (StringRef Prefix : CheckPrefixes) { 790 size_t PrefixLoc = Buffer.find(Prefix); 791 792 if (PrefixLoc == StringRef::npos) 793 continue; 794 795 // Track where we are searching for invalid prefixes that look almost right. 796 // We need to only advance to the first partial match on the next attempt 797 // since a partial match could be a substring of a later, valid prefix. 798 // Need to skip to the end of the word, otherwise we could end up 799 // matching a prefix in a substring later. 800 if (PrefixLoc < SearchLoc) 801 SearchLoc = SkipWord(Buffer, PrefixLoc); 802 803 // We only want to find the first match to avoid skipping some. 804 if (PrefixLoc > FirstLoc) 805 continue; 806 // If one matching check-prefix is a prefix of another, choose the 807 // longer one. 808 if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size()) 809 continue; 810 811 StringRef Rest = Buffer.drop_front(PrefixLoc); 812 // Make sure we have actually found the prefix, and not a word containing 813 // it. This should also prevent matching the wrong prefix when one is a 814 // substring of another. 815 if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1])) 816 FirstTy = Check::CheckNone; 817 else 818 FirstTy = FindCheckType(Rest, Prefix); 819 820 FirstLoc = PrefixLoc; 821 FirstPrefix = Prefix; 822 } 823 824 // If the first prefix is invalid, we should continue the search after it. 825 if (FirstTy == Check::CheckNone) { 826 CheckLoc = SearchLoc; 827 return ""; 828 } 829 830 CheckTy = FirstTy; 831 CheckLoc = FirstLoc; 832 return FirstPrefix; 833 } 834 835 static StringRef FindFirstMatchingPrefix(StringRef &Buffer, 836 unsigned &LineNumber, 837 Check::CheckType &CheckTy, 838 size_t &CheckLoc) { 839 while (!Buffer.empty()) { 840 StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc); 841 // If we found a real match, we are done. 842 if (!Prefix.empty()) { 843 LineNumber += Buffer.substr(0, CheckLoc).count('\n'); 844 return Prefix; 845 } 846 847 // We didn't find any almost matches either, we are also done. 848 if (CheckLoc == StringRef::npos) 849 return StringRef(); 850 851 LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n'); 852 853 // Advance to the last possible match we found and try again. 854 Buffer = Buffer.drop_front(CheckLoc + 1); 855 } 856 857 return StringRef(); 858 } 859 860 /// ReadCheckFile - Read the check file, which specifies the sequence of 861 /// expected strings. The strings are added to the CheckStrings vector. 862 /// Returns true in case of an error, false otherwise. 863 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, 864 std::vector<CheckString> &CheckStrings) { 865 std::vector<Pattern> ImplicitNegativeChecks; 866 for (const auto &PatternString : ImplicitCheckNot) { 867 // Create a buffer with fake command line content in order to display the 868 // command line option responsible for the specific implicit CHECK-NOT. 869 std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str(); 870 std::string Suffix = "'"; 871 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( 872 Prefix + PatternString + Suffix, "command line"); 873 874 StringRef PatternInBuffer = 875 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 876 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 877 878 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot)); 879 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer, 880 "IMPLICIT-CHECK", SM, 0); 881 } 882 883 884 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 885 886 // LineNumber keeps track of the line on which CheckPrefix instances are 887 // found. 888 unsigned LineNumber = 1; 889 890 while (1) { 891 Check::CheckType CheckTy; 892 size_t PrefixLoc; 893 894 // See if a prefix occurs in the memory buffer. 895 StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer, 896 LineNumber, 897 CheckTy, 898 PrefixLoc); 899 if (UsedPrefix.empty()) 900 break; 901 902 Buffer = Buffer.drop_front(PrefixLoc); 903 904 // Location to use for error messages. 905 const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1); 906 907 // PrefixLoc is to the start of the prefix. Skip to the end. 908 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy)); 909 910 // Complain about useful-looking but unsupported suffixes. 911 if (CheckTy == Check::CheckBadNot) { 912 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), 913 SourceMgr::DK_Error, 914 "unsupported -NOT combo on prefix '" + UsedPrefix + "'"); 915 return true; 916 } 917 918 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 919 // leading and trailing whitespace. 920 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 921 922 // Scan ahead to the end of line. 923 size_t EOL = Buffer.find_first_of("\n\r"); 924 925 // Remember the location of the start of the pattern, for diagnostics. 926 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 927 928 // Parse the pattern. 929 Pattern P(CheckTy); 930 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber)) 931 return true; 932 933 // Verify that CHECK-LABEL lines do not define or use variables 934 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 935 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 936 SourceMgr::DK_Error, 937 "found '" + UsedPrefix + "-LABEL:'" 938 " with variable definition or use"); 939 return true; 940 } 941 942 Buffer = Buffer.substr(EOL); 943 944 // Verify that CHECK-NEXT lines have at least one CHECK line before them. 945 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) && 946 CheckStrings.empty()) { 947 StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME"; 948 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 949 SourceMgr::DK_Error, 950 "found '" + UsedPrefix + "-" + Type + "' without previous '" 951 + UsedPrefix + ": line"); 952 return true; 953 } 954 955 // Handle CHECK-DAG/-NOT. 956 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 957 DagNotMatches.push_back(P); 958 continue; 959 } 960 961 // Okay, add the string we captured to the output vector and move on. 962 CheckStrings.emplace_back(P, UsedPrefix, PatternLoc); 963 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 964 DagNotMatches = ImplicitNegativeChecks; 965 } 966 967 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first 968 // prefix as a filler for the error message. 969 if (!DagNotMatches.empty()) { 970 CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(), 971 SMLoc::getFromPointer(Buffer.data())); 972 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 973 } 974 975 if (CheckStrings.empty()) { 976 errs() << "error: no check strings found with prefix" 977 << (CheckPrefixes.size() > 1 ? "es " : " "); 978 prefix_iterator I = CheckPrefixes.begin(); 979 prefix_iterator E = CheckPrefixes.end(); 980 if (I != E) { 981 errs() << "\'" << *I << ":'"; 982 ++I; 983 } 984 for (; I != E; ++I) 985 errs() << ", \'" << *I << ":'"; 986 987 errs() << '\n'; 988 return true; 989 } 990 991 return false; 992 } 993 994 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, 995 const Pattern &Pat, StringRef Buffer, 996 StringMap<StringRef> &VariableTable) { 997 // Otherwise, we have an error, emit an error message. 998 SM.PrintMessage(Loc, SourceMgr::DK_Error, 999 "expected string not found in input"); 1000 1001 // Print the "scanning from here" line. If the current position is at the 1002 // end of a line, advance to the start of the next line. 1003 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 1004 1005 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1006 "scanning from here"); 1007 1008 // Allow the pattern to print additional information if desired. 1009 Pat.PrintFailureInfo(SM, Buffer, VariableTable); 1010 } 1011 1012 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr, 1013 StringRef Buffer, 1014 StringMap<StringRef> &VariableTable) { 1015 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable); 1016 } 1017 1018 /// CountNumNewlinesBetween - Count the number of newlines in the specified 1019 /// range. 1020 static unsigned CountNumNewlinesBetween(StringRef Range, 1021 const char *&FirstNewLine) { 1022 unsigned NumNewLines = 0; 1023 while (1) { 1024 // Scan for newline. 1025 Range = Range.substr(Range.find_first_of("\n\r")); 1026 if (Range.empty()) return NumNewLines; 1027 1028 ++NumNewLines; 1029 1030 // Handle \n\r and \r\n as a single newline. 1031 if (Range.size() > 1 && 1032 (Range[1] == '\n' || Range[1] == '\r') && 1033 (Range[0] != Range[1])) 1034 Range = Range.substr(1); 1035 Range = Range.substr(1); 1036 1037 if (NumNewLines == 1) 1038 FirstNewLine = Range.begin(); 1039 } 1040 } 1041 1042 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer, 1043 bool IsLabelScanMode, size_t &MatchLen, 1044 StringMap<StringRef> &VariableTable) const { 1045 size_t LastPos = 0; 1046 std::vector<const Pattern *> NotStrings; 1047 1048 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 1049 // bounds; we have not processed variable definitions within the bounded block 1050 // yet so cannot handle any final CHECK-DAG yet; this is handled when going 1051 // over the block again (including the last CHECK-LABEL) in normal mode. 1052 if (!IsLabelScanMode) { 1053 // Match "dag strings" (with mixed "not strings" if any). 1054 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable); 1055 if (LastPos == StringRef::npos) 1056 return StringRef::npos; 1057 } 1058 1059 // Match itself from the last position after matching CHECK-DAG. 1060 StringRef MatchBuffer = Buffer.substr(LastPos); 1061 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1062 if (MatchPos == StringRef::npos) { 1063 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable); 1064 return StringRef::npos; 1065 } 1066 1067 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 1068 // or CHECK-NOT 1069 if (!IsLabelScanMode) { 1070 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1071 1072 // If this check is a "CHECK-NEXT", verify that the previous match was on 1073 // the previous line (i.e. that there is one newline between them). 1074 if (CheckNext(SM, SkippedRegion)) 1075 return StringRef::npos; 1076 1077 // If this check is a "CHECK-SAME", verify that the previous match was on 1078 // the same line (i.e. that there is no newline between them). 1079 if (CheckSame(SM, SkippedRegion)) 1080 return StringRef::npos; 1081 1082 // If this match had "not strings", verify that they don't exist in the 1083 // skipped region. 1084 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1085 return StringRef::npos; 1086 } 1087 1088 return LastPos + MatchPos; 1089 } 1090 1091 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 1092 if (Pat.getCheckTy() != Check::CheckNext) 1093 return false; 1094 1095 // Count the number of newlines between the previous match and this one. 1096 assert(Buffer.data() != 1097 SM.getMemoryBuffer( 1098 SM.FindBufferContainingLoc( 1099 SMLoc::getFromPointer(Buffer.data())))->getBufferStart() && 1100 "CHECK-NEXT can't be the first check in a file"); 1101 1102 const char *FirstNewLine = nullptr; 1103 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1104 1105 if (NumNewLines == 0) { 1106 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix + 1107 "-NEXT: is on the same line as previous match"); 1108 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), 1109 SourceMgr::DK_Note, "'next' match was here"); 1110 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1111 "previous match ended here"); 1112 return true; 1113 } 1114 1115 if (NumNewLines != 1) { 1116 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix + 1117 "-NEXT: is not on the line after the previous match"); 1118 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), 1119 SourceMgr::DK_Note, "'next' match was here"); 1120 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1121 "previous match ended here"); 1122 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 1123 "non-matching line after previous match is here"); 1124 return true; 1125 } 1126 1127 return false; 1128 } 1129 1130 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { 1131 if (Pat.getCheckTy() != Check::CheckSame) 1132 return false; 1133 1134 // Count the number of newlines between the previous match and this one. 1135 assert(Buffer.data() != 1136 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1137 SMLoc::getFromPointer(Buffer.data()))) 1138 ->getBufferStart() && 1139 "CHECK-SAME can't be the first check in a file"); 1140 1141 const char *FirstNewLine = nullptr; 1142 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1143 1144 if (NumNewLines != 0) { 1145 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1146 Prefix + 1147 "-SAME: is not on the same line as the previous match"); 1148 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1149 "'next' match was here"); 1150 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1151 "previous match ended here"); 1152 return true; 1153 } 1154 1155 return false; 1156 } 1157 1158 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 1159 const std::vector<const Pattern *> &NotStrings, 1160 StringMap<StringRef> &VariableTable) const { 1161 for (const Pattern *Pat : NotStrings) { 1162 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 1163 1164 size_t MatchLen = 0; 1165 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable); 1166 1167 if (Pos == StringRef::npos) continue; 1168 1169 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos), 1170 SourceMgr::DK_Error, 1171 Prefix + "-NOT: string occurred!"); 1172 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note, 1173 Prefix + "-NOT: pattern specified here"); 1174 return true; 1175 } 1176 1177 return false; 1178 } 1179 1180 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 1181 std::vector<const Pattern *> &NotStrings, 1182 StringMap<StringRef> &VariableTable) const { 1183 if (DagNotStrings.empty()) 1184 return 0; 1185 1186 size_t LastPos = 0; 1187 size_t StartPos = LastPos; 1188 1189 for (const Pattern &Pat : DagNotStrings) { 1190 assert((Pat.getCheckTy() == Check::CheckDAG || 1191 Pat.getCheckTy() == Check::CheckNot) && 1192 "Invalid CHECK-DAG or CHECK-NOT!"); 1193 1194 if (Pat.getCheckTy() == Check::CheckNot) { 1195 NotStrings.push_back(&Pat); 1196 continue; 1197 } 1198 1199 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 1200 1201 size_t MatchLen = 0, MatchPos; 1202 1203 // CHECK-DAG always matches from the start. 1204 StringRef MatchBuffer = Buffer.substr(StartPos); 1205 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1206 // With a group of CHECK-DAGs, a single mismatching means the match on 1207 // that group of CHECK-DAGs fails immediately. 1208 if (MatchPos == StringRef::npos) { 1209 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable); 1210 return StringRef::npos; 1211 } 1212 // Re-calc it as the offset relative to the start of the original string. 1213 MatchPos += StartPos; 1214 1215 if (!NotStrings.empty()) { 1216 if (MatchPos < LastPos) { 1217 // Reordered? 1218 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos), 1219 SourceMgr::DK_Error, 1220 Prefix + "-DAG: found a match of CHECK-DAG" 1221 " reordering across a CHECK-NOT"); 1222 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos), 1223 SourceMgr::DK_Note, 1224 Prefix + "-DAG: the farthest match of CHECK-DAG" 1225 " is found here"); 1226 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note, 1227 Prefix + "-NOT: the crossed pattern specified" 1228 " here"); 1229 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note, 1230 Prefix + "-DAG: the reordered pattern specified" 1231 " here"); 1232 return StringRef::npos; 1233 } 1234 // All subsequent CHECK-DAGs should be matched from the farthest 1235 // position of all precedent CHECK-DAGs (including this one.) 1236 StartPos = LastPos; 1237 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to 1238 // CHECK-DAG, verify that there's no 'not' strings occurred in that 1239 // region. 1240 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1241 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1242 return StringRef::npos; 1243 // Clear "not strings". 1244 NotStrings.clear(); 1245 } 1246 1247 // Update the last position with CHECK-DAG matches. 1248 LastPos = std::max(MatchPos + MatchLen, LastPos); 1249 } 1250 1251 return LastPos; 1252 } 1253 1254 // A check prefix must contain only alphanumeric, hyphens and underscores. 1255 static bool ValidateCheckPrefix(StringRef CheckPrefix) { 1256 Regex Validator("^[a-zA-Z0-9_-]*$"); 1257 return Validator.match(CheckPrefix); 1258 } 1259 1260 static bool ValidateCheckPrefixes() { 1261 StringSet<> PrefixSet; 1262 1263 for (StringRef Prefix : CheckPrefixes) { 1264 // Reject empty prefixes. 1265 if (Prefix == "") 1266 return false; 1267 1268 if (!PrefixSet.insert(Prefix).second) 1269 return false; 1270 1271 if (!ValidateCheckPrefix(Prefix)) 1272 return false; 1273 } 1274 1275 return true; 1276 } 1277 1278 // I don't think there's a way to specify an initial value for cl::list, 1279 // so if nothing was specified, add the default 1280 static void AddCheckPrefixIfNeeded() { 1281 if (CheckPrefixes.empty()) 1282 CheckPrefixes.push_back("CHECK"); 1283 } 1284 1285 static void DumpCommandLine(int argc, char **argv) { 1286 errs() << "FileCheck command line: "; 1287 for (int I = 0; I < argc; I++) 1288 errs() << " " << argv[I]; 1289 errs() << "\n"; 1290 } 1291 1292 /// Check the input to FileCheck provided in the \p Buffer against the \p 1293 /// CheckStrings read from the check file. 1294 /// 1295 /// Returns false if the input fails to satisfy the checks. 1296 bool CheckInput(SourceMgr &SM, StringRef Buffer, 1297 ArrayRef<CheckString> CheckStrings) { 1298 bool ChecksFailed = false; 1299 1300 /// VariableTable - This holds all the current filecheck variables. 1301 StringMap<StringRef> VariableTable; 1302 1303 unsigned i = 0, j = 0, e = CheckStrings.size(); 1304 while (true) { 1305 StringRef CheckRegion; 1306 if (j == e) { 1307 CheckRegion = Buffer; 1308 } else { 1309 const CheckString &CheckLabelStr = CheckStrings[j]; 1310 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { 1311 ++j; 1312 continue; 1313 } 1314 1315 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 1316 size_t MatchLabelLen = 0; 1317 size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true, 1318 MatchLabelLen, VariableTable); 1319 if (MatchLabelPos == StringRef::npos) 1320 // Immediately bail of CHECK-LABEL fails, nothing else we can do. 1321 return false; 1322 1323 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 1324 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 1325 ++j; 1326 } 1327 1328 for ( ; i != j; ++i) { 1329 const CheckString &CheckStr = CheckStrings[i]; 1330 1331 // Check each string within the scanned region, including a second check 1332 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 1333 size_t MatchLen = 0; 1334 size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen, 1335 VariableTable); 1336 1337 if (MatchPos == StringRef::npos) { 1338 ChecksFailed = true; 1339 i = j; 1340 break; 1341 } 1342 1343 CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 1344 } 1345 1346 if (j == e) 1347 break; 1348 } 1349 1350 // Success if no checks failed. 1351 return !ChecksFailed; 1352 } 1353 1354 int main(int argc, char **argv) { 1355 sys::PrintStackTraceOnErrorSignal(argv[0]); 1356 PrettyStackTraceProgram X(argc, argv); 1357 cl::ParseCommandLineOptions(argc, argv); 1358 1359 if (!ValidateCheckPrefixes()) { 1360 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and " 1361 "start with a letter and contain only alphanumeric characters, " 1362 "hyphens and underscores\n"; 1363 return 2; 1364 } 1365 1366 AddCheckPrefixIfNeeded(); 1367 1368 SourceMgr SM; 1369 1370 // Read the expected strings from the check file. 1371 ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr = 1372 MemoryBuffer::getFileOrSTDIN(CheckFilename); 1373 if (std::error_code EC = CheckFileOrErr.getError()) { 1374 errs() << "Could not open check file '" << CheckFilename 1375 << "': " << EC.message() << '\n'; 1376 return 2; 1377 } 1378 MemoryBuffer &CheckFile = *CheckFileOrErr.get(); 1379 1380 SmallString<4096> CheckFileBuffer; 1381 StringRef CheckFileText = 1382 CanonicalizeFile(CheckFile, NoCanonicalizeWhiteSpace, CheckFileBuffer); 1383 1384 SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( 1385 CheckFileText, CheckFile.getBufferIdentifier()), 1386 SMLoc()); 1387 1388 std::vector<CheckString> CheckStrings; 1389 if (ReadCheckFile(SM, CheckFileText, CheckStrings)) 1390 return 2; 1391 1392 1393 // Open the file to check and add it to SourceMgr. 1394 ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr = 1395 MemoryBuffer::getFileOrSTDIN(InputFilename); 1396 if (std::error_code EC = InputFileOrErr.getError()) { 1397 errs() << "Could not open input file '" << InputFilename 1398 << "': " << EC.message() << '\n'; 1399 return 2; 1400 } 1401 MemoryBuffer &InputFile = *InputFileOrErr.get(); 1402 1403 if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) { 1404 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; 1405 DumpCommandLine(argc, argv); 1406 return 2; 1407 } 1408 1409 SmallString<4096> InputFileBuffer; 1410 StringRef InputFileText = 1411 CanonicalizeFile(InputFile, NoCanonicalizeWhiteSpace, InputFileBuffer); 1412 1413 SM.AddNewSourceBuffer( 1414 MemoryBuffer::getMemBuffer(InputFileText, InputFile.getBufferIdentifier()), SMLoc()); 1415 1416 return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1; 1417 } 1418