1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // FileCheck does a line-by line check of a file that validates whether it 11 // contains the expected content. This is useful for regression tests etc. 12 // 13 // This program exits with an error status of 2 on error, exit status of 0 if 14 // the file matched the expected contents, and exit status of 1 if it did not 15 // contain the expected contents. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/StringExtras.h" 21 #include "llvm/ADT/StringMap.h" 22 #include "llvm/ADT/StringSet.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/MemoryBuffer.h" 25 #include "llvm/Support/PrettyStackTrace.h" 26 #include "llvm/Support/Regex.h" 27 #include "llvm/Support/Signals.h" 28 #include "llvm/Support/SourceMgr.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <algorithm> 31 #include <cctype> 32 #include <map> 33 #include <string> 34 #include <system_error> 35 #include <vector> 36 using namespace llvm; 37 38 static cl::opt<std::string> 39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); 40 41 static cl::opt<std::string> 42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), 43 cl::init("-"), cl::value_desc("filename")); 44 45 static cl::list<std::string> CheckPrefixes( 46 "check-prefix", 47 cl::desc("Prefix to use from check file (defaults to 'CHECK')")); 48 static cl::alias CheckPrefixesAlias( 49 "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated, 50 cl::NotHidden, 51 cl::desc( 52 "Alias for -check-prefix permitting multiple comma separated values")); 53 54 static cl::opt<bool> NoCanonicalizeWhiteSpace( 55 "strict-whitespace", 56 cl::desc("Do not treat all horizontal whitespace as equivalent")); 57 58 static cl::list<std::string> ImplicitCheckNot( 59 "implicit-check-not", 60 cl::desc("Add an implicit negative check with this pattern to every\n" 61 "positive check. This can be used to ensure that no instances of\n" 62 "this pattern occur which are not matched by a positive pattern"), 63 cl::value_desc("pattern")); 64 65 static cl::opt<bool> AllowEmptyInput( 66 "allow-empty", cl::init(false), 67 cl::desc("Allow the input file to be empty. This is useful when making\n" 68 "checks that some error message does not occur, for example.")); 69 70 static cl::opt<bool> MatchFullLines( 71 "match-full-lines", cl::init(false), 72 cl::desc("Require all positive matches to cover an entire input line.\n" 73 "Allows leading and trailing whitespace if --strict-whitespace\n" 74 "is not also passed.")); 75 76 typedef cl::list<std::string>::const_iterator prefix_iterator; 77 78 //===----------------------------------------------------------------------===// 79 // Pattern Handling Code. 80 //===----------------------------------------------------------------------===// 81 82 namespace Check { 83 enum CheckType { 84 CheckNone = 0, 85 CheckPlain, 86 CheckNext, 87 CheckSame, 88 CheckNot, 89 CheckDAG, 90 CheckLabel, 91 92 /// Indicates the pattern only matches the end of file. This is used for 93 /// trailing CHECK-NOTs. 94 CheckEOF, 95 96 /// Marks when parsing found a -NOT check combined with another CHECK suffix. 97 CheckBadNot 98 }; 99 } 100 101 class Pattern { 102 SMLoc PatternLoc; 103 104 /// A fixed string to match as the pattern or empty if this pattern requires 105 /// a regex match. 106 StringRef FixedStr; 107 108 /// A regex string to match as the pattern or empty if this pattern requires 109 /// a fixed string to match. 110 std::string RegExStr; 111 112 /// Entries in this vector map to uses of a variable in the pattern, e.g. 113 /// "foo[[bar]]baz". In this case, the RegExStr will contain "foobaz" and 114 /// we'll get an entry in this vector that tells us to insert the value of 115 /// bar at offset 3. 116 std::vector<std::pair<StringRef, unsigned>> VariableUses; 117 118 /// Maps definitions of variables to their parenthesized capture numbers. 119 /// 120 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 121 /// 1. 122 std::map<StringRef, unsigned> VariableDefs; 123 124 Check::CheckType CheckTy; 125 126 /// Contains the number of line this pattern is in. 127 unsigned LineNumber; 128 129 public: 130 explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {} 131 132 /// Returns the location in source code. 133 SMLoc getLoc() const { return PatternLoc; } 134 135 bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, 136 unsigned LineNumber); 137 size_t Match(StringRef Buffer, size_t &MatchLen, 138 StringMap<StringRef> &VariableTable) const; 139 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 140 const StringMap<StringRef> &VariableTable) const; 141 142 bool hasVariable() const { 143 return !(VariableUses.empty() && VariableDefs.empty()); 144 } 145 146 Check::CheckType getCheckTy() const { return CheckTy; } 147 148 private: 149 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); 150 void AddBackrefToRegEx(unsigned BackrefNum); 151 unsigned 152 ComputeMatchDistance(StringRef Buffer, 153 const StringMap<StringRef> &VariableTable) const; 154 bool EvaluateExpression(StringRef Expr, std::string &Value) const; 155 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); 156 }; 157 158 /// Parses the given string into the Pattern. 159 /// 160 /// \p Prefix provides which prefix is being matched, \p SM provides the 161 /// SourceMgr used for error reports, and \p LineNumber is the line number in 162 /// the input file from which the pattern string was read. Returns true in 163 /// case of an error, false otherwise. 164 bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix, 165 SourceMgr &SM, unsigned LineNumber) { 166 bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot; 167 168 this->LineNumber = LineNumber; 169 PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 170 171 // Ignore trailing whitespace. 172 while (!PatternStr.empty() && 173 (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 174 PatternStr = PatternStr.substr(0, PatternStr.size() - 1); 175 176 // Check that there is something on the line. 177 if (PatternStr.empty()) { 178 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 179 "found empty check string with prefix '" + Prefix + ":'"); 180 return true; 181 } 182 183 // Check to see if this is a fixed string, or if it has regex pieces. 184 if (!MatchFullLinesHere && 185 (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos && 186 PatternStr.find("[[") == StringRef::npos))) { 187 FixedStr = PatternStr; 188 return false; 189 } 190 191 if (MatchFullLinesHere) { 192 RegExStr += '^'; 193 if (!NoCanonicalizeWhiteSpace) 194 RegExStr += " *"; 195 } 196 197 // Paren value #0 is for the fully matched string. Any new parenthesized 198 // values add from there. 199 unsigned CurParen = 1; 200 201 // Otherwise, there is at least one regex piece. Build up the regex pattern 202 // by escaping scary characters in fixed strings, building up one big regex. 203 while (!PatternStr.empty()) { 204 // RegEx matches. 205 if (PatternStr.startswith("{{")) { 206 // This is the start of a regex match. Scan for the }}. 207 size_t End = PatternStr.find("}}"); 208 if (End == StringRef::npos) { 209 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 210 SourceMgr::DK_Error, 211 "found start of regex string with no end '}}'"); 212 return true; 213 } 214 215 // Enclose {{}} patterns in parens just like [[]] even though we're not 216 // capturing the result for any purpose. This is required in case the 217 // expression contains an alternation like: CHECK: abc{{x|z}}def. We 218 // want this to turn into: "abc(x|z)def" not "abcx|zdef". 219 RegExStr += '('; 220 ++CurParen; 221 222 if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM)) 223 return true; 224 RegExStr += ')'; 225 226 PatternStr = PatternStr.substr(End + 2); 227 continue; 228 } 229 230 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* 231 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The 232 // second form is [[foo]] which is a reference to foo. The variable name 233 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject 234 // it. This is to catch some common errors. 235 if (PatternStr.startswith("[[")) { 236 // Find the closing bracket pair ending the match. End is going to be an 237 // offset relative to the beginning of the match string. 238 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM); 239 240 if (End == StringRef::npos) { 241 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 242 SourceMgr::DK_Error, 243 "invalid named regex reference, no ]] found"); 244 return true; 245 } 246 247 StringRef MatchStr = PatternStr.substr(2, End); 248 PatternStr = PatternStr.substr(End + 4); 249 250 // Get the regex name (e.g. "foo"). 251 size_t NameEnd = MatchStr.find(':'); 252 StringRef Name = MatchStr.substr(0, NameEnd); 253 254 if (Name.empty()) { 255 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 256 "invalid name in named regex: empty name"); 257 return true; 258 } 259 260 // Verify that the name/expression is well formed. FileCheck currently 261 // supports @LINE, @LINE+number, @LINE-number expressions. The check here 262 // is relaxed, more strict check is performed in \c EvaluateExpression. 263 bool IsExpression = false; 264 for (unsigned i = 0, e = Name.size(); i != e; ++i) { 265 if (i == 0 && Name[i] == '@') { 266 if (NameEnd != StringRef::npos) { 267 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 268 SourceMgr::DK_Error, 269 "invalid name in named regex definition"); 270 return true; 271 } 272 IsExpression = true; 273 continue; 274 } 275 if (Name[i] != '_' && !isalnum(Name[i]) && 276 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { 277 SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i), 278 SourceMgr::DK_Error, "invalid name in named regex"); 279 return true; 280 } 281 } 282 283 // Name can't start with a digit. 284 if (isdigit(static_cast<unsigned char>(Name[0]))) { 285 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 286 "invalid name in named regex"); 287 return true; 288 } 289 290 // Handle [[foo]]. 291 if (NameEnd == StringRef::npos) { 292 // Handle variables that were defined earlier on the same line by 293 // emitting a backreference. 294 if (VariableDefs.find(Name) != VariableDefs.end()) { 295 unsigned VarParenNum = VariableDefs[Name]; 296 if (VarParenNum < 1 || VarParenNum > 9) { 297 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 298 SourceMgr::DK_Error, 299 "Can't back-reference more than 9 variables"); 300 return true; 301 } 302 AddBackrefToRegEx(VarParenNum); 303 } else { 304 VariableUses.push_back(std::make_pair(Name, RegExStr.size())); 305 } 306 continue; 307 } 308 309 // Handle [[foo:.*]]. 310 VariableDefs[Name] = CurParen; 311 RegExStr += '('; 312 ++CurParen; 313 314 if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM)) 315 return true; 316 317 RegExStr += ')'; 318 } 319 320 // Handle fixed string matches. 321 // Find the end, which is the start of the next regex. 322 size_t FixedMatchEnd = PatternStr.find("{{"); 323 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 324 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 325 PatternStr = PatternStr.substr(FixedMatchEnd); 326 } 327 328 if (MatchFullLinesHere) { 329 if (!NoCanonicalizeWhiteSpace) 330 RegExStr += " *"; 331 RegExStr += '$'; 332 } 333 334 return false; 335 } 336 337 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) { 338 Regex R(RS); 339 std::string Error; 340 if (!R.isValid(Error)) { 341 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 342 "invalid regex: " + Error); 343 return true; 344 } 345 346 RegExStr += RS.str(); 347 CurParen += R.getNumMatches(); 348 return false; 349 } 350 351 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 352 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 353 std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum); 354 RegExStr += Backref; 355 } 356 357 /// Evaluates expression and stores the result to \p Value. 358 /// 359 /// Returns true on success and false when the expression has invalid syntax. 360 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { 361 // The only supported expression is @LINE([\+-]\d+)? 362 if (!Expr.startswith("@LINE")) 363 return false; 364 Expr = Expr.substr(StringRef("@LINE").size()); 365 int Offset = 0; 366 if (!Expr.empty()) { 367 if (Expr[0] == '+') 368 Expr = Expr.substr(1); 369 else if (Expr[0] != '-') 370 return false; 371 if (Expr.getAsInteger(10, Offset)) 372 return false; 373 } 374 Value = llvm::itostr(LineNumber + Offset); 375 return true; 376 } 377 378 /// Matches the pattern string against the input buffer \p Buffer 379 /// 380 /// This returns the position that is matched or npos if there is no match. If 381 /// there is a match, the size of the matched string is returned in \p 382 /// MatchLen. 383 /// 384 /// The \p VariableTable StringMap provides the current values of filecheck 385 /// variables and is updated if this match defines new values. 386 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, 387 StringMap<StringRef> &VariableTable) const { 388 // If this is the EOF pattern, match it immediately. 389 if (CheckTy == Check::CheckEOF) { 390 MatchLen = 0; 391 return Buffer.size(); 392 } 393 394 // If this is a fixed string pattern, just match it now. 395 if (!FixedStr.empty()) { 396 MatchLen = FixedStr.size(); 397 return Buffer.find(FixedStr); 398 } 399 400 // Regex match. 401 402 // If there are variable uses, we need to create a temporary string with the 403 // actual value. 404 StringRef RegExToMatch = RegExStr; 405 std::string TmpStr; 406 if (!VariableUses.empty()) { 407 TmpStr = RegExStr; 408 409 unsigned InsertOffset = 0; 410 for (const auto &VariableUse : VariableUses) { 411 std::string Value; 412 413 if (VariableUse.first[0] == '@') { 414 if (!EvaluateExpression(VariableUse.first, Value)) 415 return StringRef::npos; 416 } else { 417 StringMap<StringRef>::iterator it = 418 VariableTable.find(VariableUse.first); 419 // If the variable is undefined, return an error. 420 if (it == VariableTable.end()) 421 return StringRef::npos; 422 423 // Look up the value and escape it so that we can put it into the regex. 424 Value += Regex::escape(it->second); 425 } 426 427 // Plop it into the regex at the adjusted offset. 428 TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset, 429 Value.begin(), Value.end()); 430 InsertOffset += Value.size(); 431 } 432 433 // Match the newly constructed regex. 434 RegExToMatch = TmpStr; 435 } 436 437 SmallVector<StringRef, 4> MatchInfo; 438 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) 439 return StringRef::npos; 440 441 // Successful regex match. 442 assert(!MatchInfo.empty() && "Didn't get any match"); 443 StringRef FullMatch = MatchInfo[0]; 444 445 // If this defines any variables, remember their values. 446 for (const auto &VariableDef : VariableDefs) { 447 assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); 448 VariableTable[VariableDef.first] = MatchInfo[VariableDef.second]; 449 } 450 451 MatchLen = FullMatch.size(); 452 return FullMatch.data() - Buffer.data(); 453 } 454 455 456 /// Computes an arbitrary estimate for the quality of matching this pattern at 457 /// the start of \p Buffer; a distance of zero should correspond to a perfect 458 /// match. 459 unsigned 460 Pattern::ComputeMatchDistance(StringRef Buffer, 461 const StringMap<StringRef> &VariableTable) const { 462 // Just compute the number of matching characters. For regular expressions, we 463 // just compare against the regex itself and hope for the best. 464 // 465 // FIXME: One easy improvement here is have the regex lib generate a single 466 // example regular expression which matches, and use that as the example 467 // string. 468 StringRef ExampleString(FixedStr); 469 if (ExampleString.empty()) 470 ExampleString = RegExStr; 471 472 // Only compare up to the first line in the buffer, or the string size. 473 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 474 BufferPrefix = BufferPrefix.split('\n').first; 475 return BufferPrefix.edit_distance(ExampleString); 476 } 477 478 /// Prints additional information about a failure to match involving this 479 /// pattern. 480 void Pattern::PrintFailureInfo( 481 const SourceMgr &SM, StringRef Buffer, 482 const StringMap<StringRef> &VariableTable) const { 483 // If this was a regular expression using variables, print the current 484 // variable values. 485 if (!VariableUses.empty()) { 486 for (const auto &VariableUse : VariableUses) { 487 SmallString<256> Msg; 488 raw_svector_ostream OS(Msg); 489 StringRef Var = VariableUse.first; 490 if (Var[0] == '@') { 491 std::string Value; 492 if (EvaluateExpression(Var, Value)) { 493 OS << "with expression \""; 494 OS.write_escaped(Var) << "\" equal to \""; 495 OS.write_escaped(Value) << "\""; 496 } else { 497 OS << "uses incorrect expression \""; 498 OS.write_escaped(Var) << "\""; 499 } 500 } else { 501 StringMap<StringRef>::const_iterator it = VariableTable.find(Var); 502 503 // Check for undefined variable references. 504 if (it == VariableTable.end()) { 505 OS << "uses undefined variable \""; 506 OS.write_escaped(Var) << "\""; 507 } else { 508 OS << "with variable \""; 509 OS.write_escaped(Var) << "\" equal to \""; 510 OS.write_escaped(it->second) << "\""; 511 } 512 } 513 514 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 515 OS.str()); 516 } 517 } 518 519 // Attempt to find the closest/best fuzzy match. Usually an error happens 520 // because some string in the output didn't exactly match. In these cases, we 521 // would like to show the user a best guess at what "should have" matched, to 522 // save them having to actually check the input manually. 523 size_t NumLinesForward = 0; 524 size_t Best = StringRef::npos; 525 double BestQuality = 0; 526 527 // Use an arbitrary 4k limit on how far we will search. 528 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 529 if (Buffer[i] == '\n') 530 ++NumLinesForward; 531 532 // Patterns have leading whitespace stripped, so skip whitespace when 533 // looking for something which looks like a pattern. 534 if (Buffer[i] == ' ' || Buffer[i] == '\t') 535 continue; 536 537 // Compute the "quality" of this match as an arbitrary combination of the 538 // match distance and the number of lines skipped to get to this match. 539 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable); 540 double Quality = Distance + (NumLinesForward / 100.); 541 542 if (Quality < BestQuality || Best == StringRef::npos) { 543 Best = i; 544 BestQuality = Quality; 545 } 546 } 547 548 // Print the "possible intended match here" line if we found something 549 // reasonable and not equal to what we showed in the "scanning from here" 550 // line. 551 if (Best && Best != StringRef::npos && BestQuality < 50) { 552 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best), 553 SourceMgr::DK_Note, "possible intended match here"); 554 555 // FIXME: If we wanted to be really friendly we would show why the match 556 // failed, as it can be hard to spot simple one character differences. 557 } 558 } 559 560 /// Finds the closing sequence of a regex variable usage or definition. 561 /// 562 /// \p Str has to point in the beginning of the definition (right after the 563 /// opening sequence). Returns the offset of the closing sequence within Str, 564 /// or npos if it was not found. 565 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 566 // Offset keeps track of the current offset within the input Str 567 size_t Offset = 0; 568 // [...] Nesting depth 569 size_t BracketDepth = 0; 570 571 while (!Str.empty()) { 572 if (Str.startswith("]]") && BracketDepth == 0) 573 return Offset; 574 if (Str[0] == '\\') { 575 // Backslash escapes the next char within regexes, so skip them both. 576 Str = Str.substr(2); 577 Offset += 2; 578 } else { 579 switch (Str[0]) { 580 default: 581 break; 582 case '[': 583 BracketDepth++; 584 break; 585 case ']': 586 if (BracketDepth == 0) { 587 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 588 SourceMgr::DK_Error, 589 "missing closing \"]\" for regex variable"); 590 exit(1); 591 } 592 BracketDepth--; 593 break; 594 } 595 Str = Str.substr(1); 596 Offset++; 597 } 598 } 599 600 return StringRef::npos; 601 } 602 603 //===----------------------------------------------------------------------===// 604 // Check Strings. 605 //===----------------------------------------------------------------------===// 606 607 /// A check that we found in the input file. 608 struct CheckString { 609 /// The pattern to match. 610 Pattern Pat; 611 612 /// Which prefix name this check matched. 613 StringRef Prefix; 614 615 /// The location in the match file that the check string was specified. 616 SMLoc Loc; 617 618 /// All of the strings that are disallowed from occurring between this match 619 /// string and the previous one (or start of file). 620 std::vector<Pattern> DagNotStrings; 621 622 CheckString(const Pattern &P, StringRef S, SMLoc L) 623 : Pat(P), Prefix(S), Loc(L) {} 624 625 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, 626 size_t &MatchLen, StringMap<StringRef> &VariableTable) const; 627 628 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; 629 bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; 630 bool CheckNot(const SourceMgr &SM, StringRef Buffer, 631 const std::vector<const Pattern *> &NotStrings, 632 StringMap<StringRef> &VariableTable) const; 633 size_t CheckDag(const SourceMgr &SM, StringRef Buffer, 634 std::vector<const Pattern *> &NotStrings, 635 StringMap<StringRef> &VariableTable) const; 636 }; 637 638 /// Canonicalize whitespaces in the file. Line endings are replaced with 639 /// UNIX-style '\n'. 640 /// 641 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace 642 /// characters to a single space. 643 static StringRef CanonicalizeFile(MemoryBuffer &MB, bool PreserveHorizontal, 644 SmallVectorImpl<char> &OutputBuffer) { 645 OutputBuffer.reserve(MB.getBufferSize()); 646 647 for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd(); 648 Ptr != End; ++Ptr) { 649 // Eliminate trailing dosish \r. 650 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 651 continue; 652 } 653 654 // If current char is not a horizontal whitespace or if horizontal 655 // whitespace canonicalization is disabled, dump it to output as is. 656 if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) { 657 OutputBuffer.push_back(*Ptr); 658 continue; 659 } 660 661 // Otherwise, add one space and advance over neighboring space. 662 OutputBuffer.push_back(' '); 663 while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t')) 664 ++Ptr; 665 } 666 667 // Add a null byte and then return all but that byte. 668 OutputBuffer.push_back('\0'); 669 return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1); 670 } 671 672 static bool IsPartOfWord(char c) { 673 return (isalnum(c) || c == '-' || c == '_'); 674 } 675 676 // Get the size of the prefix extension. 677 static size_t CheckTypeSize(Check::CheckType Ty) { 678 switch (Ty) { 679 case Check::CheckNone: 680 case Check::CheckBadNot: 681 return 0; 682 683 case Check::CheckPlain: 684 return sizeof(":") - 1; 685 686 case Check::CheckNext: 687 return sizeof("-NEXT:") - 1; 688 689 case Check::CheckSame: 690 return sizeof("-SAME:") - 1; 691 692 case Check::CheckNot: 693 return sizeof("-NOT:") - 1; 694 695 case Check::CheckDAG: 696 return sizeof("-DAG:") - 1; 697 698 case Check::CheckLabel: 699 return sizeof("-LABEL:") - 1; 700 701 case Check::CheckEOF: 702 llvm_unreachable("Should not be using EOF size"); 703 } 704 705 llvm_unreachable("Bad check type"); 706 } 707 708 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) { 709 char NextChar = Buffer[Prefix.size()]; 710 711 // Verify that the : is present after the prefix. 712 if (NextChar == ':') 713 return Check::CheckPlain; 714 715 if (NextChar != '-') 716 return Check::CheckNone; 717 718 StringRef Rest = Buffer.drop_front(Prefix.size() + 1); 719 if (Rest.startswith("NEXT:")) 720 return Check::CheckNext; 721 722 if (Rest.startswith("SAME:")) 723 return Check::CheckSame; 724 725 if (Rest.startswith("NOT:")) 726 return Check::CheckNot; 727 728 if (Rest.startswith("DAG:")) 729 return Check::CheckDAG; 730 731 if (Rest.startswith("LABEL:")) 732 return Check::CheckLabel; 733 734 // You can't combine -NOT with another suffix. 735 if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") || 736 Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") || 737 Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:")) 738 return Check::CheckBadNot; 739 740 return Check::CheckNone; 741 } 742 743 // From the given position, find the next character after the word. 744 static size_t SkipWord(StringRef Str, size_t Loc) { 745 while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 746 ++Loc; 747 return Loc; 748 } 749 750 // Try to find the first match in buffer for any prefix. If a valid match is 751 // found, return that prefix and set its type and location. If there are almost 752 // matches (e.g. the actual prefix string is found, but is not an actual check 753 // string), but no valid match, return an empty string and set the position to 754 // resume searching from. If no partial matches are found, return an empty 755 // string and the location will be StringRef::npos. If one prefix is a substring 756 // of another, the maximal match should be found. e.g. if "A" and "AA" are 757 // prefixes then AA-CHECK: should match the second one. 758 static StringRef FindFirstCandidateMatch(StringRef &Buffer, 759 Check::CheckType &CheckTy, 760 size_t &CheckLoc) { 761 StringRef FirstPrefix; 762 size_t FirstLoc = StringRef::npos; 763 size_t SearchLoc = StringRef::npos; 764 Check::CheckType FirstTy = Check::CheckNone; 765 766 CheckTy = Check::CheckNone; 767 CheckLoc = StringRef::npos; 768 769 for (StringRef Prefix : CheckPrefixes) { 770 size_t PrefixLoc = Buffer.find(Prefix); 771 772 if (PrefixLoc == StringRef::npos) 773 continue; 774 775 // Track where we are searching for invalid prefixes that look almost right. 776 // We need to only advance to the first partial match on the next attempt 777 // since a partial match could be a substring of a later, valid prefix. 778 // Need to skip to the end of the word, otherwise we could end up 779 // matching a prefix in a substring later. 780 if (PrefixLoc < SearchLoc) 781 SearchLoc = SkipWord(Buffer, PrefixLoc); 782 783 // We only want to find the first match to avoid skipping some. 784 if (PrefixLoc > FirstLoc) 785 continue; 786 // If one matching check-prefix is a prefix of another, choose the 787 // longer one. 788 if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size()) 789 continue; 790 791 StringRef Rest = Buffer.drop_front(PrefixLoc); 792 // Make sure we have actually found the prefix, and not a word containing 793 // it. This should also prevent matching the wrong prefix when one is a 794 // substring of another. 795 if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1])) 796 FirstTy = Check::CheckNone; 797 else 798 FirstTy = FindCheckType(Rest, Prefix); 799 800 FirstLoc = PrefixLoc; 801 FirstPrefix = Prefix; 802 } 803 804 // If the first prefix is invalid, we should continue the search after it. 805 if (FirstTy == Check::CheckNone) { 806 CheckLoc = SearchLoc; 807 return ""; 808 } 809 810 CheckTy = FirstTy; 811 CheckLoc = FirstLoc; 812 return FirstPrefix; 813 } 814 815 static StringRef FindFirstMatchingPrefix(StringRef &Buffer, 816 unsigned &LineNumber, 817 Check::CheckType &CheckTy, 818 size_t &CheckLoc) { 819 while (!Buffer.empty()) { 820 StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc); 821 // If we found a real match, we are done. 822 if (!Prefix.empty()) { 823 LineNumber += Buffer.substr(0, CheckLoc).count('\n'); 824 return Prefix; 825 } 826 827 // We didn't find any almost matches either, we are also done. 828 if (CheckLoc == StringRef::npos) 829 return StringRef(); 830 831 LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n'); 832 833 // Advance to the last possible match we found and try again. 834 Buffer = Buffer.drop_front(CheckLoc + 1); 835 } 836 837 return StringRef(); 838 } 839 840 /// Read the check file, which specifies the sequence of expected strings. 841 /// 842 /// The strings are added to the CheckStrings vector. Returns true in case of 843 /// an error, false otherwise. 844 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, 845 std::vector<CheckString> &CheckStrings) { 846 std::vector<Pattern> ImplicitNegativeChecks; 847 for (const auto &PatternString : ImplicitCheckNot) { 848 // Create a buffer with fake command line content in order to display the 849 // command line option responsible for the specific implicit CHECK-NOT. 850 std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str(); 851 std::string Suffix = "'"; 852 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( 853 Prefix + PatternString + Suffix, "command line"); 854 855 StringRef PatternInBuffer = 856 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 857 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 858 859 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot)); 860 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer, 861 "IMPLICIT-CHECK", SM, 0); 862 } 863 864 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 865 866 // LineNumber keeps track of the line on which CheckPrefix instances are 867 // found. 868 unsigned LineNumber = 1; 869 870 while (1) { 871 Check::CheckType CheckTy; 872 size_t PrefixLoc; 873 874 // See if a prefix occurs in the memory buffer. 875 StringRef UsedPrefix = 876 FindFirstMatchingPrefix(Buffer, LineNumber, CheckTy, PrefixLoc); 877 if (UsedPrefix.empty()) 878 break; 879 880 Buffer = Buffer.drop_front(PrefixLoc); 881 882 // Location to use for error messages. 883 const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1); 884 885 // PrefixLoc is to the start of the prefix. Skip to the end. 886 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy)); 887 888 // Complain about useful-looking but unsupported suffixes. 889 if (CheckTy == Check::CheckBadNot) { 890 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, 891 "unsupported -NOT combo on prefix '" + UsedPrefix + "'"); 892 return true; 893 } 894 895 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 896 // leading and trailing whitespace. 897 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 898 899 // Scan ahead to the end of line. 900 size_t EOL = Buffer.find_first_of("\n\r"); 901 902 // Remember the location of the start of the pattern, for diagnostics. 903 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 904 905 // Parse the pattern. 906 Pattern P(CheckTy); 907 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber)) 908 return true; 909 910 // Verify that CHECK-LABEL lines do not define or use variables 911 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 912 SM.PrintMessage( 913 SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error, 914 "found '" + UsedPrefix + "-LABEL:'" 915 " with variable definition or use"); 916 return true; 917 } 918 919 Buffer = Buffer.substr(EOL); 920 921 // Verify that CHECK-NEXT lines have at least one CHECK line before them. 922 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) && 923 CheckStrings.empty()) { 924 StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME"; 925 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 926 SourceMgr::DK_Error, 927 "found '" + UsedPrefix + "-" + Type + 928 "' without previous '" + UsedPrefix + ": line"); 929 return true; 930 } 931 932 // Handle CHECK-DAG/-NOT. 933 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 934 DagNotMatches.push_back(P); 935 continue; 936 } 937 938 // Okay, add the string we captured to the output vector and move on. 939 CheckStrings.emplace_back(P, UsedPrefix, PatternLoc); 940 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 941 DagNotMatches = ImplicitNegativeChecks; 942 } 943 944 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first 945 // prefix as a filler for the error message. 946 if (!DagNotMatches.empty()) { 947 CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(), 948 SMLoc::getFromPointer(Buffer.data())); 949 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 950 } 951 952 if (CheckStrings.empty()) { 953 errs() << "error: no check strings found with prefix" 954 << (CheckPrefixes.size() > 1 ? "es " : " "); 955 prefix_iterator I = CheckPrefixes.begin(); 956 prefix_iterator E = CheckPrefixes.end(); 957 if (I != E) { 958 errs() << "\'" << *I << ":'"; 959 ++I; 960 } 961 for (; I != E; ++I) 962 errs() << ", \'" << *I << ":'"; 963 964 errs() << '\n'; 965 return true; 966 } 967 968 return false; 969 } 970 971 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, const Pattern &Pat, 972 StringRef Buffer, 973 StringMap<StringRef> &VariableTable) { 974 // Otherwise, we have an error, emit an error message. 975 SM.PrintMessage(Loc, SourceMgr::DK_Error, 976 "expected string not found in input"); 977 978 // Print the "scanning from here" line. If the current position is at the 979 // end of a line, advance to the start of the next line. 980 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 981 982 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 983 "scanning from here"); 984 985 // Allow the pattern to print additional information if desired. 986 Pat.PrintFailureInfo(SM, Buffer, VariableTable); 987 } 988 989 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr, 990 StringRef Buffer, 991 StringMap<StringRef> &VariableTable) { 992 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable); 993 } 994 995 /// Count the number of newlines in the specified range. 996 static unsigned CountNumNewlinesBetween(StringRef Range, 997 const char *&FirstNewLine) { 998 unsigned NumNewLines = 0; 999 while (1) { 1000 // Scan for newline. 1001 Range = Range.substr(Range.find_first_of("\n\r")); 1002 if (Range.empty()) 1003 return NumNewLines; 1004 1005 ++NumNewLines; 1006 1007 // Handle \n\r and \r\n as a single newline. 1008 if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') && 1009 (Range[0] != Range[1])) 1010 Range = Range.substr(1); 1011 Range = Range.substr(1); 1012 1013 if (NumNewLines == 1) 1014 FirstNewLine = Range.begin(); 1015 } 1016 } 1017 1018 /// Match check string and its "not strings" and/or "dag strings". 1019 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer, 1020 bool IsLabelScanMode, size_t &MatchLen, 1021 StringMap<StringRef> &VariableTable) const { 1022 size_t LastPos = 0; 1023 std::vector<const Pattern *> NotStrings; 1024 1025 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 1026 // bounds; we have not processed variable definitions within the bounded block 1027 // yet so cannot handle any final CHECK-DAG yet; this is handled when going 1028 // over the block again (including the last CHECK-LABEL) in normal mode. 1029 if (!IsLabelScanMode) { 1030 // Match "dag strings" (with mixed "not strings" if any). 1031 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable); 1032 if (LastPos == StringRef::npos) 1033 return StringRef::npos; 1034 } 1035 1036 // Match itself from the last position after matching CHECK-DAG. 1037 StringRef MatchBuffer = Buffer.substr(LastPos); 1038 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1039 if (MatchPos == StringRef::npos) { 1040 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable); 1041 return StringRef::npos; 1042 } 1043 1044 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 1045 // or CHECK-NOT 1046 if (!IsLabelScanMode) { 1047 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1048 1049 // If this check is a "CHECK-NEXT", verify that the previous match was on 1050 // the previous line (i.e. that there is one newline between them). 1051 if (CheckNext(SM, SkippedRegion)) 1052 return StringRef::npos; 1053 1054 // If this check is a "CHECK-SAME", verify that the previous match was on 1055 // the same line (i.e. that there is no newline between them). 1056 if (CheckSame(SM, SkippedRegion)) 1057 return StringRef::npos; 1058 1059 // If this match had "not strings", verify that they don't exist in the 1060 // skipped region. 1061 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1062 return StringRef::npos; 1063 } 1064 1065 return LastPos + MatchPos; 1066 } 1067 1068 /// Verify there is a single line in the given buffer. 1069 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 1070 if (Pat.getCheckTy() != Check::CheckNext) 1071 return false; 1072 1073 // Count the number of newlines between the previous match and this one. 1074 assert(Buffer.data() != 1075 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1076 SMLoc::getFromPointer(Buffer.data()))) 1077 ->getBufferStart() && 1078 "CHECK-NEXT can't be the first check in a file"); 1079 1080 const char *FirstNewLine = nullptr; 1081 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1082 1083 if (NumNewLines == 0) { 1084 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1085 Prefix + "-NEXT: is on the same line as previous match"); 1086 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1087 "'next' match was here"); 1088 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1089 "previous match ended here"); 1090 return true; 1091 } 1092 1093 if (NumNewLines != 1) { 1094 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1095 Prefix + 1096 "-NEXT: is not on the line after the previous match"); 1097 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1098 "'next' match was here"); 1099 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1100 "previous match ended here"); 1101 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 1102 "non-matching line after previous match is here"); 1103 return true; 1104 } 1105 1106 return false; 1107 } 1108 1109 /// Verify there is no newline in the given buffer. 1110 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { 1111 if (Pat.getCheckTy() != Check::CheckSame) 1112 return false; 1113 1114 // Count the number of newlines between the previous match and this one. 1115 assert(Buffer.data() != 1116 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1117 SMLoc::getFromPointer(Buffer.data()))) 1118 ->getBufferStart() && 1119 "CHECK-SAME can't be the first check in a file"); 1120 1121 const char *FirstNewLine = nullptr; 1122 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1123 1124 if (NumNewLines != 0) { 1125 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1126 Prefix + 1127 "-SAME: is not on the same line as the previous match"); 1128 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1129 "'next' match was here"); 1130 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1131 "previous match ended here"); 1132 return true; 1133 } 1134 1135 return false; 1136 } 1137 1138 /// Verify there's no "not strings" in the given buffer. 1139 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 1140 const std::vector<const Pattern *> &NotStrings, 1141 StringMap<StringRef> &VariableTable) const { 1142 for (const Pattern *Pat : NotStrings) { 1143 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 1144 1145 size_t MatchLen = 0; 1146 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable); 1147 1148 if (Pos == StringRef::npos) 1149 continue; 1150 1151 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Pos), 1152 SourceMgr::DK_Error, Prefix + "-NOT: string occurred!"); 1153 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note, 1154 Prefix + "-NOT: pattern specified here"); 1155 return true; 1156 } 1157 1158 return false; 1159 } 1160 1161 /// Match "dag strings" and their mixed "not strings". 1162 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 1163 std::vector<const Pattern *> &NotStrings, 1164 StringMap<StringRef> &VariableTable) const { 1165 if (DagNotStrings.empty()) 1166 return 0; 1167 1168 size_t LastPos = 0; 1169 size_t StartPos = LastPos; 1170 1171 for (const Pattern &Pat : DagNotStrings) { 1172 assert((Pat.getCheckTy() == Check::CheckDAG || 1173 Pat.getCheckTy() == Check::CheckNot) && 1174 "Invalid CHECK-DAG or CHECK-NOT!"); 1175 1176 if (Pat.getCheckTy() == Check::CheckNot) { 1177 NotStrings.push_back(&Pat); 1178 continue; 1179 } 1180 1181 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 1182 1183 size_t MatchLen = 0, MatchPos; 1184 1185 // CHECK-DAG always matches from the start. 1186 StringRef MatchBuffer = Buffer.substr(StartPos); 1187 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1188 // With a group of CHECK-DAGs, a single mismatching means the match on 1189 // that group of CHECK-DAGs fails immediately. 1190 if (MatchPos == StringRef::npos) { 1191 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable); 1192 return StringRef::npos; 1193 } 1194 // Re-calc it as the offset relative to the start of the original string. 1195 MatchPos += StartPos; 1196 1197 if (!NotStrings.empty()) { 1198 if (MatchPos < LastPos) { 1199 // Reordered? 1200 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos), 1201 SourceMgr::DK_Error, 1202 Prefix + "-DAG: found a match of CHECK-DAG" 1203 " reordering across a CHECK-NOT"); 1204 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos), 1205 SourceMgr::DK_Note, 1206 Prefix + "-DAG: the farthest match of CHECK-DAG" 1207 " is found here"); 1208 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note, 1209 Prefix + "-NOT: the crossed pattern specified" 1210 " here"); 1211 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note, 1212 Prefix + "-DAG: the reordered pattern specified" 1213 " here"); 1214 return StringRef::npos; 1215 } 1216 // All subsequent CHECK-DAGs should be matched from the farthest 1217 // position of all precedent CHECK-DAGs (including this one.) 1218 StartPos = LastPos; 1219 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to 1220 // CHECK-DAG, verify that there's no 'not' strings occurred in that 1221 // region. 1222 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1223 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1224 return StringRef::npos; 1225 // Clear "not strings". 1226 NotStrings.clear(); 1227 } 1228 1229 // Update the last position with CHECK-DAG matches. 1230 LastPos = std::max(MatchPos + MatchLen, LastPos); 1231 } 1232 1233 return LastPos; 1234 } 1235 1236 // A check prefix must contain only alphanumeric, hyphens and underscores. 1237 static bool ValidateCheckPrefix(StringRef CheckPrefix) { 1238 Regex Validator("^[a-zA-Z0-9_-]*$"); 1239 return Validator.match(CheckPrefix); 1240 } 1241 1242 static bool ValidateCheckPrefixes() { 1243 StringSet<> PrefixSet; 1244 1245 for (StringRef Prefix : CheckPrefixes) { 1246 // Reject empty prefixes. 1247 if (Prefix == "") 1248 return false; 1249 1250 if (!PrefixSet.insert(Prefix).second) 1251 return false; 1252 1253 if (!ValidateCheckPrefix(Prefix)) 1254 return false; 1255 } 1256 1257 return true; 1258 } 1259 1260 // I don't think there's a way to specify an initial value for cl::list, 1261 // so if nothing was specified, add the default 1262 static void AddCheckPrefixIfNeeded() { 1263 if (CheckPrefixes.empty()) 1264 CheckPrefixes.push_back("CHECK"); 1265 } 1266 1267 static void DumpCommandLine(int argc, char **argv) { 1268 errs() << "FileCheck command line: "; 1269 for (int I = 0; I < argc; I++) 1270 errs() << " " << argv[I]; 1271 errs() << "\n"; 1272 } 1273 1274 /// Check the input to FileCheck provided in the \p Buffer against the \p 1275 /// CheckStrings read from the check file. 1276 /// 1277 /// Returns false if the input fails to satisfy the checks. 1278 bool CheckInput(SourceMgr &SM, StringRef Buffer, 1279 ArrayRef<CheckString> CheckStrings) { 1280 bool ChecksFailed = false; 1281 1282 /// VariableTable - This holds all the current filecheck variables. 1283 StringMap<StringRef> VariableTable; 1284 1285 unsigned i = 0, j = 0, e = CheckStrings.size(); 1286 while (true) { 1287 StringRef CheckRegion; 1288 if (j == e) { 1289 CheckRegion = Buffer; 1290 } else { 1291 const CheckString &CheckLabelStr = CheckStrings[j]; 1292 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { 1293 ++j; 1294 continue; 1295 } 1296 1297 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 1298 size_t MatchLabelLen = 0; 1299 size_t MatchLabelPos = 1300 CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable); 1301 if (MatchLabelPos == StringRef::npos) 1302 // Immediately bail of CHECK-LABEL fails, nothing else we can do. 1303 return false; 1304 1305 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 1306 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 1307 ++j; 1308 } 1309 1310 for (; i != j; ++i) { 1311 const CheckString &CheckStr = CheckStrings[i]; 1312 1313 // Check each string within the scanned region, including a second check 1314 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 1315 size_t MatchLen = 0; 1316 size_t MatchPos = 1317 CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable); 1318 1319 if (MatchPos == StringRef::npos) { 1320 ChecksFailed = true; 1321 i = j; 1322 break; 1323 } 1324 1325 CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 1326 } 1327 1328 if (j == e) 1329 break; 1330 } 1331 1332 // Success if no checks failed. 1333 return !ChecksFailed; 1334 } 1335 1336 int main(int argc, char **argv) { 1337 sys::PrintStackTraceOnErrorSignal(argv[0]); 1338 PrettyStackTraceProgram X(argc, argv); 1339 cl::ParseCommandLineOptions(argc, argv); 1340 1341 if (!ValidateCheckPrefixes()) { 1342 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and " 1343 "start with a letter and contain only alphanumeric characters, " 1344 "hyphens and underscores\n"; 1345 return 2; 1346 } 1347 1348 AddCheckPrefixIfNeeded(); 1349 1350 SourceMgr SM; 1351 1352 // Read the expected strings from the check file. 1353 ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr = 1354 MemoryBuffer::getFileOrSTDIN(CheckFilename); 1355 if (std::error_code EC = CheckFileOrErr.getError()) { 1356 errs() << "Could not open check file '" << CheckFilename 1357 << "': " << EC.message() << '\n'; 1358 return 2; 1359 } 1360 MemoryBuffer &CheckFile = *CheckFileOrErr.get(); 1361 1362 SmallString<4096> CheckFileBuffer; 1363 StringRef CheckFileText = 1364 CanonicalizeFile(CheckFile, NoCanonicalizeWhiteSpace, CheckFileBuffer); 1365 1366 SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( 1367 CheckFileText, CheckFile.getBufferIdentifier()), 1368 SMLoc()); 1369 1370 std::vector<CheckString> CheckStrings; 1371 if (ReadCheckFile(SM, CheckFileText, CheckStrings)) 1372 return 2; 1373 1374 // Open the file to check and add it to SourceMgr. 1375 ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr = 1376 MemoryBuffer::getFileOrSTDIN(InputFilename); 1377 if (std::error_code EC = InputFileOrErr.getError()) { 1378 errs() << "Could not open input file '" << InputFilename 1379 << "': " << EC.message() << '\n'; 1380 return 2; 1381 } 1382 MemoryBuffer &InputFile = *InputFileOrErr.get(); 1383 1384 if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) { 1385 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; 1386 DumpCommandLine(argc, argv); 1387 return 2; 1388 } 1389 1390 SmallString<4096> InputFileBuffer; 1391 StringRef InputFileText = 1392 CanonicalizeFile(InputFile, NoCanonicalizeWhiteSpace, InputFileBuffer); 1393 1394 SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( 1395 InputFileText, InputFile.getBufferIdentifier()), 1396 SMLoc()); 1397 1398 return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1; 1399 } 1400