1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // FileCheck does a line-by line check of a file that validates whether it 11 // contains the expected content. This is useful for regression tests etc. 12 // 13 // This program exits with an exit status of 2 on error, exit status of 0 if 14 // the file matched the expected contents, and exit status of 1 if it did not 15 // contain the expected contents. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/StringExtras.h" 21 #include "llvm/ADT/StringMap.h" 22 #include "llvm/ADT/StringSet.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/InitLLVM.h" 25 #include "llvm/Support/MemoryBuffer.h" 26 #include "llvm/Support/Regex.h" 27 #include "llvm/Support/SourceMgr.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <algorithm> 30 #include <cctype> 31 #include <map> 32 #include <string> 33 #include <system_error> 34 #include <vector> 35 using namespace llvm; 36 37 static cl::opt<std::string> 38 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); 39 40 static cl::opt<std::string> 41 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), 42 cl::init("-"), cl::value_desc("filename")); 43 44 static cl::list<std::string> CheckPrefixes( 45 "check-prefix", 46 cl::desc("Prefix to use from check file (defaults to 'CHECK')")); 47 static cl::alias CheckPrefixesAlias( 48 "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated, 49 cl::NotHidden, 50 cl::desc( 51 "Alias for -check-prefix permitting multiple comma separated values")); 52 53 static cl::opt<bool> NoCanonicalizeWhiteSpace( 54 "strict-whitespace", 55 cl::desc("Do not treat all horizontal whitespace as equivalent")); 56 57 static cl::list<std::string> ImplicitCheckNot( 58 "implicit-check-not", 59 cl::desc("Add an implicit negative check with this pattern to every\n" 60 "positive check. This can be used to ensure that no instances of\n" 61 "this pattern occur which are not matched by a positive pattern"), 62 cl::value_desc("pattern")); 63 64 static cl::list<std::string> GlobalDefines("D", cl::Prefix, 65 cl::desc("Define a variable to be used in capture patterns."), 66 cl::value_desc("VAR=VALUE")); 67 68 static cl::opt<bool> AllowEmptyInput( 69 "allow-empty", cl::init(false), 70 cl::desc("Allow the input file to be empty. This is useful when making\n" 71 "checks that some error message does not occur, for example.")); 72 73 static cl::opt<bool> MatchFullLines( 74 "match-full-lines", cl::init(false), 75 cl::desc("Require all positive matches to cover an entire input line.\n" 76 "Allows leading and trailing whitespace if --strict-whitespace\n" 77 "is not also passed.")); 78 79 static cl::opt<bool> EnableVarScope( 80 "enable-var-scope", cl::init(false), 81 cl::desc("Enables scope for regex variables. Variables with names that\n" 82 "do not start with '$' will be reset at the beginning of\n" 83 "each CHECK-LABEL block.")); 84 85 typedef cl::list<std::string>::const_iterator prefix_iterator; 86 87 //===----------------------------------------------------------------------===// 88 // Pattern Handling Code. 89 //===----------------------------------------------------------------------===// 90 91 namespace Check { 92 enum CheckType { 93 CheckNone = 0, 94 CheckPlain, 95 CheckNext, 96 CheckSame, 97 CheckNot, 98 CheckDAG, 99 CheckLabel, 100 101 /// Indicates the pattern only matches the end of file. This is used for 102 /// trailing CHECK-NOTs. 103 CheckEOF, 104 105 /// Marks when parsing found a -NOT check combined with another CHECK suffix. 106 CheckBadNot 107 }; 108 } 109 110 class Pattern { 111 SMLoc PatternLoc; 112 113 /// A fixed string to match as the pattern or empty if this pattern requires 114 /// a regex match. 115 StringRef FixedStr; 116 117 /// A regex string to match as the pattern or empty if this pattern requires 118 /// a fixed string to match. 119 std::string RegExStr; 120 121 /// Entries in this vector map to uses of a variable in the pattern, e.g. 122 /// "foo[[bar]]baz". In this case, the RegExStr will contain "foobaz" and 123 /// we'll get an entry in this vector that tells us to insert the value of 124 /// bar at offset 3. 125 std::vector<std::pair<StringRef, unsigned>> VariableUses; 126 127 /// Maps definitions of variables to their parenthesized capture numbers. 128 /// 129 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 130 /// 1. 131 std::map<StringRef, unsigned> VariableDefs; 132 133 Check::CheckType CheckTy; 134 135 /// Contains the number of line this pattern is in. 136 unsigned LineNumber; 137 138 public: 139 explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {} 140 141 /// Returns the location in source code. 142 SMLoc getLoc() const { return PatternLoc; } 143 144 bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, 145 unsigned LineNumber); 146 size_t Match(StringRef Buffer, size_t &MatchLen, 147 StringMap<StringRef> &VariableTable) const; 148 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 149 const StringMap<StringRef> &VariableTable) const; 150 151 bool hasVariable() const { 152 return !(VariableUses.empty() && VariableDefs.empty()); 153 } 154 155 Check::CheckType getCheckTy() const { return CheckTy; } 156 157 private: 158 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); 159 void AddBackrefToRegEx(unsigned BackrefNum); 160 unsigned 161 ComputeMatchDistance(StringRef Buffer, 162 const StringMap<StringRef> &VariableTable) const; 163 bool EvaluateExpression(StringRef Expr, std::string &Value) const; 164 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); 165 }; 166 167 /// Parses the given string into the Pattern. 168 /// 169 /// \p Prefix provides which prefix is being matched, \p SM provides the 170 /// SourceMgr used for error reports, and \p LineNumber is the line number in 171 /// the input file from which the pattern string was read. Returns true in 172 /// case of an error, false otherwise. 173 bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix, 174 SourceMgr &SM, unsigned LineNumber) { 175 bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot; 176 177 this->LineNumber = LineNumber; 178 PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 179 180 if (!(NoCanonicalizeWhiteSpace && MatchFullLines)) 181 // Ignore trailing whitespace. 182 while (!PatternStr.empty() && 183 (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 184 PatternStr = PatternStr.substr(0, PatternStr.size() - 1); 185 186 // Check that there is something on the line. 187 if (PatternStr.empty()) { 188 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 189 "found empty check string with prefix '" + Prefix + ":'"); 190 return true; 191 } 192 193 // Check to see if this is a fixed string, or if it has regex pieces. 194 if (!MatchFullLinesHere && 195 (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos && 196 PatternStr.find("[[") == StringRef::npos))) { 197 FixedStr = PatternStr; 198 return false; 199 } 200 201 if (MatchFullLinesHere) { 202 RegExStr += '^'; 203 if (!NoCanonicalizeWhiteSpace) 204 RegExStr += " *"; 205 } 206 207 // Paren value #0 is for the fully matched string. Any new parenthesized 208 // values add from there. 209 unsigned CurParen = 1; 210 211 // Otherwise, there is at least one regex piece. Build up the regex pattern 212 // by escaping scary characters in fixed strings, building up one big regex. 213 while (!PatternStr.empty()) { 214 // RegEx matches. 215 if (PatternStr.startswith("{{")) { 216 // This is the start of a regex match. Scan for the }}. 217 size_t End = PatternStr.find("}}"); 218 if (End == StringRef::npos) { 219 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 220 SourceMgr::DK_Error, 221 "found start of regex string with no end '}}'"); 222 return true; 223 } 224 225 // Enclose {{}} patterns in parens just like [[]] even though we're not 226 // capturing the result for any purpose. This is required in case the 227 // expression contains an alternation like: CHECK: abc{{x|z}}def. We 228 // want this to turn into: "abc(x|z)def" not "abcx|zdef". 229 RegExStr += '('; 230 ++CurParen; 231 232 if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM)) 233 return true; 234 RegExStr += ')'; 235 236 PatternStr = PatternStr.substr(End + 2); 237 continue; 238 } 239 240 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* 241 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The 242 // second form is [[foo]] which is a reference to foo. The variable name 243 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject 244 // it. This is to catch some common errors. 245 if (PatternStr.startswith("[[")) { 246 // Find the closing bracket pair ending the match. End is going to be an 247 // offset relative to the beginning of the match string. 248 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM); 249 250 if (End == StringRef::npos) { 251 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 252 SourceMgr::DK_Error, 253 "invalid named regex reference, no ]] found"); 254 return true; 255 } 256 257 StringRef MatchStr = PatternStr.substr(2, End); 258 PatternStr = PatternStr.substr(End + 4); 259 260 // Get the regex name (e.g. "foo"). 261 size_t NameEnd = MatchStr.find(':'); 262 StringRef Name = MatchStr.substr(0, NameEnd); 263 264 if (Name.empty()) { 265 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 266 "invalid name in named regex: empty name"); 267 return true; 268 } 269 270 // Verify that the name/expression is well formed. FileCheck currently 271 // supports @LINE, @LINE+number, @LINE-number expressions. The check here 272 // is relaxed, more strict check is performed in \c EvaluateExpression. 273 bool IsExpression = false; 274 for (unsigned i = 0, e = Name.size(); i != e; ++i) { 275 if (i == 0) { 276 if (Name[i] == '$') // Global vars start with '$' 277 continue; 278 if (Name[i] == '@') { 279 if (NameEnd != StringRef::npos) { 280 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 281 SourceMgr::DK_Error, 282 "invalid name in named regex definition"); 283 return true; 284 } 285 IsExpression = true; 286 continue; 287 } 288 } 289 if (Name[i] != '_' && !isalnum(Name[i]) && 290 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { 291 SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i), 292 SourceMgr::DK_Error, "invalid name in named regex"); 293 return true; 294 } 295 } 296 297 // Name can't start with a digit. 298 if (isdigit(static_cast<unsigned char>(Name[0]))) { 299 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 300 "invalid name in named regex"); 301 return true; 302 } 303 304 // Handle [[foo]]. 305 if (NameEnd == StringRef::npos) { 306 // Handle variables that were defined earlier on the same line by 307 // emitting a backreference. 308 if (VariableDefs.find(Name) != VariableDefs.end()) { 309 unsigned VarParenNum = VariableDefs[Name]; 310 if (VarParenNum < 1 || VarParenNum > 9) { 311 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 312 SourceMgr::DK_Error, 313 "Can't back-reference more than 9 variables"); 314 return true; 315 } 316 AddBackrefToRegEx(VarParenNum); 317 } else { 318 VariableUses.push_back(std::make_pair(Name, RegExStr.size())); 319 } 320 continue; 321 } 322 323 // Handle [[foo:.*]]. 324 VariableDefs[Name] = CurParen; 325 RegExStr += '('; 326 ++CurParen; 327 328 if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM)) 329 return true; 330 331 RegExStr += ')'; 332 } 333 334 // Handle fixed string matches. 335 // Find the end, which is the start of the next regex. 336 size_t FixedMatchEnd = PatternStr.find("{{"); 337 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 338 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 339 PatternStr = PatternStr.substr(FixedMatchEnd); 340 } 341 342 if (MatchFullLinesHere) { 343 if (!NoCanonicalizeWhiteSpace) 344 RegExStr += " *"; 345 RegExStr += '$'; 346 } 347 348 return false; 349 } 350 351 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) { 352 Regex R(RS); 353 std::string Error; 354 if (!R.isValid(Error)) { 355 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 356 "invalid regex: " + Error); 357 return true; 358 } 359 360 RegExStr += RS.str(); 361 CurParen += R.getNumMatches(); 362 return false; 363 } 364 365 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 366 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 367 std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum); 368 RegExStr += Backref; 369 } 370 371 /// Evaluates expression and stores the result to \p Value. 372 /// 373 /// Returns true on success and false when the expression has invalid syntax. 374 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { 375 // The only supported expression is @LINE([\+-]\d+)? 376 if (!Expr.startswith("@LINE")) 377 return false; 378 Expr = Expr.substr(StringRef("@LINE").size()); 379 int Offset = 0; 380 if (!Expr.empty()) { 381 if (Expr[0] == '+') 382 Expr = Expr.substr(1); 383 else if (Expr[0] != '-') 384 return false; 385 if (Expr.getAsInteger(10, Offset)) 386 return false; 387 } 388 Value = llvm::itostr(LineNumber + Offset); 389 return true; 390 } 391 392 /// Matches the pattern string against the input buffer \p Buffer 393 /// 394 /// This returns the position that is matched or npos if there is no match. If 395 /// there is a match, the size of the matched string is returned in \p 396 /// MatchLen. 397 /// 398 /// The \p VariableTable StringMap provides the current values of filecheck 399 /// variables and is updated if this match defines new values. 400 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, 401 StringMap<StringRef> &VariableTable) const { 402 // If this is the EOF pattern, match it immediately. 403 if (CheckTy == Check::CheckEOF) { 404 MatchLen = 0; 405 return Buffer.size(); 406 } 407 408 // If this is a fixed string pattern, just match it now. 409 if (!FixedStr.empty()) { 410 MatchLen = FixedStr.size(); 411 return Buffer.find(FixedStr); 412 } 413 414 // Regex match. 415 416 // If there are variable uses, we need to create a temporary string with the 417 // actual value. 418 StringRef RegExToMatch = RegExStr; 419 std::string TmpStr; 420 if (!VariableUses.empty()) { 421 TmpStr = RegExStr; 422 423 unsigned InsertOffset = 0; 424 for (const auto &VariableUse : VariableUses) { 425 std::string Value; 426 427 if (VariableUse.first[0] == '@') { 428 if (!EvaluateExpression(VariableUse.first, Value)) 429 return StringRef::npos; 430 } else { 431 StringMap<StringRef>::iterator it = 432 VariableTable.find(VariableUse.first); 433 // If the variable is undefined, return an error. 434 if (it == VariableTable.end()) 435 return StringRef::npos; 436 437 // Look up the value and escape it so that we can put it into the regex. 438 Value += Regex::escape(it->second); 439 } 440 441 // Plop it into the regex at the adjusted offset. 442 TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset, 443 Value.begin(), Value.end()); 444 InsertOffset += Value.size(); 445 } 446 447 // Match the newly constructed regex. 448 RegExToMatch = TmpStr; 449 } 450 451 SmallVector<StringRef, 4> MatchInfo; 452 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) 453 return StringRef::npos; 454 455 // Successful regex match. 456 assert(!MatchInfo.empty() && "Didn't get any match"); 457 StringRef FullMatch = MatchInfo[0]; 458 459 // If this defines any variables, remember their values. 460 for (const auto &VariableDef : VariableDefs) { 461 assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); 462 VariableTable[VariableDef.first] = MatchInfo[VariableDef.second]; 463 } 464 465 MatchLen = FullMatch.size(); 466 return FullMatch.data() - Buffer.data(); 467 } 468 469 470 /// Computes an arbitrary estimate for the quality of matching this pattern at 471 /// the start of \p Buffer; a distance of zero should correspond to a perfect 472 /// match. 473 unsigned 474 Pattern::ComputeMatchDistance(StringRef Buffer, 475 const StringMap<StringRef> &VariableTable) const { 476 // Just compute the number of matching characters. For regular expressions, we 477 // just compare against the regex itself and hope for the best. 478 // 479 // FIXME: One easy improvement here is have the regex lib generate a single 480 // example regular expression which matches, and use that as the example 481 // string. 482 StringRef ExampleString(FixedStr); 483 if (ExampleString.empty()) 484 ExampleString = RegExStr; 485 486 // Only compare up to the first line in the buffer, or the string size. 487 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 488 BufferPrefix = BufferPrefix.split('\n').first; 489 return BufferPrefix.edit_distance(ExampleString); 490 } 491 492 /// Prints additional information about a failure to match involving this 493 /// pattern. 494 void Pattern::PrintFailureInfo( 495 const SourceMgr &SM, StringRef Buffer, 496 const StringMap<StringRef> &VariableTable) const { 497 // If this was a regular expression using variables, print the current 498 // variable values. 499 if (!VariableUses.empty()) { 500 for (const auto &VariableUse : VariableUses) { 501 SmallString<256> Msg; 502 raw_svector_ostream OS(Msg); 503 StringRef Var = VariableUse.first; 504 if (Var[0] == '@') { 505 std::string Value; 506 if (EvaluateExpression(Var, Value)) { 507 OS << "with expression \""; 508 OS.write_escaped(Var) << "\" equal to \""; 509 OS.write_escaped(Value) << "\""; 510 } else { 511 OS << "uses incorrect expression \""; 512 OS.write_escaped(Var) << "\""; 513 } 514 } else { 515 StringMap<StringRef>::const_iterator it = VariableTable.find(Var); 516 517 // Check for undefined variable references. 518 if (it == VariableTable.end()) { 519 OS << "uses undefined variable \""; 520 OS.write_escaped(Var) << "\""; 521 } else { 522 OS << "with variable \""; 523 OS.write_escaped(Var) << "\" equal to \""; 524 OS.write_escaped(it->second) << "\""; 525 } 526 } 527 528 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 529 OS.str()); 530 } 531 } 532 533 // Attempt to find the closest/best fuzzy match. Usually an error happens 534 // because some string in the output didn't exactly match. In these cases, we 535 // would like to show the user a best guess at what "should have" matched, to 536 // save them having to actually check the input manually. 537 size_t NumLinesForward = 0; 538 size_t Best = StringRef::npos; 539 double BestQuality = 0; 540 541 // Use an arbitrary 4k limit on how far we will search. 542 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 543 if (Buffer[i] == '\n') 544 ++NumLinesForward; 545 546 // Patterns have leading whitespace stripped, so skip whitespace when 547 // looking for something which looks like a pattern. 548 if (Buffer[i] == ' ' || Buffer[i] == '\t') 549 continue; 550 551 // Compute the "quality" of this match as an arbitrary combination of the 552 // match distance and the number of lines skipped to get to this match. 553 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable); 554 double Quality = Distance + (NumLinesForward / 100.); 555 556 if (Quality < BestQuality || Best == StringRef::npos) { 557 Best = i; 558 BestQuality = Quality; 559 } 560 } 561 562 // Print the "possible intended match here" line if we found something 563 // reasonable and not equal to what we showed in the "scanning from here" 564 // line. 565 if (Best && Best != StringRef::npos && BestQuality < 50) { 566 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best), 567 SourceMgr::DK_Note, "possible intended match here"); 568 569 // FIXME: If we wanted to be really friendly we would show why the match 570 // failed, as it can be hard to spot simple one character differences. 571 } 572 } 573 574 /// Finds the closing sequence of a regex variable usage or definition. 575 /// 576 /// \p Str has to point in the beginning of the definition (right after the 577 /// opening sequence). Returns the offset of the closing sequence within Str, 578 /// or npos if it was not found. 579 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 580 // Offset keeps track of the current offset within the input Str 581 size_t Offset = 0; 582 // [...] Nesting depth 583 size_t BracketDepth = 0; 584 585 while (!Str.empty()) { 586 if (Str.startswith("]]") && BracketDepth == 0) 587 return Offset; 588 if (Str[0] == '\\') { 589 // Backslash escapes the next char within regexes, so skip them both. 590 Str = Str.substr(2); 591 Offset += 2; 592 } else { 593 switch (Str[0]) { 594 default: 595 break; 596 case '[': 597 BracketDepth++; 598 break; 599 case ']': 600 if (BracketDepth == 0) { 601 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 602 SourceMgr::DK_Error, 603 "missing closing \"]\" for regex variable"); 604 exit(1); 605 } 606 BracketDepth--; 607 break; 608 } 609 Str = Str.substr(1); 610 Offset++; 611 } 612 } 613 614 return StringRef::npos; 615 } 616 617 //===----------------------------------------------------------------------===// 618 // Check Strings. 619 //===----------------------------------------------------------------------===// 620 621 /// A check that we found in the input file. 622 struct CheckString { 623 /// The pattern to match. 624 Pattern Pat; 625 626 /// Which prefix name this check matched. 627 StringRef Prefix; 628 629 /// The location in the match file that the check string was specified. 630 SMLoc Loc; 631 632 /// All of the strings that are disallowed from occurring between this match 633 /// string and the previous one (or start of file). 634 std::vector<Pattern> DagNotStrings; 635 636 CheckString(const Pattern &P, StringRef S, SMLoc L) 637 : Pat(P), Prefix(S), Loc(L) {} 638 639 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, 640 size_t &MatchLen, StringMap<StringRef> &VariableTable) const; 641 642 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; 643 bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; 644 bool CheckNot(const SourceMgr &SM, StringRef Buffer, 645 const std::vector<const Pattern *> &NotStrings, 646 StringMap<StringRef> &VariableTable) const; 647 size_t CheckDag(const SourceMgr &SM, StringRef Buffer, 648 std::vector<const Pattern *> &NotStrings, 649 StringMap<StringRef> &VariableTable) const; 650 }; 651 652 /// Canonicalize whitespaces in the file. Line endings are replaced with 653 /// UNIX-style '\n'. 654 static StringRef CanonicalizeFile(MemoryBuffer &MB, 655 SmallVectorImpl<char> &OutputBuffer) { 656 OutputBuffer.reserve(MB.getBufferSize()); 657 658 for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd(); 659 Ptr != End; ++Ptr) { 660 // Eliminate trailing dosish \r. 661 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 662 continue; 663 } 664 665 // If current char is not a horizontal whitespace or if horizontal 666 // whitespace canonicalization is disabled, dump it to output as is. 667 if (NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) { 668 OutputBuffer.push_back(*Ptr); 669 continue; 670 } 671 672 // Otherwise, add one space and advance over neighboring space. 673 OutputBuffer.push_back(' '); 674 while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t')) 675 ++Ptr; 676 } 677 678 // Add a null byte and then return all but that byte. 679 OutputBuffer.push_back('\0'); 680 return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1); 681 } 682 683 static bool IsPartOfWord(char c) { 684 return (isalnum(c) || c == '-' || c == '_'); 685 } 686 687 // Get the size of the prefix extension. 688 static size_t CheckTypeSize(Check::CheckType Ty) { 689 switch (Ty) { 690 case Check::CheckNone: 691 case Check::CheckBadNot: 692 return 0; 693 694 case Check::CheckPlain: 695 return sizeof(":") - 1; 696 697 case Check::CheckNext: 698 return sizeof("-NEXT:") - 1; 699 700 case Check::CheckSame: 701 return sizeof("-SAME:") - 1; 702 703 case Check::CheckNot: 704 return sizeof("-NOT:") - 1; 705 706 case Check::CheckDAG: 707 return sizeof("-DAG:") - 1; 708 709 case Check::CheckLabel: 710 return sizeof("-LABEL:") - 1; 711 712 case Check::CheckEOF: 713 llvm_unreachable("Should not be using EOF size"); 714 } 715 716 llvm_unreachable("Bad check type"); 717 } 718 719 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) { 720 if (Buffer.size() <= Prefix.size()) 721 return Check::CheckNone; 722 723 char NextChar = Buffer[Prefix.size()]; 724 725 // Verify that the : is present after the prefix. 726 if (NextChar == ':') 727 return Check::CheckPlain; 728 729 if (NextChar != '-') 730 return Check::CheckNone; 731 732 StringRef Rest = Buffer.drop_front(Prefix.size() + 1); 733 if (Rest.startswith("NEXT:")) 734 return Check::CheckNext; 735 736 if (Rest.startswith("SAME:")) 737 return Check::CheckSame; 738 739 if (Rest.startswith("NOT:")) 740 return Check::CheckNot; 741 742 if (Rest.startswith("DAG:")) 743 return Check::CheckDAG; 744 745 if (Rest.startswith("LABEL:")) 746 return Check::CheckLabel; 747 748 // You can't combine -NOT with another suffix. 749 if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") || 750 Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") || 751 Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:")) 752 return Check::CheckBadNot; 753 754 return Check::CheckNone; 755 } 756 757 // From the given position, find the next character after the word. 758 static size_t SkipWord(StringRef Str, size_t Loc) { 759 while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 760 ++Loc; 761 return Loc; 762 } 763 764 /// Search the buffer for the first prefix in the prefix regular expression. 765 /// 766 /// This searches the buffer using the provided regular expression, however it 767 /// enforces constraints beyond that: 768 /// 1) The found prefix must not be a suffix of something that looks like 769 /// a valid prefix. 770 /// 2) The found prefix must be followed by a valid check type suffix using \c 771 /// FindCheckType above. 772 /// 773 /// The first match of the regular expression to satisfy these two is returned, 774 /// otherwise an empty StringRef is returned to indicate failure. 775 /// 776 /// If this routine returns a valid prefix, it will also shrink \p Buffer to 777 /// start at the beginning of the returned prefix, increment \p LineNumber for 778 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of 779 /// check found by examining the suffix. 780 /// 781 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy 782 /// is unspecified. 783 static StringRef FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer, 784 unsigned &LineNumber, 785 Check::CheckType &CheckTy) { 786 SmallVector<StringRef, 2> Matches; 787 788 while (!Buffer.empty()) { 789 // Find the first (longest) match using the RE. 790 if (!PrefixRE.match(Buffer, &Matches)) 791 // No match at all, bail. 792 return StringRef(); 793 794 StringRef Prefix = Matches[0]; 795 Matches.clear(); 796 797 assert(Prefix.data() >= Buffer.data() && 798 Prefix.data() < Buffer.data() + Buffer.size() && 799 "Prefix doesn't start inside of buffer!"); 800 size_t Loc = Prefix.data() - Buffer.data(); 801 StringRef Skipped = Buffer.substr(0, Loc); 802 Buffer = Buffer.drop_front(Loc); 803 LineNumber += Skipped.count('\n'); 804 805 // Check that the matched prefix isn't a suffix of some other check-like 806 // word. 807 // FIXME: This is a very ad-hoc check. it would be better handled in some 808 // other way. Among other things it seems hard to distinguish between 809 // intentional and unintentional uses of this feature. 810 if (Skipped.empty() || !IsPartOfWord(Skipped.back())) { 811 // Now extract the type. 812 CheckTy = FindCheckType(Buffer, Prefix); 813 814 // If we've found a valid check type for this prefix, we're done. 815 if (CheckTy != Check::CheckNone) 816 return Prefix; 817 } 818 819 // If we didn't successfully find a prefix, we need to skip this invalid 820 // prefix and continue scanning. We directly skip the prefix that was 821 // matched and any additional parts of that check-like word. 822 Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size())); 823 } 824 825 // We ran out of buffer while skipping partial matches so give up. 826 return StringRef(); 827 } 828 829 /// Read the check file, which specifies the sequence of expected strings. 830 /// 831 /// The strings are added to the CheckStrings vector. Returns true in case of 832 /// an error, false otherwise. 833 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, 834 std::vector<CheckString> &CheckStrings) { 835 std::vector<Pattern> ImplicitNegativeChecks; 836 for (const auto &PatternString : ImplicitCheckNot) { 837 // Create a buffer with fake command line content in order to display the 838 // command line option responsible for the specific implicit CHECK-NOT. 839 std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str(); 840 std::string Suffix = "'"; 841 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( 842 Prefix + PatternString + Suffix, "command line"); 843 844 StringRef PatternInBuffer = 845 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 846 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 847 848 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot)); 849 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer, 850 "IMPLICIT-CHECK", SM, 0); 851 } 852 853 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 854 855 // LineNumber keeps track of the line on which CheckPrefix instances are 856 // found. 857 unsigned LineNumber = 1; 858 859 while (1) { 860 Check::CheckType CheckTy; 861 862 // See if a prefix occurs in the memory buffer. 863 StringRef UsedPrefix = FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber, 864 CheckTy); 865 if (UsedPrefix.empty()) 866 break; 867 assert(UsedPrefix.data() == Buffer.data() && 868 "Failed to move Buffer's start forward, or pointed prefix outside " 869 "of the buffer!"); 870 871 // Location to use for error messages. 872 const char *UsedPrefixStart = UsedPrefix.data(); 873 874 // Skip the buffer to the end. 875 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy)); 876 877 // Complain about useful-looking but unsupported suffixes. 878 if (CheckTy == Check::CheckBadNot) { 879 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, 880 "unsupported -NOT combo on prefix '" + UsedPrefix + "'"); 881 return true; 882 } 883 884 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 885 // leading whitespace. 886 if (!(NoCanonicalizeWhiteSpace && MatchFullLines)) 887 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 888 889 // Scan ahead to the end of line. 890 size_t EOL = Buffer.find_first_of("\n\r"); 891 892 // Remember the location of the start of the pattern, for diagnostics. 893 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 894 895 // Parse the pattern. 896 Pattern P(CheckTy); 897 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber)) 898 return true; 899 900 // Verify that CHECK-LABEL lines do not define or use variables 901 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 902 SM.PrintMessage( 903 SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error, 904 "found '" + UsedPrefix + "-LABEL:'" 905 " with variable definition or use"); 906 return true; 907 } 908 909 Buffer = Buffer.substr(EOL); 910 911 // Verify that CHECK-NEXT lines have at least one CHECK line before them. 912 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) && 913 CheckStrings.empty()) { 914 StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME"; 915 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 916 SourceMgr::DK_Error, 917 "found '" + UsedPrefix + "-" + Type + 918 "' without previous '" + UsedPrefix + ": line"); 919 return true; 920 } 921 922 // Handle CHECK-DAG/-NOT. 923 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 924 DagNotMatches.push_back(P); 925 continue; 926 } 927 928 // Okay, add the string we captured to the output vector and move on. 929 CheckStrings.emplace_back(P, UsedPrefix, PatternLoc); 930 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 931 DagNotMatches = ImplicitNegativeChecks; 932 } 933 934 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first 935 // prefix as a filler for the error message. 936 if (!DagNotMatches.empty()) { 937 CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(), 938 SMLoc::getFromPointer(Buffer.data())); 939 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 940 } 941 942 if (CheckStrings.empty()) { 943 errs() << "error: no check strings found with prefix" 944 << (CheckPrefixes.size() > 1 ? "es " : " "); 945 prefix_iterator I = CheckPrefixes.begin(); 946 prefix_iterator E = CheckPrefixes.end(); 947 if (I != E) { 948 errs() << "\'" << *I << ":'"; 949 ++I; 950 } 951 for (; I != E; ++I) 952 errs() << ", \'" << *I << ":'"; 953 954 errs() << '\n'; 955 return true; 956 } 957 958 return false; 959 } 960 961 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, const Pattern &Pat, 962 StringRef Buffer, 963 StringMap<StringRef> &VariableTable) { 964 // Otherwise, we have an error, emit an error message. 965 SM.PrintMessage(Loc, SourceMgr::DK_Error, 966 "expected string not found in input"); 967 968 // Print the "scanning from here" line. If the current position is at the 969 // end of a line, advance to the start of the next line. 970 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 971 972 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 973 "scanning from here"); 974 975 // Allow the pattern to print additional information if desired. 976 Pat.PrintFailureInfo(SM, Buffer, VariableTable); 977 } 978 979 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr, 980 StringRef Buffer, 981 StringMap<StringRef> &VariableTable) { 982 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable); 983 } 984 985 /// Count the number of newlines in the specified range. 986 static unsigned CountNumNewlinesBetween(StringRef Range, 987 const char *&FirstNewLine) { 988 unsigned NumNewLines = 0; 989 while (1) { 990 // Scan for newline. 991 Range = Range.substr(Range.find_first_of("\n\r")); 992 if (Range.empty()) 993 return NumNewLines; 994 995 ++NumNewLines; 996 997 // Handle \n\r and \r\n as a single newline. 998 if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') && 999 (Range[0] != Range[1])) 1000 Range = Range.substr(1); 1001 Range = Range.substr(1); 1002 1003 if (NumNewLines == 1) 1004 FirstNewLine = Range.begin(); 1005 } 1006 } 1007 1008 /// Match check string and its "not strings" and/or "dag strings". 1009 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer, 1010 bool IsLabelScanMode, size_t &MatchLen, 1011 StringMap<StringRef> &VariableTable) const { 1012 size_t LastPos = 0; 1013 std::vector<const Pattern *> NotStrings; 1014 1015 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 1016 // bounds; we have not processed variable definitions within the bounded block 1017 // yet so cannot handle any final CHECK-DAG yet; this is handled when going 1018 // over the block again (including the last CHECK-LABEL) in normal mode. 1019 if (!IsLabelScanMode) { 1020 // Match "dag strings" (with mixed "not strings" if any). 1021 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable); 1022 if (LastPos == StringRef::npos) 1023 return StringRef::npos; 1024 } 1025 1026 // Match itself from the last position after matching CHECK-DAG. 1027 StringRef MatchBuffer = Buffer.substr(LastPos); 1028 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1029 if (MatchPos == StringRef::npos) { 1030 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable); 1031 return StringRef::npos; 1032 } 1033 1034 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 1035 // or CHECK-NOT 1036 if (!IsLabelScanMode) { 1037 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1038 1039 // If this check is a "CHECK-NEXT", verify that the previous match was on 1040 // the previous line (i.e. that there is one newline between them). 1041 if (CheckNext(SM, SkippedRegion)) 1042 return StringRef::npos; 1043 1044 // If this check is a "CHECK-SAME", verify that the previous match was on 1045 // the same line (i.e. that there is no newline between them). 1046 if (CheckSame(SM, SkippedRegion)) 1047 return StringRef::npos; 1048 1049 // If this match had "not strings", verify that they don't exist in the 1050 // skipped region. 1051 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1052 return StringRef::npos; 1053 } 1054 1055 return LastPos + MatchPos; 1056 } 1057 1058 /// Verify there is a single line in the given buffer. 1059 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 1060 if (Pat.getCheckTy() != Check::CheckNext) 1061 return false; 1062 1063 // Count the number of newlines between the previous match and this one. 1064 assert(Buffer.data() != 1065 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1066 SMLoc::getFromPointer(Buffer.data()))) 1067 ->getBufferStart() && 1068 "CHECK-NEXT can't be the first check in a file"); 1069 1070 const char *FirstNewLine = nullptr; 1071 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1072 1073 if (NumNewLines == 0) { 1074 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1075 Prefix + "-NEXT: is on the same line as previous match"); 1076 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1077 "'next' match was here"); 1078 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1079 "previous match ended here"); 1080 return true; 1081 } 1082 1083 if (NumNewLines != 1) { 1084 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1085 Prefix + 1086 "-NEXT: is not on the line after the previous match"); 1087 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1088 "'next' match was here"); 1089 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1090 "previous match ended here"); 1091 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 1092 "non-matching line after previous match is here"); 1093 return true; 1094 } 1095 1096 return false; 1097 } 1098 1099 /// Verify there is no newline in the given buffer. 1100 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { 1101 if (Pat.getCheckTy() != Check::CheckSame) 1102 return false; 1103 1104 // Count the number of newlines between the previous match and this one. 1105 assert(Buffer.data() != 1106 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1107 SMLoc::getFromPointer(Buffer.data()))) 1108 ->getBufferStart() && 1109 "CHECK-SAME can't be the first check in a file"); 1110 1111 const char *FirstNewLine = nullptr; 1112 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1113 1114 if (NumNewLines != 0) { 1115 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1116 Prefix + 1117 "-SAME: is not on the same line as the previous match"); 1118 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1119 "'next' match was here"); 1120 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1121 "previous match ended here"); 1122 return true; 1123 } 1124 1125 return false; 1126 } 1127 1128 /// Verify there's no "not strings" in the given buffer. 1129 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 1130 const std::vector<const Pattern *> &NotStrings, 1131 StringMap<StringRef> &VariableTable) const { 1132 for (const Pattern *Pat : NotStrings) { 1133 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 1134 1135 size_t MatchLen = 0; 1136 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable); 1137 1138 if (Pos == StringRef::npos) 1139 continue; 1140 1141 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Pos), 1142 SourceMgr::DK_Error, Prefix + "-NOT: string occurred!"); 1143 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note, 1144 Prefix + "-NOT: pattern specified here"); 1145 return true; 1146 } 1147 1148 return false; 1149 } 1150 1151 /// Match "dag strings" and their mixed "not strings". 1152 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 1153 std::vector<const Pattern *> &NotStrings, 1154 StringMap<StringRef> &VariableTable) const { 1155 if (DagNotStrings.empty()) 1156 return 0; 1157 1158 size_t LastPos = 0; 1159 size_t StartPos = LastPos; 1160 1161 for (const Pattern &Pat : DagNotStrings) { 1162 assert((Pat.getCheckTy() == Check::CheckDAG || 1163 Pat.getCheckTy() == Check::CheckNot) && 1164 "Invalid CHECK-DAG or CHECK-NOT!"); 1165 1166 if (Pat.getCheckTy() == Check::CheckNot) { 1167 NotStrings.push_back(&Pat); 1168 continue; 1169 } 1170 1171 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 1172 1173 size_t MatchLen = 0, MatchPos; 1174 1175 // CHECK-DAG always matches from the start. 1176 StringRef MatchBuffer = Buffer.substr(StartPos); 1177 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1178 // With a group of CHECK-DAGs, a single mismatching means the match on 1179 // that group of CHECK-DAGs fails immediately. 1180 if (MatchPos == StringRef::npos) { 1181 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable); 1182 return StringRef::npos; 1183 } 1184 // Re-calc it as the offset relative to the start of the original string. 1185 MatchPos += StartPos; 1186 1187 if (!NotStrings.empty()) { 1188 if (MatchPos < LastPos) { 1189 // Reordered? 1190 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos), 1191 SourceMgr::DK_Error, 1192 Prefix + "-DAG: found a match of CHECK-DAG" 1193 " reordering across a CHECK-NOT"); 1194 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos), 1195 SourceMgr::DK_Note, 1196 Prefix + "-DAG: the farthest match of CHECK-DAG" 1197 " is found here"); 1198 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note, 1199 Prefix + "-NOT: the crossed pattern specified" 1200 " here"); 1201 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note, 1202 Prefix + "-DAG: the reordered pattern specified" 1203 " here"); 1204 return StringRef::npos; 1205 } 1206 // All subsequent CHECK-DAGs should be matched from the farthest 1207 // position of all precedent CHECK-DAGs (including this one.) 1208 StartPos = LastPos; 1209 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to 1210 // CHECK-DAG, verify that there's no 'not' strings occurred in that 1211 // region. 1212 StringRef SkippedRegion = Buffer.slice(LastPos, MatchPos); 1213 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1214 return StringRef::npos; 1215 // Clear "not strings". 1216 NotStrings.clear(); 1217 } 1218 1219 // Update the last position with CHECK-DAG matches. 1220 LastPos = std::max(MatchPos + MatchLen, LastPos); 1221 } 1222 1223 return LastPos; 1224 } 1225 1226 // A check prefix must contain only alphanumeric, hyphens and underscores. 1227 static bool ValidateCheckPrefix(StringRef CheckPrefix) { 1228 Regex Validator("^[a-zA-Z0-9_-]*$"); 1229 return Validator.match(CheckPrefix); 1230 } 1231 1232 static bool ValidateCheckPrefixes() { 1233 StringSet<> PrefixSet; 1234 1235 for (StringRef Prefix : CheckPrefixes) { 1236 // Reject empty prefixes. 1237 if (Prefix == "") 1238 return false; 1239 1240 if (!PrefixSet.insert(Prefix).second) 1241 return false; 1242 1243 if (!ValidateCheckPrefix(Prefix)) 1244 return false; 1245 } 1246 1247 return true; 1248 } 1249 1250 // Combines the check prefixes into a single regex so that we can efficiently 1251 // scan for any of the set. 1252 // 1253 // The semantics are that the longest-match wins which matches our regex 1254 // library. 1255 static Regex buildCheckPrefixRegex() { 1256 // I don't think there's a way to specify an initial value for cl::list, 1257 // so if nothing was specified, add the default 1258 if (CheckPrefixes.empty()) 1259 CheckPrefixes.push_back("CHECK"); 1260 1261 // We already validated the contents of CheckPrefixes so just concatenate 1262 // them as alternatives. 1263 SmallString<32> PrefixRegexStr; 1264 for (StringRef Prefix : CheckPrefixes) { 1265 if (Prefix != CheckPrefixes.front()) 1266 PrefixRegexStr.push_back('|'); 1267 1268 PrefixRegexStr.append(Prefix); 1269 } 1270 1271 return Regex(PrefixRegexStr); 1272 } 1273 1274 static void DumpCommandLine(int argc, char **argv) { 1275 errs() << "FileCheck command line: "; 1276 for (int I = 0; I < argc; I++) 1277 errs() << " " << argv[I]; 1278 errs() << "\n"; 1279 } 1280 1281 // Remove local variables from \p VariableTable. Global variables 1282 // (start with '$') are preserved. 1283 static void ClearLocalVars(StringMap<StringRef> &VariableTable) { 1284 SmallVector<StringRef, 16> LocalVars; 1285 for (const auto &Var : VariableTable) 1286 if (Var.first()[0] != '$') 1287 LocalVars.push_back(Var.first()); 1288 1289 for (const auto &Var : LocalVars) 1290 VariableTable.erase(Var); 1291 } 1292 1293 /// Check the input to FileCheck provided in the \p Buffer against the \p 1294 /// CheckStrings read from the check file. 1295 /// 1296 /// Returns false if the input fails to satisfy the checks. 1297 bool CheckInput(SourceMgr &SM, StringRef Buffer, 1298 ArrayRef<CheckString> CheckStrings) { 1299 bool ChecksFailed = false; 1300 1301 /// VariableTable - This holds all the current filecheck variables. 1302 StringMap<StringRef> VariableTable; 1303 1304 for (const auto& Def : GlobalDefines) 1305 VariableTable.insert(StringRef(Def).split('=')); 1306 1307 unsigned i = 0, j = 0, e = CheckStrings.size(); 1308 while (true) { 1309 StringRef CheckRegion; 1310 if (j == e) { 1311 CheckRegion = Buffer; 1312 } else { 1313 const CheckString &CheckLabelStr = CheckStrings[j]; 1314 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { 1315 ++j; 1316 continue; 1317 } 1318 1319 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 1320 size_t MatchLabelLen = 0; 1321 size_t MatchLabelPos = 1322 CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable); 1323 if (MatchLabelPos == StringRef::npos) 1324 // Immediately bail of CHECK-LABEL fails, nothing else we can do. 1325 return false; 1326 1327 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 1328 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 1329 ++j; 1330 } 1331 1332 if (EnableVarScope) 1333 ClearLocalVars(VariableTable); 1334 1335 for (; i != j; ++i) { 1336 const CheckString &CheckStr = CheckStrings[i]; 1337 1338 // Check each string within the scanned region, including a second check 1339 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 1340 size_t MatchLen = 0; 1341 size_t MatchPos = 1342 CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable); 1343 1344 if (MatchPos == StringRef::npos) { 1345 ChecksFailed = true; 1346 i = j; 1347 break; 1348 } 1349 1350 CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 1351 } 1352 1353 if (j == e) 1354 break; 1355 } 1356 1357 // Success if no checks failed. 1358 return !ChecksFailed; 1359 } 1360 1361 int main(int argc, char **argv) { 1362 InitLLVM X(argc, argv); 1363 cl::ParseCommandLineOptions(argc, argv); 1364 1365 if (!ValidateCheckPrefixes()) { 1366 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and " 1367 "start with a letter and contain only alphanumeric characters, " 1368 "hyphens and underscores\n"; 1369 return 2; 1370 } 1371 1372 Regex PrefixRE = buildCheckPrefixRegex(); 1373 std::string REError; 1374 if (!PrefixRE.isValid(REError)) { 1375 errs() << "Unable to combine check-prefix strings into a prefix regular " 1376 "expression! This is likely a bug in FileCheck's verification of " 1377 "the check-prefix strings. Regular expression parsing failed " 1378 "with the following error: " 1379 << REError << "\n"; 1380 return 2; 1381 } 1382 1383 SourceMgr SM; 1384 1385 // Read the expected strings from the check file. 1386 ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr = 1387 MemoryBuffer::getFileOrSTDIN(CheckFilename); 1388 if (std::error_code EC = CheckFileOrErr.getError()) { 1389 errs() << "Could not open check file '" << CheckFilename 1390 << "': " << EC.message() << '\n'; 1391 return 2; 1392 } 1393 MemoryBuffer &CheckFile = *CheckFileOrErr.get(); 1394 1395 SmallString<4096> CheckFileBuffer; 1396 StringRef CheckFileText = CanonicalizeFile(CheckFile, CheckFileBuffer); 1397 1398 SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( 1399 CheckFileText, CheckFile.getBufferIdentifier()), 1400 SMLoc()); 1401 1402 std::vector<CheckString> CheckStrings; 1403 if (ReadCheckFile(SM, CheckFileText, PrefixRE, CheckStrings)) 1404 return 2; 1405 1406 // Open the file to check and add it to SourceMgr. 1407 ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr = 1408 MemoryBuffer::getFileOrSTDIN(InputFilename); 1409 if (std::error_code EC = InputFileOrErr.getError()) { 1410 errs() << "Could not open input file '" << InputFilename 1411 << "': " << EC.message() << '\n'; 1412 return 2; 1413 } 1414 MemoryBuffer &InputFile = *InputFileOrErr.get(); 1415 1416 if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) { 1417 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; 1418 DumpCommandLine(argc, argv); 1419 return 2; 1420 } 1421 1422 SmallString<4096> InputFileBuffer; 1423 StringRef InputFileText = CanonicalizeFile(InputFile, InputFileBuffer); 1424 1425 SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( 1426 InputFileText, InputFile.getBufferIdentifier()), 1427 SMLoc()); 1428 1429 return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1; 1430 } 1431