1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // FileCheck does a line-by line check of a file that validates whether it 11 // contains the expected content. This is useful for regression tests etc. 12 // 13 // This program exits with an exit status of 2 on error, exit status of 0 if 14 // the file matched the expected contents, and exit status of 1 if it did not 15 // contain the expected contents. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/StringExtras.h" 21 #include "llvm/ADT/StringMap.h" 22 #include "llvm/ADT/StringSet.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/InitLLVM.h" 25 #include "llvm/Support/MemoryBuffer.h" 26 #include "llvm/Support/Regex.h" 27 #include "llvm/Support/SourceMgr.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <algorithm> 30 #include <cctype> 31 #include <list> 32 #include <map> 33 #include <string> 34 #include <system_error> 35 #include <vector> 36 using namespace llvm; 37 38 static cl::opt<std::string> 39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); 40 41 static cl::opt<std::string> 42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), 43 cl::init("-"), cl::value_desc("filename")); 44 45 static cl::list<std::string> CheckPrefixes( 46 "check-prefix", 47 cl::desc("Prefix to use from check file (defaults to 'CHECK')")); 48 static cl::alias CheckPrefixesAlias( 49 "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated, 50 cl::NotHidden, 51 cl::desc( 52 "Alias for -check-prefix permitting multiple comma separated values")); 53 54 static cl::opt<bool> NoCanonicalizeWhiteSpace( 55 "strict-whitespace", 56 cl::desc("Do not treat all horizontal whitespace as equivalent")); 57 58 static cl::list<std::string> ImplicitCheckNot( 59 "implicit-check-not", 60 cl::desc("Add an implicit negative check with this pattern to every\n" 61 "positive check. This can be used to ensure that no instances of\n" 62 "this pattern occur which are not matched by a positive pattern"), 63 cl::value_desc("pattern")); 64 65 static cl::list<std::string> GlobalDefines("D", cl::Prefix, 66 cl::desc("Define a variable to be used in capture patterns."), 67 cl::value_desc("VAR=VALUE")); 68 69 static cl::opt<bool> AllowEmptyInput( 70 "allow-empty", cl::init(false), 71 cl::desc("Allow the input file to be empty. This is useful when making\n" 72 "checks that some error message does not occur, for example.")); 73 74 static cl::opt<bool> MatchFullLines( 75 "match-full-lines", cl::init(false), 76 cl::desc("Require all positive matches to cover an entire input line.\n" 77 "Allows leading and trailing whitespace if --strict-whitespace\n" 78 "is not also passed.")); 79 80 static cl::opt<bool> EnableVarScope( 81 "enable-var-scope", cl::init(false), 82 cl::desc("Enables scope for regex variables. Variables with names that\n" 83 "do not start with '$' will be reset at the beginning of\n" 84 "each CHECK-LABEL block.")); 85 86 static cl::opt<bool> AllowDeprecatedDagOverlap( 87 "allow-deprecated-dag-overlap", cl::init(false), 88 cl::desc("Enable overlapping among matches in a group of consecutive\n" 89 "CHECK-DAG directives. This option is deprecated and is only\n" 90 "provided for convenience as old tests are migrated to the new\n" 91 "non-overlapping CHECK-DAG implementation.\n")); 92 93 static cl::opt<bool> Verbose("v", cl::init(false), 94 cl::desc("Print directive pattern matches.\n")); 95 96 static cl::opt<bool> VerboseVerbose( 97 "vv", cl::init(false), 98 cl::desc("Print information helpful in diagnosing internal FileCheck\n" 99 "issues. Implies -v.\n")); 100 101 typedef cl::list<std::string>::const_iterator prefix_iterator; 102 103 //===----------------------------------------------------------------------===// 104 // Pattern Handling Code. 105 //===----------------------------------------------------------------------===// 106 107 namespace Check { 108 enum CheckType { 109 CheckNone = 0, 110 CheckPlain, 111 CheckNext, 112 CheckSame, 113 CheckNot, 114 CheckDAG, 115 CheckLabel, 116 CheckEmpty, 117 118 /// Indicates the pattern only matches the end of file. This is used for 119 /// trailing CHECK-NOTs. 120 CheckEOF, 121 122 /// Marks when parsing found a -NOT check combined with another CHECK suffix. 123 CheckBadNot 124 }; 125 } 126 127 class Pattern { 128 SMLoc PatternLoc; 129 130 /// A fixed string to match as the pattern or empty if this pattern requires 131 /// a regex match. 132 StringRef FixedStr; 133 134 /// A regex string to match as the pattern or empty if this pattern requires 135 /// a fixed string to match. 136 std::string RegExStr; 137 138 /// Entries in this vector map to uses of a variable in the pattern, e.g. 139 /// "foo[[bar]]baz". In this case, the RegExStr will contain "foobaz" and 140 /// we'll get an entry in this vector that tells us to insert the value of 141 /// bar at offset 3. 142 std::vector<std::pair<StringRef, unsigned>> VariableUses; 143 144 /// Maps definitions of variables to their parenthesized capture numbers. 145 /// 146 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 147 /// 1. 148 std::map<StringRef, unsigned> VariableDefs; 149 150 Check::CheckType CheckTy; 151 152 /// Contains the number of line this pattern is in. 153 unsigned LineNumber; 154 155 public: 156 explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {} 157 158 /// Returns the location in source code. 159 SMLoc getLoc() const { return PatternLoc; } 160 161 bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, 162 unsigned LineNumber); 163 size_t Match(StringRef Buffer, size_t &MatchLen, 164 StringMap<StringRef> &VariableTable) const; 165 void PrintVariableUses(const SourceMgr &SM, StringRef Buffer, 166 const StringMap<StringRef> &VariableTable, 167 SMRange MatchRange = None) const; 168 void PrintFuzzyMatch(const SourceMgr &SM, StringRef Buffer, 169 const StringMap<StringRef> &VariableTable) const; 170 171 bool hasVariable() const { 172 return !(VariableUses.empty() && VariableDefs.empty()); 173 } 174 175 Check::CheckType getCheckTy() const { return CheckTy; } 176 177 private: 178 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); 179 void AddBackrefToRegEx(unsigned BackrefNum); 180 unsigned 181 ComputeMatchDistance(StringRef Buffer, 182 const StringMap<StringRef> &VariableTable) const; 183 bool EvaluateExpression(StringRef Expr, std::string &Value) const; 184 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); 185 }; 186 187 /// Parses the given string into the Pattern. 188 /// 189 /// \p Prefix provides which prefix is being matched, \p SM provides the 190 /// SourceMgr used for error reports, and \p LineNumber is the line number in 191 /// the input file from which the pattern string was read. Returns true in 192 /// case of an error, false otherwise. 193 bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix, 194 SourceMgr &SM, unsigned LineNumber) { 195 bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot; 196 197 this->LineNumber = LineNumber; 198 PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 199 200 if (!(NoCanonicalizeWhiteSpace && MatchFullLines)) 201 // Ignore trailing whitespace. 202 while (!PatternStr.empty() && 203 (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 204 PatternStr = PatternStr.substr(0, PatternStr.size() - 1); 205 206 // Check that there is something on the line. 207 if (PatternStr.empty() && CheckTy != Check::CheckEmpty) { 208 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 209 "found empty check string with prefix '" + Prefix + ":'"); 210 return true; 211 } 212 213 if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) { 214 SM.PrintMessage( 215 PatternLoc, SourceMgr::DK_Error, 216 "found non-empty check string for empty check with prefix '" + Prefix + 217 ":'"); 218 return true; 219 } 220 221 if (CheckTy == Check::CheckEmpty) { 222 RegExStr = "(\n$)"; 223 return false; 224 } 225 226 // Check to see if this is a fixed string, or if it has regex pieces. 227 if (!MatchFullLinesHere && 228 (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos && 229 PatternStr.find("[[") == StringRef::npos))) { 230 FixedStr = PatternStr; 231 return false; 232 } 233 234 if (MatchFullLinesHere) { 235 RegExStr += '^'; 236 if (!NoCanonicalizeWhiteSpace) 237 RegExStr += " *"; 238 } 239 240 // Paren value #0 is for the fully matched string. Any new parenthesized 241 // values add from there. 242 unsigned CurParen = 1; 243 244 // Otherwise, there is at least one regex piece. Build up the regex pattern 245 // by escaping scary characters in fixed strings, building up one big regex. 246 while (!PatternStr.empty()) { 247 // RegEx matches. 248 if (PatternStr.startswith("{{")) { 249 // This is the start of a regex match. Scan for the }}. 250 size_t End = PatternStr.find("}}"); 251 if (End == StringRef::npos) { 252 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 253 SourceMgr::DK_Error, 254 "found start of regex string with no end '}}'"); 255 return true; 256 } 257 258 // Enclose {{}} patterns in parens just like [[]] even though we're not 259 // capturing the result for any purpose. This is required in case the 260 // expression contains an alternation like: CHECK: abc{{x|z}}def. We 261 // want this to turn into: "abc(x|z)def" not "abcx|zdef". 262 RegExStr += '('; 263 ++CurParen; 264 265 if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM)) 266 return true; 267 RegExStr += ')'; 268 269 PatternStr = PatternStr.substr(End + 2); 270 continue; 271 } 272 273 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* 274 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The 275 // second form is [[foo]] which is a reference to foo. The variable name 276 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject 277 // it. This is to catch some common errors. 278 if (PatternStr.startswith("[[")) { 279 // Find the closing bracket pair ending the match. End is going to be an 280 // offset relative to the beginning of the match string. 281 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM); 282 283 if (End == StringRef::npos) { 284 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 285 SourceMgr::DK_Error, 286 "invalid named regex reference, no ]] found"); 287 return true; 288 } 289 290 StringRef MatchStr = PatternStr.substr(2, End); 291 PatternStr = PatternStr.substr(End + 4); 292 293 // Get the regex name (e.g. "foo"). 294 size_t NameEnd = MatchStr.find(':'); 295 StringRef Name = MatchStr.substr(0, NameEnd); 296 297 if (Name.empty()) { 298 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 299 "invalid name in named regex: empty name"); 300 return true; 301 } 302 303 // Verify that the name/expression is well formed. FileCheck currently 304 // supports @LINE, @LINE+number, @LINE-number expressions. The check here 305 // is relaxed, more strict check is performed in \c EvaluateExpression. 306 bool IsExpression = false; 307 for (unsigned i = 0, e = Name.size(); i != e; ++i) { 308 if (i == 0) { 309 if (Name[i] == '$') // Global vars start with '$' 310 continue; 311 if (Name[i] == '@') { 312 if (NameEnd != StringRef::npos) { 313 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 314 SourceMgr::DK_Error, 315 "invalid name in named regex definition"); 316 return true; 317 } 318 IsExpression = true; 319 continue; 320 } 321 } 322 if (Name[i] != '_' && !isalnum(Name[i]) && 323 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { 324 SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i), 325 SourceMgr::DK_Error, "invalid name in named regex"); 326 return true; 327 } 328 } 329 330 // Name can't start with a digit. 331 if (isdigit(static_cast<unsigned char>(Name[0]))) { 332 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 333 "invalid name in named regex"); 334 return true; 335 } 336 337 // Handle [[foo]]. 338 if (NameEnd == StringRef::npos) { 339 // Handle variables that were defined earlier on the same line by 340 // emitting a backreference. 341 if (VariableDefs.find(Name) != VariableDefs.end()) { 342 unsigned VarParenNum = VariableDefs[Name]; 343 if (VarParenNum < 1 || VarParenNum > 9) { 344 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 345 SourceMgr::DK_Error, 346 "Can't back-reference more than 9 variables"); 347 return true; 348 } 349 AddBackrefToRegEx(VarParenNum); 350 } else { 351 VariableUses.push_back(std::make_pair(Name, RegExStr.size())); 352 } 353 continue; 354 } 355 356 // Handle [[foo:.*]]. 357 VariableDefs[Name] = CurParen; 358 RegExStr += '('; 359 ++CurParen; 360 361 if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM)) 362 return true; 363 364 RegExStr += ')'; 365 } 366 367 // Handle fixed string matches. 368 // Find the end, which is the start of the next regex. 369 size_t FixedMatchEnd = PatternStr.find("{{"); 370 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 371 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 372 PatternStr = PatternStr.substr(FixedMatchEnd); 373 } 374 375 if (MatchFullLinesHere) { 376 if (!NoCanonicalizeWhiteSpace) 377 RegExStr += " *"; 378 RegExStr += '$'; 379 } 380 381 return false; 382 } 383 384 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) { 385 Regex R(RS); 386 std::string Error; 387 if (!R.isValid(Error)) { 388 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 389 "invalid regex: " + Error); 390 return true; 391 } 392 393 RegExStr += RS.str(); 394 CurParen += R.getNumMatches(); 395 return false; 396 } 397 398 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 399 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 400 std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum); 401 RegExStr += Backref; 402 } 403 404 /// Evaluates expression and stores the result to \p Value. 405 /// 406 /// Returns true on success and false when the expression has invalid syntax. 407 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { 408 // The only supported expression is @LINE([\+-]\d+)? 409 if (!Expr.startswith("@LINE")) 410 return false; 411 Expr = Expr.substr(StringRef("@LINE").size()); 412 int Offset = 0; 413 if (!Expr.empty()) { 414 if (Expr[0] == '+') 415 Expr = Expr.substr(1); 416 else if (Expr[0] != '-') 417 return false; 418 if (Expr.getAsInteger(10, Offset)) 419 return false; 420 } 421 Value = llvm::itostr(LineNumber + Offset); 422 return true; 423 } 424 425 /// Matches the pattern string against the input buffer \p Buffer 426 /// 427 /// This returns the position that is matched or npos if there is no match. If 428 /// there is a match, the size of the matched string is returned in \p 429 /// MatchLen. 430 /// 431 /// The \p VariableTable StringMap provides the current values of filecheck 432 /// variables and is updated if this match defines new values. 433 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, 434 StringMap<StringRef> &VariableTable) const { 435 // If this is the EOF pattern, match it immediately. 436 if (CheckTy == Check::CheckEOF) { 437 MatchLen = 0; 438 return Buffer.size(); 439 } 440 441 // If this is a fixed string pattern, just match it now. 442 if (!FixedStr.empty()) { 443 MatchLen = FixedStr.size(); 444 return Buffer.find(FixedStr); 445 } 446 447 // Regex match. 448 449 // If there are variable uses, we need to create a temporary string with the 450 // actual value. 451 StringRef RegExToMatch = RegExStr; 452 std::string TmpStr; 453 if (!VariableUses.empty()) { 454 TmpStr = RegExStr; 455 456 unsigned InsertOffset = 0; 457 for (const auto &VariableUse : VariableUses) { 458 std::string Value; 459 460 if (VariableUse.first[0] == '@') { 461 if (!EvaluateExpression(VariableUse.first, Value)) 462 return StringRef::npos; 463 } else { 464 StringMap<StringRef>::iterator it = 465 VariableTable.find(VariableUse.first); 466 // If the variable is undefined, return an error. 467 if (it == VariableTable.end()) 468 return StringRef::npos; 469 470 // Look up the value and escape it so that we can put it into the regex. 471 Value += Regex::escape(it->second); 472 } 473 474 // Plop it into the regex at the adjusted offset. 475 TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset, 476 Value.begin(), Value.end()); 477 InsertOffset += Value.size(); 478 } 479 480 // Match the newly constructed regex. 481 RegExToMatch = TmpStr; 482 } 483 484 SmallVector<StringRef, 4> MatchInfo; 485 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) 486 return StringRef::npos; 487 488 // Successful regex match. 489 assert(!MatchInfo.empty() && "Didn't get any match"); 490 StringRef FullMatch = MatchInfo[0]; 491 492 // If this defines any variables, remember their values. 493 for (const auto &VariableDef : VariableDefs) { 494 assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); 495 VariableTable[VariableDef.first] = MatchInfo[VariableDef.second]; 496 } 497 498 // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after 499 // the required preceding newline, which is consumed by the pattern in the 500 // case of CHECK-EMPTY but not CHECK-NEXT. 501 size_t MatchStartSkip = CheckTy == Check::CheckEmpty; 502 MatchLen = FullMatch.size() - MatchStartSkip; 503 return FullMatch.data() - Buffer.data() + MatchStartSkip; 504 } 505 506 507 /// Computes an arbitrary estimate for the quality of matching this pattern at 508 /// the start of \p Buffer; a distance of zero should correspond to a perfect 509 /// match. 510 unsigned 511 Pattern::ComputeMatchDistance(StringRef Buffer, 512 const StringMap<StringRef> &VariableTable) const { 513 // Just compute the number of matching characters. For regular expressions, we 514 // just compare against the regex itself and hope for the best. 515 // 516 // FIXME: One easy improvement here is have the regex lib generate a single 517 // example regular expression which matches, and use that as the example 518 // string. 519 StringRef ExampleString(FixedStr); 520 if (ExampleString.empty()) 521 ExampleString = RegExStr; 522 523 // Only compare up to the first line in the buffer, or the string size. 524 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 525 BufferPrefix = BufferPrefix.split('\n').first; 526 return BufferPrefix.edit_distance(ExampleString); 527 } 528 529 void Pattern::PrintVariableUses(const SourceMgr &SM, StringRef Buffer, 530 const StringMap<StringRef> &VariableTable, 531 SMRange MatchRange) const { 532 // If this was a regular expression using variables, print the current 533 // variable values. 534 if (!VariableUses.empty()) { 535 for (const auto &VariableUse : VariableUses) { 536 SmallString<256> Msg; 537 raw_svector_ostream OS(Msg); 538 StringRef Var = VariableUse.first; 539 if (Var[0] == '@') { 540 std::string Value; 541 if (EvaluateExpression(Var, Value)) { 542 OS << "with expression \""; 543 OS.write_escaped(Var) << "\" equal to \""; 544 OS.write_escaped(Value) << "\""; 545 } else { 546 OS << "uses incorrect expression \""; 547 OS.write_escaped(Var) << "\""; 548 } 549 } else { 550 StringMap<StringRef>::const_iterator it = VariableTable.find(Var); 551 552 // Check for undefined variable references. 553 if (it == VariableTable.end()) { 554 OS << "uses undefined variable \""; 555 OS.write_escaped(Var) << "\""; 556 } else { 557 OS << "with variable \""; 558 OS.write_escaped(Var) << "\" equal to \""; 559 OS.write_escaped(it->second) << "\""; 560 } 561 } 562 563 if (MatchRange.isValid()) 564 SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, OS.str(), 565 {MatchRange}); 566 else 567 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), 568 SourceMgr::DK_Note, OS.str()); 569 } 570 } 571 } 572 573 void Pattern::PrintFuzzyMatch( 574 const SourceMgr &SM, StringRef Buffer, 575 const StringMap<StringRef> &VariableTable) const { 576 // Attempt to find the closest/best fuzzy match. Usually an error happens 577 // because some string in the output didn't exactly match. In these cases, we 578 // would like to show the user a best guess at what "should have" matched, to 579 // save them having to actually check the input manually. 580 size_t NumLinesForward = 0; 581 size_t Best = StringRef::npos; 582 double BestQuality = 0; 583 584 // Use an arbitrary 4k limit on how far we will search. 585 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 586 if (Buffer[i] == '\n') 587 ++NumLinesForward; 588 589 // Patterns have leading whitespace stripped, so skip whitespace when 590 // looking for something which looks like a pattern. 591 if (Buffer[i] == ' ' || Buffer[i] == '\t') 592 continue; 593 594 // Compute the "quality" of this match as an arbitrary combination of the 595 // match distance and the number of lines skipped to get to this match. 596 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable); 597 double Quality = Distance + (NumLinesForward / 100.); 598 599 if (Quality < BestQuality || Best == StringRef::npos) { 600 Best = i; 601 BestQuality = Quality; 602 } 603 } 604 605 // Print the "possible intended match here" line if we found something 606 // reasonable and not equal to what we showed in the "scanning from here" 607 // line. 608 if (Best && Best != StringRef::npos && BestQuality < 50) { 609 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best), 610 SourceMgr::DK_Note, "possible intended match here"); 611 612 // FIXME: If we wanted to be really friendly we would show why the match 613 // failed, as it can be hard to spot simple one character differences. 614 } 615 } 616 617 /// Finds the closing sequence of a regex variable usage or definition. 618 /// 619 /// \p Str has to point in the beginning of the definition (right after the 620 /// opening sequence). Returns the offset of the closing sequence within Str, 621 /// or npos if it was not found. 622 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 623 // Offset keeps track of the current offset within the input Str 624 size_t Offset = 0; 625 // [...] Nesting depth 626 size_t BracketDepth = 0; 627 628 while (!Str.empty()) { 629 if (Str.startswith("]]") && BracketDepth == 0) 630 return Offset; 631 if (Str[0] == '\\') { 632 // Backslash escapes the next char within regexes, so skip them both. 633 Str = Str.substr(2); 634 Offset += 2; 635 } else { 636 switch (Str[0]) { 637 default: 638 break; 639 case '[': 640 BracketDepth++; 641 break; 642 case ']': 643 if (BracketDepth == 0) { 644 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 645 SourceMgr::DK_Error, 646 "missing closing \"]\" for regex variable"); 647 exit(1); 648 } 649 BracketDepth--; 650 break; 651 } 652 Str = Str.substr(1); 653 Offset++; 654 } 655 } 656 657 return StringRef::npos; 658 } 659 660 //===----------------------------------------------------------------------===// 661 // Check Strings. 662 //===----------------------------------------------------------------------===// 663 664 /// A check that we found in the input file. 665 struct CheckString { 666 /// The pattern to match. 667 Pattern Pat; 668 669 /// Which prefix name this check matched. 670 StringRef Prefix; 671 672 /// The location in the match file that the check string was specified. 673 SMLoc Loc; 674 675 /// All of the strings that are disallowed from occurring between this match 676 /// string and the previous one (or start of file). 677 std::vector<Pattern> DagNotStrings; 678 679 CheckString(const Pattern &P, StringRef S, SMLoc L) 680 : Pat(P), Prefix(S), Loc(L) {} 681 682 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, 683 size_t &MatchLen, StringMap<StringRef> &VariableTable) const; 684 685 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; 686 bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; 687 bool CheckNot(const SourceMgr &SM, StringRef Buffer, 688 const std::vector<const Pattern *> &NotStrings, 689 StringMap<StringRef> &VariableTable) const; 690 size_t CheckDag(const SourceMgr &SM, StringRef Buffer, 691 std::vector<const Pattern *> &NotStrings, 692 StringMap<StringRef> &VariableTable) const; 693 }; 694 695 /// Canonicalize whitespaces in the file. Line endings are replaced with 696 /// UNIX-style '\n'. 697 static StringRef CanonicalizeFile(MemoryBuffer &MB, 698 SmallVectorImpl<char> &OutputBuffer) { 699 OutputBuffer.reserve(MB.getBufferSize()); 700 701 for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd(); 702 Ptr != End; ++Ptr) { 703 // Eliminate trailing dosish \r. 704 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 705 continue; 706 } 707 708 // If current char is not a horizontal whitespace or if horizontal 709 // whitespace canonicalization is disabled, dump it to output as is. 710 if (NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) { 711 OutputBuffer.push_back(*Ptr); 712 continue; 713 } 714 715 // Otherwise, add one space and advance over neighboring space. 716 OutputBuffer.push_back(' '); 717 while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t')) 718 ++Ptr; 719 } 720 721 // Add a null byte and then return all but that byte. 722 OutputBuffer.push_back('\0'); 723 return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1); 724 } 725 726 static bool IsPartOfWord(char c) { 727 return (isalnum(c) || c == '-' || c == '_'); 728 } 729 730 // Get the size of the prefix extension. 731 static size_t CheckTypeSize(Check::CheckType Ty) { 732 switch (Ty) { 733 case Check::CheckNone: 734 case Check::CheckBadNot: 735 return 0; 736 737 case Check::CheckPlain: 738 return sizeof(":") - 1; 739 740 case Check::CheckNext: 741 return sizeof("-NEXT:") - 1; 742 743 case Check::CheckSame: 744 return sizeof("-SAME:") - 1; 745 746 case Check::CheckNot: 747 return sizeof("-NOT:") - 1; 748 749 case Check::CheckDAG: 750 return sizeof("-DAG:") - 1; 751 752 case Check::CheckLabel: 753 return sizeof("-LABEL:") - 1; 754 755 case Check::CheckEmpty: 756 return sizeof("-EMPTY:") - 1; 757 758 case Check::CheckEOF: 759 llvm_unreachable("Should not be using EOF size"); 760 } 761 762 llvm_unreachable("Bad check type"); 763 } 764 765 // Get a description of the type. 766 static std::string CheckTypeName(StringRef Prefix, Check::CheckType Ty) { 767 switch (Ty) { 768 case Check::CheckNone: 769 return "invalid"; 770 case Check::CheckPlain: 771 return Prefix; 772 case Check::CheckNext: 773 return Prefix.str() + "-NEXT"; 774 case Check::CheckSame: 775 return Prefix.str() + "-SAME"; 776 case Check::CheckNot: 777 return Prefix.str() + "-NOT"; 778 case Check::CheckDAG: 779 return Prefix.str() + "-DAG"; 780 case Check::CheckLabel: 781 return Prefix.str() + "-LABEL"; 782 case Check::CheckEmpty: 783 return Prefix.str() + "-EMPTY"; 784 case Check::CheckEOF: 785 return "implicit EOF"; 786 case Check::CheckBadNot: 787 return "bad NOT"; 788 } 789 llvm_unreachable("unknown CheckType"); 790 } 791 792 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) { 793 if (Buffer.size() <= Prefix.size()) 794 return Check::CheckNone; 795 796 char NextChar = Buffer[Prefix.size()]; 797 798 // Verify that the : is present after the prefix. 799 if (NextChar == ':') 800 return Check::CheckPlain; 801 802 if (NextChar != '-') 803 return Check::CheckNone; 804 805 StringRef Rest = Buffer.drop_front(Prefix.size() + 1); 806 if (Rest.startswith("NEXT:")) 807 return Check::CheckNext; 808 809 if (Rest.startswith("SAME:")) 810 return Check::CheckSame; 811 812 if (Rest.startswith("NOT:")) 813 return Check::CheckNot; 814 815 if (Rest.startswith("DAG:")) 816 return Check::CheckDAG; 817 818 if (Rest.startswith("LABEL:")) 819 return Check::CheckLabel; 820 821 if (Rest.startswith("EMPTY:")) 822 return Check::CheckEmpty; 823 824 // You can't combine -NOT with another suffix. 825 if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") || 826 Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") || 827 Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") || 828 Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:")) 829 return Check::CheckBadNot; 830 831 return Check::CheckNone; 832 } 833 834 // From the given position, find the next character after the word. 835 static size_t SkipWord(StringRef Str, size_t Loc) { 836 while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 837 ++Loc; 838 return Loc; 839 } 840 841 /// Search the buffer for the first prefix in the prefix regular expression. 842 /// 843 /// This searches the buffer using the provided regular expression, however it 844 /// enforces constraints beyond that: 845 /// 1) The found prefix must not be a suffix of something that looks like 846 /// a valid prefix. 847 /// 2) The found prefix must be followed by a valid check type suffix using \c 848 /// FindCheckType above. 849 /// 850 /// The first match of the regular expression to satisfy these two is returned, 851 /// otherwise an empty StringRef is returned to indicate failure. 852 /// 853 /// If this routine returns a valid prefix, it will also shrink \p Buffer to 854 /// start at the beginning of the returned prefix, increment \p LineNumber for 855 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of 856 /// check found by examining the suffix. 857 /// 858 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy 859 /// is unspecified. 860 static StringRef FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer, 861 unsigned &LineNumber, 862 Check::CheckType &CheckTy) { 863 SmallVector<StringRef, 2> Matches; 864 865 while (!Buffer.empty()) { 866 // Find the first (longest) match using the RE. 867 if (!PrefixRE.match(Buffer, &Matches)) 868 // No match at all, bail. 869 return StringRef(); 870 871 StringRef Prefix = Matches[0]; 872 Matches.clear(); 873 874 assert(Prefix.data() >= Buffer.data() && 875 Prefix.data() < Buffer.data() + Buffer.size() && 876 "Prefix doesn't start inside of buffer!"); 877 size_t Loc = Prefix.data() - Buffer.data(); 878 StringRef Skipped = Buffer.substr(0, Loc); 879 Buffer = Buffer.drop_front(Loc); 880 LineNumber += Skipped.count('\n'); 881 882 // Check that the matched prefix isn't a suffix of some other check-like 883 // word. 884 // FIXME: This is a very ad-hoc check. it would be better handled in some 885 // other way. Among other things it seems hard to distinguish between 886 // intentional and unintentional uses of this feature. 887 if (Skipped.empty() || !IsPartOfWord(Skipped.back())) { 888 // Now extract the type. 889 CheckTy = FindCheckType(Buffer, Prefix); 890 891 // If we've found a valid check type for this prefix, we're done. 892 if (CheckTy != Check::CheckNone) 893 return Prefix; 894 } 895 896 // If we didn't successfully find a prefix, we need to skip this invalid 897 // prefix and continue scanning. We directly skip the prefix that was 898 // matched and any additional parts of that check-like word. 899 Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size())); 900 } 901 902 // We ran out of buffer while skipping partial matches so give up. 903 return StringRef(); 904 } 905 906 /// Read the check file, which specifies the sequence of expected strings. 907 /// 908 /// The strings are added to the CheckStrings vector. Returns true in case of 909 /// an error, false otherwise. 910 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, 911 std::vector<CheckString> &CheckStrings) { 912 std::vector<Pattern> ImplicitNegativeChecks; 913 for (const auto &PatternString : ImplicitCheckNot) { 914 // Create a buffer with fake command line content in order to display the 915 // command line option responsible for the specific implicit CHECK-NOT. 916 std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str(); 917 std::string Suffix = "'"; 918 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( 919 Prefix + PatternString + Suffix, "command line"); 920 921 StringRef PatternInBuffer = 922 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 923 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 924 925 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot)); 926 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer, 927 "IMPLICIT-CHECK", SM, 0); 928 } 929 930 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 931 932 // LineNumber keeps track of the line on which CheckPrefix instances are 933 // found. 934 unsigned LineNumber = 1; 935 936 while (1) { 937 Check::CheckType CheckTy; 938 939 // See if a prefix occurs in the memory buffer. 940 StringRef UsedPrefix = FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber, 941 CheckTy); 942 if (UsedPrefix.empty()) 943 break; 944 assert(UsedPrefix.data() == Buffer.data() && 945 "Failed to move Buffer's start forward, or pointed prefix outside " 946 "of the buffer!"); 947 948 // Location to use for error messages. 949 const char *UsedPrefixStart = UsedPrefix.data(); 950 951 // Skip the buffer to the end. 952 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy)); 953 954 // Complain about useful-looking but unsupported suffixes. 955 if (CheckTy == Check::CheckBadNot) { 956 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, 957 "unsupported -NOT combo on prefix '" + UsedPrefix + "'"); 958 return true; 959 } 960 961 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 962 // leading whitespace. 963 if (!(NoCanonicalizeWhiteSpace && MatchFullLines)) 964 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 965 966 // Scan ahead to the end of line. 967 size_t EOL = Buffer.find_first_of("\n\r"); 968 969 // Remember the location of the start of the pattern, for diagnostics. 970 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 971 972 // Parse the pattern. 973 Pattern P(CheckTy); 974 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber)) 975 return true; 976 977 // Verify that CHECK-LABEL lines do not define or use variables 978 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 979 SM.PrintMessage( 980 SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error, 981 "found '" + UsedPrefix + "-LABEL:'" 982 " with variable definition or use"); 983 return true; 984 } 985 986 Buffer = Buffer.substr(EOL); 987 988 // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them. 989 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame || 990 CheckTy == Check::CheckEmpty) && 991 CheckStrings.empty()) { 992 StringRef Type = CheckTy == Check::CheckNext 993 ? "NEXT" 994 : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME"; 995 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 996 SourceMgr::DK_Error, 997 "found '" + UsedPrefix + "-" + Type + 998 "' without previous '" + UsedPrefix + ": line"); 999 return true; 1000 } 1001 1002 // Handle CHECK-DAG/-NOT. 1003 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 1004 DagNotMatches.push_back(P); 1005 continue; 1006 } 1007 1008 // Okay, add the string we captured to the output vector and move on. 1009 CheckStrings.emplace_back(P, UsedPrefix, PatternLoc); 1010 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 1011 DagNotMatches = ImplicitNegativeChecks; 1012 } 1013 1014 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first 1015 // prefix as a filler for the error message. 1016 if (!DagNotMatches.empty()) { 1017 CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(), 1018 SMLoc::getFromPointer(Buffer.data())); 1019 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 1020 } 1021 1022 if (CheckStrings.empty()) { 1023 errs() << "error: no check strings found with prefix" 1024 << (CheckPrefixes.size() > 1 ? "es " : " "); 1025 prefix_iterator I = CheckPrefixes.begin(); 1026 prefix_iterator E = CheckPrefixes.end(); 1027 if (I != E) { 1028 errs() << "\'" << *I << ":'"; 1029 ++I; 1030 } 1031 for (; I != E; ++I) 1032 errs() << ", \'" << *I << ":'"; 1033 1034 errs() << '\n'; 1035 return true; 1036 } 1037 1038 return false; 1039 } 1040 1041 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM, 1042 StringRef Prefix, SMLoc Loc, const Pattern &Pat, 1043 StringRef Buffer, StringMap<StringRef> &VariableTable, 1044 size_t MatchPos, size_t MatchLen) { 1045 if (ExpectedMatch) { 1046 if (!Verbose) 1047 return; 1048 if (!VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF) 1049 return; 1050 } 1051 SMLoc MatchStart = SMLoc::getFromPointer(Buffer.data() + MatchPos); 1052 SMLoc MatchEnd = SMLoc::getFromPointer(Buffer.data() + MatchPos + MatchLen); 1053 SMRange MatchRange(MatchStart, MatchEnd); 1054 SM.PrintMessage( 1055 Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error, 1056 CheckTypeName(Prefix, Pat.getCheckTy()) + ": " + 1057 (ExpectedMatch ? "expected" : "excluded") + 1058 " string found in input"); 1059 SM.PrintMessage(MatchStart, SourceMgr::DK_Note, "found here", {MatchRange}); 1060 Pat.PrintVariableUses(SM, Buffer, VariableTable, MatchRange); 1061 } 1062 1063 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM, 1064 const CheckString &CheckStr, StringRef Buffer, 1065 StringMap<StringRef> &VariableTable, size_t MatchPos, 1066 size_t MatchLen) { 1067 PrintMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat, 1068 Buffer, VariableTable, MatchPos, MatchLen); 1069 } 1070 1071 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM, 1072 StringRef Prefix, SMLoc Loc, const Pattern &Pat, 1073 StringRef Buffer, 1074 StringMap<StringRef> &VariableTable) { 1075 if (!ExpectedMatch && !VerboseVerbose) 1076 return; 1077 1078 // Otherwise, we have an error, emit an error message. 1079 SM.PrintMessage(Loc, 1080 ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark, 1081 CheckTypeName(Prefix, Pat.getCheckTy()) + ": " + 1082 (ExpectedMatch ? "expected" : "excluded") + 1083 " string not found in input"); 1084 1085 // Print the "scanning from here" line. If the current position is at the 1086 // end of a line, advance to the start of the next line. 1087 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 1088 1089 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1090 "scanning from here"); 1091 1092 // Allow the pattern to print additional information if desired. 1093 Pat.PrintVariableUses(SM, Buffer, VariableTable); 1094 if (ExpectedMatch) 1095 Pat.PrintFuzzyMatch(SM, Buffer, VariableTable); 1096 } 1097 1098 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM, 1099 const CheckString &CheckStr, StringRef Buffer, 1100 StringMap<StringRef> &VariableTable) { 1101 PrintNoMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat, 1102 Buffer, VariableTable); 1103 } 1104 1105 /// Count the number of newlines in the specified range. 1106 static unsigned CountNumNewlinesBetween(StringRef Range, 1107 const char *&FirstNewLine) { 1108 unsigned NumNewLines = 0; 1109 while (1) { 1110 // Scan for newline. 1111 Range = Range.substr(Range.find_first_of("\n\r")); 1112 if (Range.empty()) 1113 return NumNewLines; 1114 1115 ++NumNewLines; 1116 1117 // Handle \n\r and \r\n as a single newline. 1118 if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') && 1119 (Range[0] != Range[1])) 1120 Range = Range.substr(1); 1121 Range = Range.substr(1); 1122 1123 if (NumNewLines == 1) 1124 FirstNewLine = Range.begin(); 1125 } 1126 } 1127 1128 /// Match check string and its "not strings" and/or "dag strings". 1129 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer, 1130 bool IsLabelScanMode, size_t &MatchLen, 1131 StringMap<StringRef> &VariableTable) const { 1132 size_t LastPos = 0; 1133 std::vector<const Pattern *> NotStrings; 1134 1135 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 1136 // bounds; we have not processed variable definitions within the bounded block 1137 // yet so cannot handle any final CHECK-DAG yet; this is handled when going 1138 // over the block again (including the last CHECK-LABEL) in normal mode. 1139 if (!IsLabelScanMode) { 1140 // Match "dag strings" (with mixed "not strings" if any). 1141 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable); 1142 if (LastPos == StringRef::npos) 1143 return StringRef::npos; 1144 } 1145 1146 // Match itself from the last position after matching CHECK-DAG. 1147 StringRef MatchBuffer = Buffer.substr(LastPos); 1148 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1149 if (MatchPos == StringRef::npos) { 1150 PrintNoMatch(true, SM, *this, MatchBuffer, VariableTable); 1151 return StringRef::npos; 1152 } 1153 PrintMatch(true, SM, *this, MatchBuffer, VariableTable, MatchPos, MatchLen); 1154 1155 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 1156 // or CHECK-NOT 1157 if (!IsLabelScanMode) { 1158 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1159 1160 // If this check is a "CHECK-NEXT", verify that the previous match was on 1161 // the previous line (i.e. that there is one newline between them). 1162 if (CheckNext(SM, SkippedRegion)) 1163 return StringRef::npos; 1164 1165 // If this check is a "CHECK-SAME", verify that the previous match was on 1166 // the same line (i.e. that there is no newline between them). 1167 if (CheckSame(SM, SkippedRegion)) 1168 return StringRef::npos; 1169 1170 // If this match had "not strings", verify that they don't exist in the 1171 // skipped region. 1172 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1173 return StringRef::npos; 1174 } 1175 1176 return LastPos + MatchPos; 1177 } 1178 1179 /// Verify there is a single line in the given buffer. 1180 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 1181 if (Pat.getCheckTy() != Check::CheckNext && 1182 Pat.getCheckTy() != Check::CheckEmpty) 1183 return false; 1184 1185 Twine CheckName = 1186 Prefix + 1187 Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT"); 1188 1189 // Count the number of newlines between the previous match and this one. 1190 assert(Buffer.data() != 1191 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1192 SMLoc::getFromPointer(Buffer.data()))) 1193 ->getBufferStart() && 1194 "CHECK-NEXT and CHECK-EMPTY can't be the first check in a file"); 1195 1196 const char *FirstNewLine = nullptr; 1197 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1198 1199 if (NumNewLines == 0) { 1200 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1201 CheckName + ": is on the same line as previous match"); 1202 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1203 "'next' match was here"); 1204 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1205 "previous match ended here"); 1206 return true; 1207 } 1208 1209 if (NumNewLines != 1) { 1210 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1211 CheckName + 1212 ": is not on the line after the previous match"); 1213 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1214 "'next' match was here"); 1215 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1216 "previous match ended here"); 1217 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 1218 "non-matching line after previous match is here"); 1219 return true; 1220 } 1221 1222 return false; 1223 } 1224 1225 /// Verify there is no newline in the given buffer. 1226 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { 1227 if (Pat.getCheckTy() != Check::CheckSame) 1228 return false; 1229 1230 // Count the number of newlines between the previous match and this one. 1231 assert(Buffer.data() != 1232 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1233 SMLoc::getFromPointer(Buffer.data()))) 1234 ->getBufferStart() && 1235 "CHECK-SAME can't be the first check in a file"); 1236 1237 const char *FirstNewLine = nullptr; 1238 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1239 1240 if (NumNewLines != 0) { 1241 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1242 Prefix + 1243 "-SAME: is not on the same line as the previous match"); 1244 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1245 "'next' match was here"); 1246 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1247 "previous match ended here"); 1248 return true; 1249 } 1250 1251 return false; 1252 } 1253 1254 /// Verify there's no "not strings" in the given buffer. 1255 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 1256 const std::vector<const Pattern *> &NotStrings, 1257 StringMap<StringRef> &VariableTable) const { 1258 for (const Pattern *Pat : NotStrings) { 1259 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 1260 1261 size_t MatchLen = 0; 1262 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable); 1263 1264 if (Pos == StringRef::npos) { 1265 PrintNoMatch(false, SM, Prefix, Pat->getLoc(), *Pat, Buffer, 1266 VariableTable); 1267 continue; 1268 } 1269 1270 PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, Buffer, VariableTable, 1271 Pos, MatchLen); 1272 1273 return true; 1274 } 1275 1276 return false; 1277 } 1278 1279 /// Match "dag strings" and their mixed "not strings". 1280 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 1281 std::vector<const Pattern *> &NotStrings, 1282 StringMap<StringRef> &VariableTable) const { 1283 if (DagNotStrings.empty()) 1284 return 0; 1285 1286 // The start of the search range. 1287 size_t StartPos = 0; 1288 1289 struct MatchRange { 1290 size_t Pos; 1291 size_t End; 1292 }; 1293 // A sorted list of ranges for non-overlapping CHECK-DAG matches. Match 1294 // ranges are erased from this list once they are no longer in the search 1295 // range. 1296 std::list<MatchRange> MatchRanges; 1297 1298 // We need PatItr and PatEnd later for detecting the end of a CHECK-DAG 1299 // group, so we don't use a range-based for loop here. 1300 for (auto PatItr = DagNotStrings.begin(), PatEnd = DagNotStrings.end(); 1301 PatItr != PatEnd; ++PatItr) { 1302 const Pattern &Pat = *PatItr; 1303 assert((Pat.getCheckTy() == Check::CheckDAG || 1304 Pat.getCheckTy() == Check::CheckNot) && 1305 "Invalid CHECK-DAG or CHECK-NOT!"); 1306 1307 if (Pat.getCheckTy() == Check::CheckNot) { 1308 NotStrings.push_back(&Pat); 1309 continue; 1310 } 1311 1312 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 1313 1314 // CHECK-DAG always matches from the start. 1315 size_t MatchLen = 0, MatchPos = StartPos; 1316 1317 // Search for a match that doesn't overlap a previous match in this 1318 // CHECK-DAG group. 1319 for (auto MI = MatchRanges.begin(), ME = MatchRanges.end(); true; ++MI) { 1320 StringRef MatchBuffer = Buffer.substr(MatchPos); 1321 size_t MatchPosBuf = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1322 // With a group of CHECK-DAGs, a single mismatching means the match on 1323 // that group of CHECK-DAGs fails immediately. 1324 if (MatchPosBuf == StringRef::npos) { 1325 PrintNoMatch(true, SM, Prefix, Pat.getLoc(), Pat, MatchBuffer, 1326 VariableTable); 1327 return StringRef::npos; 1328 } 1329 // Re-calc it as the offset relative to the start of the original string. 1330 MatchPos += MatchPosBuf; 1331 if (VerboseVerbose) 1332 PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, Buffer, VariableTable, 1333 MatchPos, MatchLen); 1334 MatchRange M{MatchPos, MatchPos + MatchLen}; 1335 if (AllowDeprecatedDagOverlap) { 1336 // We don't need to track all matches in this mode, so we just maintain 1337 // one match range that encompasses the current CHECK-DAG group's 1338 // matches. 1339 if (MatchRanges.empty()) 1340 MatchRanges.insert(MatchRanges.end(), M); 1341 else { 1342 auto Block = MatchRanges.begin(); 1343 Block->Pos = std::min(Block->Pos, M.Pos); 1344 Block->End = std::max(Block->End, M.End); 1345 } 1346 break; 1347 } 1348 // Iterate previous matches until overlapping match or insertion point. 1349 bool Overlap = false; 1350 for (; MI != ME; ++MI) { 1351 if (M.Pos < MI->End) { 1352 // !Overlap => New match has no overlap and is before this old match. 1353 // Overlap => New match overlaps this old match. 1354 Overlap = MI->Pos < M.End; 1355 break; 1356 } 1357 } 1358 if (!Overlap) { 1359 // Insert non-overlapping match into list. 1360 MatchRanges.insert(MI, M); 1361 break; 1362 } 1363 if (VerboseVerbose) { 1364 SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos); 1365 SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End); 1366 SMRange OldRange(OldStart, OldEnd); 1367 SM.PrintMessage(OldStart, SourceMgr::DK_Note, 1368 "match discarded, overlaps earlier DAG match here", 1369 {OldRange}); 1370 } 1371 MatchPos = MI->End; 1372 } 1373 if (!VerboseVerbose) 1374 PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, Buffer, VariableTable, 1375 MatchPos, MatchLen); 1376 1377 // Handle the end of a CHECK-DAG group. 1378 if (std::next(PatItr) == PatEnd || 1379 std::next(PatItr)->getCheckTy() == Check::CheckNot) { 1380 if (!NotStrings.empty()) { 1381 // If there are CHECK-NOTs between two CHECK-DAGs or from CHECK to 1382 // CHECK-DAG, verify that there are no 'not' strings occurred in that 1383 // region. 1384 StringRef SkippedRegion = 1385 Buffer.slice(StartPos, MatchRanges.begin()->Pos); 1386 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1387 return StringRef::npos; 1388 // Clear "not strings". 1389 NotStrings.clear(); 1390 } 1391 // All subsequent CHECK-DAGs and CHECK-NOTs should be matched from the 1392 // end of this CHECK-DAG group's match range. 1393 StartPos = MatchRanges.rbegin()->End; 1394 // Don't waste time checking for (impossible) overlaps before that. 1395 MatchRanges.clear(); 1396 } 1397 } 1398 1399 return StartPos; 1400 } 1401 1402 // A check prefix must contain only alphanumeric, hyphens and underscores. 1403 static bool ValidateCheckPrefix(StringRef CheckPrefix) { 1404 Regex Validator("^[a-zA-Z0-9_-]*$"); 1405 return Validator.match(CheckPrefix); 1406 } 1407 1408 static bool ValidateCheckPrefixes() { 1409 StringSet<> PrefixSet; 1410 1411 for (StringRef Prefix : CheckPrefixes) { 1412 // Reject empty prefixes. 1413 if (Prefix == "") 1414 return false; 1415 1416 if (!PrefixSet.insert(Prefix).second) 1417 return false; 1418 1419 if (!ValidateCheckPrefix(Prefix)) 1420 return false; 1421 } 1422 1423 return true; 1424 } 1425 1426 // Combines the check prefixes into a single regex so that we can efficiently 1427 // scan for any of the set. 1428 // 1429 // The semantics are that the longest-match wins which matches our regex 1430 // library. 1431 static Regex buildCheckPrefixRegex() { 1432 // I don't think there's a way to specify an initial value for cl::list, 1433 // so if nothing was specified, add the default 1434 if (CheckPrefixes.empty()) 1435 CheckPrefixes.push_back("CHECK"); 1436 1437 // We already validated the contents of CheckPrefixes so just concatenate 1438 // them as alternatives. 1439 SmallString<32> PrefixRegexStr; 1440 for (StringRef Prefix : CheckPrefixes) { 1441 if (Prefix != CheckPrefixes.front()) 1442 PrefixRegexStr.push_back('|'); 1443 1444 PrefixRegexStr.append(Prefix); 1445 } 1446 1447 return Regex(PrefixRegexStr); 1448 } 1449 1450 static void DumpCommandLine(int argc, char **argv) { 1451 errs() << "FileCheck command line: "; 1452 for (int I = 0; I < argc; I++) 1453 errs() << " " << argv[I]; 1454 errs() << "\n"; 1455 } 1456 1457 // Remove local variables from \p VariableTable. Global variables 1458 // (start with '$') are preserved. 1459 static void ClearLocalVars(StringMap<StringRef> &VariableTable) { 1460 SmallVector<StringRef, 16> LocalVars; 1461 for (const auto &Var : VariableTable) 1462 if (Var.first()[0] != '$') 1463 LocalVars.push_back(Var.first()); 1464 1465 for (const auto &Var : LocalVars) 1466 VariableTable.erase(Var); 1467 } 1468 1469 /// Check the input to FileCheck provided in the \p Buffer against the \p 1470 /// CheckStrings read from the check file. 1471 /// 1472 /// Returns false if the input fails to satisfy the checks. 1473 bool CheckInput(SourceMgr &SM, StringRef Buffer, 1474 ArrayRef<CheckString> CheckStrings) { 1475 bool ChecksFailed = false; 1476 1477 /// VariableTable - This holds all the current filecheck variables. 1478 StringMap<StringRef> VariableTable; 1479 1480 for (const auto& Def : GlobalDefines) 1481 VariableTable.insert(StringRef(Def).split('=')); 1482 1483 unsigned i = 0, j = 0, e = CheckStrings.size(); 1484 while (true) { 1485 StringRef CheckRegion; 1486 if (j == e) { 1487 CheckRegion = Buffer; 1488 } else { 1489 const CheckString &CheckLabelStr = CheckStrings[j]; 1490 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { 1491 ++j; 1492 continue; 1493 } 1494 1495 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 1496 size_t MatchLabelLen = 0; 1497 size_t MatchLabelPos = 1498 CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable); 1499 if (MatchLabelPos == StringRef::npos) 1500 // Immediately bail of CHECK-LABEL fails, nothing else we can do. 1501 return false; 1502 1503 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 1504 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 1505 ++j; 1506 } 1507 1508 if (EnableVarScope) 1509 ClearLocalVars(VariableTable); 1510 1511 for (; i != j; ++i) { 1512 const CheckString &CheckStr = CheckStrings[i]; 1513 1514 // Check each string within the scanned region, including a second check 1515 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 1516 size_t MatchLen = 0; 1517 size_t MatchPos = 1518 CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable); 1519 1520 if (MatchPos == StringRef::npos) { 1521 ChecksFailed = true; 1522 i = j; 1523 break; 1524 } 1525 1526 CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 1527 } 1528 1529 if (j == e) 1530 break; 1531 } 1532 1533 // Success if no checks failed. 1534 return !ChecksFailed; 1535 } 1536 1537 int main(int argc, char **argv) { 1538 InitLLVM X(argc, argv); 1539 cl::ParseCommandLineOptions(argc, argv); 1540 1541 if (!ValidateCheckPrefixes()) { 1542 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and " 1543 "start with a letter and contain only alphanumeric characters, " 1544 "hyphens and underscores\n"; 1545 return 2; 1546 } 1547 1548 Regex PrefixRE = buildCheckPrefixRegex(); 1549 std::string REError; 1550 if (!PrefixRE.isValid(REError)) { 1551 errs() << "Unable to combine check-prefix strings into a prefix regular " 1552 "expression! This is likely a bug in FileCheck's verification of " 1553 "the check-prefix strings. Regular expression parsing failed " 1554 "with the following error: " 1555 << REError << "\n"; 1556 return 2; 1557 } 1558 1559 if (VerboseVerbose) 1560 Verbose = true; 1561 1562 SourceMgr SM; 1563 1564 // Read the expected strings from the check file. 1565 ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr = 1566 MemoryBuffer::getFileOrSTDIN(CheckFilename); 1567 if (std::error_code EC = CheckFileOrErr.getError()) { 1568 errs() << "Could not open check file '" << CheckFilename 1569 << "': " << EC.message() << '\n'; 1570 return 2; 1571 } 1572 MemoryBuffer &CheckFile = *CheckFileOrErr.get(); 1573 1574 SmallString<4096> CheckFileBuffer; 1575 StringRef CheckFileText = CanonicalizeFile(CheckFile, CheckFileBuffer); 1576 1577 SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( 1578 CheckFileText, CheckFile.getBufferIdentifier()), 1579 SMLoc()); 1580 1581 std::vector<CheckString> CheckStrings; 1582 if (ReadCheckFile(SM, CheckFileText, PrefixRE, CheckStrings)) 1583 return 2; 1584 1585 // Open the file to check and add it to SourceMgr. 1586 ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr = 1587 MemoryBuffer::getFileOrSTDIN(InputFilename); 1588 if (std::error_code EC = InputFileOrErr.getError()) { 1589 errs() << "Could not open input file '" << InputFilename 1590 << "': " << EC.message() << '\n'; 1591 return 2; 1592 } 1593 MemoryBuffer &InputFile = *InputFileOrErr.get(); 1594 1595 if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) { 1596 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; 1597 DumpCommandLine(argc, argv); 1598 return 2; 1599 } 1600 1601 SmallString<4096> InputFileBuffer; 1602 StringRef InputFileText = CanonicalizeFile(InputFile, InputFileBuffer); 1603 1604 SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( 1605 InputFileText, InputFile.getBufferIdentifier()), 1606 SMLoc()); 1607 1608 return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1; 1609 } 1610