1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content.  This is useful for regression tests etc.
12 //
13 // This program exits with an exit status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/InitLLVM.h"
25 #include "llvm/Support/MemoryBuffer.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/SourceMgr.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <algorithm>
30 #include <cctype>
31 #include <list>
32 #include <map>
33 #include <string>
34 #include <system_error>
35 #include <vector>
36 using namespace llvm;
37 
38 static cl::opt<std::string>
39     CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40 
41 static cl::opt<std::string>
42     InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43                   cl::init("-"), cl::value_desc("filename"));
44 
45 static cl::list<std::string> CheckPrefixes(
46     "check-prefix",
47     cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48 static cl::alias CheckPrefixesAlias(
49     "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated,
50     cl::NotHidden,
51     cl::desc(
52         "Alias for -check-prefix permitting multiple comma separated values"));
53 
54 static cl::opt<bool> NoCanonicalizeWhiteSpace(
55     "strict-whitespace",
56     cl::desc("Do not treat all horizontal whitespace as equivalent"));
57 
58 static cl::list<std::string> ImplicitCheckNot(
59     "implicit-check-not",
60     cl::desc("Add an implicit negative check with this pattern to every\n"
61              "positive check. This can be used to ensure that no instances of\n"
62              "this pattern occur which are not matched by a positive pattern"),
63     cl::value_desc("pattern"));
64 
65 static cl::list<std::string> GlobalDefines("D", cl::Prefix,
66     cl::desc("Define a variable to be used in capture patterns."),
67     cl::value_desc("VAR=VALUE"));
68 
69 static cl::opt<bool> AllowEmptyInput(
70     "allow-empty", cl::init(false),
71     cl::desc("Allow the input file to be empty. This is useful when making\n"
72              "checks that some error message does not occur, for example."));
73 
74 static cl::opt<bool> MatchFullLines(
75     "match-full-lines", cl::init(false),
76     cl::desc("Require all positive matches to cover an entire input line.\n"
77              "Allows leading and trailing whitespace if --strict-whitespace\n"
78              "is not also passed."));
79 
80 static cl::opt<bool> EnableVarScope(
81     "enable-var-scope", cl::init(false),
82     cl::desc("Enables scope for regex variables. Variables with names that\n"
83              "do not start with '$' will be reset at the beginning of\n"
84              "each CHECK-LABEL block."));
85 
86 static cl::opt<bool> AllowDeprecatedDagOverlap(
87     "allow-deprecated-dag-overlap", cl::init(false),
88     cl::desc("Enable overlapping among matches in a group of consecutive\n"
89              "CHECK-DAG directives.  This option is deprecated and is only\n"
90              "provided for convenience as old tests are migrated to the new\n"
91              "non-overlapping CHECK-DAG implementation.\n"));
92 
93 static cl::opt<bool> Verbose("v", cl::init(false),
94                              cl::desc("Print directive pattern matches.\n"));
95 
96 static cl::opt<bool> VerboseVerbose(
97     "vv", cl::init(false),
98     cl::desc("Print information helpful in diagnosing internal FileCheck\n"
99              "issues.  Implies -v.\n"));
100 
101 typedef cl::list<std::string>::const_iterator prefix_iterator;
102 
103 //===----------------------------------------------------------------------===//
104 // Pattern Handling Code.
105 //===----------------------------------------------------------------------===//
106 
107 namespace Check {
108 enum CheckType {
109   CheckNone = 0,
110   CheckPlain,
111   CheckNext,
112   CheckSame,
113   CheckNot,
114   CheckDAG,
115   CheckLabel,
116   CheckEmpty,
117 
118   /// Indicates the pattern only matches the end of file. This is used for
119   /// trailing CHECK-NOTs.
120   CheckEOF,
121 
122   /// Marks when parsing found a -NOT check combined with another CHECK suffix.
123   CheckBadNot
124 };
125 }
126 
127 class Pattern {
128   SMLoc PatternLoc;
129 
130   /// A fixed string to match as the pattern or empty if this pattern requires
131   /// a regex match.
132   StringRef FixedStr;
133 
134   /// A regex string to match as the pattern or empty if this pattern requires
135   /// a fixed string to match.
136   std::string RegExStr;
137 
138   /// Entries in this vector map to uses of a variable in the pattern, e.g.
139   /// "foo[[bar]]baz".  In this case, the RegExStr will contain "foobaz" and
140   /// we'll get an entry in this vector that tells us to insert the value of
141   /// bar at offset 3.
142   std::vector<std::pair<StringRef, unsigned>> VariableUses;
143 
144   /// Maps definitions of variables to their parenthesized capture numbers.
145   ///
146   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to
147   /// 1.
148   std::map<StringRef, unsigned> VariableDefs;
149 
150   Check::CheckType CheckTy;
151 
152   /// Contains the number of line this pattern is in.
153   unsigned LineNumber;
154 
155 public:
156   explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {}
157 
158   /// Returns the location in source code.
159   SMLoc getLoc() const { return PatternLoc; }
160 
161   bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
162                     unsigned LineNumber);
163   size_t Match(StringRef Buffer, size_t &MatchLen,
164                StringMap<StringRef> &VariableTable) const;
165   void PrintVariableUses(const SourceMgr &SM, StringRef Buffer,
166                          const StringMap<StringRef> &VariableTable,
167                          SMRange MatchRange = None) const;
168   void PrintFuzzyMatch(const SourceMgr &SM, StringRef Buffer,
169                        const StringMap<StringRef> &VariableTable) const;
170 
171   bool hasVariable() const {
172     return !(VariableUses.empty() && VariableDefs.empty());
173   }
174 
175   Check::CheckType getCheckTy() const { return CheckTy; }
176 
177 private:
178   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
179   void AddBackrefToRegEx(unsigned BackrefNum);
180   unsigned
181   ComputeMatchDistance(StringRef Buffer,
182                        const StringMap<StringRef> &VariableTable) const;
183   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
184   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
185 };
186 
187 /// Parses the given string into the Pattern.
188 ///
189 /// \p Prefix provides which prefix is being matched, \p SM provides the
190 /// SourceMgr used for error reports, and \p LineNumber is the line number in
191 /// the input file from which the pattern string was read. Returns true in
192 /// case of an error, false otherwise.
193 bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
194                            SourceMgr &SM, unsigned LineNumber) {
195   bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot;
196 
197   this->LineNumber = LineNumber;
198   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
199 
200   if (!(NoCanonicalizeWhiteSpace && MatchFullLines))
201     // Ignore trailing whitespace.
202     while (!PatternStr.empty() &&
203            (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
204       PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
205 
206   // Check that there is something on the line.
207   if (PatternStr.empty() && CheckTy != Check::CheckEmpty) {
208     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
209                     "found empty check string with prefix '" + Prefix + ":'");
210     return true;
211   }
212 
213   if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) {
214     SM.PrintMessage(
215         PatternLoc, SourceMgr::DK_Error,
216         "found non-empty check string for empty check with prefix '" + Prefix +
217             ":'");
218     return true;
219   }
220 
221   if (CheckTy == Check::CheckEmpty) {
222     RegExStr = "(\n$)";
223     return false;
224   }
225 
226   // Check to see if this is a fixed string, or if it has regex pieces.
227   if (!MatchFullLinesHere &&
228       (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
229                                  PatternStr.find("[[") == StringRef::npos))) {
230     FixedStr = PatternStr;
231     return false;
232   }
233 
234   if (MatchFullLinesHere) {
235     RegExStr += '^';
236     if (!NoCanonicalizeWhiteSpace)
237       RegExStr += " *";
238   }
239 
240   // Paren value #0 is for the fully matched string.  Any new parenthesized
241   // values add from there.
242   unsigned CurParen = 1;
243 
244   // Otherwise, there is at least one regex piece.  Build up the regex pattern
245   // by escaping scary characters in fixed strings, building up one big regex.
246   while (!PatternStr.empty()) {
247     // RegEx matches.
248     if (PatternStr.startswith("{{")) {
249       // This is the start of a regex match.  Scan for the }}.
250       size_t End = PatternStr.find("}}");
251       if (End == StringRef::npos) {
252         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
253                         SourceMgr::DK_Error,
254                         "found start of regex string with no end '}}'");
255         return true;
256       }
257 
258       // Enclose {{}} patterns in parens just like [[]] even though we're not
259       // capturing the result for any purpose.  This is required in case the
260       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
261       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
262       RegExStr += '(';
263       ++CurParen;
264 
265       if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
266         return true;
267       RegExStr += ')';
268 
269       PatternStr = PatternStr.substr(End + 2);
270       continue;
271     }
272 
273     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
274     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
275     // second form is [[foo]] which is a reference to foo.  The variable name
276     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
277     // it.  This is to catch some common errors.
278     if (PatternStr.startswith("[[")) {
279       // Find the closing bracket pair ending the match.  End is going to be an
280       // offset relative to the beginning of the match string.
281       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
282 
283       if (End == StringRef::npos) {
284         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
285                         SourceMgr::DK_Error,
286                         "invalid named regex reference, no ]] found");
287         return true;
288       }
289 
290       StringRef MatchStr = PatternStr.substr(2, End);
291       PatternStr = PatternStr.substr(End + 4);
292 
293       // Get the regex name (e.g. "foo").
294       size_t NameEnd = MatchStr.find(':');
295       StringRef Name = MatchStr.substr(0, NameEnd);
296 
297       if (Name.empty()) {
298         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
299                         "invalid name in named regex: empty name");
300         return true;
301       }
302 
303       // Verify that the name/expression is well formed. FileCheck currently
304       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
305       // is relaxed, more strict check is performed in \c EvaluateExpression.
306       bool IsExpression = false;
307       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
308         if (i == 0) {
309           if (Name[i] == '$')  // Global vars start with '$'
310             continue;
311           if (Name[i] == '@') {
312             if (NameEnd != StringRef::npos) {
313               SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
314                               SourceMgr::DK_Error,
315                               "invalid name in named regex definition");
316               return true;
317             }
318             IsExpression = true;
319             continue;
320           }
321         }
322         if (Name[i] != '_' && !isalnum(Name[i]) &&
323             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
324           SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i),
325                           SourceMgr::DK_Error, "invalid name in named regex");
326           return true;
327         }
328       }
329 
330       // Name can't start with a digit.
331       if (isdigit(static_cast<unsigned char>(Name[0]))) {
332         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
333                         "invalid name in named regex");
334         return true;
335       }
336 
337       // Handle [[foo]].
338       if (NameEnd == StringRef::npos) {
339         // Handle variables that were defined earlier on the same line by
340         // emitting a backreference.
341         if (VariableDefs.find(Name) != VariableDefs.end()) {
342           unsigned VarParenNum = VariableDefs[Name];
343           if (VarParenNum < 1 || VarParenNum > 9) {
344             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
345                             SourceMgr::DK_Error,
346                             "Can't back-reference more than 9 variables");
347             return true;
348           }
349           AddBackrefToRegEx(VarParenNum);
350         } else {
351           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
352         }
353         continue;
354       }
355 
356       // Handle [[foo:.*]].
357       VariableDefs[Name] = CurParen;
358       RegExStr += '(';
359       ++CurParen;
360 
361       if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM))
362         return true;
363 
364       RegExStr += ')';
365     }
366 
367     // Handle fixed string matches.
368     // Find the end, which is the start of the next regex.
369     size_t FixedMatchEnd = PatternStr.find("{{");
370     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
371     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
372     PatternStr = PatternStr.substr(FixedMatchEnd);
373   }
374 
375   if (MatchFullLinesHere) {
376     if (!NoCanonicalizeWhiteSpace)
377       RegExStr += " *";
378     RegExStr += '$';
379   }
380 
381   return false;
382 }
383 
384 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
385   Regex R(RS);
386   std::string Error;
387   if (!R.isValid(Error)) {
388     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
389                     "invalid regex: " + Error);
390     return true;
391   }
392 
393   RegExStr += RS.str();
394   CurParen += R.getNumMatches();
395   return false;
396 }
397 
398 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
399   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
400   std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
401   RegExStr += Backref;
402 }
403 
404 /// Evaluates expression and stores the result to \p Value.
405 ///
406 /// Returns true on success and false when the expression has invalid syntax.
407 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
408   // The only supported expression is @LINE([\+-]\d+)?
409   if (!Expr.startswith("@LINE"))
410     return false;
411   Expr = Expr.substr(StringRef("@LINE").size());
412   int Offset = 0;
413   if (!Expr.empty()) {
414     if (Expr[0] == '+')
415       Expr = Expr.substr(1);
416     else if (Expr[0] != '-')
417       return false;
418     if (Expr.getAsInteger(10, Offset))
419       return false;
420   }
421   Value = llvm::itostr(LineNumber + Offset);
422   return true;
423 }
424 
425 /// Matches the pattern string against the input buffer \p Buffer
426 ///
427 /// This returns the position that is matched or npos if there is no match. If
428 /// there is a match, the size of the matched string is returned in \p
429 /// MatchLen.
430 ///
431 /// The \p VariableTable StringMap provides the current values of filecheck
432 /// variables and is updated if this match defines new values.
433 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
434                       StringMap<StringRef> &VariableTable) const {
435   // If this is the EOF pattern, match it immediately.
436   if (CheckTy == Check::CheckEOF) {
437     MatchLen = 0;
438     return Buffer.size();
439   }
440 
441   // If this is a fixed string pattern, just match it now.
442   if (!FixedStr.empty()) {
443     MatchLen = FixedStr.size();
444     return Buffer.find(FixedStr);
445   }
446 
447   // Regex match.
448 
449   // If there are variable uses, we need to create a temporary string with the
450   // actual value.
451   StringRef RegExToMatch = RegExStr;
452   std::string TmpStr;
453   if (!VariableUses.empty()) {
454     TmpStr = RegExStr;
455 
456     unsigned InsertOffset = 0;
457     for (const auto &VariableUse : VariableUses) {
458       std::string Value;
459 
460       if (VariableUse.first[0] == '@') {
461         if (!EvaluateExpression(VariableUse.first, Value))
462           return StringRef::npos;
463       } else {
464         StringMap<StringRef>::iterator it =
465             VariableTable.find(VariableUse.first);
466         // If the variable is undefined, return an error.
467         if (it == VariableTable.end())
468           return StringRef::npos;
469 
470         // Look up the value and escape it so that we can put it into the regex.
471         Value += Regex::escape(it->second);
472       }
473 
474       // Plop it into the regex at the adjusted offset.
475       TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
476                     Value.begin(), Value.end());
477       InsertOffset += Value.size();
478     }
479 
480     // Match the newly constructed regex.
481     RegExToMatch = TmpStr;
482   }
483 
484   SmallVector<StringRef, 4> MatchInfo;
485   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
486     return StringRef::npos;
487 
488   // Successful regex match.
489   assert(!MatchInfo.empty() && "Didn't get any match");
490   StringRef FullMatch = MatchInfo[0];
491 
492   // If this defines any variables, remember their values.
493   for (const auto &VariableDef : VariableDefs) {
494     assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
495     VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
496   }
497 
498   // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after
499   // the required preceding newline, which is consumed by the pattern in the
500   // case of CHECK-EMPTY but not CHECK-NEXT.
501   size_t MatchStartSkip = CheckTy == Check::CheckEmpty;
502   MatchLen = FullMatch.size() - MatchStartSkip;
503   return FullMatch.data() - Buffer.data() + MatchStartSkip;
504 }
505 
506 
507 /// Computes an arbitrary estimate for the quality of matching this pattern at
508 /// the start of \p Buffer; a distance of zero should correspond to a perfect
509 /// match.
510 unsigned
511 Pattern::ComputeMatchDistance(StringRef Buffer,
512                               const StringMap<StringRef> &VariableTable) const {
513   // Just compute the number of matching characters. For regular expressions, we
514   // just compare against the regex itself and hope for the best.
515   //
516   // FIXME: One easy improvement here is have the regex lib generate a single
517   // example regular expression which matches, and use that as the example
518   // string.
519   StringRef ExampleString(FixedStr);
520   if (ExampleString.empty())
521     ExampleString = RegExStr;
522 
523   // Only compare up to the first line in the buffer, or the string size.
524   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
525   BufferPrefix = BufferPrefix.split('\n').first;
526   return BufferPrefix.edit_distance(ExampleString);
527 }
528 
529 void Pattern::PrintVariableUses(const SourceMgr &SM, StringRef Buffer,
530                                 const StringMap<StringRef> &VariableTable,
531                                 SMRange MatchRange) const {
532   // If this was a regular expression using variables, print the current
533   // variable values.
534   if (!VariableUses.empty()) {
535     for (const auto &VariableUse : VariableUses) {
536       SmallString<256> Msg;
537       raw_svector_ostream OS(Msg);
538       StringRef Var = VariableUse.first;
539       if (Var[0] == '@') {
540         std::string Value;
541         if (EvaluateExpression(Var, Value)) {
542           OS << "with expression \"";
543           OS.write_escaped(Var) << "\" equal to \"";
544           OS.write_escaped(Value) << "\"";
545         } else {
546           OS << "uses incorrect expression \"";
547           OS.write_escaped(Var) << "\"";
548         }
549       } else {
550         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
551 
552         // Check for undefined variable references.
553         if (it == VariableTable.end()) {
554           OS << "uses undefined variable \"";
555           OS.write_escaped(Var) << "\"";
556         } else {
557           OS << "with variable \"";
558           OS.write_escaped(Var) << "\" equal to \"";
559           OS.write_escaped(it->second) << "\"";
560         }
561       }
562 
563       if (MatchRange.isValid())
564         SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, OS.str(),
565                         {MatchRange});
566       else
567         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
568                         SourceMgr::DK_Note, OS.str());
569     }
570   }
571 }
572 
573 void Pattern::PrintFuzzyMatch(
574     const SourceMgr &SM, StringRef Buffer,
575     const StringMap<StringRef> &VariableTable) const {
576   // Attempt to find the closest/best fuzzy match.  Usually an error happens
577   // because some string in the output didn't exactly match. In these cases, we
578   // would like to show the user a best guess at what "should have" matched, to
579   // save them having to actually check the input manually.
580   size_t NumLinesForward = 0;
581   size_t Best = StringRef::npos;
582   double BestQuality = 0;
583 
584   // Use an arbitrary 4k limit on how far we will search.
585   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
586     if (Buffer[i] == '\n')
587       ++NumLinesForward;
588 
589     // Patterns have leading whitespace stripped, so skip whitespace when
590     // looking for something which looks like a pattern.
591     if (Buffer[i] == ' ' || Buffer[i] == '\t')
592       continue;
593 
594     // Compute the "quality" of this match as an arbitrary combination of the
595     // match distance and the number of lines skipped to get to this match.
596     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
597     double Quality = Distance + (NumLinesForward / 100.);
598 
599     if (Quality < BestQuality || Best == StringRef::npos) {
600       Best = i;
601       BestQuality = Quality;
602     }
603   }
604 
605   // Print the "possible intended match here" line if we found something
606   // reasonable and not equal to what we showed in the "scanning from here"
607   // line.
608   if (Best && Best != StringRef::npos && BestQuality < 50) {
609     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
610                     SourceMgr::DK_Note, "possible intended match here");
611 
612     // FIXME: If we wanted to be really friendly we would show why the match
613     // failed, as it can be hard to spot simple one character differences.
614   }
615 }
616 
617 /// Finds the closing sequence of a regex variable usage or definition.
618 ///
619 /// \p Str has to point in the beginning of the definition (right after the
620 /// opening sequence). Returns the offset of the closing sequence within Str,
621 /// or npos if it was not found.
622 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
623   // Offset keeps track of the current offset within the input Str
624   size_t Offset = 0;
625   // [...] Nesting depth
626   size_t BracketDepth = 0;
627 
628   while (!Str.empty()) {
629     if (Str.startswith("]]") && BracketDepth == 0)
630       return Offset;
631     if (Str[0] == '\\') {
632       // Backslash escapes the next char within regexes, so skip them both.
633       Str = Str.substr(2);
634       Offset += 2;
635     } else {
636       switch (Str[0]) {
637       default:
638         break;
639       case '[':
640         BracketDepth++;
641         break;
642       case ']':
643         if (BracketDepth == 0) {
644           SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
645                           SourceMgr::DK_Error,
646                           "missing closing \"]\" for regex variable");
647           exit(1);
648         }
649         BracketDepth--;
650         break;
651       }
652       Str = Str.substr(1);
653       Offset++;
654     }
655   }
656 
657   return StringRef::npos;
658 }
659 
660 //===----------------------------------------------------------------------===//
661 // Check Strings.
662 //===----------------------------------------------------------------------===//
663 
664 /// A check that we found in the input file.
665 struct CheckString {
666   /// The pattern to match.
667   Pattern Pat;
668 
669   /// Which prefix name this check matched.
670   StringRef Prefix;
671 
672   /// The location in the match file that the check string was specified.
673   SMLoc Loc;
674 
675   /// All of the strings that are disallowed from occurring between this match
676   /// string and the previous one (or start of file).
677   std::vector<Pattern> DagNotStrings;
678 
679   CheckString(const Pattern &P, StringRef S, SMLoc L)
680       : Pat(P), Prefix(S), Loc(L) {}
681 
682   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
683                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
684 
685   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
686   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
687   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
688                 const std::vector<const Pattern *> &NotStrings,
689                 StringMap<StringRef> &VariableTable) const;
690   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
691                   std::vector<const Pattern *> &NotStrings,
692                   StringMap<StringRef> &VariableTable) const;
693 };
694 
695 /// Canonicalize whitespaces in the file. Line endings are replaced with
696 /// UNIX-style '\n'.
697 static StringRef CanonicalizeFile(MemoryBuffer &MB,
698                                   SmallVectorImpl<char> &OutputBuffer) {
699   OutputBuffer.reserve(MB.getBufferSize());
700 
701   for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
702        Ptr != End; ++Ptr) {
703     // Eliminate trailing dosish \r.
704     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
705       continue;
706     }
707 
708     // If current char is not a horizontal whitespace or if horizontal
709     // whitespace canonicalization is disabled, dump it to output as is.
710     if (NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) {
711       OutputBuffer.push_back(*Ptr);
712       continue;
713     }
714 
715     // Otherwise, add one space and advance over neighboring space.
716     OutputBuffer.push_back(' ');
717     while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t'))
718       ++Ptr;
719   }
720 
721   // Add a null byte and then return all but that byte.
722   OutputBuffer.push_back('\0');
723   return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
724 }
725 
726 static bool IsPartOfWord(char c) {
727   return (isalnum(c) || c == '-' || c == '_');
728 }
729 
730 // Get the size of the prefix extension.
731 static size_t CheckTypeSize(Check::CheckType Ty) {
732   switch (Ty) {
733   case Check::CheckNone:
734   case Check::CheckBadNot:
735     return 0;
736 
737   case Check::CheckPlain:
738     return sizeof(":") - 1;
739 
740   case Check::CheckNext:
741     return sizeof("-NEXT:") - 1;
742 
743   case Check::CheckSame:
744     return sizeof("-SAME:") - 1;
745 
746   case Check::CheckNot:
747     return sizeof("-NOT:") - 1;
748 
749   case Check::CheckDAG:
750     return sizeof("-DAG:") - 1;
751 
752   case Check::CheckLabel:
753     return sizeof("-LABEL:") - 1;
754 
755   case Check::CheckEmpty:
756     return sizeof("-EMPTY:") - 1;
757 
758   case Check::CheckEOF:
759     llvm_unreachable("Should not be using EOF size");
760   }
761 
762   llvm_unreachable("Bad check type");
763 }
764 
765 // Get a description of the type.
766 static std::string CheckTypeName(StringRef Prefix, Check::CheckType Ty) {
767   switch (Ty) {
768   case Check::CheckNone:
769     return "invalid";
770   case Check::CheckPlain:
771     return Prefix;
772   case Check::CheckNext:
773     return Prefix.str() + "-NEXT";
774   case Check::CheckSame:
775     return Prefix.str() + "-SAME";
776   case Check::CheckNot:
777     return Prefix.str() + "-NOT";
778   case Check::CheckDAG:
779     return Prefix.str() + "-DAG";
780   case Check::CheckLabel:
781     return Prefix.str() + "-LABEL";
782   case Check::CheckEmpty:
783     return Prefix.str() + "-EMPTY";
784   case Check::CheckEOF:
785     return "implicit EOF";
786   case Check::CheckBadNot:
787     return "bad NOT";
788   }
789   llvm_unreachable("unknown CheckType");
790 }
791 
792 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
793   if (Buffer.size() <= Prefix.size())
794     return Check::CheckNone;
795 
796   char NextChar = Buffer[Prefix.size()];
797 
798   // Verify that the : is present after the prefix.
799   if (NextChar == ':')
800     return Check::CheckPlain;
801 
802   if (NextChar != '-')
803     return Check::CheckNone;
804 
805   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
806   if (Rest.startswith("NEXT:"))
807     return Check::CheckNext;
808 
809   if (Rest.startswith("SAME:"))
810     return Check::CheckSame;
811 
812   if (Rest.startswith("NOT:"))
813     return Check::CheckNot;
814 
815   if (Rest.startswith("DAG:"))
816     return Check::CheckDAG;
817 
818   if (Rest.startswith("LABEL:"))
819     return Check::CheckLabel;
820 
821   if (Rest.startswith("EMPTY:"))
822     return Check::CheckEmpty;
823 
824   // You can't combine -NOT with another suffix.
825   if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
826       Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
827       Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") ||
828       Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:"))
829     return Check::CheckBadNot;
830 
831   return Check::CheckNone;
832 }
833 
834 // From the given position, find the next character after the word.
835 static size_t SkipWord(StringRef Str, size_t Loc) {
836   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
837     ++Loc;
838   return Loc;
839 }
840 
841 /// Search the buffer for the first prefix in the prefix regular expression.
842 ///
843 /// This searches the buffer using the provided regular expression, however it
844 /// enforces constraints beyond that:
845 /// 1) The found prefix must not be a suffix of something that looks like
846 ///    a valid prefix.
847 /// 2) The found prefix must be followed by a valid check type suffix using \c
848 ///    FindCheckType above.
849 ///
850 /// The first match of the regular expression to satisfy these two is returned,
851 /// otherwise an empty StringRef is returned to indicate failure.
852 ///
853 /// If this routine returns a valid prefix, it will also shrink \p Buffer to
854 /// start at the beginning of the returned prefix, increment \p LineNumber for
855 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of
856 /// check found by examining the suffix.
857 ///
858 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
859 /// is unspecified.
860 static StringRef FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer,
861                                          unsigned &LineNumber,
862                                          Check::CheckType &CheckTy) {
863   SmallVector<StringRef, 2> Matches;
864 
865   while (!Buffer.empty()) {
866     // Find the first (longest) match using the RE.
867     if (!PrefixRE.match(Buffer, &Matches))
868       // No match at all, bail.
869       return StringRef();
870 
871     StringRef Prefix = Matches[0];
872     Matches.clear();
873 
874     assert(Prefix.data() >= Buffer.data() &&
875            Prefix.data() < Buffer.data() + Buffer.size() &&
876            "Prefix doesn't start inside of buffer!");
877     size_t Loc = Prefix.data() - Buffer.data();
878     StringRef Skipped = Buffer.substr(0, Loc);
879     Buffer = Buffer.drop_front(Loc);
880     LineNumber += Skipped.count('\n');
881 
882     // Check that the matched prefix isn't a suffix of some other check-like
883     // word.
884     // FIXME: This is a very ad-hoc check. it would be better handled in some
885     // other way. Among other things it seems hard to distinguish between
886     // intentional and unintentional uses of this feature.
887     if (Skipped.empty() || !IsPartOfWord(Skipped.back())) {
888       // Now extract the type.
889       CheckTy = FindCheckType(Buffer, Prefix);
890 
891       // If we've found a valid check type for this prefix, we're done.
892       if (CheckTy != Check::CheckNone)
893         return Prefix;
894     }
895 
896     // If we didn't successfully find a prefix, we need to skip this invalid
897     // prefix and continue scanning. We directly skip the prefix that was
898     // matched and any additional parts of that check-like word.
899     Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size()));
900   }
901 
902   // We ran out of buffer while skipping partial matches so give up.
903   return StringRef();
904 }
905 
906 /// Read the check file, which specifies the sequence of expected strings.
907 ///
908 /// The strings are added to the CheckStrings vector. Returns true in case of
909 /// an error, false otherwise.
910 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
911                           std::vector<CheckString> &CheckStrings) {
912   std::vector<Pattern> ImplicitNegativeChecks;
913   for (const auto &PatternString : ImplicitCheckNot) {
914     // Create a buffer with fake command line content in order to display the
915     // command line option responsible for the specific implicit CHECK-NOT.
916     std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
917     std::string Suffix = "'";
918     std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
919         Prefix + PatternString + Suffix, "command line");
920 
921     StringRef PatternInBuffer =
922         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
923     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
924 
925     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
926     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
927                                                "IMPLICIT-CHECK", SM, 0);
928   }
929 
930   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
931 
932   // LineNumber keeps track of the line on which CheckPrefix instances are
933   // found.
934   unsigned LineNumber = 1;
935 
936   while (1) {
937     Check::CheckType CheckTy;
938 
939     // See if a prefix occurs in the memory buffer.
940     StringRef UsedPrefix = FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber,
941                                                    CheckTy);
942     if (UsedPrefix.empty())
943       break;
944     assert(UsedPrefix.data() == Buffer.data() &&
945            "Failed to move Buffer's start forward, or pointed prefix outside "
946            "of the buffer!");
947 
948     // Location to use for error messages.
949     const char *UsedPrefixStart = UsedPrefix.data();
950 
951     // Skip the buffer to the end.
952     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
953 
954     // Complain about useful-looking but unsupported suffixes.
955     if (CheckTy == Check::CheckBadNot) {
956       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
957                       "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
958       return true;
959     }
960 
961     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
962     // leading whitespace.
963     if (!(NoCanonicalizeWhiteSpace && MatchFullLines))
964       Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
965 
966     // Scan ahead to the end of line.
967     size_t EOL = Buffer.find_first_of("\n\r");
968 
969     // Remember the location of the start of the pattern, for diagnostics.
970     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
971 
972     // Parse the pattern.
973     Pattern P(CheckTy);
974     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
975       return true;
976 
977     // Verify that CHECK-LABEL lines do not define or use variables
978     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
979       SM.PrintMessage(
980           SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error,
981           "found '" + UsedPrefix + "-LABEL:'"
982                                    " with variable definition or use");
983       return true;
984     }
985 
986     Buffer = Buffer.substr(EOL);
987 
988     // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them.
989     if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame ||
990          CheckTy == Check::CheckEmpty) &&
991         CheckStrings.empty()) {
992       StringRef Type = CheckTy == Check::CheckNext
993                            ? "NEXT"
994                            : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME";
995       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
996                       SourceMgr::DK_Error,
997                       "found '" + UsedPrefix + "-" + Type +
998                           "' without previous '" + UsedPrefix + ": line");
999       return true;
1000     }
1001 
1002     // Handle CHECK-DAG/-NOT.
1003     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
1004       DagNotMatches.push_back(P);
1005       continue;
1006     }
1007 
1008     // Okay, add the string we captured to the output vector and move on.
1009     CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
1010     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
1011     DagNotMatches = ImplicitNegativeChecks;
1012   }
1013 
1014   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
1015   // prefix as a filler for the error message.
1016   if (!DagNotMatches.empty()) {
1017     CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
1018                               SMLoc::getFromPointer(Buffer.data()));
1019     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
1020   }
1021 
1022   if (CheckStrings.empty()) {
1023     errs() << "error: no check strings found with prefix"
1024            << (CheckPrefixes.size() > 1 ? "es " : " ");
1025     prefix_iterator I = CheckPrefixes.begin();
1026     prefix_iterator E = CheckPrefixes.end();
1027     if (I != E) {
1028       errs() << "\'" << *I << ":'";
1029       ++I;
1030     }
1031     for (; I != E; ++I)
1032       errs() << ", \'" << *I << ":'";
1033 
1034     errs() << '\n';
1035     return true;
1036   }
1037 
1038   return false;
1039 }
1040 
1041 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
1042                        StringRef Prefix, SMLoc Loc, const Pattern &Pat,
1043                        StringRef Buffer, StringMap<StringRef> &VariableTable,
1044                        size_t MatchPos, size_t MatchLen) {
1045   if (ExpectedMatch) {
1046     if (!Verbose)
1047       return;
1048     if (!VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF)
1049       return;
1050   }
1051   SMLoc MatchStart = SMLoc::getFromPointer(Buffer.data() + MatchPos);
1052   SMLoc MatchEnd = SMLoc::getFromPointer(Buffer.data() + MatchPos + MatchLen);
1053   SMRange MatchRange(MatchStart, MatchEnd);
1054   SM.PrintMessage(
1055       Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error,
1056       CheckTypeName(Prefix, Pat.getCheckTy()) + ": " +
1057           (ExpectedMatch ? "expected" : "excluded") +
1058           " string found in input");
1059   SM.PrintMessage(MatchStart, SourceMgr::DK_Note, "found here", {MatchRange});
1060   Pat.PrintVariableUses(SM, Buffer, VariableTable, MatchRange);
1061 }
1062 
1063 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
1064                        const CheckString &CheckStr, StringRef Buffer,
1065                        StringMap<StringRef> &VariableTable, size_t MatchPos,
1066                        size_t MatchLen) {
1067   PrintMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
1068              Buffer, VariableTable, MatchPos, MatchLen);
1069 }
1070 
1071 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
1072                          StringRef Prefix, SMLoc Loc, const Pattern &Pat,
1073                          StringRef Buffer,
1074                          StringMap<StringRef> &VariableTable) {
1075   if (!ExpectedMatch && !VerboseVerbose)
1076     return;
1077 
1078   // Otherwise, we have an error, emit an error message.
1079   SM.PrintMessage(Loc,
1080                   ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark,
1081                   CheckTypeName(Prefix, Pat.getCheckTy()) + ": " +
1082                       (ExpectedMatch ? "expected" : "excluded") +
1083                       " string not found in input");
1084 
1085   // Print the "scanning from here" line.  If the current position is at the
1086   // end of a line, advance to the start of the next line.
1087   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
1088 
1089   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1090                   "scanning from here");
1091 
1092   // Allow the pattern to print additional information if desired.
1093   Pat.PrintVariableUses(SM, Buffer, VariableTable);
1094   if (ExpectedMatch)
1095     Pat.PrintFuzzyMatch(SM, Buffer, VariableTable);
1096 }
1097 
1098 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
1099                          const CheckString &CheckStr, StringRef Buffer,
1100                          StringMap<StringRef> &VariableTable) {
1101   PrintNoMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
1102                Buffer, VariableTable);
1103 }
1104 
1105 /// Count the number of newlines in the specified range.
1106 static unsigned CountNumNewlinesBetween(StringRef Range,
1107                                         const char *&FirstNewLine) {
1108   unsigned NumNewLines = 0;
1109   while (1) {
1110     // Scan for newline.
1111     Range = Range.substr(Range.find_first_of("\n\r"));
1112     if (Range.empty())
1113       return NumNewLines;
1114 
1115     ++NumNewLines;
1116 
1117     // Handle \n\r and \r\n as a single newline.
1118     if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') &&
1119         (Range[0] != Range[1]))
1120       Range = Range.substr(1);
1121     Range = Range.substr(1);
1122 
1123     if (NumNewLines == 1)
1124       FirstNewLine = Range.begin();
1125   }
1126 }
1127 
1128 /// Match check string and its "not strings" and/or "dag strings".
1129 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1130                           bool IsLabelScanMode, size_t &MatchLen,
1131                           StringMap<StringRef> &VariableTable) const {
1132   size_t LastPos = 0;
1133   std::vector<const Pattern *> NotStrings;
1134 
1135   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1136   // bounds; we have not processed variable definitions within the bounded block
1137   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1138   // over the block again (including the last CHECK-LABEL) in normal mode.
1139   if (!IsLabelScanMode) {
1140     // Match "dag strings" (with mixed "not strings" if any).
1141     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1142     if (LastPos == StringRef::npos)
1143       return StringRef::npos;
1144   }
1145 
1146   // Match itself from the last position after matching CHECK-DAG.
1147   StringRef MatchBuffer = Buffer.substr(LastPos);
1148   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1149   if (MatchPos == StringRef::npos) {
1150     PrintNoMatch(true, SM, *this, MatchBuffer, VariableTable);
1151     return StringRef::npos;
1152   }
1153   PrintMatch(true, SM, *this, MatchBuffer, VariableTable, MatchPos, MatchLen);
1154 
1155   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1156   // or CHECK-NOT
1157   if (!IsLabelScanMode) {
1158     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1159 
1160     // If this check is a "CHECK-NEXT", verify that the previous match was on
1161     // the previous line (i.e. that there is one newline between them).
1162     if (CheckNext(SM, SkippedRegion))
1163       return StringRef::npos;
1164 
1165     // If this check is a "CHECK-SAME", verify that the previous match was on
1166     // the same line (i.e. that there is no newline between them).
1167     if (CheckSame(SM, SkippedRegion))
1168       return StringRef::npos;
1169 
1170     // If this match had "not strings", verify that they don't exist in the
1171     // skipped region.
1172     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1173       return StringRef::npos;
1174   }
1175 
1176   return LastPos + MatchPos;
1177 }
1178 
1179 /// Verify there is a single line in the given buffer.
1180 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1181   if (Pat.getCheckTy() != Check::CheckNext &&
1182       Pat.getCheckTy() != Check::CheckEmpty)
1183     return false;
1184 
1185   Twine CheckName =
1186       Prefix +
1187       Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT");
1188 
1189   // Count the number of newlines between the previous match and this one.
1190   assert(Buffer.data() !=
1191              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1192                                     SMLoc::getFromPointer(Buffer.data())))
1193                  ->getBufferStart() &&
1194          "CHECK-NEXT and CHECK-EMPTY can't be the first check in a file");
1195 
1196   const char *FirstNewLine = nullptr;
1197   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1198 
1199   if (NumNewLines == 0) {
1200     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1201                     CheckName + ": is on the same line as previous match");
1202     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1203                     "'next' match was here");
1204     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1205                     "previous match ended here");
1206     return true;
1207   }
1208 
1209   if (NumNewLines != 1) {
1210     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1211                     CheckName +
1212                         ": is not on the line after the previous match");
1213     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1214                     "'next' match was here");
1215     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1216                     "previous match ended here");
1217     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1218                     "non-matching line after previous match is here");
1219     return true;
1220   }
1221 
1222   return false;
1223 }
1224 
1225 /// Verify there is no newline in the given buffer.
1226 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1227   if (Pat.getCheckTy() != Check::CheckSame)
1228     return false;
1229 
1230   // Count the number of newlines between the previous match and this one.
1231   assert(Buffer.data() !=
1232              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1233                                     SMLoc::getFromPointer(Buffer.data())))
1234                  ->getBufferStart() &&
1235          "CHECK-SAME can't be the first check in a file");
1236 
1237   const char *FirstNewLine = nullptr;
1238   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1239 
1240   if (NumNewLines != 0) {
1241     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1242                     Prefix +
1243                         "-SAME: is not on the same line as the previous match");
1244     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1245                     "'next' match was here");
1246     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1247                     "previous match ended here");
1248     return true;
1249   }
1250 
1251   return false;
1252 }
1253 
1254 /// Verify there's no "not strings" in the given buffer.
1255 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1256                            const std::vector<const Pattern *> &NotStrings,
1257                            StringMap<StringRef> &VariableTable) const {
1258   for (const Pattern *Pat : NotStrings) {
1259     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1260 
1261     size_t MatchLen = 0;
1262     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1263 
1264     if (Pos == StringRef::npos) {
1265       PrintNoMatch(false, SM, Prefix, Pat->getLoc(), *Pat, Buffer,
1266                    VariableTable);
1267       continue;
1268     }
1269 
1270     PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, Buffer, VariableTable,
1271                Pos, MatchLen);
1272 
1273     return true;
1274   }
1275 
1276   return false;
1277 }
1278 
1279 /// Match "dag strings" and their mixed "not strings".
1280 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1281                              std::vector<const Pattern *> &NotStrings,
1282                              StringMap<StringRef> &VariableTable) const {
1283   if (DagNotStrings.empty())
1284     return 0;
1285 
1286   size_t LastPos = 0;
1287   size_t StartPos = LastPos;
1288 
1289   // A sorted list of ranges for non-overlapping dag matches.
1290   struct Match {
1291     size_t Pos;
1292     size_t End;
1293   };
1294   std::list<Match> Matches;
1295 
1296   for (const Pattern &Pat : DagNotStrings) {
1297     assert((Pat.getCheckTy() == Check::CheckDAG ||
1298             Pat.getCheckTy() == Check::CheckNot) &&
1299            "Invalid CHECK-DAG or CHECK-NOT!");
1300 
1301     if (Pat.getCheckTy() == Check::CheckNot) {
1302       NotStrings.push_back(&Pat);
1303       continue;
1304     }
1305 
1306     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1307 
1308     // CHECK-DAG always matches from the start.
1309     size_t MatchLen = 0, MatchPos = StartPos;
1310 
1311     // Search for a match that doesn't overlap a previous match in this
1312     // CHECK-DAG group.
1313     for (auto MI = Matches.begin(), ME = Matches.end(); true; ++MI) {
1314       StringRef MatchBuffer = Buffer.substr(MatchPos);
1315       size_t MatchPosBuf = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1316       // With a group of CHECK-DAGs, a single mismatching means the match on
1317       // that group of CHECK-DAGs fails immediately.
1318       if (MatchPosBuf == StringRef::npos) {
1319         PrintNoMatch(true, SM, Prefix, Pat.getLoc(), Pat, MatchBuffer,
1320                      VariableTable);
1321         return StringRef::npos;
1322       }
1323       // Re-calc it as the offset relative to the start of the original string.
1324       MatchPos += MatchPosBuf;
1325       if (VerboseVerbose)
1326         PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, Buffer, VariableTable,
1327                    MatchPos, MatchLen);
1328       if (AllowDeprecatedDagOverlap)
1329         break;
1330       // Iterate previous matches until overlapping match or insertion point.
1331       Match M{MatchPos, MatchPos + MatchLen};
1332       bool Overlap = false;
1333       for (; MI != ME; ++MI) {
1334         if (M.Pos < MI->End) {
1335           // !Overlap => New match has no overlap and is before this old match.
1336           // Overlap => New match overlaps this old match.
1337           Overlap = MI->Pos < M.End;
1338           break;
1339         }
1340       }
1341       if (!Overlap) {
1342         // Insert non-overlapping match into list.
1343         Matches.insert(MI, M);
1344         break;
1345       }
1346       if (VerboseVerbose) {
1347         SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos);
1348         SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End);
1349         SMRange OldRange(OldStart, OldEnd);
1350         SM.PrintMessage(OldStart, SourceMgr::DK_Note,
1351                         "match discarded, overlaps earlier DAG match here",
1352                         {OldRange});
1353       }
1354       MatchPos = MI->End;
1355     }
1356     if (!VerboseVerbose)
1357       PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, Buffer, VariableTable,
1358                  MatchPos, MatchLen);
1359 
1360     if (!NotStrings.empty()) {
1361       if (MatchPos < LastPos) {
1362         // Reordered?
1363         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1364                         SourceMgr::DK_Error,
1365                         Prefix + "-DAG: found a match of CHECK-DAG"
1366                                  " reordering across a CHECK-NOT");
1367         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1368                         SourceMgr::DK_Note,
1369                         Prefix + "-DAG: the farthest match of CHECK-DAG"
1370                                  " is found here");
1371         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1372                         Prefix + "-NOT: the crossed pattern specified"
1373                                  " here");
1374         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1375                         Prefix + "-DAG: the reordered pattern specified"
1376                                  " here");
1377         return StringRef::npos;
1378       }
1379       // All subsequent CHECK-DAGs should be matched from the farthest
1380       // position of all precedent CHECK-DAGs (not including this one).
1381       StartPos = LastPos;
1382       // Don't waste time checking for (impossible) overlaps before that.
1383       Matches.clear();
1384       Matches.push_back(Match{MatchPos, MatchPos + MatchLen});
1385       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1386       // CHECK-DAG, verify that there's no 'not' strings occurred in that
1387       // region.
1388       StringRef SkippedRegion = Buffer.slice(LastPos, MatchPos);
1389       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1390         return StringRef::npos;
1391       // Clear "not strings".
1392       NotStrings.clear();
1393     }
1394 
1395     // Update the last position with CHECK-DAG matches.
1396     LastPos = std::max(MatchPos + MatchLen, LastPos);
1397   }
1398 
1399   return LastPos;
1400 }
1401 
1402 // A check prefix must contain only alphanumeric, hyphens and underscores.
1403 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1404   Regex Validator("^[a-zA-Z0-9_-]*$");
1405   return Validator.match(CheckPrefix);
1406 }
1407 
1408 static bool ValidateCheckPrefixes() {
1409   StringSet<> PrefixSet;
1410 
1411   for (StringRef Prefix : CheckPrefixes) {
1412     // Reject empty prefixes.
1413     if (Prefix == "")
1414       return false;
1415 
1416     if (!PrefixSet.insert(Prefix).second)
1417       return false;
1418 
1419     if (!ValidateCheckPrefix(Prefix))
1420       return false;
1421   }
1422 
1423   return true;
1424 }
1425 
1426 // Combines the check prefixes into a single regex so that we can efficiently
1427 // scan for any of the set.
1428 //
1429 // The semantics are that the longest-match wins which matches our regex
1430 // library.
1431 static Regex buildCheckPrefixRegex() {
1432   // I don't think there's a way to specify an initial value for cl::list,
1433   // so if nothing was specified, add the default
1434   if (CheckPrefixes.empty())
1435     CheckPrefixes.push_back("CHECK");
1436 
1437   // We already validated the contents of CheckPrefixes so just concatenate
1438   // them as alternatives.
1439   SmallString<32> PrefixRegexStr;
1440   for (StringRef Prefix : CheckPrefixes) {
1441     if (Prefix != CheckPrefixes.front())
1442       PrefixRegexStr.push_back('|');
1443 
1444     PrefixRegexStr.append(Prefix);
1445   }
1446 
1447   return Regex(PrefixRegexStr);
1448 }
1449 
1450 static void DumpCommandLine(int argc, char **argv) {
1451   errs() << "FileCheck command line: ";
1452   for (int I = 0; I < argc; I++)
1453     errs() << " " << argv[I];
1454   errs() << "\n";
1455 }
1456 
1457 // Remove local variables from \p VariableTable. Global variables
1458 // (start with '$') are preserved.
1459 static void ClearLocalVars(StringMap<StringRef> &VariableTable) {
1460   SmallVector<StringRef, 16> LocalVars;
1461   for (const auto &Var : VariableTable)
1462     if (Var.first()[0] != '$')
1463       LocalVars.push_back(Var.first());
1464 
1465   for (const auto &Var : LocalVars)
1466     VariableTable.erase(Var);
1467 }
1468 
1469 /// Check the input to FileCheck provided in the \p Buffer against the \p
1470 /// CheckStrings read from the check file.
1471 ///
1472 /// Returns false if the input fails to satisfy the checks.
1473 bool CheckInput(SourceMgr &SM, StringRef Buffer,
1474                 ArrayRef<CheckString> CheckStrings) {
1475   bool ChecksFailed = false;
1476 
1477   /// VariableTable - This holds all the current filecheck variables.
1478   StringMap<StringRef> VariableTable;
1479 
1480   for (const auto& Def : GlobalDefines)
1481     VariableTable.insert(StringRef(Def).split('='));
1482 
1483   unsigned i = 0, j = 0, e = CheckStrings.size();
1484   while (true) {
1485     StringRef CheckRegion;
1486     if (j == e) {
1487       CheckRegion = Buffer;
1488     } else {
1489       const CheckString &CheckLabelStr = CheckStrings[j];
1490       if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
1491         ++j;
1492         continue;
1493       }
1494 
1495       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1496       size_t MatchLabelLen = 0;
1497       size_t MatchLabelPos =
1498           CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable);
1499       if (MatchLabelPos == StringRef::npos)
1500         // Immediately bail of CHECK-LABEL fails, nothing else we can do.
1501         return false;
1502 
1503       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1504       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1505       ++j;
1506     }
1507 
1508     if (EnableVarScope)
1509       ClearLocalVars(VariableTable);
1510 
1511     for (; i != j; ++i) {
1512       const CheckString &CheckStr = CheckStrings[i];
1513 
1514       // Check each string within the scanned region, including a second check
1515       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1516       size_t MatchLen = 0;
1517       size_t MatchPos =
1518           CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable);
1519 
1520       if (MatchPos == StringRef::npos) {
1521         ChecksFailed = true;
1522         i = j;
1523         break;
1524       }
1525 
1526       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1527     }
1528 
1529     if (j == e)
1530       break;
1531   }
1532 
1533   // Success if no checks failed.
1534   return !ChecksFailed;
1535 }
1536 
1537 int main(int argc, char **argv) {
1538   InitLLVM X(argc, argv);
1539   cl::ParseCommandLineOptions(argc, argv);
1540 
1541   if (!ValidateCheckPrefixes()) {
1542     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1543               "start with a letter and contain only alphanumeric characters, "
1544               "hyphens and underscores\n";
1545     return 2;
1546   }
1547 
1548   Regex PrefixRE = buildCheckPrefixRegex();
1549   std::string REError;
1550   if (!PrefixRE.isValid(REError)) {
1551     errs() << "Unable to combine check-prefix strings into a prefix regular "
1552               "expression! This is likely a bug in FileCheck's verification of "
1553               "the check-prefix strings. Regular expression parsing failed "
1554               "with the following error: "
1555            << REError << "\n";
1556     return 2;
1557   }
1558 
1559   if (VerboseVerbose)
1560     Verbose = true;
1561 
1562   SourceMgr SM;
1563 
1564   // Read the expected strings from the check file.
1565   ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr =
1566       MemoryBuffer::getFileOrSTDIN(CheckFilename);
1567   if (std::error_code EC = CheckFileOrErr.getError()) {
1568     errs() << "Could not open check file '" << CheckFilename
1569            << "': " << EC.message() << '\n';
1570     return 2;
1571   }
1572   MemoryBuffer &CheckFile = *CheckFileOrErr.get();
1573 
1574   SmallString<4096> CheckFileBuffer;
1575   StringRef CheckFileText = CanonicalizeFile(CheckFile, CheckFileBuffer);
1576 
1577   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1578                             CheckFileText, CheckFile.getBufferIdentifier()),
1579                         SMLoc());
1580 
1581   std::vector<CheckString> CheckStrings;
1582   if (ReadCheckFile(SM, CheckFileText, PrefixRE, CheckStrings))
1583     return 2;
1584 
1585   // Open the file to check and add it to SourceMgr.
1586   ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr =
1587       MemoryBuffer::getFileOrSTDIN(InputFilename);
1588   if (std::error_code EC = InputFileOrErr.getError()) {
1589     errs() << "Could not open input file '" << InputFilename
1590            << "': " << EC.message() << '\n';
1591     return 2;
1592   }
1593   MemoryBuffer &InputFile = *InputFileOrErr.get();
1594 
1595   if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) {
1596     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1597     DumpCommandLine(argc, argv);
1598     return 2;
1599   }
1600 
1601   SmallString<4096> InputFileBuffer;
1602   StringRef InputFileText = CanonicalizeFile(InputFile, InputFileBuffer);
1603 
1604   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1605                             InputFileText, InputFile.getBufferIdentifier()),
1606                         SMLoc());
1607 
1608   return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1;
1609 }
1610