1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content.  This is useful for regression tests etc.
12 //
13 // This program exits with an exit status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/InitLLVM.h"
25 #include "llvm/Support/MemoryBuffer.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/SourceMgr.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <algorithm>
30 #include <cctype>
31 #include <map>
32 #include <string>
33 #include <system_error>
34 #include <vector>
35 using namespace llvm;
36 
37 static cl::opt<std::string>
38     CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
39 
40 static cl::opt<std::string>
41     InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
42                   cl::init("-"), cl::value_desc("filename"));
43 
44 static cl::list<std::string> CheckPrefixes(
45     "check-prefix",
46     cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
47 static cl::alias CheckPrefixesAlias(
48     "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated,
49     cl::NotHidden,
50     cl::desc(
51         "Alias for -check-prefix permitting multiple comma separated values"));
52 
53 static cl::opt<bool> NoCanonicalizeWhiteSpace(
54     "strict-whitespace",
55     cl::desc("Do not treat all horizontal whitespace as equivalent"));
56 
57 static cl::list<std::string> ImplicitCheckNot(
58     "implicit-check-not",
59     cl::desc("Add an implicit negative check with this pattern to every\n"
60              "positive check. This can be used to ensure that no instances of\n"
61              "this pattern occur which are not matched by a positive pattern"),
62     cl::value_desc("pattern"));
63 
64 static cl::list<std::string> GlobalDefines("D", cl::Prefix,
65     cl::desc("Define a variable to be used in capture patterns."),
66     cl::value_desc("VAR=VALUE"));
67 
68 static cl::opt<bool> AllowEmptyInput(
69     "allow-empty", cl::init(false),
70     cl::desc("Allow the input file to be empty. This is useful when making\n"
71              "checks that some error message does not occur, for example."));
72 
73 static cl::opt<bool> MatchFullLines(
74     "match-full-lines", cl::init(false),
75     cl::desc("Require all positive matches to cover an entire input line.\n"
76              "Allows leading and trailing whitespace if --strict-whitespace\n"
77              "is not also passed."));
78 
79 static cl::opt<bool> EnableVarScope(
80     "enable-var-scope", cl::init(false),
81     cl::desc("Enables scope for regex variables. Variables with names that\n"
82              "do not start with '$' will be reset at the beginning of\n"
83              "each CHECK-LABEL block."));
84 
85 typedef cl::list<std::string>::const_iterator prefix_iterator;
86 
87 //===----------------------------------------------------------------------===//
88 // Pattern Handling Code.
89 //===----------------------------------------------------------------------===//
90 
91 namespace Check {
92 enum CheckType {
93   CheckNone = 0,
94   CheckPlain,
95   CheckNext,
96   CheckSame,
97   CheckNot,
98   CheckDAG,
99   CheckLabel,
100 
101   /// Indicates the pattern only matches the end of file. This is used for
102   /// trailing CHECK-NOTs.
103   CheckEOF,
104 
105   /// Marks when parsing found a -NOT check combined with another CHECK suffix.
106   CheckBadNot
107 };
108 }
109 
110 class Pattern {
111   SMLoc PatternLoc;
112 
113   /// A fixed string to match as the pattern or empty if this pattern requires
114   /// a regex match.
115   StringRef FixedStr;
116 
117   /// A regex string to match as the pattern or empty if this pattern requires
118   /// a fixed string to match.
119   std::string RegExStr;
120 
121   /// Entries in this vector map to uses of a variable in the pattern, e.g.
122   /// "foo[[bar]]baz".  In this case, the RegExStr will contain "foobaz" and
123   /// we'll get an entry in this vector that tells us to insert the value of
124   /// bar at offset 3.
125   std::vector<std::pair<StringRef, unsigned>> VariableUses;
126 
127   /// Maps definitions of variables to their parenthesized capture numbers.
128   ///
129   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to
130   /// 1.
131   std::map<StringRef, unsigned> VariableDefs;
132 
133   Check::CheckType CheckTy;
134 
135   /// Contains the number of line this pattern is in.
136   unsigned LineNumber;
137 
138 public:
139   explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {}
140 
141   /// Returns the location in source code.
142   SMLoc getLoc() const { return PatternLoc; }
143 
144   bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
145                     unsigned LineNumber);
146   size_t Match(StringRef Buffer, size_t &MatchLen,
147                StringMap<StringRef> &VariableTable) const;
148   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
149                         const StringMap<StringRef> &VariableTable) const;
150 
151   bool hasVariable() const {
152     return !(VariableUses.empty() && VariableDefs.empty());
153   }
154 
155   Check::CheckType getCheckTy() const { return CheckTy; }
156 
157 private:
158   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
159   void AddBackrefToRegEx(unsigned BackrefNum);
160   unsigned
161   ComputeMatchDistance(StringRef Buffer,
162                        const StringMap<StringRef> &VariableTable) const;
163   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
164   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
165 };
166 
167 /// Parses the given string into the Pattern.
168 ///
169 /// \p Prefix provides which prefix is being matched, \p SM provides the
170 /// SourceMgr used for error reports, and \p LineNumber is the line number in
171 /// the input file from which the pattern string was read. Returns true in
172 /// case of an error, false otherwise.
173 bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
174                            SourceMgr &SM, unsigned LineNumber) {
175   bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot;
176 
177   this->LineNumber = LineNumber;
178   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
179 
180   if (!(NoCanonicalizeWhiteSpace && MatchFullLines))
181     // Ignore trailing whitespace.
182     while (!PatternStr.empty() &&
183            (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
184       PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
185 
186   // Check that there is something on the line.
187   if (PatternStr.empty()) {
188     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
189                     "found empty check string with prefix '" + Prefix + ":'");
190     return true;
191   }
192 
193   // Check to see if this is a fixed string, or if it has regex pieces.
194   if (!MatchFullLinesHere &&
195       (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
196                                  PatternStr.find("[[") == StringRef::npos))) {
197     FixedStr = PatternStr;
198     return false;
199   }
200 
201   if (MatchFullLinesHere) {
202     RegExStr += '^';
203     if (!NoCanonicalizeWhiteSpace)
204       RegExStr += " *";
205   }
206 
207   // Paren value #0 is for the fully matched string.  Any new parenthesized
208   // values add from there.
209   unsigned CurParen = 1;
210 
211   // Otherwise, there is at least one regex piece.  Build up the regex pattern
212   // by escaping scary characters in fixed strings, building up one big regex.
213   while (!PatternStr.empty()) {
214     // RegEx matches.
215     if (PatternStr.startswith("{{")) {
216       // This is the start of a regex match.  Scan for the }}.
217       size_t End = PatternStr.find("}}");
218       if (End == StringRef::npos) {
219         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
220                         SourceMgr::DK_Error,
221                         "found start of regex string with no end '}}'");
222         return true;
223       }
224 
225       // Enclose {{}} patterns in parens just like [[]] even though we're not
226       // capturing the result for any purpose.  This is required in case the
227       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
228       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
229       RegExStr += '(';
230       ++CurParen;
231 
232       if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
233         return true;
234       RegExStr += ')';
235 
236       PatternStr = PatternStr.substr(End + 2);
237       continue;
238     }
239 
240     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
241     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
242     // second form is [[foo]] which is a reference to foo.  The variable name
243     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
244     // it.  This is to catch some common errors.
245     if (PatternStr.startswith("[[")) {
246       // Find the closing bracket pair ending the match.  End is going to be an
247       // offset relative to the beginning of the match string.
248       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
249 
250       if (End == StringRef::npos) {
251         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
252                         SourceMgr::DK_Error,
253                         "invalid named regex reference, no ]] found");
254         return true;
255       }
256 
257       StringRef MatchStr = PatternStr.substr(2, End);
258       PatternStr = PatternStr.substr(End + 4);
259 
260       // Get the regex name (e.g. "foo").
261       size_t NameEnd = MatchStr.find(':');
262       StringRef Name = MatchStr.substr(0, NameEnd);
263 
264       if (Name.empty()) {
265         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
266                         "invalid name in named regex: empty name");
267         return true;
268       }
269 
270       // Verify that the name/expression is well formed. FileCheck currently
271       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
272       // is relaxed, more strict check is performed in \c EvaluateExpression.
273       bool IsExpression = false;
274       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
275         if (i == 0) {
276           if (Name[i] == '$')  // Global vars start with '$'
277             continue;
278           if (Name[i] == '@') {
279             if (NameEnd != StringRef::npos) {
280               SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
281                               SourceMgr::DK_Error,
282                               "invalid name in named regex definition");
283               return true;
284             }
285             IsExpression = true;
286             continue;
287           }
288         }
289         if (Name[i] != '_' && !isalnum(Name[i]) &&
290             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
291           SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i),
292                           SourceMgr::DK_Error, "invalid name in named regex");
293           return true;
294         }
295       }
296 
297       // Name can't start with a digit.
298       if (isdigit(static_cast<unsigned char>(Name[0]))) {
299         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
300                         "invalid name in named regex");
301         return true;
302       }
303 
304       // Handle [[foo]].
305       if (NameEnd == StringRef::npos) {
306         // Handle variables that were defined earlier on the same line by
307         // emitting a backreference.
308         if (VariableDefs.find(Name) != VariableDefs.end()) {
309           unsigned VarParenNum = VariableDefs[Name];
310           if (VarParenNum < 1 || VarParenNum > 9) {
311             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
312                             SourceMgr::DK_Error,
313                             "Can't back-reference more than 9 variables");
314             return true;
315           }
316           AddBackrefToRegEx(VarParenNum);
317         } else {
318           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
319         }
320         continue;
321       }
322 
323       // Handle [[foo:.*]].
324       VariableDefs[Name] = CurParen;
325       RegExStr += '(';
326       ++CurParen;
327 
328       if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM))
329         return true;
330 
331       RegExStr += ')';
332     }
333 
334     // Handle fixed string matches.
335     // Find the end, which is the start of the next regex.
336     size_t FixedMatchEnd = PatternStr.find("{{");
337     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
338     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
339     PatternStr = PatternStr.substr(FixedMatchEnd);
340   }
341 
342   if (MatchFullLinesHere) {
343     if (!NoCanonicalizeWhiteSpace)
344       RegExStr += " *";
345     RegExStr += '$';
346   }
347 
348   return false;
349 }
350 
351 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
352   Regex R(RS);
353   std::string Error;
354   if (!R.isValid(Error)) {
355     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
356                     "invalid regex: " + Error);
357     return true;
358   }
359 
360   RegExStr += RS.str();
361   CurParen += R.getNumMatches();
362   return false;
363 }
364 
365 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
366   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
367   std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
368   RegExStr += Backref;
369 }
370 
371 /// Evaluates expression and stores the result to \p Value.
372 ///
373 /// Returns true on success and false when the expression has invalid syntax.
374 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
375   // The only supported expression is @LINE([\+-]\d+)?
376   if (!Expr.startswith("@LINE"))
377     return false;
378   Expr = Expr.substr(StringRef("@LINE").size());
379   int Offset = 0;
380   if (!Expr.empty()) {
381     if (Expr[0] == '+')
382       Expr = Expr.substr(1);
383     else if (Expr[0] != '-')
384       return false;
385     if (Expr.getAsInteger(10, Offset))
386       return false;
387   }
388   Value = llvm::itostr(LineNumber + Offset);
389   return true;
390 }
391 
392 /// Matches the pattern string against the input buffer \p Buffer
393 ///
394 /// This returns the position that is matched or npos if there is no match. If
395 /// there is a match, the size of the matched string is returned in \p
396 /// MatchLen.
397 ///
398 /// The \p VariableTable StringMap provides the current values of filecheck
399 /// variables and is updated if this match defines new values.
400 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
401                       StringMap<StringRef> &VariableTable) const {
402   // If this is the EOF pattern, match it immediately.
403   if (CheckTy == Check::CheckEOF) {
404     MatchLen = 0;
405     return Buffer.size();
406   }
407 
408   // If this is a fixed string pattern, just match it now.
409   if (!FixedStr.empty()) {
410     MatchLen = FixedStr.size();
411     return Buffer.find(FixedStr);
412   }
413 
414   // Regex match.
415 
416   // If there are variable uses, we need to create a temporary string with the
417   // actual value.
418   StringRef RegExToMatch = RegExStr;
419   std::string TmpStr;
420   if (!VariableUses.empty()) {
421     TmpStr = RegExStr;
422 
423     unsigned InsertOffset = 0;
424     for (const auto &VariableUse : VariableUses) {
425       std::string Value;
426 
427       if (VariableUse.first[0] == '@') {
428         if (!EvaluateExpression(VariableUse.first, Value))
429           return StringRef::npos;
430       } else {
431         StringMap<StringRef>::iterator it =
432             VariableTable.find(VariableUse.first);
433         // If the variable is undefined, return an error.
434         if (it == VariableTable.end())
435           return StringRef::npos;
436 
437         // Look up the value and escape it so that we can put it into the regex.
438         Value += Regex::escape(it->second);
439       }
440 
441       // Plop it into the regex at the adjusted offset.
442       TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
443                     Value.begin(), Value.end());
444       InsertOffset += Value.size();
445     }
446 
447     // Match the newly constructed regex.
448     RegExToMatch = TmpStr;
449   }
450 
451   SmallVector<StringRef, 4> MatchInfo;
452   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
453     return StringRef::npos;
454 
455   // Successful regex match.
456   assert(!MatchInfo.empty() && "Didn't get any match");
457   StringRef FullMatch = MatchInfo[0];
458 
459   // If this defines any variables, remember their values.
460   for (const auto &VariableDef : VariableDefs) {
461     assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
462     VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
463   }
464 
465   MatchLen = FullMatch.size();
466   return FullMatch.data() - Buffer.data();
467 }
468 
469 
470 /// Computes an arbitrary estimate for the quality of matching this pattern at
471 /// the start of \p Buffer; a distance of zero should correspond to a perfect
472 /// match.
473 unsigned
474 Pattern::ComputeMatchDistance(StringRef Buffer,
475                               const StringMap<StringRef> &VariableTable) const {
476   // Just compute the number of matching characters. For regular expressions, we
477   // just compare against the regex itself and hope for the best.
478   //
479   // FIXME: One easy improvement here is have the regex lib generate a single
480   // example regular expression which matches, and use that as the example
481   // string.
482   StringRef ExampleString(FixedStr);
483   if (ExampleString.empty())
484     ExampleString = RegExStr;
485 
486   // Only compare up to the first line in the buffer, or the string size.
487   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
488   BufferPrefix = BufferPrefix.split('\n').first;
489   return BufferPrefix.edit_distance(ExampleString);
490 }
491 
492 /// Prints additional information about a failure to match involving this
493 /// pattern.
494 void Pattern::PrintFailureInfo(
495     const SourceMgr &SM, StringRef Buffer,
496     const StringMap<StringRef> &VariableTable) const {
497   // If this was a regular expression using variables, print the current
498   // variable values.
499   if (!VariableUses.empty()) {
500     for (const auto &VariableUse : VariableUses) {
501       SmallString<256> Msg;
502       raw_svector_ostream OS(Msg);
503       StringRef Var = VariableUse.first;
504       if (Var[0] == '@') {
505         std::string Value;
506         if (EvaluateExpression(Var, Value)) {
507           OS << "with expression \"";
508           OS.write_escaped(Var) << "\" equal to \"";
509           OS.write_escaped(Value) << "\"";
510         } else {
511           OS << "uses incorrect expression \"";
512           OS.write_escaped(Var) << "\"";
513         }
514       } else {
515         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
516 
517         // Check for undefined variable references.
518         if (it == VariableTable.end()) {
519           OS << "uses undefined variable \"";
520           OS.write_escaped(Var) << "\"";
521         } else {
522           OS << "with variable \"";
523           OS.write_escaped(Var) << "\" equal to \"";
524           OS.write_escaped(it->second) << "\"";
525         }
526       }
527 
528       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
529                       OS.str());
530     }
531   }
532 
533   // Attempt to find the closest/best fuzzy match.  Usually an error happens
534   // because some string in the output didn't exactly match. In these cases, we
535   // would like to show the user a best guess at what "should have" matched, to
536   // save them having to actually check the input manually.
537   size_t NumLinesForward = 0;
538   size_t Best = StringRef::npos;
539   double BestQuality = 0;
540 
541   // Use an arbitrary 4k limit on how far we will search.
542   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
543     if (Buffer[i] == '\n')
544       ++NumLinesForward;
545 
546     // Patterns have leading whitespace stripped, so skip whitespace when
547     // looking for something which looks like a pattern.
548     if (Buffer[i] == ' ' || Buffer[i] == '\t')
549       continue;
550 
551     // Compute the "quality" of this match as an arbitrary combination of the
552     // match distance and the number of lines skipped to get to this match.
553     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
554     double Quality = Distance + (NumLinesForward / 100.);
555 
556     if (Quality < BestQuality || Best == StringRef::npos) {
557       Best = i;
558       BestQuality = Quality;
559     }
560   }
561 
562   // Print the "possible intended match here" line if we found something
563   // reasonable and not equal to what we showed in the "scanning from here"
564   // line.
565   if (Best && Best != StringRef::npos && BestQuality < 50) {
566     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
567                     SourceMgr::DK_Note, "possible intended match here");
568 
569     // FIXME: If we wanted to be really friendly we would show why the match
570     // failed, as it can be hard to spot simple one character differences.
571   }
572 }
573 
574 /// Finds the closing sequence of a regex variable usage or definition.
575 ///
576 /// \p Str has to point in the beginning of the definition (right after the
577 /// opening sequence). Returns the offset of the closing sequence within Str,
578 /// or npos if it was not found.
579 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
580   // Offset keeps track of the current offset within the input Str
581   size_t Offset = 0;
582   // [...] Nesting depth
583   size_t BracketDepth = 0;
584 
585   while (!Str.empty()) {
586     if (Str.startswith("]]") && BracketDepth == 0)
587       return Offset;
588     if (Str[0] == '\\') {
589       // Backslash escapes the next char within regexes, so skip them both.
590       Str = Str.substr(2);
591       Offset += 2;
592     } else {
593       switch (Str[0]) {
594       default:
595         break;
596       case '[':
597         BracketDepth++;
598         break;
599       case ']':
600         if (BracketDepth == 0) {
601           SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
602                           SourceMgr::DK_Error,
603                           "missing closing \"]\" for regex variable");
604           exit(1);
605         }
606         BracketDepth--;
607         break;
608       }
609       Str = Str.substr(1);
610       Offset++;
611     }
612   }
613 
614   return StringRef::npos;
615 }
616 
617 //===----------------------------------------------------------------------===//
618 // Check Strings.
619 //===----------------------------------------------------------------------===//
620 
621 /// A check that we found in the input file.
622 struct CheckString {
623   /// The pattern to match.
624   Pattern Pat;
625 
626   /// Which prefix name this check matched.
627   StringRef Prefix;
628 
629   /// The location in the match file that the check string was specified.
630   SMLoc Loc;
631 
632   /// All of the strings that are disallowed from occurring between this match
633   /// string and the previous one (or start of file).
634   std::vector<Pattern> DagNotStrings;
635 
636   CheckString(const Pattern &P, StringRef S, SMLoc L)
637       : Pat(P), Prefix(S), Loc(L) {}
638 
639   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
640                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
641 
642   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
643   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
644   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
645                 const std::vector<const Pattern *> &NotStrings,
646                 StringMap<StringRef> &VariableTable) const;
647   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
648                   std::vector<const Pattern *> &NotStrings,
649                   StringMap<StringRef> &VariableTable) const;
650 };
651 
652 /// Canonicalize whitespaces in the file. Line endings are replaced with
653 /// UNIX-style '\n'.
654 static StringRef CanonicalizeFile(MemoryBuffer &MB,
655                                   SmallVectorImpl<char> &OutputBuffer) {
656   OutputBuffer.reserve(MB.getBufferSize());
657 
658   for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
659        Ptr != End; ++Ptr) {
660     // Eliminate trailing dosish \r.
661     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
662       continue;
663     }
664 
665     // If current char is not a horizontal whitespace or if horizontal
666     // whitespace canonicalization is disabled, dump it to output as is.
667     if (NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) {
668       OutputBuffer.push_back(*Ptr);
669       continue;
670     }
671 
672     // Otherwise, add one space and advance over neighboring space.
673     OutputBuffer.push_back(' ');
674     while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t'))
675       ++Ptr;
676   }
677 
678   // Add a null byte and then return all but that byte.
679   OutputBuffer.push_back('\0');
680   return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
681 }
682 
683 static bool IsPartOfWord(char c) {
684   return (isalnum(c) || c == '-' || c == '_');
685 }
686 
687 // Get the size of the prefix extension.
688 static size_t CheckTypeSize(Check::CheckType Ty) {
689   switch (Ty) {
690   case Check::CheckNone:
691   case Check::CheckBadNot:
692     return 0;
693 
694   case Check::CheckPlain:
695     return sizeof(":") - 1;
696 
697   case Check::CheckNext:
698     return sizeof("-NEXT:") - 1;
699 
700   case Check::CheckSame:
701     return sizeof("-SAME:") - 1;
702 
703   case Check::CheckNot:
704     return sizeof("-NOT:") - 1;
705 
706   case Check::CheckDAG:
707     return sizeof("-DAG:") - 1;
708 
709   case Check::CheckLabel:
710     return sizeof("-LABEL:") - 1;
711 
712   case Check::CheckEOF:
713     llvm_unreachable("Should not be using EOF size");
714   }
715 
716   llvm_unreachable("Bad check type");
717 }
718 
719 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
720   if (Buffer.size() <= Prefix.size())
721     return Check::CheckNone;
722 
723   char NextChar = Buffer[Prefix.size()];
724 
725   // Verify that the : is present after the prefix.
726   if (NextChar == ':')
727     return Check::CheckPlain;
728 
729   if (NextChar != '-')
730     return Check::CheckNone;
731 
732   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
733   if (Rest.startswith("NEXT:"))
734     return Check::CheckNext;
735 
736   if (Rest.startswith("SAME:"))
737     return Check::CheckSame;
738 
739   if (Rest.startswith("NOT:"))
740     return Check::CheckNot;
741 
742   if (Rest.startswith("DAG:"))
743     return Check::CheckDAG;
744 
745   if (Rest.startswith("LABEL:"))
746     return Check::CheckLabel;
747 
748   // You can't combine -NOT with another suffix.
749   if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
750       Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
751       Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:"))
752     return Check::CheckBadNot;
753 
754   return Check::CheckNone;
755 }
756 
757 // From the given position, find the next character after the word.
758 static size_t SkipWord(StringRef Str, size_t Loc) {
759   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
760     ++Loc;
761   return Loc;
762 }
763 
764 /// Search the buffer for the first prefix in the prefix regular expression.
765 ///
766 /// This searches the buffer using the provided regular expression, however it
767 /// enforces constraints beyond that:
768 /// 1) The found prefix must not be a suffix of something that looks like
769 ///    a valid prefix.
770 /// 2) The found prefix must be followed by a valid check type suffix using \c
771 ///    FindCheckType above.
772 ///
773 /// The first match of the regular expression to satisfy these two is returned,
774 /// otherwise an empty StringRef is returned to indicate failure.
775 ///
776 /// If this routine returns a valid prefix, it will also shrink \p Buffer to
777 /// start at the beginning of the returned prefix, increment \p LineNumber for
778 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of
779 /// check found by examining the suffix.
780 ///
781 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
782 /// is unspecified.
783 static StringRef FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer,
784                                          unsigned &LineNumber,
785                                          Check::CheckType &CheckTy) {
786   SmallVector<StringRef, 2> Matches;
787 
788   while (!Buffer.empty()) {
789     // Find the first (longest) match using the RE.
790     if (!PrefixRE.match(Buffer, &Matches))
791       // No match at all, bail.
792       return StringRef();
793 
794     StringRef Prefix = Matches[0];
795     Matches.clear();
796 
797     assert(Prefix.data() >= Buffer.data() &&
798            Prefix.data() < Buffer.data() + Buffer.size() &&
799            "Prefix doesn't start inside of buffer!");
800     size_t Loc = Prefix.data() - Buffer.data();
801     StringRef Skipped = Buffer.substr(0, Loc);
802     Buffer = Buffer.drop_front(Loc);
803     LineNumber += Skipped.count('\n');
804 
805     // Check that the matched prefix isn't a suffix of some other check-like
806     // word.
807     // FIXME: This is a very ad-hoc check. it would be better handled in some
808     // other way. Among other things it seems hard to distinguish between
809     // intentional and unintentional uses of this feature.
810     if (Skipped.empty() || !IsPartOfWord(Skipped.back())) {
811       // Now extract the type.
812       CheckTy = FindCheckType(Buffer, Prefix);
813 
814       // If we've found a valid check type for this prefix, we're done.
815       if (CheckTy != Check::CheckNone)
816         return Prefix;
817     }
818 
819     // If we didn't successfully find a prefix, we need to skip this invalid
820     // prefix and continue scanning. We directly skip the prefix that was
821     // matched and any additional parts of that check-like word.
822     Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size()));
823   }
824 
825   // We ran out of buffer while skipping partial matches so give up.
826   return StringRef();
827 }
828 
829 /// Read the check file, which specifies the sequence of expected strings.
830 ///
831 /// The strings are added to the CheckStrings vector. Returns true in case of
832 /// an error, false otherwise.
833 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
834                           std::vector<CheckString> &CheckStrings) {
835   std::vector<Pattern> ImplicitNegativeChecks;
836   for (const auto &PatternString : ImplicitCheckNot) {
837     // Create a buffer with fake command line content in order to display the
838     // command line option responsible for the specific implicit CHECK-NOT.
839     std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
840     std::string Suffix = "'";
841     std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
842         Prefix + PatternString + Suffix, "command line");
843 
844     StringRef PatternInBuffer =
845         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
846     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
847 
848     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
849     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
850                                                "IMPLICIT-CHECK", SM, 0);
851   }
852 
853   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
854 
855   // LineNumber keeps track of the line on which CheckPrefix instances are
856   // found.
857   unsigned LineNumber = 1;
858 
859   while (1) {
860     Check::CheckType CheckTy;
861 
862     // See if a prefix occurs in the memory buffer.
863     StringRef UsedPrefix = FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber,
864                                                    CheckTy);
865     if (UsedPrefix.empty())
866       break;
867     assert(UsedPrefix.data() == Buffer.data() &&
868            "Failed to move Buffer's start forward, or pointed prefix outside "
869            "of the buffer!");
870 
871     // Location to use for error messages.
872     const char *UsedPrefixStart = UsedPrefix.data();
873 
874     // Skip the buffer to the end.
875     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
876 
877     // Complain about useful-looking but unsupported suffixes.
878     if (CheckTy == Check::CheckBadNot) {
879       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
880                       "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
881       return true;
882     }
883 
884     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
885     // leading whitespace.
886     if (!(NoCanonicalizeWhiteSpace && MatchFullLines))
887       Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
888 
889     // Scan ahead to the end of line.
890     size_t EOL = Buffer.find_first_of("\n\r");
891 
892     // Remember the location of the start of the pattern, for diagnostics.
893     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
894 
895     // Parse the pattern.
896     Pattern P(CheckTy);
897     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
898       return true;
899 
900     // Verify that CHECK-LABEL lines do not define or use variables
901     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
902       SM.PrintMessage(
903           SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error,
904           "found '" + UsedPrefix + "-LABEL:'"
905                                    " with variable definition or use");
906       return true;
907     }
908 
909     Buffer = Buffer.substr(EOL);
910 
911     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
912     if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) &&
913         CheckStrings.empty()) {
914       StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME";
915       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
916                       SourceMgr::DK_Error,
917                       "found '" + UsedPrefix + "-" + Type +
918                           "' without previous '" + UsedPrefix + ": line");
919       return true;
920     }
921 
922     // Handle CHECK-DAG/-NOT.
923     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
924       DagNotMatches.push_back(P);
925       continue;
926     }
927 
928     // Okay, add the string we captured to the output vector and move on.
929     CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
930     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
931     DagNotMatches = ImplicitNegativeChecks;
932   }
933 
934   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
935   // prefix as a filler for the error message.
936   if (!DagNotMatches.empty()) {
937     CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
938                               SMLoc::getFromPointer(Buffer.data()));
939     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
940   }
941 
942   if (CheckStrings.empty()) {
943     errs() << "error: no check strings found with prefix"
944            << (CheckPrefixes.size() > 1 ? "es " : " ");
945     prefix_iterator I = CheckPrefixes.begin();
946     prefix_iterator E = CheckPrefixes.end();
947     if (I != E) {
948       errs() << "\'" << *I << ":'";
949       ++I;
950     }
951     for (; I != E; ++I)
952       errs() << ", \'" << *I << ":'";
953 
954     errs() << '\n';
955     return true;
956   }
957 
958   return false;
959 }
960 
961 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, const Pattern &Pat,
962                              StringRef Buffer,
963                              StringMap<StringRef> &VariableTable) {
964   // Otherwise, we have an error, emit an error message.
965   SM.PrintMessage(Loc, SourceMgr::DK_Error,
966                   "expected string not found in input");
967 
968   // Print the "scanning from here" line.  If the current position is at the
969   // end of a line, advance to the start of the next line.
970   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
971 
972   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
973                   "scanning from here");
974 
975   // Allow the pattern to print additional information if desired.
976   Pat.PrintFailureInfo(SM, Buffer, VariableTable);
977 }
978 
979 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
980                              StringRef Buffer,
981                              StringMap<StringRef> &VariableTable) {
982   PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
983 }
984 
985 /// Count the number of newlines in the specified range.
986 static unsigned CountNumNewlinesBetween(StringRef Range,
987                                         const char *&FirstNewLine) {
988   unsigned NumNewLines = 0;
989   while (1) {
990     // Scan for newline.
991     Range = Range.substr(Range.find_first_of("\n\r"));
992     if (Range.empty())
993       return NumNewLines;
994 
995     ++NumNewLines;
996 
997     // Handle \n\r and \r\n as a single newline.
998     if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') &&
999         (Range[0] != Range[1]))
1000       Range = Range.substr(1);
1001     Range = Range.substr(1);
1002 
1003     if (NumNewLines == 1)
1004       FirstNewLine = Range.begin();
1005   }
1006 }
1007 
1008 /// Match check string and its "not strings" and/or "dag strings".
1009 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1010                           bool IsLabelScanMode, size_t &MatchLen,
1011                           StringMap<StringRef> &VariableTable) const {
1012   size_t LastPos = 0;
1013   std::vector<const Pattern *> NotStrings;
1014 
1015   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1016   // bounds; we have not processed variable definitions within the bounded block
1017   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1018   // over the block again (including the last CHECK-LABEL) in normal mode.
1019   if (!IsLabelScanMode) {
1020     // Match "dag strings" (with mixed "not strings" if any).
1021     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1022     if (LastPos == StringRef::npos)
1023       return StringRef::npos;
1024   }
1025 
1026   // Match itself from the last position after matching CHECK-DAG.
1027   StringRef MatchBuffer = Buffer.substr(LastPos);
1028   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1029   if (MatchPos == StringRef::npos) {
1030     PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1031     return StringRef::npos;
1032   }
1033 
1034   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1035   // or CHECK-NOT
1036   if (!IsLabelScanMode) {
1037     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1038 
1039     // If this check is a "CHECK-NEXT", verify that the previous match was on
1040     // the previous line (i.e. that there is one newline between them).
1041     if (CheckNext(SM, SkippedRegion))
1042       return StringRef::npos;
1043 
1044     // If this check is a "CHECK-SAME", verify that the previous match was on
1045     // the same line (i.e. that there is no newline between them).
1046     if (CheckSame(SM, SkippedRegion))
1047       return StringRef::npos;
1048 
1049     // If this match had "not strings", verify that they don't exist in the
1050     // skipped region.
1051     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1052       return StringRef::npos;
1053   }
1054 
1055   return LastPos + MatchPos;
1056 }
1057 
1058 /// Verify there is a single line in the given buffer.
1059 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1060   if (Pat.getCheckTy() != Check::CheckNext)
1061     return false;
1062 
1063   // Count the number of newlines between the previous match and this one.
1064   assert(Buffer.data() !=
1065              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1066                                     SMLoc::getFromPointer(Buffer.data())))
1067                  ->getBufferStart() &&
1068          "CHECK-NEXT can't be the first check in a file");
1069 
1070   const char *FirstNewLine = nullptr;
1071   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1072 
1073   if (NumNewLines == 0) {
1074     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1075                     Prefix + "-NEXT: is on the same line as previous match");
1076     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1077                     "'next' match was here");
1078     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1079                     "previous match ended here");
1080     return true;
1081   }
1082 
1083   if (NumNewLines != 1) {
1084     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1085                     Prefix +
1086                         "-NEXT: is not on the line after the previous match");
1087     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1088                     "'next' match was here");
1089     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1090                     "previous match ended here");
1091     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1092                     "non-matching line after previous match is here");
1093     return true;
1094   }
1095 
1096   return false;
1097 }
1098 
1099 /// Verify there is no newline in the given buffer.
1100 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1101   if (Pat.getCheckTy() != Check::CheckSame)
1102     return false;
1103 
1104   // Count the number of newlines between the previous match and this one.
1105   assert(Buffer.data() !=
1106              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1107                                     SMLoc::getFromPointer(Buffer.data())))
1108                  ->getBufferStart() &&
1109          "CHECK-SAME can't be the first check in a file");
1110 
1111   const char *FirstNewLine = nullptr;
1112   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1113 
1114   if (NumNewLines != 0) {
1115     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1116                     Prefix +
1117                         "-SAME: is not on the same line as the previous match");
1118     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1119                     "'next' match was here");
1120     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1121                     "previous match ended here");
1122     return true;
1123   }
1124 
1125   return false;
1126 }
1127 
1128 /// Verify there's no "not strings" in the given buffer.
1129 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1130                            const std::vector<const Pattern *> &NotStrings,
1131                            StringMap<StringRef> &VariableTable) const {
1132   for (const Pattern *Pat : NotStrings) {
1133     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1134 
1135     size_t MatchLen = 0;
1136     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1137 
1138     if (Pos == StringRef::npos)
1139       continue;
1140 
1141     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Pos),
1142                     SourceMgr::DK_Error, Prefix + "-NOT: string occurred!");
1143     SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1144                     Prefix + "-NOT: pattern specified here");
1145     return true;
1146   }
1147 
1148   return false;
1149 }
1150 
1151 /// Match "dag strings" and their mixed "not strings".
1152 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1153                              std::vector<const Pattern *> &NotStrings,
1154                              StringMap<StringRef> &VariableTable) const {
1155   if (DagNotStrings.empty())
1156     return 0;
1157 
1158   size_t LastPos = 0;
1159   size_t StartPos = LastPos;
1160 
1161   for (const Pattern &Pat : DagNotStrings) {
1162     assert((Pat.getCheckTy() == Check::CheckDAG ||
1163             Pat.getCheckTy() == Check::CheckNot) &&
1164            "Invalid CHECK-DAG or CHECK-NOT!");
1165 
1166     if (Pat.getCheckTy() == Check::CheckNot) {
1167       NotStrings.push_back(&Pat);
1168       continue;
1169     }
1170 
1171     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1172 
1173     size_t MatchLen = 0, MatchPos;
1174 
1175     // CHECK-DAG always matches from the start.
1176     StringRef MatchBuffer = Buffer.substr(StartPos);
1177     MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1178     // With a group of CHECK-DAGs, a single mismatching means the match on
1179     // that group of CHECK-DAGs fails immediately.
1180     if (MatchPos == StringRef::npos) {
1181       PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1182       return StringRef::npos;
1183     }
1184     // Re-calc it as the offset relative to the start of the original string.
1185     MatchPos += StartPos;
1186 
1187     if (!NotStrings.empty()) {
1188       if (MatchPos < LastPos) {
1189         // Reordered?
1190         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1191                         SourceMgr::DK_Error,
1192                         Prefix + "-DAG: found a match of CHECK-DAG"
1193                                  " reordering across a CHECK-NOT");
1194         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1195                         SourceMgr::DK_Note,
1196                         Prefix + "-DAG: the farthest match of CHECK-DAG"
1197                                  " is found here");
1198         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1199                         Prefix + "-NOT: the crossed pattern specified"
1200                                  " here");
1201         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1202                         Prefix + "-DAG: the reordered pattern specified"
1203                                  " here");
1204         return StringRef::npos;
1205       }
1206       // All subsequent CHECK-DAGs should be matched from the farthest
1207       // position of all precedent CHECK-DAGs (including this one.)
1208       StartPos = LastPos;
1209       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1210       // CHECK-DAG, verify that there's no 'not' strings occurred in that
1211       // region.
1212       StringRef SkippedRegion = Buffer.slice(LastPos, MatchPos);
1213       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1214         return StringRef::npos;
1215       // Clear "not strings".
1216       NotStrings.clear();
1217     }
1218 
1219     // Update the last position with CHECK-DAG matches.
1220     LastPos = std::max(MatchPos + MatchLen, LastPos);
1221   }
1222 
1223   return LastPos;
1224 }
1225 
1226 // A check prefix must contain only alphanumeric, hyphens and underscores.
1227 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1228   Regex Validator("^[a-zA-Z0-9_-]*$");
1229   return Validator.match(CheckPrefix);
1230 }
1231 
1232 static bool ValidateCheckPrefixes() {
1233   StringSet<> PrefixSet;
1234 
1235   for (StringRef Prefix : CheckPrefixes) {
1236     // Reject empty prefixes.
1237     if (Prefix == "")
1238       return false;
1239 
1240     if (!PrefixSet.insert(Prefix).second)
1241       return false;
1242 
1243     if (!ValidateCheckPrefix(Prefix))
1244       return false;
1245   }
1246 
1247   return true;
1248 }
1249 
1250 // Combines the check prefixes into a single regex so that we can efficiently
1251 // scan for any of the set.
1252 //
1253 // The semantics are that the longest-match wins which matches our regex
1254 // library.
1255 static Regex buildCheckPrefixRegex() {
1256   // I don't think there's a way to specify an initial value for cl::list,
1257   // so if nothing was specified, add the default
1258   if (CheckPrefixes.empty())
1259     CheckPrefixes.push_back("CHECK");
1260 
1261   // We already validated the contents of CheckPrefixes so just concatenate
1262   // them as alternatives.
1263   SmallString<32> PrefixRegexStr;
1264   for (StringRef Prefix : CheckPrefixes) {
1265     if (Prefix != CheckPrefixes.front())
1266       PrefixRegexStr.push_back('|');
1267 
1268     PrefixRegexStr.append(Prefix);
1269   }
1270 
1271   return Regex(PrefixRegexStr);
1272 }
1273 
1274 static void DumpCommandLine(int argc, char **argv) {
1275   errs() << "FileCheck command line: ";
1276   for (int I = 0; I < argc; I++)
1277     errs() << " " << argv[I];
1278   errs() << "\n";
1279 }
1280 
1281 // Remove local variables from \p VariableTable. Global variables
1282 // (start with '$') are preserved.
1283 static void ClearLocalVars(StringMap<StringRef> &VariableTable) {
1284   SmallVector<StringRef, 16> LocalVars;
1285   for (const auto &Var : VariableTable)
1286     if (Var.first()[0] != '$')
1287       LocalVars.push_back(Var.first());
1288 
1289   for (const auto &Var : LocalVars)
1290     VariableTable.erase(Var);
1291 }
1292 
1293 /// Check the input to FileCheck provided in the \p Buffer against the \p
1294 /// CheckStrings read from the check file.
1295 ///
1296 /// Returns false if the input fails to satisfy the checks.
1297 bool CheckInput(SourceMgr &SM, StringRef Buffer,
1298                 ArrayRef<CheckString> CheckStrings) {
1299   bool ChecksFailed = false;
1300 
1301   /// VariableTable - This holds all the current filecheck variables.
1302   StringMap<StringRef> VariableTable;
1303 
1304   for (const auto& Def : GlobalDefines)
1305     VariableTable.insert(StringRef(Def).split('='));
1306 
1307   unsigned i = 0, j = 0, e = CheckStrings.size();
1308   while (true) {
1309     StringRef CheckRegion;
1310     if (j == e) {
1311       CheckRegion = Buffer;
1312     } else {
1313       const CheckString &CheckLabelStr = CheckStrings[j];
1314       if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
1315         ++j;
1316         continue;
1317       }
1318 
1319       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1320       size_t MatchLabelLen = 0;
1321       size_t MatchLabelPos =
1322           CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable);
1323       if (MatchLabelPos == StringRef::npos)
1324         // Immediately bail of CHECK-LABEL fails, nothing else we can do.
1325         return false;
1326 
1327       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1328       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1329       ++j;
1330     }
1331 
1332     if (EnableVarScope)
1333       ClearLocalVars(VariableTable);
1334 
1335     for (; i != j; ++i) {
1336       const CheckString &CheckStr = CheckStrings[i];
1337 
1338       // Check each string within the scanned region, including a second check
1339       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1340       size_t MatchLen = 0;
1341       size_t MatchPos =
1342           CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable);
1343 
1344       if (MatchPos == StringRef::npos) {
1345         ChecksFailed = true;
1346         i = j;
1347         break;
1348       }
1349 
1350       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1351     }
1352 
1353     if (j == e)
1354       break;
1355   }
1356 
1357   // Success if no checks failed.
1358   return !ChecksFailed;
1359 }
1360 
1361 int main(int argc, char **argv) {
1362   InitLLVM X(argc, argv);
1363   cl::ParseCommandLineOptions(argc, argv);
1364 
1365   if (!ValidateCheckPrefixes()) {
1366     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1367               "start with a letter and contain only alphanumeric characters, "
1368               "hyphens and underscores\n";
1369     return 2;
1370   }
1371 
1372   Regex PrefixRE = buildCheckPrefixRegex();
1373   std::string REError;
1374   if (!PrefixRE.isValid(REError)) {
1375     errs() << "Unable to combine check-prefix strings into a prefix regular "
1376               "expression! This is likely a bug in FileCheck's verification of "
1377               "the check-prefix strings. Regular expression parsing failed "
1378               "with the following error: "
1379            << REError << "\n";
1380     return 2;
1381   }
1382 
1383   SourceMgr SM;
1384 
1385   // Read the expected strings from the check file.
1386   ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr =
1387       MemoryBuffer::getFileOrSTDIN(CheckFilename);
1388   if (std::error_code EC = CheckFileOrErr.getError()) {
1389     errs() << "Could not open check file '" << CheckFilename
1390            << "': " << EC.message() << '\n';
1391     return 2;
1392   }
1393   MemoryBuffer &CheckFile = *CheckFileOrErr.get();
1394 
1395   SmallString<4096> CheckFileBuffer;
1396   StringRef CheckFileText = CanonicalizeFile(CheckFile, CheckFileBuffer);
1397 
1398   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1399                             CheckFileText, CheckFile.getBufferIdentifier()),
1400                         SMLoc());
1401 
1402   std::vector<CheckString> CheckStrings;
1403   if (ReadCheckFile(SM, CheckFileText, PrefixRE, CheckStrings))
1404     return 2;
1405 
1406   // Open the file to check and add it to SourceMgr.
1407   ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr =
1408       MemoryBuffer::getFileOrSTDIN(InputFilename);
1409   if (std::error_code EC = InputFileOrErr.getError()) {
1410     errs() << "Could not open input file '" << InputFilename
1411            << "': " << EC.message() << '\n';
1412     return 2;
1413   }
1414   MemoryBuffer &InputFile = *InputFileOrErr.get();
1415 
1416   if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) {
1417     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1418     DumpCommandLine(argc, argv);
1419     return 2;
1420   }
1421 
1422   SmallString<4096> InputFileBuffer;
1423   StringRef InputFileText = CanonicalizeFile(InputFile, InputFileBuffer);
1424 
1425   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1426                             InputFileText, InputFile.getBufferIdentifier()),
1427                         SMLoc());
1428 
1429   return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1;
1430 }
1431