1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content.  This is useful for regression tests etc.
12 //
13 // This program exits with an error status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <algorithm>
31 #include <cctype>
32 #include <map>
33 #include <string>
34 #include <system_error>
35 #include <vector>
36 using namespace llvm;
37 
38 static cl::opt<std::string>
39     CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40 
41 static cl::opt<std::string>
42     InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43                   cl::init("-"), cl::value_desc("filename"));
44 
45 static cl::list<std::string> CheckPrefixes(
46     "check-prefix",
47     cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48 static cl::alias CheckPrefixesAlias(
49     "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated,
50     cl::NotHidden,
51     cl::desc(
52         "Alias for -check-prefix permitting multiple comma separated values"));
53 
54 static cl::opt<bool> NoCanonicalizeWhiteSpace(
55     "strict-whitespace",
56     cl::desc("Do not treat all horizontal whitespace as equivalent"));
57 
58 static cl::list<std::string> ImplicitCheckNot(
59     "implicit-check-not",
60     cl::desc("Add an implicit negative check with this pattern to every\n"
61              "positive check. This can be used to ensure that no instances of\n"
62              "this pattern occur which are not matched by a positive pattern"),
63     cl::value_desc("pattern"));
64 
65 static cl::opt<bool> AllowEmptyInput(
66     "allow-empty", cl::init(false),
67     cl::desc("Allow the input file to be empty. This is useful when making\n"
68              "checks that some error message does not occur, for example."));
69 
70 static cl::opt<bool> MatchFullLines(
71     "match-full-lines", cl::init(false),
72     cl::desc("Require all positive matches to cover an entire input line.\n"
73              "Allows leading and trailing whitespace if --strict-whitespace\n"
74              "is not also passed."));
75 
76 typedef cl::list<std::string>::const_iterator prefix_iterator;
77 
78 //===----------------------------------------------------------------------===//
79 // Pattern Handling Code.
80 //===----------------------------------------------------------------------===//
81 
82 namespace Check {
83 enum CheckType {
84   CheckNone = 0,
85   CheckPlain,
86   CheckNext,
87   CheckSame,
88   CheckNot,
89   CheckDAG,
90   CheckLabel,
91 
92   /// Indicates the pattern only matches the end of file. This is used for
93   /// trailing CHECK-NOTs.
94   CheckEOF,
95 
96   /// Marks when parsing found a -NOT check combined with another CHECK suffix.
97   CheckBadNot
98 };
99 }
100 
101 class Pattern {
102   SMLoc PatternLoc;
103 
104   /// A fixed string to match as the pattern or empty if this pattern requires
105   /// a regex match.
106   StringRef FixedStr;
107 
108   /// A regex string to match as the pattern or empty if this pattern requires
109   /// a fixed string to match.
110   std::string RegExStr;
111 
112   /// Entries in this vector map to uses of a variable in the pattern, e.g.
113   /// "foo[[bar]]baz".  In this case, the RegExStr will contain "foobaz" and
114   /// we'll get an entry in this vector that tells us to insert the value of
115   /// bar at offset 3.
116   std::vector<std::pair<StringRef, unsigned>> VariableUses;
117 
118   /// Maps definitions of variables to their parenthesized capture numbers.
119   ///
120   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to
121   /// 1.
122   std::map<StringRef, unsigned> VariableDefs;
123 
124   Check::CheckType CheckTy;
125 
126   /// Contains the number of line this pattern is in.
127   unsigned LineNumber;
128 
129 public:
130   explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {}
131 
132   /// Returns the location in source code.
133   SMLoc getLoc() const { return PatternLoc; }
134 
135   bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
136                     unsigned LineNumber);
137   size_t Match(StringRef Buffer, size_t &MatchLen,
138                StringMap<StringRef> &VariableTable) const;
139   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
140                         const StringMap<StringRef> &VariableTable) const;
141 
142   bool hasVariable() const {
143     return !(VariableUses.empty() && VariableDefs.empty());
144   }
145 
146   Check::CheckType getCheckTy() const { return CheckTy; }
147 
148 private:
149   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
150   void AddBackrefToRegEx(unsigned BackrefNum);
151   unsigned
152   ComputeMatchDistance(StringRef Buffer,
153                        const StringMap<StringRef> &VariableTable) const;
154   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
155   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
156 };
157 
158 /// Parses the given string into the Pattern.
159 ///
160 /// \p Prefix provides which prefix is being matched, \p SM provides the
161 /// SourceMgr used for error reports, and \p LineNumber is the line number in
162 /// the input file from which the pattern string was read. Returns true in
163 /// case of an error, false otherwise.
164 bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
165                            SourceMgr &SM, unsigned LineNumber) {
166   bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot;
167 
168   this->LineNumber = LineNumber;
169   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
170 
171   // Ignore trailing whitespace.
172   while (!PatternStr.empty() &&
173          (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
174     PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
175 
176   // Check that there is something on the line.
177   if (PatternStr.empty()) {
178     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
179                     "found empty check string with prefix '" + Prefix + ":'");
180     return true;
181   }
182 
183   // Check to see if this is a fixed string, or if it has regex pieces.
184   if (!MatchFullLinesHere &&
185       (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
186                                  PatternStr.find("[[") == StringRef::npos))) {
187     FixedStr = PatternStr;
188     return false;
189   }
190 
191   if (MatchFullLinesHere) {
192     RegExStr += '^';
193     if (!NoCanonicalizeWhiteSpace)
194       RegExStr += " *";
195   }
196 
197   // Paren value #0 is for the fully matched string.  Any new parenthesized
198   // values add from there.
199   unsigned CurParen = 1;
200 
201   // Otherwise, there is at least one regex piece.  Build up the regex pattern
202   // by escaping scary characters in fixed strings, building up one big regex.
203   while (!PatternStr.empty()) {
204     // RegEx matches.
205     if (PatternStr.startswith("{{")) {
206       // This is the start of a regex match.  Scan for the }}.
207       size_t End = PatternStr.find("}}");
208       if (End == StringRef::npos) {
209         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
210                         SourceMgr::DK_Error,
211                         "found start of regex string with no end '}}'");
212         return true;
213       }
214 
215       // Enclose {{}} patterns in parens just like [[]] even though we're not
216       // capturing the result for any purpose.  This is required in case the
217       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
218       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
219       RegExStr += '(';
220       ++CurParen;
221 
222       if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
223         return true;
224       RegExStr += ')';
225 
226       PatternStr = PatternStr.substr(End + 2);
227       continue;
228     }
229 
230     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
231     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
232     // second form is [[foo]] which is a reference to foo.  The variable name
233     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
234     // it.  This is to catch some common errors.
235     if (PatternStr.startswith("[[")) {
236       // Find the closing bracket pair ending the match.  End is going to be an
237       // offset relative to the beginning of the match string.
238       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
239 
240       if (End == StringRef::npos) {
241         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
242                         SourceMgr::DK_Error,
243                         "invalid named regex reference, no ]] found");
244         return true;
245       }
246 
247       StringRef MatchStr = PatternStr.substr(2, End);
248       PatternStr = PatternStr.substr(End + 4);
249 
250       // Get the regex name (e.g. "foo").
251       size_t NameEnd = MatchStr.find(':');
252       StringRef Name = MatchStr.substr(0, NameEnd);
253 
254       if (Name.empty()) {
255         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
256                         "invalid name in named regex: empty name");
257         return true;
258       }
259 
260       // Verify that the name/expression is well formed. FileCheck currently
261       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
262       // is relaxed, more strict check is performed in \c EvaluateExpression.
263       bool IsExpression = false;
264       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
265         if (i == 0 && Name[i] == '@') {
266           if (NameEnd != StringRef::npos) {
267             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
268                             SourceMgr::DK_Error,
269                             "invalid name in named regex definition");
270             return true;
271           }
272           IsExpression = true;
273           continue;
274         }
275         if (Name[i] != '_' && !isalnum(Name[i]) &&
276             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
277           SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i),
278                           SourceMgr::DK_Error, "invalid name in named regex");
279           return true;
280         }
281       }
282 
283       // Name can't start with a digit.
284       if (isdigit(static_cast<unsigned char>(Name[0]))) {
285         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
286                         "invalid name in named regex");
287         return true;
288       }
289 
290       // Handle [[foo]].
291       if (NameEnd == StringRef::npos) {
292         // Handle variables that were defined earlier on the same line by
293         // emitting a backreference.
294         if (VariableDefs.find(Name) != VariableDefs.end()) {
295           unsigned VarParenNum = VariableDefs[Name];
296           if (VarParenNum < 1 || VarParenNum > 9) {
297             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
298                             SourceMgr::DK_Error,
299                             "Can't back-reference more than 9 variables");
300             return true;
301           }
302           AddBackrefToRegEx(VarParenNum);
303         } else {
304           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
305         }
306         continue;
307       }
308 
309       // Handle [[foo:.*]].
310       VariableDefs[Name] = CurParen;
311       RegExStr += '(';
312       ++CurParen;
313 
314       if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM))
315         return true;
316 
317       RegExStr += ')';
318     }
319 
320     // Handle fixed string matches.
321     // Find the end, which is the start of the next regex.
322     size_t FixedMatchEnd = PatternStr.find("{{");
323     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
324     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
325     PatternStr = PatternStr.substr(FixedMatchEnd);
326   }
327 
328   if (MatchFullLinesHere) {
329     if (!NoCanonicalizeWhiteSpace)
330       RegExStr += " *";
331     RegExStr += '$';
332   }
333 
334   return false;
335 }
336 
337 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
338   Regex R(RS);
339   std::string Error;
340   if (!R.isValid(Error)) {
341     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
342                     "invalid regex: " + Error);
343     return true;
344   }
345 
346   RegExStr += RS.str();
347   CurParen += R.getNumMatches();
348   return false;
349 }
350 
351 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
352   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
353   std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
354   RegExStr += Backref;
355 }
356 
357 /// Evaluates expression and stores the result to \p Value.
358 ///
359 /// Returns true on success and false when the expression has invalid syntax.
360 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
361   // The only supported expression is @LINE([\+-]\d+)?
362   if (!Expr.startswith("@LINE"))
363     return false;
364   Expr = Expr.substr(StringRef("@LINE").size());
365   int Offset = 0;
366   if (!Expr.empty()) {
367     if (Expr[0] == '+')
368       Expr = Expr.substr(1);
369     else if (Expr[0] != '-')
370       return false;
371     if (Expr.getAsInteger(10, Offset))
372       return false;
373   }
374   Value = llvm::itostr(LineNumber + Offset);
375   return true;
376 }
377 
378 /// Matches the pattern string against the input buffer \p Buffer
379 ///
380 /// This returns the position that is matched or npos if there is no match. If
381 /// there is a match, the size of the matched string is returned in \p
382 /// MatchLen.
383 ///
384 /// The \p VariableTable StringMap provides the current values of filecheck
385 /// variables and is updated if this match defines new values.
386 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
387                       StringMap<StringRef> &VariableTable) const {
388   // If this is the EOF pattern, match it immediately.
389   if (CheckTy == Check::CheckEOF) {
390     MatchLen = 0;
391     return Buffer.size();
392   }
393 
394   // If this is a fixed string pattern, just match it now.
395   if (!FixedStr.empty()) {
396     MatchLen = FixedStr.size();
397     return Buffer.find(FixedStr);
398   }
399 
400   // Regex match.
401 
402   // If there are variable uses, we need to create a temporary string with the
403   // actual value.
404   StringRef RegExToMatch = RegExStr;
405   std::string TmpStr;
406   if (!VariableUses.empty()) {
407     TmpStr = RegExStr;
408 
409     unsigned InsertOffset = 0;
410     for (const auto &VariableUse : VariableUses) {
411       std::string Value;
412 
413       if (VariableUse.first[0] == '@') {
414         if (!EvaluateExpression(VariableUse.first, Value))
415           return StringRef::npos;
416       } else {
417         StringMap<StringRef>::iterator it =
418             VariableTable.find(VariableUse.first);
419         // If the variable is undefined, return an error.
420         if (it == VariableTable.end())
421           return StringRef::npos;
422 
423         // Look up the value and escape it so that we can put it into the regex.
424         Value += Regex::escape(it->second);
425       }
426 
427       // Plop it into the regex at the adjusted offset.
428       TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
429                     Value.begin(), Value.end());
430       InsertOffset += Value.size();
431     }
432 
433     // Match the newly constructed regex.
434     RegExToMatch = TmpStr;
435   }
436 
437   SmallVector<StringRef, 4> MatchInfo;
438   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
439     return StringRef::npos;
440 
441   // Successful regex match.
442   assert(!MatchInfo.empty() && "Didn't get any match");
443   StringRef FullMatch = MatchInfo[0];
444 
445   // If this defines any variables, remember their values.
446   for (const auto &VariableDef : VariableDefs) {
447     assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
448     VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
449   }
450 
451   MatchLen = FullMatch.size();
452   return FullMatch.data() - Buffer.data();
453 }
454 
455 
456 /// Computes an arbitrary estimate for the quality of matching this pattern at
457 /// the start of \p Buffer; a distance of zero should correspond to a perfect
458 /// match.
459 unsigned
460 Pattern::ComputeMatchDistance(StringRef Buffer,
461                               const StringMap<StringRef> &VariableTable) const {
462   // Just compute the number of matching characters. For regular expressions, we
463   // just compare against the regex itself and hope for the best.
464   //
465   // FIXME: One easy improvement here is have the regex lib generate a single
466   // example regular expression which matches, and use that as the example
467   // string.
468   StringRef ExampleString(FixedStr);
469   if (ExampleString.empty())
470     ExampleString = RegExStr;
471 
472   // Only compare up to the first line in the buffer, or the string size.
473   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
474   BufferPrefix = BufferPrefix.split('\n').first;
475   return BufferPrefix.edit_distance(ExampleString);
476 }
477 
478 /// Prints additional information about a failure to match involving this
479 /// pattern.
480 void Pattern::PrintFailureInfo(
481     const SourceMgr &SM, StringRef Buffer,
482     const StringMap<StringRef> &VariableTable) const {
483   // If this was a regular expression using variables, print the current
484   // variable values.
485   if (!VariableUses.empty()) {
486     for (const auto &VariableUse : VariableUses) {
487       SmallString<256> Msg;
488       raw_svector_ostream OS(Msg);
489       StringRef Var = VariableUse.first;
490       if (Var[0] == '@') {
491         std::string Value;
492         if (EvaluateExpression(Var, Value)) {
493           OS << "with expression \"";
494           OS.write_escaped(Var) << "\" equal to \"";
495           OS.write_escaped(Value) << "\"";
496         } else {
497           OS << "uses incorrect expression \"";
498           OS.write_escaped(Var) << "\"";
499         }
500       } else {
501         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
502 
503         // Check for undefined variable references.
504         if (it == VariableTable.end()) {
505           OS << "uses undefined variable \"";
506           OS.write_escaped(Var) << "\"";
507         } else {
508           OS << "with variable \"";
509           OS.write_escaped(Var) << "\" equal to \"";
510           OS.write_escaped(it->second) << "\"";
511         }
512       }
513 
514       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
515                       OS.str());
516     }
517   }
518 
519   // Attempt to find the closest/best fuzzy match.  Usually an error happens
520   // because some string in the output didn't exactly match. In these cases, we
521   // would like to show the user a best guess at what "should have" matched, to
522   // save them having to actually check the input manually.
523   size_t NumLinesForward = 0;
524   size_t Best = StringRef::npos;
525   double BestQuality = 0;
526 
527   // Use an arbitrary 4k limit on how far we will search.
528   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
529     if (Buffer[i] == '\n')
530       ++NumLinesForward;
531 
532     // Patterns have leading whitespace stripped, so skip whitespace when
533     // looking for something which looks like a pattern.
534     if (Buffer[i] == ' ' || Buffer[i] == '\t')
535       continue;
536 
537     // Compute the "quality" of this match as an arbitrary combination of the
538     // match distance and the number of lines skipped to get to this match.
539     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
540     double Quality = Distance + (NumLinesForward / 100.);
541 
542     if (Quality < BestQuality || Best == StringRef::npos) {
543       Best = i;
544       BestQuality = Quality;
545     }
546   }
547 
548   // Print the "possible intended match here" line if we found something
549   // reasonable and not equal to what we showed in the "scanning from here"
550   // line.
551   if (Best && Best != StringRef::npos && BestQuality < 50) {
552     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
553                     SourceMgr::DK_Note, "possible intended match here");
554 
555     // FIXME: If we wanted to be really friendly we would show why the match
556     // failed, as it can be hard to spot simple one character differences.
557   }
558 }
559 
560 /// Finds the closing sequence of a regex variable usage or definition.
561 ///
562 /// \p Str has to point in the beginning of the definition (right after the
563 /// opening sequence). Returns the offset of the closing sequence within Str,
564 /// or npos if it was not found.
565 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
566   // Offset keeps track of the current offset within the input Str
567   size_t Offset = 0;
568   // [...] Nesting depth
569   size_t BracketDepth = 0;
570 
571   while (!Str.empty()) {
572     if (Str.startswith("]]") && BracketDepth == 0)
573       return Offset;
574     if (Str[0] == '\\') {
575       // Backslash escapes the next char within regexes, so skip them both.
576       Str = Str.substr(2);
577       Offset += 2;
578     } else {
579       switch (Str[0]) {
580       default:
581         break;
582       case '[':
583         BracketDepth++;
584         break;
585       case ']':
586         if (BracketDepth == 0) {
587           SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
588                           SourceMgr::DK_Error,
589                           "missing closing \"]\" for regex variable");
590           exit(1);
591         }
592         BracketDepth--;
593         break;
594       }
595       Str = Str.substr(1);
596       Offset++;
597     }
598   }
599 
600   return StringRef::npos;
601 }
602 
603 //===----------------------------------------------------------------------===//
604 // Check Strings.
605 //===----------------------------------------------------------------------===//
606 
607 /// A check that we found in the input file.
608 struct CheckString {
609   /// The pattern to match.
610   Pattern Pat;
611 
612   /// Which prefix name this check matched.
613   StringRef Prefix;
614 
615   /// The location in the match file that the check string was specified.
616   SMLoc Loc;
617 
618   /// All of the strings that are disallowed from occurring between this match
619   /// string and the previous one (or start of file).
620   std::vector<Pattern> DagNotStrings;
621 
622   CheckString(const Pattern &P, StringRef S, SMLoc L)
623       : Pat(P), Prefix(S), Loc(L) {}
624 
625   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
626                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
627 
628   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
629   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
630   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
631                 const std::vector<const Pattern *> &NotStrings,
632                 StringMap<StringRef> &VariableTable) const;
633   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
634                   std::vector<const Pattern *> &NotStrings,
635                   StringMap<StringRef> &VariableTable) const;
636 };
637 
638 /// Canonicalize whitespaces in the file. Line endings are replaced with
639 /// UNIX-style '\n'.
640 static StringRef CanonicalizeFile(MemoryBuffer &MB,
641                                   SmallVectorImpl<char> &OutputBuffer) {
642   OutputBuffer.reserve(MB.getBufferSize());
643 
644   for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
645        Ptr != End; ++Ptr) {
646     // Eliminate trailing dosish \r.
647     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
648       continue;
649     }
650 
651     // If current char is not a horizontal whitespace or if horizontal
652     // whitespace canonicalization is disabled, dump it to output as is.
653     if (NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) {
654       OutputBuffer.push_back(*Ptr);
655       continue;
656     }
657 
658     // Otherwise, add one space and advance over neighboring space.
659     OutputBuffer.push_back(' ');
660     while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t'))
661       ++Ptr;
662   }
663 
664   // Add a null byte and then return all but that byte.
665   OutputBuffer.push_back('\0');
666   return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
667 }
668 
669 static bool IsPartOfWord(char c) {
670   return (isalnum(c) || c == '-' || c == '_');
671 }
672 
673 // Get the size of the prefix extension.
674 static size_t CheckTypeSize(Check::CheckType Ty) {
675   switch (Ty) {
676   case Check::CheckNone:
677   case Check::CheckBadNot:
678     return 0;
679 
680   case Check::CheckPlain:
681     return sizeof(":") - 1;
682 
683   case Check::CheckNext:
684     return sizeof("-NEXT:") - 1;
685 
686   case Check::CheckSame:
687     return sizeof("-SAME:") - 1;
688 
689   case Check::CheckNot:
690     return sizeof("-NOT:") - 1;
691 
692   case Check::CheckDAG:
693     return sizeof("-DAG:") - 1;
694 
695   case Check::CheckLabel:
696     return sizeof("-LABEL:") - 1;
697 
698   case Check::CheckEOF:
699     llvm_unreachable("Should not be using EOF size");
700   }
701 
702   llvm_unreachable("Bad check type");
703 }
704 
705 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
706   char NextChar = Buffer[Prefix.size()];
707 
708   // Verify that the : is present after the prefix.
709   if (NextChar == ':')
710     return Check::CheckPlain;
711 
712   if (NextChar != '-')
713     return Check::CheckNone;
714 
715   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
716   if (Rest.startswith("NEXT:"))
717     return Check::CheckNext;
718 
719   if (Rest.startswith("SAME:"))
720     return Check::CheckSame;
721 
722   if (Rest.startswith("NOT:"))
723     return Check::CheckNot;
724 
725   if (Rest.startswith("DAG:"))
726     return Check::CheckDAG;
727 
728   if (Rest.startswith("LABEL:"))
729     return Check::CheckLabel;
730 
731   // You can't combine -NOT with another suffix.
732   if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
733       Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
734       Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:"))
735     return Check::CheckBadNot;
736 
737   return Check::CheckNone;
738 }
739 
740 // From the given position, find the next character after the word.
741 static size_t SkipWord(StringRef Str, size_t Loc) {
742   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
743     ++Loc;
744   return Loc;
745 }
746 
747 // Try to find the first match in buffer for any prefix. If a valid match is
748 // found, return that prefix and set its type and location.  If there are almost
749 // matches (e.g. the actual prefix string is found, but is not an actual check
750 // string), but no valid match, return an empty string and set the position to
751 // resume searching from. If no partial matches are found, return an empty
752 // string and the location will be StringRef::npos. If one prefix is a substring
753 // of another, the maximal match should be found. e.g. if "A" and "AA" are
754 // prefixes then AA-CHECK: should match the second one.
755 static StringRef FindFirstCandidateMatch(StringRef &Buffer,
756                                          Check::CheckType &CheckTy,
757                                          size_t &CheckLoc) {
758   StringRef FirstPrefix;
759   size_t FirstLoc = StringRef::npos;
760   size_t SearchLoc = StringRef::npos;
761   Check::CheckType FirstTy = Check::CheckNone;
762 
763   CheckTy = Check::CheckNone;
764   CheckLoc = StringRef::npos;
765 
766   for (StringRef Prefix : CheckPrefixes) {
767     size_t PrefixLoc = Buffer.find(Prefix);
768 
769     if (PrefixLoc == StringRef::npos)
770       continue;
771 
772     // Track where we are searching for invalid prefixes that look almost right.
773     // We need to only advance to the first partial match on the next attempt
774     // since a partial match could be a substring of a later, valid prefix.
775     // Need to skip to the end of the word, otherwise we could end up
776     // matching a prefix in a substring later.
777     if (PrefixLoc < SearchLoc)
778       SearchLoc = SkipWord(Buffer, PrefixLoc);
779 
780     // We only want to find the first match to avoid skipping some.
781     if (PrefixLoc > FirstLoc)
782       continue;
783     // If one matching check-prefix is a prefix of another, choose the
784     // longer one.
785     if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
786       continue;
787 
788     StringRef Rest = Buffer.drop_front(PrefixLoc);
789     // Make sure we have actually found the prefix, and not a word containing
790     // it. This should also prevent matching the wrong prefix when one is a
791     // substring of another.
792     if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
793       FirstTy = Check::CheckNone;
794     else
795       FirstTy = FindCheckType(Rest, Prefix);
796 
797     FirstLoc = PrefixLoc;
798     FirstPrefix = Prefix;
799   }
800 
801   // If the first prefix is invalid, we should continue the search after it.
802   if (FirstTy == Check::CheckNone) {
803     CheckLoc = SearchLoc;
804     return "";
805   }
806 
807   CheckTy = FirstTy;
808   CheckLoc = FirstLoc;
809   return FirstPrefix;
810 }
811 
812 static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
813                                          unsigned &LineNumber,
814                                          Check::CheckType &CheckTy,
815                                          size_t &CheckLoc) {
816   while (!Buffer.empty()) {
817     StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
818     // If we found a real match, we are done.
819     if (!Prefix.empty()) {
820       LineNumber += Buffer.substr(0, CheckLoc).count('\n');
821       return Prefix;
822     }
823 
824     // We didn't find any almost matches either, we are also done.
825     if (CheckLoc == StringRef::npos)
826       return StringRef();
827 
828     LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
829 
830     // Advance to the last possible match we found and try again.
831     Buffer = Buffer.drop_front(CheckLoc + 1);
832   }
833 
834   return StringRef();
835 }
836 
837 /// Read the check file, which specifies the sequence of expected strings.
838 ///
839 /// The strings are added to the CheckStrings vector. Returns true in case of
840 /// an error, false otherwise.
841 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer,
842                           std::vector<CheckString> &CheckStrings) {
843   std::vector<Pattern> ImplicitNegativeChecks;
844   for (const auto &PatternString : ImplicitCheckNot) {
845     // Create a buffer with fake command line content in order to display the
846     // command line option responsible for the specific implicit CHECK-NOT.
847     std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
848     std::string Suffix = "'";
849     std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
850         Prefix + PatternString + Suffix, "command line");
851 
852     StringRef PatternInBuffer =
853         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
854     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
855 
856     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
857     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
858                                                "IMPLICIT-CHECK", SM, 0);
859   }
860 
861   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
862 
863   // LineNumber keeps track of the line on which CheckPrefix instances are
864   // found.
865   unsigned LineNumber = 1;
866 
867   while (1) {
868     Check::CheckType CheckTy;
869     size_t PrefixLoc;
870 
871     // See if a prefix occurs in the memory buffer.
872     StringRef UsedPrefix =
873         FindFirstMatchingPrefix(Buffer, LineNumber, CheckTy, PrefixLoc);
874     if (UsedPrefix.empty())
875       break;
876 
877     Buffer = Buffer.drop_front(PrefixLoc);
878 
879     // Location to use for error messages.
880     const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
881 
882     // PrefixLoc is to the start of the prefix. Skip to the end.
883     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
884 
885     // Complain about useful-looking but unsupported suffixes.
886     if (CheckTy == Check::CheckBadNot) {
887       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
888                       "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
889       return true;
890     }
891 
892     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
893     // leading and trailing whitespace.
894     Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
895 
896     // Scan ahead to the end of line.
897     size_t EOL = Buffer.find_first_of("\n\r");
898 
899     // Remember the location of the start of the pattern, for diagnostics.
900     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
901 
902     // Parse the pattern.
903     Pattern P(CheckTy);
904     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
905       return true;
906 
907     // Verify that CHECK-LABEL lines do not define or use variables
908     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
909       SM.PrintMessage(
910           SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error,
911           "found '" + UsedPrefix + "-LABEL:'"
912                                    " with variable definition or use");
913       return true;
914     }
915 
916     Buffer = Buffer.substr(EOL);
917 
918     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
919     if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) &&
920         CheckStrings.empty()) {
921       StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME";
922       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
923                       SourceMgr::DK_Error,
924                       "found '" + UsedPrefix + "-" + Type +
925                           "' without previous '" + UsedPrefix + ": line");
926       return true;
927     }
928 
929     // Handle CHECK-DAG/-NOT.
930     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
931       DagNotMatches.push_back(P);
932       continue;
933     }
934 
935     // Okay, add the string we captured to the output vector and move on.
936     CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
937     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
938     DagNotMatches = ImplicitNegativeChecks;
939   }
940 
941   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
942   // prefix as a filler for the error message.
943   if (!DagNotMatches.empty()) {
944     CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
945                               SMLoc::getFromPointer(Buffer.data()));
946     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
947   }
948 
949   if (CheckStrings.empty()) {
950     errs() << "error: no check strings found with prefix"
951            << (CheckPrefixes.size() > 1 ? "es " : " ");
952     prefix_iterator I = CheckPrefixes.begin();
953     prefix_iterator E = CheckPrefixes.end();
954     if (I != E) {
955       errs() << "\'" << *I << ":'";
956       ++I;
957     }
958     for (; I != E; ++I)
959       errs() << ", \'" << *I << ":'";
960 
961     errs() << '\n';
962     return true;
963   }
964 
965   return false;
966 }
967 
968 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, const Pattern &Pat,
969                              StringRef Buffer,
970                              StringMap<StringRef> &VariableTable) {
971   // Otherwise, we have an error, emit an error message.
972   SM.PrintMessage(Loc, SourceMgr::DK_Error,
973                   "expected string not found in input");
974 
975   // Print the "scanning from here" line.  If the current position is at the
976   // end of a line, advance to the start of the next line.
977   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
978 
979   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
980                   "scanning from here");
981 
982   // Allow the pattern to print additional information if desired.
983   Pat.PrintFailureInfo(SM, Buffer, VariableTable);
984 }
985 
986 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
987                              StringRef Buffer,
988                              StringMap<StringRef> &VariableTable) {
989   PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
990 }
991 
992 /// Count the number of newlines in the specified range.
993 static unsigned CountNumNewlinesBetween(StringRef Range,
994                                         const char *&FirstNewLine) {
995   unsigned NumNewLines = 0;
996   while (1) {
997     // Scan for newline.
998     Range = Range.substr(Range.find_first_of("\n\r"));
999     if (Range.empty())
1000       return NumNewLines;
1001 
1002     ++NumNewLines;
1003 
1004     // Handle \n\r and \r\n as a single newline.
1005     if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') &&
1006         (Range[0] != Range[1]))
1007       Range = Range.substr(1);
1008     Range = Range.substr(1);
1009 
1010     if (NumNewLines == 1)
1011       FirstNewLine = Range.begin();
1012   }
1013 }
1014 
1015 /// Match check string and its "not strings" and/or "dag strings".
1016 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1017                           bool IsLabelScanMode, size_t &MatchLen,
1018                           StringMap<StringRef> &VariableTable) const {
1019   size_t LastPos = 0;
1020   std::vector<const Pattern *> NotStrings;
1021 
1022   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1023   // bounds; we have not processed variable definitions within the bounded block
1024   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1025   // over the block again (including the last CHECK-LABEL) in normal mode.
1026   if (!IsLabelScanMode) {
1027     // Match "dag strings" (with mixed "not strings" if any).
1028     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1029     if (LastPos == StringRef::npos)
1030       return StringRef::npos;
1031   }
1032 
1033   // Match itself from the last position after matching CHECK-DAG.
1034   StringRef MatchBuffer = Buffer.substr(LastPos);
1035   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1036   if (MatchPos == StringRef::npos) {
1037     PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1038     return StringRef::npos;
1039   }
1040 
1041   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1042   // or CHECK-NOT
1043   if (!IsLabelScanMode) {
1044     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1045 
1046     // If this check is a "CHECK-NEXT", verify that the previous match was on
1047     // the previous line (i.e. that there is one newline between them).
1048     if (CheckNext(SM, SkippedRegion))
1049       return StringRef::npos;
1050 
1051     // If this check is a "CHECK-SAME", verify that the previous match was on
1052     // the same line (i.e. that there is no newline between them).
1053     if (CheckSame(SM, SkippedRegion))
1054       return StringRef::npos;
1055 
1056     // If this match had "not strings", verify that they don't exist in the
1057     // skipped region.
1058     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1059       return StringRef::npos;
1060   }
1061 
1062   return LastPos + MatchPos;
1063 }
1064 
1065 /// Verify there is a single line in the given buffer.
1066 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1067   if (Pat.getCheckTy() != Check::CheckNext)
1068     return false;
1069 
1070   // Count the number of newlines between the previous match and this one.
1071   assert(Buffer.data() !=
1072              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1073                                     SMLoc::getFromPointer(Buffer.data())))
1074                  ->getBufferStart() &&
1075          "CHECK-NEXT can't be the first check in a file");
1076 
1077   const char *FirstNewLine = nullptr;
1078   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1079 
1080   if (NumNewLines == 0) {
1081     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1082                     Prefix + "-NEXT: is on the same line as previous match");
1083     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1084                     "'next' match was here");
1085     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1086                     "previous match ended here");
1087     return true;
1088   }
1089 
1090   if (NumNewLines != 1) {
1091     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1092                     Prefix +
1093                         "-NEXT: is not on the line after the previous match");
1094     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1095                     "'next' match was here");
1096     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1097                     "previous match ended here");
1098     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1099                     "non-matching line after previous match is here");
1100     return true;
1101   }
1102 
1103   return false;
1104 }
1105 
1106 /// Verify there is no newline in the given buffer.
1107 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1108   if (Pat.getCheckTy() != Check::CheckSame)
1109     return false;
1110 
1111   // Count the number of newlines between the previous match and this one.
1112   assert(Buffer.data() !=
1113              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1114                                     SMLoc::getFromPointer(Buffer.data())))
1115                  ->getBufferStart() &&
1116          "CHECK-SAME can't be the first check in a file");
1117 
1118   const char *FirstNewLine = nullptr;
1119   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1120 
1121   if (NumNewLines != 0) {
1122     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1123                     Prefix +
1124                         "-SAME: is not on the same line as the previous match");
1125     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1126                     "'next' match was here");
1127     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1128                     "previous match ended here");
1129     return true;
1130   }
1131 
1132   return false;
1133 }
1134 
1135 /// Verify there's no "not strings" in the given buffer.
1136 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1137                            const std::vector<const Pattern *> &NotStrings,
1138                            StringMap<StringRef> &VariableTable) const {
1139   for (const Pattern *Pat : NotStrings) {
1140     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1141 
1142     size_t MatchLen = 0;
1143     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1144 
1145     if (Pos == StringRef::npos)
1146       continue;
1147 
1148     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Pos),
1149                     SourceMgr::DK_Error, Prefix + "-NOT: string occurred!");
1150     SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1151                     Prefix + "-NOT: pattern specified here");
1152     return true;
1153   }
1154 
1155   return false;
1156 }
1157 
1158 /// Match "dag strings" and their mixed "not strings".
1159 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1160                              std::vector<const Pattern *> &NotStrings,
1161                              StringMap<StringRef> &VariableTable) const {
1162   if (DagNotStrings.empty())
1163     return 0;
1164 
1165   size_t LastPos = 0;
1166   size_t StartPos = LastPos;
1167 
1168   for (const Pattern &Pat : DagNotStrings) {
1169     assert((Pat.getCheckTy() == Check::CheckDAG ||
1170             Pat.getCheckTy() == Check::CheckNot) &&
1171            "Invalid CHECK-DAG or CHECK-NOT!");
1172 
1173     if (Pat.getCheckTy() == Check::CheckNot) {
1174       NotStrings.push_back(&Pat);
1175       continue;
1176     }
1177 
1178     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1179 
1180     size_t MatchLen = 0, MatchPos;
1181 
1182     // CHECK-DAG always matches from the start.
1183     StringRef MatchBuffer = Buffer.substr(StartPos);
1184     MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1185     // With a group of CHECK-DAGs, a single mismatching means the match on
1186     // that group of CHECK-DAGs fails immediately.
1187     if (MatchPos == StringRef::npos) {
1188       PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1189       return StringRef::npos;
1190     }
1191     // Re-calc it as the offset relative to the start of the original string.
1192     MatchPos += StartPos;
1193 
1194     if (!NotStrings.empty()) {
1195       if (MatchPos < LastPos) {
1196         // Reordered?
1197         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1198                         SourceMgr::DK_Error,
1199                         Prefix + "-DAG: found a match of CHECK-DAG"
1200                                  " reordering across a CHECK-NOT");
1201         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1202                         SourceMgr::DK_Note,
1203                         Prefix + "-DAG: the farthest match of CHECK-DAG"
1204                                  " is found here");
1205         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1206                         Prefix + "-NOT: the crossed pattern specified"
1207                                  " here");
1208         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1209                         Prefix + "-DAG: the reordered pattern specified"
1210                                  " here");
1211         return StringRef::npos;
1212       }
1213       // All subsequent CHECK-DAGs should be matched from the farthest
1214       // position of all precedent CHECK-DAGs (including this one.)
1215       StartPos = LastPos;
1216       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1217       // CHECK-DAG, verify that there's no 'not' strings occurred in that
1218       // region.
1219       StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1220       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1221         return StringRef::npos;
1222       // Clear "not strings".
1223       NotStrings.clear();
1224     }
1225 
1226     // Update the last position with CHECK-DAG matches.
1227     LastPos = std::max(MatchPos + MatchLen, LastPos);
1228   }
1229 
1230   return LastPos;
1231 }
1232 
1233 // A check prefix must contain only alphanumeric, hyphens and underscores.
1234 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1235   Regex Validator("^[a-zA-Z0-9_-]*$");
1236   return Validator.match(CheckPrefix);
1237 }
1238 
1239 static bool ValidateCheckPrefixes() {
1240   StringSet<> PrefixSet;
1241 
1242   for (StringRef Prefix : CheckPrefixes) {
1243     // Reject empty prefixes.
1244     if (Prefix == "")
1245       return false;
1246 
1247     if (!PrefixSet.insert(Prefix).second)
1248       return false;
1249 
1250     if (!ValidateCheckPrefix(Prefix))
1251       return false;
1252   }
1253 
1254   return true;
1255 }
1256 
1257 // I don't think there's a way to specify an initial value for cl::list,
1258 // so if nothing was specified, add the default
1259 static void AddCheckPrefixIfNeeded() {
1260   if (CheckPrefixes.empty())
1261     CheckPrefixes.push_back("CHECK");
1262 }
1263 
1264 static void DumpCommandLine(int argc, char **argv) {
1265   errs() << "FileCheck command line: ";
1266   for (int I = 0; I < argc; I++)
1267     errs() << " " << argv[I];
1268   errs() << "\n";
1269 }
1270 
1271 /// Check the input to FileCheck provided in the \p Buffer against the \p
1272 /// CheckStrings read from the check file.
1273 ///
1274 /// Returns false if the input fails to satisfy the checks.
1275 bool CheckInput(SourceMgr &SM, StringRef Buffer,
1276                 ArrayRef<CheckString> CheckStrings) {
1277   bool ChecksFailed = false;
1278 
1279   /// VariableTable - This holds all the current filecheck variables.
1280   StringMap<StringRef> VariableTable;
1281 
1282   unsigned i = 0, j = 0, e = CheckStrings.size();
1283   while (true) {
1284     StringRef CheckRegion;
1285     if (j == e) {
1286       CheckRegion = Buffer;
1287     } else {
1288       const CheckString &CheckLabelStr = CheckStrings[j];
1289       if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
1290         ++j;
1291         continue;
1292       }
1293 
1294       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1295       size_t MatchLabelLen = 0;
1296       size_t MatchLabelPos =
1297           CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable);
1298       if (MatchLabelPos == StringRef::npos)
1299         // Immediately bail of CHECK-LABEL fails, nothing else we can do.
1300         return false;
1301 
1302       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1303       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1304       ++j;
1305     }
1306 
1307     for (; i != j; ++i) {
1308       const CheckString &CheckStr = CheckStrings[i];
1309 
1310       // Check each string within the scanned region, including a second check
1311       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1312       size_t MatchLen = 0;
1313       size_t MatchPos =
1314           CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable);
1315 
1316       if (MatchPos == StringRef::npos) {
1317         ChecksFailed = true;
1318         i = j;
1319         break;
1320       }
1321 
1322       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1323     }
1324 
1325     if (j == e)
1326       break;
1327   }
1328 
1329   // Success if no checks failed.
1330   return !ChecksFailed;
1331 }
1332 
1333 int main(int argc, char **argv) {
1334   sys::PrintStackTraceOnErrorSignal(argv[0]);
1335   PrettyStackTraceProgram X(argc, argv);
1336   cl::ParseCommandLineOptions(argc, argv);
1337 
1338   if (!ValidateCheckPrefixes()) {
1339     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1340               "start with a letter and contain only alphanumeric characters, "
1341               "hyphens and underscores\n";
1342     return 2;
1343   }
1344 
1345   AddCheckPrefixIfNeeded();
1346 
1347   SourceMgr SM;
1348 
1349   // Read the expected strings from the check file.
1350   ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr =
1351       MemoryBuffer::getFileOrSTDIN(CheckFilename);
1352   if (std::error_code EC = CheckFileOrErr.getError()) {
1353     errs() << "Could not open check file '" << CheckFilename
1354            << "': " << EC.message() << '\n';
1355     return 2;
1356   }
1357   MemoryBuffer &CheckFile = *CheckFileOrErr.get();
1358 
1359   SmallString<4096> CheckFileBuffer;
1360   StringRef CheckFileText = CanonicalizeFile(CheckFile, CheckFileBuffer);
1361 
1362   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1363                             CheckFileText, CheckFile.getBufferIdentifier()),
1364                         SMLoc());
1365 
1366   std::vector<CheckString> CheckStrings;
1367   if (ReadCheckFile(SM, CheckFileText, CheckStrings))
1368     return 2;
1369 
1370   // Open the file to check and add it to SourceMgr.
1371   ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr =
1372       MemoryBuffer::getFileOrSTDIN(InputFilename);
1373   if (std::error_code EC = InputFileOrErr.getError()) {
1374     errs() << "Could not open input file '" << InputFilename
1375            << "': " << EC.message() << '\n';
1376     return 2;
1377   }
1378   MemoryBuffer &InputFile = *InputFileOrErr.get();
1379 
1380   if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) {
1381     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1382     DumpCommandLine(argc, argv);
1383     return 2;
1384   }
1385 
1386   SmallString<4096> InputFileBuffer;
1387   StringRef InputFileText = CanonicalizeFile(InputFile, InputFileBuffer);
1388 
1389   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1390                             InputFileText, InputFile.getBufferIdentifier()),
1391                         SMLoc());
1392 
1393   return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1;
1394 }
1395