1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content.  This is useful for regression tests etc.
12 //
13 // This program exits with an error status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <algorithm>
31 #include <cctype>
32 #include <map>
33 #include <string>
34 #include <system_error>
35 #include <vector>
36 using namespace llvm;
37 
38 static cl::opt<std::string>
39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40 
41 static cl::opt<std::string>
42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43               cl::init("-"), cl::value_desc("filename"));
44 
45 static cl::list<std::string>
46 CheckPrefixes("check-prefix",
47               cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48 static cl::alias CheckPrefixesAlias(
49     "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated,
50     cl::NotHidden,
51     cl::desc(
52         "Alias for -check-prefix permitting multiple comma separated values"));
53 
54 static cl::opt<bool>
55 NoCanonicalizeWhiteSpace("strict-whitespace",
56               cl::desc("Do not treat all horizontal whitespace as equivalent"));
57 
58 static cl::list<std::string> ImplicitCheckNot(
59     "implicit-check-not",
60     cl::desc("Add an implicit negative check with this pattern to every\n"
61              "positive check. This can be used to ensure that no instances of\n"
62              "this pattern occur which are not matched by a positive pattern"),
63     cl::value_desc("pattern"));
64 
65 static cl::opt<bool> AllowEmptyInput(
66     "allow-empty", cl::init(false),
67     cl::desc("Allow the input file to be empty. This is useful when making\n"
68              "checks that some error message does not occur, for example."));
69 
70 static cl::opt<bool> MatchFullLines(
71     "match-full-lines", cl::init(false),
72     cl::desc("Require all positive matches to cover an entire input line.\n"
73              "Allows leading and trailing whitespace if --strict-whitespace\n"
74              "is not also passed."));
75 
76 typedef cl::list<std::string>::const_iterator prefix_iterator;
77 
78 //===----------------------------------------------------------------------===//
79 // Pattern Handling Code.
80 //===----------------------------------------------------------------------===//
81 
82 namespace Check {
83   enum CheckType {
84     CheckNone = 0,
85     CheckPlain,
86     CheckNext,
87     CheckSame,
88     CheckNot,
89     CheckDAG,
90     CheckLabel,
91 
92     /// MatchEOF - When set, this pattern only matches the end of file. This is
93     /// used for trailing CHECK-NOTs.
94     CheckEOF,
95     /// CheckBadNot - Found -NOT combined with another CHECK suffix.
96     CheckBadNot
97   };
98 }
99 
100 class Pattern {
101   SMLoc PatternLoc;
102 
103   /// FixedStr - If non-empty, this pattern is a fixed string match with the
104   /// specified fixed string.
105   StringRef FixedStr;
106 
107   /// RegEx - If non-empty, this is a regex pattern.
108   std::string RegExStr;
109 
110   /// VariableUses - Entries in this vector map to uses of a variable in the
111   /// pattern, e.g. "foo[[bar]]baz".  In this case, the RegExStr will contain
112   /// "foobaz" and we'll get an entry in this vector that tells us to insert the
113   /// value of bar at offset 3.
114   std::vector<std::pair<StringRef, unsigned> > VariableUses;
115 
116   /// VariableDefs - Maps definitions of variables to their parenthesized
117   /// capture numbers.
118   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
119   std::map<StringRef, unsigned> VariableDefs;
120 
121   Check::CheckType CheckTy;
122 
123   /// \brief Contains the number of line this pattern is in.
124   unsigned LineNumber;
125 
126 public:
127   explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {}
128 
129   /// getLoc - Return the location in source code.
130   SMLoc getLoc() const { return PatternLoc; }
131 
132   /// ParsePattern - Parse the given string into the Pattern. Prefix provides
133   /// which prefix is being matched, SM provides the SourceMgr used for error
134   /// reports, and LineNumber is the line number in the input file from which
135   /// the pattern string was read.  Returns true in case of an error, false
136   /// otherwise.
137   bool ParsePattern(StringRef PatternStr,
138                     StringRef Prefix,
139                     SourceMgr &SM,
140                     unsigned LineNumber);
141 
142   /// Match - Match the pattern string against the input buffer Buffer.  This
143   /// returns the position that is matched or npos if there is no match.  If
144   /// there is a match, the size of the matched string is returned in MatchLen.
145   ///
146   /// The VariableTable StringMap provides the current values of filecheck
147   /// variables and is updated if this match defines new values.
148   size_t Match(StringRef Buffer, size_t &MatchLen,
149                StringMap<StringRef> &VariableTable) const;
150 
151   /// PrintFailureInfo - Print additional information about a failure to match
152   /// involving this pattern.
153   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
154                         const StringMap<StringRef> &VariableTable) const;
155 
156   bool hasVariable() const { return !(VariableUses.empty() &&
157                                       VariableDefs.empty()); }
158 
159   Check::CheckType getCheckTy() const { return CheckTy; }
160 
161 private:
162   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
163   void AddBackrefToRegEx(unsigned BackrefNum);
164 
165   /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
166   /// matching this pattern at the start of \arg Buffer; a distance of zero
167   /// should correspond to a perfect match.
168   unsigned ComputeMatchDistance(StringRef Buffer,
169                                const StringMap<StringRef> &VariableTable) const;
170 
171   /// \brief Evaluates expression and stores the result to \p Value.
172   /// \return true on success. false when the expression has invalid syntax.
173   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
174 
175   /// \brief Finds the closing sequence of a regex variable usage or
176   /// definition. Str has to point in the beginning of the definition
177   /// (right after the opening sequence).
178   /// \return offset of the closing sequence within Str, or npos if it was not
179   /// found.
180   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
181 };
182 
183 
184 bool Pattern::ParsePattern(StringRef PatternStr,
185                            StringRef Prefix,
186                            SourceMgr &SM,
187                            unsigned LineNumber) {
188   bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot;
189 
190   this->LineNumber = LineNumber;
191   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
192 
193   // Ignore trailing whitespace.
194   while (!PatternStr.empty() &&
195          (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
196     PatternStr = PatternStr.substr(0, PatternStr.size()-1);
197 
198   // Check that there is something on the line.
199   if (PatternStr.empty()) {
200     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
201                     "found empty check string with prefix '" +
202                     Prefix + ":'");
203     return true;
204   }
205 
206   // Check to see if this is a fixed string, or if it has regex pieces.
207   if (!MatchFullLinesHere &&
208       (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
209                                  PatternStr.find("[[") == StringRef::npos))) {
210     FixedStr = PatternStr;
211     return false;
212   }
213 
214   if (MatchFullLinesHere) {
215     RegExStr += '^';
216     if (!NoCanonicalizeWhiteSpace)
217       RegExStr += " *";
218   }
219 
220   // Paren value #0 is for the fully matched string.  Any new parenthesized
221   // values add from there.
222   unsigned CurParen = 1;
223 
224   // Otherwise, there is at least one regex piece.  Build up the regex pattern
225   // by escaping scary characters in fixed strings, building up one big regex.
226   while (!PatternStr.empty()) {
227     // RegEx matches.
228     if (PatternStr.startswith("{{")) {
229       // This is the start of a regex match.  Scan for the }}.
230       size_t End = PatternStr.find("}}");
231       if (End == StringRef::npos) {
232         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
233                         SourceMgr::DK_Error,
234                         "found start of regex string with no end '}}'");
235         return true;
236       }
237 
238       // Enclose {{}} patterns in parens just like [[]] even though we're not
239       // capturing the result for any purpose.  This is required in case the
240       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
241       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
242       RegExStr += '(';
243       ++CurParen;
244 
245       if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
246         return true;
247       RegExStr += ')';
248 
249       PatternStr = PatternStr.substr(End+2);
250       continue;
251     }
252 
253     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
254     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
255     // second form is [[foo]] which is a reference to foo.  The variable name
256     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
257     // it.  This is to catch some common errors.
258     if (PatternStr.startswith("[[")) {
259       // Find the closing bracket pair ending the match.  End is going to be an
260       // offset relative to the beginning of the match string.
261       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
262 
263       if (End == StringRef::npos) {
264         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
265                         SourceMgr::DK_Error,
266                         "invalid named regex reference, no ]] found");
267         return true;
268       }
269 
270       StringRef MatchStr = PatternStr.substr(2, End);
271       PatternStr = PatternStr.substr(End+4);
272 
273       // Get the regex name (e.g. "foo").
274       size_t NameEnd = MatchStr.find(':');
275       StringRef Name = MatchStr.substr(0, NameEnd);
276 
277       if (Name.empty()) {
278         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
279                         "invalid name in named regex: empty name");
280         return true;
281       }
282 
283       // Verify that the name/expression is well formed. FileCheck currently
284       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
285       // is relaxed, more strict check is performed in \c EvaluateExpression.
286       bool IsExpression = false;
287       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
288         if (i == 0 && Name[i] == '@') {
289           if (NameEnd != StringRef::npos) {
290             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
291                             SourceMgr::DK_Error,
292                             "invalid name in named regex definition");
293             return true;
294           }
295           IsExpression = true;
296           continue;
297         }
298         if (Name[i] != '_' && !isalnum(Name[i]) &&
299             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
300           SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
301                           SourceMgr::DK_Error, "invalid name in named regex");
302           return true;
303         }
304       }
305 
306       // Name can't start with a digit.
307       if (isdigit(static_cast<unsigned char>(Name[0]))) {
308         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
309                         "invalid name in named regex");
310         return true;
311       }
312 
313       // Handle [[foo]].
314       if (NameEnd == StringRef::npos) {
315         // Handle variables that were defined earlier on the same line by
316         // emitting a backreference.
317         if (VariableDefs.find(Name) != VariableDefs.end()) {
318           unsigned VarParenNum = VariableDefs[Name];
319           if (VarParenNum < 1 || VarParenNum > 9) {
320             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
321                             SourceMgr::DK_Error,
322                             "Can't back-reference more than 9 variables");
323             return true;
324           }
325           AddBackrefToRegEx(VarParenNum);
326         } else {
327           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
328         }
329         continue;
330       }
331 
332       // Handle [[foo:.*]].
333       VariableDefs[Name] = CurParen;
334       RegExStr += '(';
335       ++CurParen;
336 
337       if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
338         return true;
339 
340       RegExStr += ')';
341     }
342 
343     // Handle fixed string matches.
344     // Find the end, which is the start of the next regex.
345     size_t FixedMatchEnd = PatternStr.find("{{");
346     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
347     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
348     PatternStr = PatternStr.substr(FixedMatchEnd);
349   }
350 
351   if (MatchFullLinesHere) {
352     if (!NoCanonicalizeWhiteSpace)
353       RegExStr += " *";
354     RegExStr += '$';
355   }
356 
357   return false;
358 }
359 
360 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
361                               SourceMgr &SM) {
362   Regex R(RS);
363   std::string Error;
364   if (!R.isValid(Error)) {
365     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
366                     "invalid regex: " + Error);
367     return true;
368   }
369 
370   RegExStr += RS.str();
371   CurParen += R.getNumMatches();
372   return false;
373 }
374 
375 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
376   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
377   std::string Backref = std::string("\\") +
378                         std::string(1, '0' + BackrefNum);
379   RegExStr += Backref;
380 }
381 
382 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
383   // The only supported expression is @LINE([\+-]\d+)?
384   if (!Expr.startswith("@LINE"))
385     return false;
386   Expr = Expr.substr(StringRef("@LINE").size());
387   int Offset = 0;
388   if (!Expr.empty()) {
389     if (Expr[0] == '+')
390       Expr = Expr.substr(1);
391     else if (Expr[0] != '-')
392       return false;
393     if (Expr.getAsInteger(10, Offset))
394       return false;
395   }
396   Value = llvm::itostr(LineNumber + Offset);
397   return true;
398 }
399 
400 /// Match - Match the pattern string against the input buffer Buffer.  This
401 /// returns the position that is matched or npos if there is no match.  If
402 /// there is a match, the size of the matched string is returned in MatchLen.
403 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
404                       StringMap<StringRef> &VariableTable) const {
405   // If this is the EOF pattern, match it immediately.
406   if (CheckTy == Check::CheckEOF) {
407     MatchLen = 0;
408     return Buffer.size();
409   }
410 
411   // If this is a fixed string pattern, just match it now.
412   if (!FixedStr.empty()) {
413     MatchLen = FixedStr.size();
414     return Buffer.find(FixedStr);
415   }
416 
417   // Regex match.
418 
419   // If there are variable uses, we need to create a temporary string with the
420   // actual value.
421   StringRef RegExToMatch = RegExStr;
422   std::string TmpStr;
423   if (!VariableUses.empty()) {
424     TmpStr = RegExStr;
425 
426     unsigned InsertOffset = 0;
427     for (const auto &VariableUse : VariableUses) {
428       std::string Value;
429 
430       if (VariableUse.first[0] == '@') {
431         if (!EvaluateExpression(VariableUse.first, Value))
432           return StringRef::npos;
433       } else {
434         StringMap<StringRef>::iterator it =
435             VariableTable.find(VariableUse.first);
436         // If the variable is undefined, return an error.
437         if (it == VariableTable.end())
438           return StringRef::npos;
439 
440         // Look up the value and escape it so that we can put it into the regex.
441         Value += Regex::escape(it->second);
442       }
443 
444       // Plop it into the regex at the adjusted offset.
445       TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
446                     Value.begin(), Value.end());
447       InsertOffset += Value.size();
448     }
449 
450     // Match the newly constructed regex.
451     RegExToMatch = TmpStr;
452   }
453 
454 
455   SmallVector<StringRef, 4> MatchInfo;
456   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
457     return StringRef::npos;
458 
459   // Successful regex match.
460   assert(!MatchInfo.empty() && "Didn't get any match");
461   StringRef FullMatch = MatchInfo[0];
462 
463   // If this defines any variables, remember their values.
464   for (const auto &VariableDef : VariableDefs) {
465     assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
466     VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
467   }
468 
469   MatchLen = FullMatch.size();
470   return FullMatch.data()-Buffer.data();
471 }
472 
473 unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
474                               const StringMap<StringRef> &VariableTable) const {
475   // Just compute the number of matching characters. For regular expressions, we
476   // just compare against the regex itself and hope for the best.
477   //
478   // FIXME: One easy improvement here is have the regex lib generate a single
479   // example regular expression which matches, and use that as the example
480   // string.
481   StringRef ExampleString(FixedStr);
482   if (ExampleString.empty())
483     ExampleString = RegExStr;
484 
485   // Only compare up to the first line in the buffer, or the string size.
486   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
487   BufferPrefix = BufferPrefix.split('\n').first;
488   return BufferPrefix.edit_distance(ExampleString);
489 }
490 
491 void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
492                                const StringMap<StringRef> &VariableTable) const{
493   // If this was a regular expression using variables, print the current
494   // variable values.
495   if (!VariableUses.empty()) {
496     for (const auto &VariableUse : VariableUses) {
497       SmallString<256> Msg;
498       raw_svector_ostream OS(Msg);
499       StringRef Var = VariableUse.first;
500       if (Var[0] == '@') {
501         std::string Value;
502         if (EvaluateExpression(Var, Value)) {
503           OS << "with expression \"";
504           OS.write_escaped(Var) << "\" equal to \"";
505           OS.write_escaped(Value) << "\"";
506         } else {
507           OS << "uses incorrect expression \"";
508           OS.write_escaped(Var) << "\"";
509         }
510       } else {
511         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
512 
513         // Check for undefined variable references.
514         if (it == VariableTable.end()) {
515           OS << "uses undefined variable \"";
516           OS.write_escaped(Var) << "\"";
517         } else {
518           OS << "with variable \"";
519           OS.write_escaped(Var) << "\" equal to \"";
520           OS.write_escaped(it->second) << "\"";
521         }
522       }
523 
524       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
525                       OS.str());
526     }
527   }
528 
529   // Attempt to find the closest/best fuzzy match.  Usually an error happens
530   // because some string in the output didn't exactly match. In these cases, we
531   // would like to show the user a best guess at what "should have" matched, to
532   // save them having to actually check the input manually.
533   size_t NumLinesForward = 0;
534   size_t Best = StringRef::npos;
535   double BestQuality = 0;
536 
537   // Use an arbitrary 4k limit on how far we will search.
538   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
539     if (Buffer[i] == '\n')
540       ++NumLinesForward;
541 
542     // Patterns have leading whitespace stripped, so skip whitespace when
543     // looking for something which looks like a pattern.
544     if (Buffer[i] == ' ' || Buffer[i] == '\t')
545       continue;
546 
547     // Compute the "quality" of this match as an arbitrary combination of the
548     // match distance and the number of lines skipped to get to this match.
549     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
550     double Quality = Distance + (NumLinesForward / 100.);
551 
552     if (Quality < BestQuality || Best == StringRef::npos) {
553       Best = i;
554       BestQuality = Quality;
555     }
556   }
557 
558   // Print the "possible intended match here" line if we found something
559   // reasonable and not equal to what we showed in the "scanning from here"
560   // line.
561   if (Best && Best != StringRef::npos && BestQuality < 50) {
562       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
563                       SourceMgr::DK_Note, "possible intended match here");
564 
565     // FIXME: If we wanted to be really friendly we would show why the match
566     // failed, as it can be hard to spot simple one character differences.
567   }
568 }
569 
570 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
571   // Offset keeps track of the current offset within the input Str
572   size_t Offset = 0;
573   // [...] Nesting depth
574   size_t BracketDepth = 0;
575 
576   while (!Str.empty()) {
577     if (Str.startswith("]]") && BracketDepth == 0)
578       return Offset;
579     if (Str[0] == '\\') {
580       // Backslash escapes the next char within regexes, so skip them both.
581       Str = Str.substr(2);
582       Offset += 2;
583     } else {
584       switch (Str[0]) {
585         default:
586           break;
587         case '[':
588           BracketDepth++;
589           break;
590         case ']':
591           if (BracketDepth == 0) {
592             SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
593                             SourceMgr::DK_Error,
594                             "missing closing \"]\" for regex variable");
595             exit(1);
596           }
597           BracketDepth--;
598           break;
599       }
600       Str = Str.substr(1);
601       Offset++;
602     }
603   }
604 
605   return StringRef::npos;
606 }
607 
608 
609 //===----------------------------------------------------------------------===//
610 // Check Strings.
611 //===----------------------------------------------------------------------===//
612 
613 /// CheckString - This is a check that we found in the input file.
614 struct CheckString {
615   /// Pat - The pattern to match.
616   Pattern Pat;
617 
618   /// Prefix - Which prefix name this check matched.
619   StringRef Prefix;
620 
621   /// Loc - The location in the match file that the check string was specified.
622   SMLoc Loc;
623 
624   /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
625   /// as opposed to a CHECK: directive.
626   //  Check::CheckType CheckTy;
627 
628   /// DagNotStrings - These are all of the strings that are disallowed from
629   /// occurring between this match string and the previous one (or start of
630   /// file).
631   std::vector<Pattern> DagNotStrings;
632 
633   CheckString(const Pattern &P, StringRef S, SMLoc L)
634       : Pat(P), Prefix(S), Loc(L) {}
635 
636   /// Check - Match check string and its "not strings" and/or "dag strings".
637   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
638                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
639 
640   /// CheckNext - Verify there is a single line in the given buffer.
641   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
642 
643   /// CheckSame - Verify there is no newline in the given buffer.
644   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
645 
646   /// CheckNot - Verify there's no "not strings" in the given buffer.
647   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
648                 const std::vector<const Pattern *> &NotStrings,
649                 StringMap<StringRef> &VariableTable) const;
650 
651   /// CheckDag - Match "dag strings" and their mixed "not strings".
652   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
653                   std::vector<const Pattern *> &NotStrings,
654                   StringMap<StringRef> &VariableTable) const;
655 };
656 
657 /// Canonicalize whitespaces in the file. Line endings are replaced with
658 /// UNIX-style '\n'.
659 ///
660 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
661 /// characters to a single space.
662 static StringRef CanonicalizeFile(MemoryBuffer &MB, bool PreserveHorizontal,
663                                   SmallVectorImpl<char> &OutputBuffer) {
664   OutputBuffer.reserve(MB.getBufferSize());
665 
666   for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
667        Ptr != End; ++Ptr) {
668     // Eliminate trailing dosish \r.
669     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
670       continue;
671     }
672 
673     // If current char is not a horizontal whitespace or if horizontal
674     // whitespace canonicalization is disabled, dump it to output as is.
675     if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
676       OutputBuffer.push_back(*Ptr);
677       continue;
678     }
679 
680     // Otherwise, add one space and advance over neighboring space.
681     OutputBuffer.push_back(' ');
682     while (Ptr+1 != End &&
683            (Ptr[1] == ' ' || Ptr[1] == '\t'))
684       ++Ptr;
685   }
686 
687   // Add a null byte and then return all but that byte.
688   OutputBuffer.push_back('\0');
689   return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
690 }
691 
692 static bool IsPartOfWord(char c) {
693   return (isalnum(c) || c == '-' || c == '_');
694 }
695 
696 // Get the size of the prefix extension.
697 static size_t CheckTypeSize(Check::CheckType Ty) {
698   switch (Ty) {
699   case Check::CheckNone:
700   case Check::CheckBadNot:
701     return 0;
702 
703   case Check::CheckPlain:
704     return sizeof(":") - 1;
705 
706   case Check::CheckNext:
707     return sizeof("-NEXT:") - 1;
708 
709   case Check::CheckSame:
710     return sizeof("-SAME:") - 1;
711 
712   case Check::CheckNot:
713     return sizeof("-NOT:") - 1;
714 
715   case Check::CheckDAG:
716     return sizeof("-DAG:") - 1;
717 
718   case Check::CheckLabel:
719     return sizeof("-LABEL:") - 1;
720 
721   case Check::CheckEOF:
722     llvm_unreachable("Should not be using EOF size");
723   }
724 
725   llvm_unreachable("Bad check type");
726 }
727 
728 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
729   char NextChar = Buffer[Prefix.size()];
730 
731   // Verify that the : is present after the prefix.
732   if (NextChar == ':')
733     return Check::CheckPlain;
734 
735   if (NextChar != '-')
736     return Check::CheckNone;
737 
738   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
739   if (Rest.startswith("NEXT:"))
740     return Check::CheckNext;
741 
742   if (Rest.startswith("SAME:"))
743     return Check::CheckSame;
744 
745   if (Rest.startswith("NOT:"))
746     return Check::CheckNot;
747 
748   if (Rest.startswith("DAG:"))
749     return Check::CheckDAG;
750 
751   if (Rest.startswith("LABEL:"))
752     return Check::CheckLabel;
753 
754   // You can't combine -NOT with another suffix.
755   if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
756       Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
757       Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:"))
758     return Check::CheckBadNot;
759 
760   return Check::CheckNone;
761 }
762 
763 // From the given position, find the next character after the word.
764 static size_t SkipWord(StringRef Str, size_t Loc) {
765   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
766     ++Loc;
767   return Loc;
768 }
769 
770 // Try to find the first match in buffer for any prefix. If a valid match is
771 // found, return that prefix and set its type and location.  If there are almost
772 // matches (e.g. the actual prefix string is found, but is not an actual check
773 // string), but no valid match, return an empty string and set the position to
774 // resume searching from. If no partial matches are found, return an empty
775 // string and the location will be StringRef::npos. If one prefix is a substring
776 // of another, the maximal match should be found. e.g. if "A" and "AA" are
777 // prefixes then AA-CHECK: should match the second one.
778 static StringRef FindFirstCandidateMatch(StringRef &Buffer,
779                                          Check::CheckType &CheckTy,
780                                          size_t &CheckLoc) {
781   StringRef FirstPrefix;
782   size_t FirstLoc = StringRef::npos;
783   size_t SearchLoc = StringRef::npos;
784   Check::CheckType FirstTy = Check::CheckNone;
785 
786   CheckTy = Check::CheckNone;
787   CheckLoc = StringRef::npos;
788 
789   for (StringRef Prefix : CheckPrefixes) {
790     size_t PrefixLoc = Buffer.find(Prefix);
791 
792     if (PrefixLoc == StringRef::npos)
793       continue;
794 
795     // Track where we are searching for invalid prefixes that look almost right.
796     // We need to only advance to the first partial match on the next attempt
797     // since a partial match could be a substring of a later, valid prefix.
798     // Need to skip to the end of the word, otherwise we could end up
799     // matching a prefix in a substring later.
800     if (PrefixLoc < SearchLoc)
801       SearchLoc = SkipWord(Buffer, PrefixLoc);
802 
803     // We only want to find the first match to avoid skipping some.
804     if (PrefixLoc > FirstLoc)
805       continue;
806     // If one matching check-prefix is a prefix of another, choose the
807     // longer one.
808     if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
809       continue;
810 
811     StringRef Rest = Buffer.drop_front(PrefixLoc);
812     // Make sure we have actually found the prefix, and not a word containing
813     // it. This should also prevent matching the wrong prefix when one is a
814     // substring of another.
815     if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
816       FirstTy = Check::CheckNone;
817     else
818       FirstTy = FindCheckType(Rest, Prefix);
819 
820     FirstLoc = PrefixLoc;
821     FirstPrefix = Prefix;
822   }
823 
824   // If the first prefix is invalid, we should continue the search after it.
825   if (FirstTy == Check::CheckNone) {
826     CheckLoc = SearchLoc;
827     return "";
828   }
829 
830   CheckTy = FirstTy;
831   CheckLoc = FirstLoc;
832   return FirstPrefix;
833 }
834 
835 static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
836                                          unsigned &LineNumber,
837                                          Check::CheckType &CheckTy,
838                                          size_t &CheckLoc) {
839   while (!Buffer.empty()) {
840     StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
841     // If we found a real match, we are done.
842     if (!Prefix.empty()) {
843       LineNumber += Buffer.substr(0, CheckLoc).count('\n');
844       return Prefix;
845     }
846 
847     // We didn't find any almost matches either, we are also done.
848     if (CheckLoc == StringRef::npos)
849       return StringRef();
850 
851     LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
852 
853     // Advance to the last possible match we found and try again.
854     Buffer = Buffer.drop_front(CheckLoc + 1);
855   }
856 
857   return StringRef();
858 }
859 
860 /// ReadCheckFile - Read the check file, which specifies the sequence of
861 /// expected strings.  The strings are added to the CheckStrings vector.
862 /// Returns true in case of an error, false otherwise.
863 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer,
864                           std::vector<CheckString> &CheckStrings) {
865   std::vector<Pattern> ImplicitNegativeChecks;
866   for (const auto &PatternString : ImplicitCheckNot) {
867     // Create a buffer with fake command line content in order to display the
868     // command line option responsible for the specific implicit CHECK-NOT.
869     std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
870     std::string Suffix = "'";
871     std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
872         Prefix + PatternString + Suffix, "command line");
873 
874     StringRef PatternInBuffer =
875         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
876     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
877 
878     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
879     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
880                                                "IMPLICIT-CHECK", SM, 0);
881   }
882 
883 
884   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
885 
886   // LineNumber keeps track of the line on which CheckPrefix instances are
887   // found.
888   unsigned LineNumber = 1;
889 
890   while (1) {
891     Check::CheckType CheckTy;
892     size_t PrefixLoc;
893 
894     // See if a prefix occurs in the memory buffer.
895     StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer,
896                                                    LineNumber,
897                                                    CheckTy,
898                                                    PrefixLoc);
899     if (UsedPrefix.empty())
900       break;
901 
902     Buffer = Buffer.drop_front(PrefixLoc);
903 
904     // Location to use for error messages.
905     const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
906 
907     // PrefixLoc is to the start of the prefix. Skip to the end.
908     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
909 
910     // Complain about useful-looking but unsupported suffixes.
911     if (CheckTy == Check::CheckBadNot) {
912       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
913                       SourceMgr::DK_Error,
914                       "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
915       return true;
916     }
917 
918     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
919     // leading and trailing whitespace.
920     Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
921 
922     // Scan ahead to the end of line.
923     size_t EOL = Buffer.find_first_of("\n\r");
924 
925     // Remember the location of the start of the pattern, for diagnostics.
926     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
927 
928     // Parse the pattern.
929     Pattern P(CheckTy);
930     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
931       return true;
932 
933     // Verify that CHECK-LABEL lines do not define or use variables
934     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
935       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
936                       SourceMgr::DK_Error,
937                       "found '" + UsedPrefix + "-LABEL:'"
938                       " with variable definition or use");
939       return true;
940     }
941 
942     Buffer = Buffer.substr(EOL);
943 
944     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
945     if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) &&
946         CheckStrings.empty()) {
947       StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME";
948       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
949                       SourceMgr::DK_Error,
950                       "found '" + UsedPrefix + "-" + Type + "' without previous '"
951                       + UsedPrefix + ": line");
952       return true;
953     }
954 
955     // Handle CHECK-DAG/-NOT.
956     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
957       DagNotMatches.push_back(P);
958       continue;
959     }
960 
961     // Okay, add the string we captured to the output vector and move on.
962     CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
963     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
964     DagNotMatches = ImplicitNegativeChecks;
965   }
966 
967   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
968   // prefix as a filler for the error message.
969   if (!DagNotMatches.empty()) {
970     CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
971                               SMLoc::getFromPointer(Buffer.data()));
972     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
973   }
974 
975   if (CheckStrings.empty()) {
976     errs() << "error: no check strings found with prefix"
977            << (CheckPrefixes.size() > 1 ? "es " : " ");
978     prefix_iterator I = CheckPrefixes.begin();
979     prefix_iterator E = CheckPrefixes.end();
980     if (I != E) {
981       errs() << "\'" << *I << ":'";
982       ++I;
983     }
984     for (; I != E; ++I)
985       errs() << ", \'" << *I << ":'";
986 
987     errs() << '\n';
988     return true;
989   }
990 
991   return false;
992 }
993 
994 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc,
995                              const Pattern &Pat, StringRef Buffer,
996                              StringMap<StringRef> &VariableTable) {
997   // Otherwise, we have an error, emit an error message.
998   SM.PrintMessage(Loc, SourceMgr::DK_Error,
999                   "expected string not found in input");
1000 
1001   // Print the "scanning from here" line.  If the current position is at the
1002   // end of a line, advance to the start of the next line.
1003   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
1004 
1005   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1006                   "scanning from here");
1007 
1008   // Allow the pattern to print additional information if desired.
1009   Pat.PrintFailureInfo(SM, Buffer, VariableTable);
1010 }
1011 
1012 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
1013                              StringRef Buffer,
1014                              StringMap<StringRef> &VariableTable) {
1015   PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
1016 }
1017 
1018 /// CountNumNewlinesBetween - Count the number of newlines in the specified
1019 /// range.
1020 static unsigned CountNumNewlinesBetween(StringRef Range,
1021                                         const char *&FirstNewLine) {
1022   unsigned NumNewLines = 0;
1023   while (1) {
1024     // Scan for newline.
1025     Range = Range.substr(Range.find_first_of("\n\r"));
1026     if (Range.empty()) return NumNewLines;
1027 
1028     ++NumNewLines;
1029 
1030     // Handle \n\r and \r\n as a single newline.
1031     if (Range.size() > 1 &&
1032         (Range[1] == '\n' || Range[1] == '\r') &&
1033         (Range[0] != Range[1]))
1034       Range = Range.substr(1);
1035     Range = Range.substr(1);
1036 
1037     if (NumNewLines == 1)
1038       FirstNewLine = Range.begin();
1039   }
1040 }
1041 
1042 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1043                           bool IsLabelScanMode, size_t &MatchLen,
1044                           StringMap<StringRef> &VariableTable) const {
1045   size_t LastPos = 0;
1046   std::vector<const Pattern *> NotStrings;
1047 
1048   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1049   // bounds; we have not processed variable definitions within the bounded block
1050   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1051   // over the block again (including the last CHECK-LABEL) in normal mode.
1052   if (!IsLabelScanMode) {
1053     // Match "dag strings" (with mixed "not strings" if any).
1054     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1055     if (LastPos == StringRef::npos)
1056       return StringRef::npos;
1057   }
1058 
1059   // Match itself from the last position after matching CHECK-DAG.
1060   StringRef MatchBuffer = Buffer.substr(LastPos);
1061   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1062   if (MatchPos == StringRef::npos) {
1063     PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1064     return StringRef::npos;
1065   }
1066 
1067   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1068   // or CHECK-NOT
1069   if (!IsLabelScanMode) {
1070     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1071 
1072     // If this check is a "CHECK-NEXT", verify that the previous match was on
1073     // the previous line (i.e. that there is one newline between them).
1074     if (CheckNext(SM, SkippedRegion))
1075       return StringRef::npos;
1076 
1077     // If this check is a "CHECK-SAME", verify that the previous match was on
1078     // the same line (i.e. that there is no newline between them).
1079     if (CheckSame(SM, SkippedRegion))
1080       return StringRef::npos;
1081 
1082     // If this match had "not strings", verify that they don't exist in the
1083     // skipped region.
1084     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1085       return StringRef::npos;
1086   }
1087 
1088   return LastPos + MatchPos;
1089 }
1090 
1091 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1092   if (Pat.getCheckTy() != Check::CheckNext)
1093     return false;
1094 
1095   // Count the number of newlines between the previous match and this one.
1096   assert(Buffer.data() !=
1097          SM.getMemoryBuffer(
1098            SM.FindBufferContainingLoc(
1099              SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
1100          "CHECK-NEXT can't be the first check in a file");
1101 
1102   const char *FirstNewLine = nullptr;
1103   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1104 
1105   if (NumNewLines == 0) {
1106     SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1107                     "-NEXT: is on the same line as previous match");
1108     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1109                     SourceMgr::DK_Note, "'next' match was here");
1110     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1111                     "previous match ended here");
1112     return true;
1113   }
1114 
1115   if (NumNewLines != 1) {
1116     SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1117                     "-NEXT: is not on the line after the previous match");
1118     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1119                     SourceMgr::DK_Note, "'next' match was here");
1120     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1121                     "previous match ended here");
1122     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1123                     "non-matching line after previous match is here");
1124     return true;
1125   }
1126 
1127   return false;
1128 }
1129 
1130 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1131   if (Pat.getCheckTy() != Check::CheckSame)
1132     return false;
1133 
1134   // Count the number of newlines between the previous match and this one.
1135   assert(Buffer.data() !=
1136              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1137                                     SMLoc::getFromPointer(Buffer.data())))
1138                  ->getBufferStart() &&
1139          "CHECK-SAME can't be the first check in a file");
1140 
1141   const char *FirstNewLine = nullptr;
1142   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1143 
1144   if (NumNewLines != 0) {
1145     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1146                     Prefix +
1147                         "-SAME: is not on the same line as the previous match");
1148     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1149                     "'next' match was here");
1150     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1151                     "previous match ended here");
1152     return true;
1153   }
1154 
1155   return false;
1156 }
1157 
1158 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1159                            const std::vector<const Pattern *> &NotStrings,
1160                            StringMap<StringRef> &VariableTable) const {
1161   for (const Pattern *Pat : NotStrings) {
1162     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1163 
1164     size_t MatchLen = 0;
1165     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1166 
1167     if (Pos == StringRef::npos) continue;
1168 
1169     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
1170                     SourceMgr::DK_Error,
1171                     Prefix + "-NOT: string occurred!");
1172     SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1173                     Prefix + "-NOT: pattern specified here");
1174     return true;
1175   }
1176 
1177   return false;
1178 }
1179 
1180 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1181                              std::vector<const Pattern *> &NotStrings,
1182                              StringMap<StringRef> &VariableTable) const {
1183   if (DagNotStrings.empty())
1184     return 0;
1185 
1186   size_t LastPos = 0;
1187   size_t StartPos = LastPos;
1188 
1189   for (const Pattern &Pat : DagNotStrings) {
1190     assert((Pat.getCheckTy() == Check::CheckDAG ||
1191             Pat.getCheckTy() == Check::CheckNot) &&
1192            "Invalid CHECK-DAG or CHECK-NOT!");
1193 
1194     if (Pat.getCheckTy() == Check::CheckNot) {
1195       NotStrings.push_back(&Pat);
1196       continue;
1197     }
1198 
1199     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1200 
1201     size_t MatchLen = 0, MatchPos;
1202 
1203     // CHECK-DAG always matches from the start.
1204     StringRef MatchBuffer = Buffer.substr(StartPos);
1205     MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1206     // With a group of CHECK-DAGs, a single mismatching means the match on
1207     // that group of CHECK-DAGs fails immediately.
1208     if (MatchPos == StringRef::npos) {
1209       PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1210       return StringRef::npos;
1211     }
1212     // Re-calc it as the offset relative to the start of the original string.
1213     MatchPos += StartPos;
1214 
1215     if (!NotStrings.empty()) {
1216       if (MatchPos < LastPos) {
1217         // Reordered?
1218         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1219                         SourceMgr::DK_Error,
1220                         Prefix + "-DAG: found a match of CHECK-DAG"
1221                         " reordering across a CHECK-NOT");
1222         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1223                         SourceMgr::DK_Note,
1224                         Prefix + "-DAG: the farthest match of CHECK-DAG"
1225                         " is found here");
1226         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1227                         Prefix + "-NOT: the crossed pattern specified"
1228                         " here");
1229         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1230                         Prefix + "-DAG: the reordered pattern specified"
1231                         " here");
1232         return StringRef::npos;
1233       }
1234       // All subsequent CHECK-DAGs should be matched from the farthest
1235       // position of all precedent CHECK-DAGs (including this one.)
1236       StartPos = LastPos;
1237       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1238       // CHECK-DAG, verify that there's no 'not' strings occurred in that
1239       // region.
1240       StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1241       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1242         return StringRef::npos;
1243       // Clear "not strings".
1244       NotStrings.clear();
1245     }
1246 
1247     // Update the last position with CHECK-DAG matches.
1248     LastPos = std::max(MatchPos + MatchLen, LastPos);
1249   }
1250 
1251   return LastPos;
1252 }
1253 
1254 // A check prefix must contain only alphanumeric, hyphens and underscores.
1255 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1256   Regex Validator("^[a-zA-Z0-9_-]*$");
1257   return Validator.match(CheckPrefix);
1258 }
1259 
1260 static bool ValidateCheckPrefixes() {
1261   StringSet<> PrefixSet;
1262 
1263   for (StringRef Prefix : CheckPrefixes) {
1264     // Reject empty prefixes.
1265     if (Prefix == "")
1266       return false;
1267 
1268     if (!PrefixSet.insert(Prefix).second)
1269       return false;
1270 
1271     if (!ValidateCheckPrefix(Prefix))
1272       return false;
1273   }
1274 
1275   return true;
1276 }
1277 
1278 // I don't think there's a way to specify an initial value for cl::list,
1279 // so if nothing was specified, add the default
1280 static void AddCheckPrefixIfNeeded() {
1281   if (CheckPrefixes.empty())
1282     CheckPrefixes.push_back("CHECK");
1283 }
1284 
1285 static void DumpCommandLine(int argc, char **argv) {
1286   errs() << "FileCheck command line: ";
1287   for (int I = 0; I < argc; I++)
1288     errs() << " " << argv[I];
1289   errs() << "\n";
1290 }
1291 
1292 /// Check the input to FileCheck provided in the \p Buffer against the \p
1293 /// CheckStrings read from the check file.
1294 ///
1295 /// Returns false if the input fails to satisfy the checks.
1296 bool CheckInput(SourceMgr &SM, StringRef Buffer,
1297                 ArrayRef<CheckString> CheckStrings) {
1298   bool ChecksFailed = false;
1299 
1300   /// VariableTable - This holds all the current filecheck variables.
1301   StringMap<StringRef> VariableTable;
1302 
1303   unsigned i = 0, j = 0, e = CheckStrings.size();
1304   while (true) {
1305     StringRef CheckRegion;
1306     if (j == e) {
1307       CheckRegion = Buffer;
1308     } else {
1309       const CheckString &CheckLabelStr = CheckStrings[j];
1310       if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
1311         ++j;
1312         continue;
1313       }
1314 
1315       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1316       size_t MatchLabelLen = 0;
1317       size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
1318                                                  MatchLabelLen, VariableTable);
1319       if (MatchLabelPos == StringRef::npos)
1320         // Immediately bail of CHECK-LABEL fails, nothing else we can do.
1321         return false;
1322 
1323       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1324       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1325       ++j;
1326     }
1327 
1328     for ( ; i != j; ++i) {
1329       const CheckString &CheckStr = CheckStrings[i];
1330 
1331       // Check each string within the scanned region, including a second check
1332       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1333       size_t MatchLen = 0;
1334       size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
1335                                        VariableTable);
1336 
1337       if (MatchPos == StringRef::npos) {
1338         ChecksFailed = true;
1339         i = j;
1340         break;
1341       }
1342 
1343       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1344     }
1345 
1346     if (j == e)
1347       break;
1348   }
1349 
1350   // Success if no checks failed.
1351   return !ChecksFailed;
1352 }
1353 
1354 int main(int argc, char **argv) {
1355   sys::PrintStackTraceOnErrorSignal(argv[0]);
1356   PrettyStackTraceProgram X(argc, argv);
1357   cl::ParseCommandLineOptions(argc, argv);
1358 
1359   if (!ValidateCheckPrefixes()) {
1360     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1361               "start with a letter and contain only alphanumeric characters, "
1362               "hyphens and underscores\n";
1363     return 2;
1364   }
1365 
1366   AddCheckPrefixIfNeeded();
1367 
1368   SourceMgr SM;
1369 
1370   // Read the expected strings from the check file.
1371   ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr =
1372       MemoryBuffer::getFileOrSTDIN(CheckFilename);
1373   if (std::error_code EC = CheckFileOrErr.getError()) {
1374     errs() << "Could not open check file '" << CheckFilename
1375            << "': " << EC.message() << '\n';
1376     return 2;
1377   }
1378   MemoryBuffer &CheckFile = *CheckFileOrErr.get();
1379 
1380   SmallString<4096> CheckFileBuffer;
1381   StringRef CheckFileText =
1382       CanonicalizeFile(CheckFile, NoCanonicalizeWhiteSpace, CheckFileBuffer);
1383 
1384   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1385                             CheckFileText, CheckFile.getBufferIdentifier()),
1386                         SMLoc());
1387 
1388   std::vector<CheckString> CheckStrings;
1389   if (ReadCheckFile(SM, CheckFileText, CheckStrings))
1390     return 2;
1391 
1392 
1393   // Open the file to check and add it to SourceMgr.
1394   ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr =
1395       MemoryBuffer::getFileOrSTDIN(InputFilename);
1396   if (std::error_code EC = InputFileOrErr.getError()) {
1397     errs() << "Could not open input file '" << InputFilename
1398            << "': " << EC.message() << '\n';
1399     return 2;
1400   }
1401   MemoryBuffer &InputFile = *InputFileOrErr.get();
1402 
1403   if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) {
1404     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1405     DumpCommandLine(argc, argv);
1406     return 2;
1407   }
1408 
1409   SmallString<4096> InputFileBuffer;
1410   StringRef InputFileText =
1411       CanonicalizeFile(InputFile, NoCanonicalizeWhiteSpace, InputFileBuffer);
1412 
1413   SM.AddNewSourceBuffer(
1414       MemoryBuffer::getMemBuffer(InputFileText, InputFile.getBufferIdentifier()), SMLoc());
1415 
1416   return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1;
1417 }
1418