1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content.  This is useful for regression tests etc.
12 //
13 // This program exits with an error status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <algorithm>
31 #include <cctype>
32 #include <map>
33 #include <string>
34 #include <system_error>
35 #include <vector>
36 using namespace llvm;
37 
38 static cl::opt<std::string>
39     CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40 
41 static cl::opt<std::string>
42     InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43                   cl::init("-"), cl::value_desc("filename"));
44 
45 static cl::list<std::string> CheckPrefixes(
46     "check-prefix",
47     cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48 static cl::alias CheckPrefixesAlias(
49     "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated,
50     cl::NotHidden,
51     cl::desc(
52         "Alias for -check-prefix permitting multiple comma separated values"));
53 
54 static cl::opt<bool> NoCanonicalizeWhiteSpace(
55     "strict-whitespace",
56     cl::desc("Do not treat all horizontal whitespace as equivalent"));
57 
58 static cl::list<std::string> ImplicitCheckNot(
59     "implicit-check-not",
60     cl::desc("Add an implicit negative check with this pattern to every\n"
61              "positive check. This can be used to ensure that no instances of\n"
62              "this pattern occur which are not matched by a positive pattern"),
63     cl::value_desc("pattern"));
64 
65 static cl::opt<bool> AllowEmptyInput(
66     "allow-empty", cl::init(false),
67     cl::desc("Allow the input file to be empty. This is useful when making\n"
68              "checks that some error message does not occur, for example."));
69 
70 static cl::opt<bool> MatchFullLines(
71     "match-full-lines", cl::init(false),
72     cl::desc("Require all positive matches to cover an entire input line.\n"
73              "Allows leading and trailing whitespace if --strict-whitespace\n"
74              "is not also passed."));
75 
76 typedef cl::list<std::string>::const_iterator prefix_iterator;
77 
78 //===----------------------------------------------------------------------===//
79 // Pattern Handling Code.
80 //===----------------------------------------------------------------------===//
81 
82 namespace Check {
83 enum CheckType {
84   CheckNone = 0,
85   CheckPlain,
86   CheckNext,
87   CheckSame,
88   CheckNot,
89   CheckDAG,
90   CheckLabel,
91 
92   /// Indicates the pattern only matches the end of file. This is used for
93   /// trailing CHECK-NOTs.
94   CheckEOF,
95 
96   /// Marks when parsing found a -NOT check combined with another CHECK suffix.
97   CheckBadNot
98 };
99 }
100 
101 class Pattern {
102   SMLoc PatternLoc;
103 
104   /// A fixed string to match as the pattern or empty if this pattern requires
105   /// a regex match.
106   StringRef FixedStr;
107 
108   /// A regex string to match as the pattern or empty if this pattern requires
109   /// a fixed string to match.
110   std::string RegExStr;
111 
112   /// Entries in this vector map to uses of a variable in the pattern, e.g.
113   /// "foo[[bar]]baz".  In this case, the RegExStr will contain "foobaz" and
114   /// we'll get an entry in this vector that tells us to insert the value of
115   /// bar at offset 3.
116   std::vector<std::pair<StringRef, unsigned>> VariableUses;
117 
118   /// Maps definitions of variables to their parenthesized capture numbers.
119   ///
120   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to
121   /// 1.
122   std::map<StringRef, unsigned> VariableDefs;
123 
124   Check::CheckType CheckTy;
125 
126   /// Contains the number of line this pattern is in.
127   unsigned LineNumber;
128 
129 public:
130   explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {}
131 
132   /// Returns the location in source code.
133   SMLoc getLoc() const { return PatternLoc; }
134 
135   bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
136                     unsigned LineNumber);
137   size_t Match(StringRef Buffer, size_t &MatchLen,
138                StringMap<StringRef> &VariableTable) const;
139   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
140                         const StringMap<StringRef> &VariableTable) const;
141 
142   bool hasVariable() const {
143     return !(VariableUses.empty() && VariableDefs.empty());
144   }
145 
146   Check::CheckType getCheckTy() const { return CheckTy; }
147 
148 private:
149   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
150   void AddBackrefToRegEx(unsigned BackrefNum);
151   unsigned
152   ComputeMatchDistance(StringRef Buffer,
153                        const StringMap<StringRef> &VariableTable) const;
154   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
155   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
156 };
157 
158 /// Parses the given string into the Pattern.
159 ///
160 /// \p Prefix provides which prefix is being matched, \p SM provides the
161 /// SourceMgr used for error reports, and \p LineNumber is the line number in
162 /// the input file from which the pattern string was read. Returns true in
163 /// case of an error, false otherwise.
164 bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
165                            SourceMgr &SM, unsigned LineNumber) {
166   bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot;
167 
168   this->LineNumber = LineNumber;
169   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
170 
171   // Ignore trailing whitespace.
172   while (!PatternStr.empty() &&
173          (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
174     PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
175 
176   // Check that there is something on the line.
177   if (PatternStr.empty()) {
178     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
179                     "found empty check string with prefix '" + Prefix + ":'");
180     return true;
181   }
182 
183   // Check to see if this is a fixed string, or if it has regex pieces.
184   if (!MatchFullLinesHere &&
185       (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
186                                  PatternStr.find("[[") == StringRef::npos))) {
187     FixedStr = PatternStr;
188     return false;
189   }
190 
191   if (MatchFullLinesHere) {
192     RegExStr += '^';
193     if (!NoCanonicalizeWhiteSpace)
194       RegExStr += " *";
195   }
196 
197   // Paren value #0 is for the fully matched string.  Any new parenthesized
198   // values add from there.
199   unsigned CurParen = 1;
200 
201   // Otherwise, there is at least one regex piece.  Build up the regex pattern
202   // by escaping scary characters in fixed strings, building up one big regex.
203   while (!PatternStr.empty()) {
204     // RegEx matches.
205     if (PatternStr.startswith("{{")) {
206       // This is the start of a regex match.  Scan for the }}.
207       size_t End = PatternStr.find("}}");
208       if (End == StringRef::npos) {
209         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
210                         SourceMgr::DK_Error,
211                         "found start of regex string with no end '}}'");
212         return true;
213       }
214 
215       // Enclose {{}} patterns in parens just like [[]] even though we're not
216       // capturing the result for any purpose.  This is required in case the
217       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
218       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
219       RegExStr += '(';
220       ++CurParen;
221 
222       if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
223         return true;
224       RegExStr += ')';
225 
226       PatternStr = PatternStr.substr(End + 2);
227       continue;
228     }
229 
230     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
231     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
232     // second form is [[foo]] which is a reference to foo.  The variable name
233     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
234     // it.  This is to catch some common errors.
235     if (PatternStr.startswith("[[")) {
236       // Find the closing bracket pair ending the match.  End is going to be an
237       // offset relative to the beginning of the match string.
238       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
239 
240       if (End == StringRef::npos) {
241         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
242                         SourceMgr::DK_Error,
243                         "invalid named regex reference, no ]] found");
244         return true;
245       }
246 
247       StringRef MatchStr = PatternStr.substr(2, End);
248       PatternStr = PatternStr.substr(End + 4);
249 
250       // Get the regex name (e.g. "foo").
251       size_t NameEnd = MatchStr.find(':');
252       StringRef Name = MatchStr.substr(0, NameEnd);
253 
254       if (Name.empty()) {
255         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
256                         "invalid name in named regex: empty name");
257         return true;
258       }
259 
260       // Verify that the name/expression is well formed. FileCheck currently
261       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
262       // is relaxed, more strict check is performed in \c EvaluateExpression.
263       bool IsExpression = false;
264       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
265         if (i == 0 && Name[i] == '@') {
266           if (NameEnd != StringRef::npos) {
267             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
268                             SourceMgr::DK_Error,
269                             "invalid name in named regex definition");
270             return true;
271           }
272           IsExpression = true;
273           continue;
274         }
275         if (Name[i] != '_' && !isalnum(Name[i]) &&
276             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
277           SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i),
278                           SourceMgr::DK_Error, "invalid name in named regex");
279           return true;
280         }
281       }
282 
283       // Name can't start with a digit.
284       if (isdigit(static_cast<unsigned char>(Name[0]))) {
285         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
286                         "invalid name in named regex");
287         return true;
288       }
289 
290       // Handle [[foo]].
291       if (NameEnd == StringRef::npos) {
292         // Handle variables that were defined earlier on the same line by
293         // emitting a backreference.
294         if (VariableDefs.find(Name) != VariableDefs.end()) {
295           unsigned VarParenNum = VariableDefs[Name];
296           if (VarParenNum < 1 || VarParenNum > 9) {
297             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
298                             SourceMgr::DK_Error,
299                             "Can't back-reference more than 9 variables");
300             return true;
301           }
302           AddBackrefToRegEx(VarParenNum);
303         } else {
304           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
305         }
306         continue;
307       }
308 
309       // Handle [[foo:.*]].
310       VariableDefs[Name] = CurParen;
311       RegExStr += '(';
312       ++CurParen;
313 
314       if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM))
315         return true;
316 
317       RegExStr += ')';
318     }
319 
320     // Handle fixed string matches.
321     // Find the end, which is the start of the next regex.
322     size_t FixedMatchEnd = PatternStr.find("{{");
323     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
324     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
325     PatternStr = PatternStr.substr(FixedMatchEnd);
326   }
327 
328   if (MatchFullLinesHere) {
329     if (!NoCanonicalizeWhiteSpace)
330       RegExStr += " *";
331     RegExStr += '$';
332   }
333 
334   return false;
335 }
336 
337 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
338   Regex R(RS);
339   std::string Error;
340   if (!R.isValid(Error)) {
341     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
342                     "invalid regex: " + Error);
343     return true;
344   }
345 
346   RegExStr += RS.str();
347   CurParen += R.getNumMatches();
348   return false;
349 }
350 
351 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
352   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
353   std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
354   RegExStr += Backref;
355 }
356 
357 /// Evaluates expression and stores the result to \p Value.
358 ///
359 /// Returns true on success and false when the expression has invalid syntax.
360 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
361   // The only supported expression is @LINE([\+-]\d+)?
362   if (!Expr.startswith("@LINE"))
363     return false;
364   Expr = Expr.substr(StringRef("@LINE").size());
365   int Offset = 0;
366   if (!Expr.empty()) {
367     if (Expr[0] == '+')
368       Expr = Expr.substr(1);
369     else if (Expr[0] != '-')
370       return false;
371     if (Expr.getAsInteger(10, Offset))
372       return false;
373   }
374   Value = llvm::itostr(LineNumber + Offset);
375   return true;
376 }
377 
378 /// Matches the pattern string against the input buffer \p Buffer
379 ///
380 /// This returns the position that is matched or npos if there is no match. If
381 /// there is a match, the size of the matched string is returned in \p
382 /// MatchLen.
383 ///
384 /// The \p VariableTable StringMap provides the current values of filecheck
385 /// variables and is updated if this match defines new values.
386 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
387                       StringMap<StringRef> &VariableTable) const {
388   // If this is the EOF pattern, match it immediately.
389   if (CheckTy == Check::CheckEOF) {
390     MatchLen = 0;
391     return Buffer.size();
392   }
393 
394   // If this is a fixed string pattern, just match it now.
395   if (!FixedStr.empty()) {
396     MatchLen = FixedStr.size();
397     return Buffer.find(FixedStr);
398   }
399 
400   // Regex match.
401 
402   // If there are variable uses, we need to create a temporary string with the
403   // actual value.
404   StringRef RegExToMatch = RegExStr;
405   std::string TmpStr;
406   if (!VariableUses.empty()) {
407     TmpStr = RegExStr;
408 
409     unsigned InsertOffset = 0;
410     for (const auto &VariableUse : VariableUses) {
411       std::string Value;
412 
413       if (VariableUse.first[0] == '@') {
414         if (!EvaluateExpression(VariableUse.first, Value))
415           return StringRef::npos;
416       } else {
417         StringMap<StringRef>::iterator it =
418             VariableTable.find(VariableUse.first);
419         // If the variable is undefined, return an error.
420         if (it == VariableTable.end())
421           return StringRef::npos;
422 
423         // Look up the value and escape it so that we can put it into the regex.
424         Value += Regex::escape(it->second);
425       }
426 
427       // Plop it into the regex at the adjusted offset.
428       TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
429                     Value.begin(), Value.end());
430       InsertOffset += Value.size();
431     }
432 
433     // Match the newly constructed regex.
434     RegExToMatch = TmpStr;
435   }
436 
437   SmallVector<StringRef, 4> MatchInfo;
438   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
439     return StringRef::npos;
440 
441   // Successful regex match.
442   assert(!MatchInfo.empty() && "Didn't get any match");
443   StringRef FullMatch = MatchInfo[0];
444 
445   // If this defines any variables, remember their values.
446   for (const auto &VariableDef : VariableDefs) {
447     assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
448     VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
449   }
450 
451   MatchLen = FullMatch.size();
452   return FullMatch.data() - Buffer.data();
453 }
454 
455 
456 /// Computes an arbitrary estimate for the quality of matching this pattern at
457 /// the start of \p Buffer; a distance of zero should correspond to a perfect
458 /// match.
459 unsigned
460 Pattern::ComputeMatchDistance(StringRef Buffer,
461                               const StringMap<StringRef> &VariableTable) const {
462   // Just compute the number of matching characters. For regular expressions, we
463   // just compare against the regex itself and hope for the best.
464   //
465   // FIXME: One easy improvement here is have the regex lib generate a single
466   // example regular expression which matches, and use that as the example
467   // string.
468   StringRef ExampleString(FixedStr);
469   if (ExampleString.empty())
470     ExampleString = RegExStr;
471 
472   // Only compare up to the first line in the buffer, or the string size.
473   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
474   BufferPrefix = BufferPrefix.split('\n').first;
475   return BufferPrefix.edit_distance(ExampleString);
476 }
477 
478 /// Prints additional information about a failure to match involving this
479 /// pattern.
480 void Pattern::PrintFailureInfo(
481     const SourceMgr &SM, StringRef Buffer,
482     const StringMap<StringRef> &VariableTable) const {
483   // If this was a regular expression using variables, print the current
484   // variable values.
485   if (!VariableUses.empty()) {
486     for (const auto &VariableUse : VariableUses) {
487       SmallString<256> Msg;
488       raw_svector_ostream OS(Msg);
489       StringRef Var = VariableUse.first;
490       if (Var[0] == '@') {
491         std::string Value;
492         if (EvaluateExpression(Var, Value)) {
493           OS << "with expression \"";
494           OS.write_escaped(Var) << "\" equal to \"";
495           OS.write_escaped(Value) << "\"";
496         } else {
497           OS << "uses incorrect expression \"";
498           OS.write_escaped(Var) << "\"";
499         }
500       } else {
501         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
502 
503         // Check for undefined variable references.
504         if (it == VariableTable.end()) {
505           OS << "uses undefined variable \"";
506           OS.write_escaped(Var) << "\"";
507         } else {
508           OS << "with variable \"";
509           OS.write_escaped(Var) << "\" equal to \"";
510           OS.write_escaped(it->second) << "\"";
511         }
512       }
513 
514       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
515                       OS.str());
516     }
517   }
518 
519   // Attempt to find the closest/best fuzzy match.  Usually an error happens
520   // because some string in the output didn't exactly match. In these cases, we
521   // would like to show the user a best guess at what "should have" matched, to
522   // save them having to actually check the input manually.
523   size_t NumLinesForward = 0;
524   size_t Best = StringRef::npos;
525   double BestQuality = 0;
526 
527   // Use an arbitrary 4k limit on how far we will search.
528   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
529     if (Buffer[i] == '\n')
530       ++NumLinesForward;
531 
532     // Patterns have leading whitespace stripped, so skip whitespace when
533     // looking for something which looks like a pattern.
534     if (Buffer[i] == ' ' || Buffer[i] == '\t')
535       continue;
536 
537     // Compute the "quality" of this match as an arbitrary combination of the
538     // match distance and the number of lines skipped to get to this match.
539     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
540     double Quality = Distance + (NumLinesForward / 100.);
541 
542     if (Quality < BestQuality || Best == StringRef::npos) {
543       Best = i;
544       BestQuality = Quality;
545     }
546   }
547 
548   // Print the "possible intended match here" line if we found something
549   // reasonable and not equal to what we showed in the "scanning from here"
550   // line.
551   if (Best && Best != StringRef::npos && BestQuality < 50) {
552     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
553                     SourceMgr::DK_Note, "possible intended match here");
554 
555     // FIXME: If we wanted to be really friendly we would show why the match
556     // failed, as it can be hard to spot simple one character differences.
557   }
558 }
559 
560 /// Finds the closing sequence of a regex variable usage or definition.
561 ///
562 /// \p Str has to point in the beginning of the definition (right after the
563 /// opening sequence). Returns the offset of the closing sequence within Str,
564 /// or npos if it was not found.
565 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
566   // Offset keeps track of the current offset within the input Str
567   size_t Offset = 0;
568   // [...] Nesting depth
569   size_t BracketDepth = 0;
570 
571   while (!Str.empty()) {
572     if (Str.startswith("]]") && BracketDepth == 0)
573       return Offset;
574     if (Str[0] == '\\') {
575       // Backslash escapes the next char within regexes, so skip them both.
576       Str = Str.substr(2);
577       Offset += 2;
578     } else {
579       switch (Str[0]) {
580       default:
581         break;
582       case '[':
583         BracketDepth++;
584         break;
585       case ']':
586         if (BracketDepth == 0) {
587           SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
588                           SourceMgr::DK_Error,
589                           "missing closing \"]\" for regex variable");
590           exit(1);
591         }
592         BracketDepth--;
593         break;
594       }
595       Str = Str.substr(1);
596       Offset++;
597     }
598   }
599 
600   return StringRef::npos;
601 }
602 
603 //===----------------------------------------------------------------------===//
604 // Check Strings.
605 //===----------------------------------------------------------------------===//
606 
607 /// A check that we found in the input file.
608 struct CheckString {
609   /// The pattern to match.
610   Pattern Pat;
611 
612   /// Which prefix name this check matched.
613   StringRef Prefix;
614 
615   /// The location in the match file that the check string was specified.
616   SMLoc Loc;
617 
618   /// All of the strings that are disallowed from occurring between this match
619   /// string and the previous one (or start of file).
620   std::vector<Pattern> DagNotStrings;
621 
622   CheckString(const Pattern &P, StringRef S, SMLoc L)
623       : Pat(P), Prefix(S), Loc(L) {}
624 
625   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
626                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
627 
628   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
629   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
630   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
631                 const std::vector<const Pattern *> &NotStrings,
632                 StringMap<StringRef> &VariableTable) const;
633   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
634                   std::vector<const Pattern *> &NotStrings,
635                   StringMap<StringRef> &VariableTable) const;
636 };
637 
638 /// Canonicalize whitespaces in the file. Line endings are replaced with
639 /// UNIX-style '\n'.
640 ///
641 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
642 /// characters to a single space.
643 static StringRef CanonicalizeFile(MemoryBuffer &MB, bool PreserveHorizontal,
644                                   SmallVectorImpl<char> &OutputBuffer) {
645   OutputBuffer.reserve(MB.getBufferSize());
646 
647   for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
648        Ptr != End; ++Ptr) {
649     // Eliminate trailing dosish \r.
650     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
651       continue;
652     }
653 
654     // If current char is not a horizontal whitespace or if horizontal
655     // whitespace canonicalization is disabled, dump it to output as is.
656     if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
657       OutputBuffer.push_back(*Ptr);
658       continue;
659     }
660 
661     // Otherwise, add one space and advance over neighboring space.
662     OutputBuffer.push_back(' ');
663     while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t'))
664       ++Ptr;
665   }
666 
667   // Add a null byte and then return all but that byte.
668   OutputBuffer.push_back('\0');
669   return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
670 }
671 
672 static bool IsPartOfWord(char c) {
673   return (isalnum(c) || c == '-' || c == '_');
674 }
675 
676 // Get the size of the prefix extension.
677 static size_t CheckTypeSize(Check::CheckType Ty) {
678   switch (Ty) {
679   case Check::CheckNone:
680   case Check::CheckBadNot:
681     return 0;
682 
683   case Check::CheckPlain:
684     return sizeof(":") - 1;
685 
686   case Check::CheckNext:
687     return sizeof("-NEXT:") - 1;
688 
689   case Check::CheckSame:
690     return sizeof("-SAME:") - 1;
691 
692   case Check::CheckNot:
693     return sizeof("-NOT:") - 1;
694 
695   case Check::CheckDAG:
696     return sizeof("-DAG:") - 1;
697 
698   case Check::CheckLabel:
699     return sizeof("-LABEL:") - 1;
700 
701   case Check::CheckEOF:
702     llvm_unreachable("Should not be using EOF size");
703   }
704 
705   llvm_unreachable("Bad check type");
706 }
707 
708 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
709   char NextChar = Buffer[Prefix.size()];
710 
711   // Verify that the : is present after the prefix.
712   if (NextChar == ':')
713     return Check::CheckPlain;
714 
715   if (NextChar != '-')
716     return Check::CheckNone;
717 
718   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
719   if (Rest.startswith("NEXT:"))
720     return Check::CheckNext;
721 
722   if (Rest.startswith("SAME:"))
723     return Check::CheckSame;
724 
725   if (Rest.startswith("NOT:"))
726     return Check::CheckNot;
727 
728   if (Rest.startswith("DAG:"))
729     return Check::CheckDAG;
730 
731   if (Rest.startswith("LABEL:"))
732     return Check::CheckLabel;
733 
734   // You can't combine -NOT with another suffix.
735   if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
736       Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
737       Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:"))
738     return Check::CheckBadNot;
739 
740   return Check::CheckNone;
741 }
742 
743 // From the given position, find the next character after the word.
744 static size_t SkipWord(StringRef Str, size_t Loc) {
745   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
746     ++Loc;
747   return Loc;
748 }
749 
750 // Try to find the first match in buffer for any prefix. If a valid match is
751 // found, return that prefix and set its type and location.  If there are almost
752 // matches (e.g. the actual prefix string is found, but is not an actual check
753 // string), but no valid match, return an empty string and set the position to
754 // resume searching from. If no partial matches are found, return an empty
755 // string and the location will be StringRef::npos. If one prefix is a substring
756 // of another, the maximal match should be found. e.g. if "A" and "AA" are
757 // prefixes then AA-CHECK: should match the second one.
758 static StringRef FindFirstCandidateMatch(StringRef &Buffer,
759                                          Check::CheckType &CheckTy,
760                                          size_t &CheckLoc) {
761   StringRef FirstPrefix;
762   size_t FirstLoc = StringRef::npos;
763   size_t SearchLoc = StringRef::npos;
764   Check::CheckType FirstTy = Check::CheckNone;
765 
766   CheckTy = Check::CheckNone;
767   CheckLoc = StringRef::npos;
768 
769   for (StringRef Prefix : CheckPrefixes) {
770     size_t PrefixLoc = Buffer.find(Prefix);
771 
772     if (PrefixLoc == StringRef::npos)
773       continue;
774 
775     // Track where we are searching for invalid prefixes that look almost right.
776     // We need to only advance to the first partial match on the next attempt
777     // since a partial match could be a substring of a later, valid prefix.
778     // Need to skip to the end of the word, otherwise we could end up
779     // matching a prefix in a substring later.
780     if (PrefixLoc < SearchLoc)
781       SearchLoc = SkipWord(Buffer, PrefixLoc);
782 
783     // We only want to find the first match to avoid skipping some.
784     if (PrefixLoc > FirstLoc)
785       continue;
786     // If one matching check-prefix is a prefix of another, choose the
787     // longer one.
788     if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
789       continue;
790 
791     StringRef Rest = Buffer.drop_front(PrefixLoc);
792     // Make sure we have actually found the prefix, and not a word containing
793     // it. This should also prevent matching the wrong prefix when one is a
794     // substring of another.
795     if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
796       FirstTy = Check::CheckNone;
797     else
798       FirstTy = FindCheckType(Rest, Prefix);
799 
800     FirstLoc = PrefixLoc;
801     FirstPrefix = Prefix;
802   }
803 
804   // If the first prefix is invalid, we should continue the search after it.
805   if (FirstTy == Check::CheckNone) {
806     CheckLoc = SearchLoc;
807     return "";
808   }
809 
810   CheckTy = FirstTy;
811   CheckLoc = FirstLoc;
812   return FirstPrefix;
813 }
814 
815 static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
816                                          unsigned &LineNumber,
817                                          Check::CheckType &CheckTy,
818                                          size_t &CheckLoc) {
819   while (!Buffer.empty()) {
820     StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
821     // If we found a real match, we are done.
822     if (!Prefix.empty()) {
823       LineNumber += Buffer.substr(0, CheckLoc).count('\n');
824       return Prefix;
825     }
826 
827     // We didn't find any almost matches either, we are also done.
828     if (CheckLoc == StringRef::npos)
829       return StringRef();
830 
831     LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
832 
833     // Advance to the last possible match we found and try again.
834     Buffer = Buffer.drop_front(CheckLoc + 1);
835   }
836 
837   return StringRef();
838 }
839 
840 /// Read the check file, which specifies the sequence of expected strings.
841 ///
842 /// The strings are added to the CheckStrings vector. Returns true in case of
843 /// an error, false otherwise.
844 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer,
845                           std::vector<CheckString> &CheckStrings) {
846   std::vector<Pattern> ImplicitNegativeChecks;
847   for (const auto &PatternString : ImplicitCheckNot) {
848     // Create a buffer with fake command line content in order to display the
849     // command line option responsible for the specific implicit CHECK-NOT.
850     std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
851     std::string Suffix = "'";
852     std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
853         Prefix + PatternString + Suffix, "command line");
854 
855     StringRef PatternInBuffer =
856         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
857     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
858 
859     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
860     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
861                                                "IMPLICIT-CHECK", SM, 0);
862   }
863 
864   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
865 
866   // LineNumber keeps track of the line on which CheckPrefix instances are
867   // found.
868   unsigned LineNumber = 1;
869 
870   while (1) {
871     Check::CheckType CheckTy;
872     size_t PrefixLoc;
873 
874     // See if a prefix occurs in the memory buffer.
875     StringRef UsedPrefix =
876         FindFirstMatchingPrefix(Buffer, LineNumber, CheckTy, PrefixLoc);
877     if (UsedPrefix.empty())
878       break;
879 
880     Buffer = Buffer.drop_front(PrefixLoc);
881 
882     // Location to use for error messages.
883     const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
884 
885     // PrefixLoc is to the start of the prefix. Skip to the end.
886     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
887 
888     // Complain about useful-looking but unsupported suffixes.
889     if (CheckTy == Check::CheckBadNot) {
890       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
891                       "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
892       return true;
893     }
894 
895     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
896     // leading and trailing whitespace.
897     Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
898 
899     // Scan ahead to the end of line.
900     size_t EOL = Buffer.find_first_of("\n\r");
901 
902     // Remember the location of the start of the pattern, for diagnostics.
903     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
904 
905     // Parse the pattern.
906     Pattern P(CheckTy);
907     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
908       return true;
909 
910     // Verify that CHECK-LABEL lines do not define or use variables
911     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
912       SM.PrintMessage(
913           SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error,
914           "found '" + UsedPrefix + "-LABEL:'"
915                                    " with variable definition or use");
916       return true;
917     }
918 
919     Buffer = Buffer.substr(EOL);
920 
921     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
922     if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) &&
923         CheckStrings.empty()) {
924       StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME";
925       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
926                       SourceMgr::DK_Error,
927                       "found '" + UsedPrefix + "-" + Type +
928                           "' without previous '" + UsedPrefix + ": line");
929       return true;
930     }
931 
932     // Handle CHECK-DAG/-NOT.
933     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
934       DagNotMatches.push_back(P);
935       continue;
936     }
937 
938     // Okay, add the string we captured to the output vector and move on.
939     CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
940     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
941     DagNotMatches = ImplicitNegativeChecks;
942   }
943 
944   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
945   // prefix as a filler for the error message.
946   if (!DagNotMatches.empty()) {
947     CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
948                               SMLoc::getFromPointer(Buffer.data()));
949     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
950   }
951 
952   if (CheckStrings.empty()) {
953     errs() << "error: no check strings found with prefix"
954            << (CheckPrefixes.size() > 1 ? "es " : " ");
955     prefix_iterator I = CheckPrefixes.begin();
956     prefix_iterator E = CheckPrefixes.end();
957     if (I != E) {
958       errs() << "\'" << *I << ":'";
959       ++I;
960     }
961     for (; I != E; ++I)
962       errs() << ", \'" << *I << ":'";
963 
964     errs() << '\n';
965     return true;
966   }
967 
968   return false;
969 }
970 
971 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, const Pattern &Pat,
972                              StringRef Buffer,
973                              StringMap<StringRef> &VariableTable) {
974   // Otherwise, we have an error, emit an error message.
975   SM.PrintMessage(Loc, SourceMgr::DK_Error,
976                   "expected string not found in input");
977 
978   // Print the "scanning from here" line.  If the current position is at the
979   // end of a line, advance to the start of the next line.
980   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
981 
982   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
983                   "scanning from here");
984 
985   // Allow the pattern to print additional information if desired.
986   Pat.PrintFailureInfo(SM, Buffer, VariableTable);
987 }
988 
989 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
990                              StringRef Buffer,
991                              StringMap<StringRef> &VariableTable) {
992   PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
993 }
994 
995 /// Count the number of newlines in the specified range.
996 static unsigned CountNumNewlinesBetween(StringRef Range,
997                                         const char *&FirstNewLine) {
998   unsigned NumNewLines = 0;
999   while (1) {
1000     // Scan for newline.
1001     Range = Range.substr(Range.find_first_of("\n\r"));
1002     if (Range.empty())
1003       return NumNewLines;
1004 
1005     ++NumNewLines;
1006 
1007     // Handle \n\r and \r\n as a single newline.
1008     if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') &&
1009         (Range[0] != Range[1]))
1010       Range = Range.substr(1);
1011     Range = Range.substr(1);
1012 
1013     if (NumNewLines == 1)
1014       FirstNewLine = Range.begin();
1015   }
1016 }
1017 
1018 /// Match check string and its "not strings" and/or "dag strings".
1019 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1020                           bool IsLabelScanMode, size_t &MatchLen,
1021                           StringMap<StringRef> &VariableTable) const {
1022   size_t LastPos = 0;
1023   std::vector<const Pattern *> NotStrings;
1024 
1025   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1026   // bounds; we have not processed variable definitions within the bounded block
1027   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1028   // over the block again (including the last CHECK-LABEL) in normal mode.
1029   if (!IsLabelScanMode) {
1030     // Match "dag strings" (with mixed "not strings" if any).
1031     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1032     if (LastPos == StringRef::npos)
1033       return StringRef::npos;
1034   }
1035 
1036   // Match itself from the last position after matching CHECK-DAG.
1037   StringRef MatchBuffer = Buffer.substr(LastPos);
1038   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1039   if (MatchPos == StringRef::npos) {
1040     PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1041     return StringRef::npos;
1042   }
1043 
1044   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1045   // or CHECK-NOT
1046   if (!IsLabelScanMode) {
1047     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1048 
1049     // If this check is a "CHECK-NEXT", verify that the previous match was on
1050     // the previous line (i.e. that there is one newline between them).
1051     if (CheckNext(SM, SkippedRegion))
1052       return StringRef::npos;
1053 
1054     // If this check is a "CHECK-SAME", verify that the previous match was on
1055     // the same line (i.e. that there is no newline between them).
1056     if (CheckSame(SM, SkippedRegion))
1057       return StringRef::npos;
1058 
1059     // If this match had "not strings", verify that they don't exist in the
1060     // skipped region.
1061     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1062       return StringRef::npos;
1063   }
1064 
1065   return LastPos + MatchPos;
1066 }
1067 
1068 /// Verify there is a single line in the given buffer.
1069 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1070   if (Pat.getCheckTy() != Check::CheckNext)
1071     return false;
1072 
1073   // Count the number of newlines between the previous match and this one.
1074   assert(Buffer.data() !=
1075              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1076                                     SMLoc::getFromPointer(Buffer.data())))
1077                  ->getBufferStart() &&
1078          "CHECK-NEXT can't be the first check in a file");
1079 
1080   const char *FirstNewLine = nullptr;
1081   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1082 
1083   if (NumNewLines == 0) {
1084     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1085                     Prefix + "-NEXT: is on the same line as previous match");
1086     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1087                     "'next' match was here");
1088     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1089                     "previous match ended here");
1090     return true;
1091   }
1092 
1093   if (NumNewLines != 1) {
1094     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1095                     Prefix +
1096                         "-NEXT: is not on the line after the previous match");
1097     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1098                     "'next' match was here");
1099     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1100                     "previous match ended here");
1101     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1102                     "non-matching line after previous match is here");
1103     return true;
1104   }
1105 
1106   return false;
1107 }
1108 
1109 /// Verify there is no newline in the given buffer.
1110 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1111   if (Pat.getCheckTy() != Check::CheckSame)
1112     return false;
1113 
1114   // Count the number of newlines between the previous match and this one.
1115   assert(Buffer.data() !=
1116              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1117                                     SMLoc::getFromPointer(Buffer.data())))
1118                  ->getBufferStart() &&
1119          "CHECK-SAME can't be the first check in a file");
1120 
1121   const char *FirstNewLine = nullptr;
1122   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1123 
1124   if (NumNewLines != 0) {
1125     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1126                     Prefix +
1127                         "-SAME: is not on the same line as the previous match");
1128     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1129                     "'next' match was here");
1130     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1131                     "previous match ended here");
1132     return true;
1133   }
1134 
1135   return false;
1136 }
1137 
1138 /// Verify there's no "not strings" in the given buffer.
1139 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1140                            const std::vector<const Pattern *> &NotStrings,
1141                            StringMap<StringRef> &VariableTable) const {
1142   for (const Pattern *Pat : NotStrings) {
1143     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1144 
1145     size_t MatchLen = 0;
1146     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1147 
1148     if (Pos == StringRef::npos)
1149       continue;
1150 
1151     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Pos),
1152                     SourceMgr::DK_Error, Prefix + "-NOT: string occurred!");
1153     SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1154                     Prefix + "-NOT: pattern specified here");
1155     return true;
1156   }
1157 
1158   return false;
1159 }
1160 
1161 /// Match "dag strings" and their mixed "not strings".
1162 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1163                              std::vector<const Pattern *> &NotStrings,
1164                              StringMap<StringRef> &VariableTable) const {
1165   if (DagNotStrings.empty())
1166     return 0;
1167 
1168   size_t LastPos = 0;
1169   size_t StartPos = LastPos;
1170 
1171   for (const Pattern &Pat : DagNotStrings) {
1172     assert((Pat.getCheckTy() == Check::CheckDAG ||
1173             Pat.getCheckTy() == Check::CheckNot) &&
1174            "Invalid CHECK-DAG or CHECK-NOT!");
1175 
1176     if (Pat.getCheckTy() == Check::CheckNot) {
1177       NotStrings.push_back(&Pat);
1178       continue;
1179     }
1180 
1181     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1182 
1183     size_t MatchLen = 0, MatchPos;
1184 
1185     // CHECK-DAG always matches from the start.
1186     StringRef MatchBuffer = Buffer.substr(StartPos);
1187     MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1188     // With a group of CHECK-DAGs, a single mismatching means the match on
1189     // that group of CHECK-DAGs fails immediately.
1190     if (MatchPos == StringRef::npos) {
1191       PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1192       return StringRef::npos;
1193     }
1194     // Re-calc it as the offset relative to the start of the original string.
1195     MatchPos += StartPos;
1196 
1197     if (!NotStrings.empty()) {
1198       if (MatchPos < LastPos) {
1199         // Reordered?
1200         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1201                         SourceMgr::DK_Error,
1202                         Prefix + "-DAG: found a match of CHECK-DAG"
1203                                  " reordering across a CHECK-NOT");
1204         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1205                         SourceMgr::DK_Note,
1206                         Prefix + "-DAG: the farthest match of CHECK-DAG"
1207                                  " is found here");
1208         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1209                         Prefix + "-NOT: the crossed pattern specified"
1210                                  " here");
1211         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1212                         Prefix + "-DAG: the reordered pattern specified"
1213                                  " here");
1214         return StringRef::npos;
1215       }
1216       // All subsequent CHECK-DAGs should be matched from the farthest
1217       // position of all precedent CHECK-DAGs (including this one.)
1218       StartPos = LastPos;
1219       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1220       // CHECK-DAG, verify that there's no 'not' strings occurred in that
1221       // region.
1222       StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1223       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1224         return StringRef::npos;
1225       // Clear "not strings".
1226       NotStrings.clear();
1227     }
1228 
1229     // Update the last position with CHECK-DAG matches.
1230     LastPos = std::max(MatchPos + MatchLen, LastPos);
1231   }
1232 
1233   return LastPos;
1234 }
1235 
1236 // A check prefix must contain only alphanumeric, hyphens and underscores.
1237 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1238   Regex Validator("^[a-zA-Z0-9_-]*$");
1239   return Validator.match(CheckPrefix);
1240 }
1241 
1242 static bool ValidateCheckPrefixes() {
1243   StringSet<> PrefixSet;
1244 
1245   for (StringRef Prefix : CheckPrefixes) {
1246     // Reject empty prefixes.
1247     if (Prefix == "")
1248       return false;
1249 
1250     if (!PrefixSet.insert(Prefix).second)
1251       return false;
1252 
1253     if (!ValidateCheckPrefix(Prefix))
1254       return false;
1255   }
1256 
1257   return true;
1258 }
1259 
1260 // I don't think there's a way to specify an initial value for cl::list,
1261 // so if nothing was specified, add the default
1262 static void AddCheckPrefixIfNeeded() {
1263   if (CheckPrefixes.empty())
1264     CheckPrefixes.push_back("CHECK");
1265 }
1266 
1267 static void DumpCommandLine(int argc, char **argv) {
1268   errs() << "FileCheck command line: ";
1269   for (int I = 0; I < argc; I++)
1270     errs() << " " << argv[I];
1271   errs() << "\n";
1272 }
1273 
1274 /// Check the input to FileCheck provided in the \p Buffer against the \p
1275 /// CheckStrings read from the check file.
1276 ///
1277 /// Returns false if the input fails to satisfy the checks.
1278 bool CheckInput(SourceMgr &SM, StringRef Buffer,
1279                 ArrayRef<CheckString> CheckStrings) {
1280   bool ChecksFailed = false;
1281 
1282   /// VariableTable - This holds all the current filecheck variables.
1283   StringMap<StringRef> VariableTable;
1284 
1285   unsigned i = 0, j = 0, e = CheckStrings.size();
1286   while (true) {
1287     StringRef CheckRegion;
1288     if (j == e) {
1289       CheckRegion = Buffer;
1290     } else {
1291       const CheckString &CheckLabelStr = CheckStrings[j];
1292       if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
1293         ++j;
1294         continue;
1295       }
1296 
1297       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1298       size_t MatchLabelLen = 0;
1299       size_t MatchLabelPos =
1300           CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable);
1301       if (MatchLabelPos == StringRef::npos)
1302         // Immediately bail of CHECK-LABEL fails, nothing else we can do.
1303         return false;
1304 
1305       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1306       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1307       ++j;
1308     }
1309 
1310     for (; i != j; ++i) {
1311       const CheckString &CheckStr = CheckStrings[i];
1312 
1313       // Check each string within the scanned region, including a second check
1314       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1315       size_t MatchLen = 0;
1316       size_t MatchPos =
1317           CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable);
1318 
1319       if (MatchPos == StringRef::npos) {
1320         ChecksFailed = true;
1321         i = j;
1322         break;
1323       }
1324 
1325       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1326     }
1327 
1328     if (j == e)
1329       break;
1330   }
1331 
1332   // Success if no checks failed.
1333   return !ChecksFailed;
1334 }
1335 
1336 int main(int argc, char **argv) {
1337   sys::PrintStackTraceOnErrorSignal(argv[0]);
1338   PrettyStackTraceProgram X(argc, argv);
1339   cl::ParseCommandLineOptions(argc, argv);
1340 
1341   if (!ValidateCheckPrefixes()) {
1342     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1343               "start with a letter and contain only alphanumeric characters, "
1344               "hyphens and underscores\n";
1345     return 2;
1346   }
1347 
1348   AddCheckPrefixIfNeeded();
1349 
1350   SourceMgr SM;
1351 
1352   // Read the expected strings from the check file.
1353   ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr =
1354       MemoryBuffer::getFileOrSTDIN(CheckFilename);
1355   if (std::error_code EC = CheckFileOrErr.getError()) {
1356     errs() << "Could not open check file '" << CheckFilename
1357            << "': " << EC.message() << '\n';
1358     return 2;
1359   }
1360   MemoryBuffer &CheckFile = *CheckFileOrErr.get();
1361 
1362   SmallString<4096> CheckFileBuffer;
1363   StringRef CheckFileText =
1364       CanonicalizeFile(CheckFile, NoCanonicalizeWhiteSpace, CheckFileBuffer);
1365 
1366   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1367                             CheckFileText, CheckFile.getBufferIdentifier()),
1368                         SMLoc());
1369 
1370   std::vector<CheckString> CheckStrings;
1371   if (ReadCheckFile(SM, CheckFileText, CheckStrings))
1372     return 2;
1373 
1374   // Open the file to check and add it to SourceMgr.
1375   ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr =
1376       MemoryBuffer::getFileOrSTDIN(InputFilename);
1377   if (std::error_code EC = InputFileOrErr.getError()) {
1378     errs() << "Could not open input file '" << InputFilename
1379            << "': " << EC.message() << '\n';
1380     return 2;
1381   }
1382   MemoryBuffer &InputFile = *InputFileOrErr.get();
1383 
1384   if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) {
1385     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1386     DumpCommandLine(argc, argv);
1387     return 2;
1388   }
1389 
1390   SmallString<4096> InputFileBuffer;
1391   StringRef InputFileText =
1392       CanonicalizeFile(InputFile, NoCanonicalizeWhiteSpace, InputFileBuffer);
1393 
1394   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1395                             InputFileText, InputFile.getBufferIdentifier()),
1396                         SMLoc());
1397 
1398   return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1;
1399 }
1400