1 //===- TokensTest.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/Syntax/Tokens.h"
10 #include "clang/AST/ASTConsumer.h"
11 #include "clang/AST/Expr.h"
12 #include "clang/Basic/Diagnostic.h"
13 #include "clang/Basic/DiagnosticIDs.h"
14 #include "clang/Basic/DiagnosticOptions.h"
15 #include "clang/Basic/FileManager.h"
16 #include "clang/Basic/FileSystemOptions.h"
17 #include "clang/Basic/LLVM.h"
18 #include "clang/Basic/LangOptions.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Basic/TokenKinds.def"
22 #include "clang/Basic/TokenKinds.h"
23 #include "clang/Frontend/CompilerInstance.h"
24 #include "clang/Frontend/FrontendAction.h"
25 #include "clang/Frontend/Utils.h"
26 #include "clang/Lex/Lexer.h"
27 #include "clang/Lex/PreprocessorOptions.h"
28 #include "clang/Lex/Token.h"
29 #include "clang/Tooling/Tooling.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/IntrusiveRefCntPtr.h"
32 #include "llvm/ADT/None.h"
33 #include "llvm/ADT/Optional.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/Support/FormatVariadic.h"
37 #include "llvm/Support/MemoryBuffer.h"
38 #include "llvm/Support/VirtualFileSystem.h"
39 #include "llvm/Support/raw_os_ostream.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include "llvm/Testing/Support/Annotations.h"
42 #include "llvm/Testing/Support/SupportHelpers.h"
43 #include <cassert>
44 #include <cstdlib>
45 #include <gmock/gmock.h>
46 #include <gtest/gtest.h>
47 #include <memory>
48 #include <ostream>
49 #include <string>
50 
51 using namespace clang;
52 using namespace clang::syntax;
53 
54 using llvm::ValueIs;
55 using ::testing::AllOf;
56 using ::testing::Contains;
57 using ::testing::ElementsAre;
58 using ::testing::Field;
59 using ::testing::Matcher;
60 using ::testing::Not;
61 using ::testing::StartsWith;
62 
63 namespace {
64 // Checks the passed ArrayRef<T> has the same begin() and end() iterators as the
65 // argument.
66 MATCHER_P(SameRange, A, "") {
67   return A.begin() == arg.begin() && A.end() == arg.end();
68 }
69 
70 Matcher<TokenBuffer::Expansion>
71 IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled,
72             Matcher<llvm::ArrayRef<syntax::Token>> Expanded) {
73   return AllOf(Field(&TokenBuffer::Expansion::Spelled, Spelled),
74                Field(&TokenBuffer::Expansion::Expanded, Expanded));
75 }
76 // Matchers for syntax::Token.
77 MATCHER_P(Kind, K, "") { return arg.kind() == K; }
78 MATCHER_P2(HasText, Text, SourceMgr, "") {
79   return arg.text(*SourceMgr) == Text;
80 }
81 /// Checks the start and end location of a token are equal to SourceRng.
82 MATCHER_P(RangeIs, SourceRng, "") {
83   return arg.location() == SourceRng.first &&
84          arg.endLocation() == SourceRng.second;
85 }
86 
87 class TokenCollectorTest : public ::testing::Test {
88 public:
89   /// Run the clang frontend, collect the preprocessed tokens from the frontend
90   /// invocation and store them in this->Buffer.
91   /// This also clears SourceManager before running the compiler.
92   void recordTokens(llvm::StringRef Code) {
93     class RecordTokens : public ASTFrontendAction {
94     public:
95       explicit RecordTokens(TokenBuffer &Result) : Result(Result) {}
96 
97       bool BeginSourceFileAction(CompilerInstance &CI) override {
98         assert(!Collector && "expected only a single call to BeginSourceFile");
99         Collector.emplace(CI.getPreprocessor());
100         return true;
101       }
102       void EndSourceFileAction() override {
103         assert(Collector && "BeginSourceFileAction was never called");
104         Result = std::move(*Collector).consume();
105       }
106 
107       std::unique_ptr<ASTConsumer>
108       CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override {
109         return std::make_unique<ASTConsumer>();
110       }
111 
112     private:
113       TokenBuffer &Result;
114       llvm::Optional<TokenCollector> Collector;
115     };
116 
117     constexpr const char *FileName = "./input.cpp";
118     FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy(""));
119     // Prepare to run a compiler.
120     if (!Diags->getClient())
121       Diags->setClient(new IgnoringDiagConsumer);
122     std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only",
123                                       FileName};
124     auto CI = createInvocationFromCommandLine(Args, Diags, FS);
125     assert(CI);
126     CI->getFrontendOpts().DisableFree = false;
127     CI->getPreprocessorOpts().addRemappedFile(
128         FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release());
129     CompilerInstance Compiler;
130     Compiler.setInvocation(std::move(CI));
131     Compiler.setDiagnostics(Diags.get());
132     Compiler.setFileManager(FileMgr.get());
133     Compiler.setSourceManager(SourceMgr.get());
134 
135     this->Buffer = TokenBuffer(*SourceMgr);
136     RecordTokens Recorder(this->Buffer);
137     ASSERT_TRUE(Compiler.ExecuteAction(Recorder))
138         << "failed to run the frontend";
139   }
140 
141   /// Record the tokens and return a test dump of the resulting buffer.
142   std::string collectAndDump(llvm::StringRef Code) {
143     recordTokens(Code);
144     return Buffer.dumpForTests();
145   }
146 
147   // Adds a file to the test VFS.
148   void addFile(llvm::StringRef Path, llvm::StringRef Contents) {
149     if (!FS->addFile(Path, time_t(),
150                      llvm::MemoryBuffer::getMemBufferCopy(Contents))) {
151       ADD_FAILURE() << "could not add a file to VFS: " << Path;
152     }
153   }
154 
155   /// Add a new file, run syntax::tokenize() on it and return the results.
156   std::vector<syntax::Token> tokenize(llvm::StringRef Text) {
157     // FIXME: pass proper LangOptions.
158     return syntax::tokenize(
159         SourceMgr->createFileID(llvm::MemoryBuffer::getMemBufferCopy(Text)),
160         *SourceMgr, LangOptions());
161   }
162 
163   // Specialized versions of matchers that hide the SourceManager from clients.
164   Matcher<syntax::Token> HasText(std::string Text) const {
165     return ::HasText(Text, SourceMgr.get());
166   }
167   Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const {
168     std::pair<SourceLocation, SourceLocation> Ls;
169     Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
170                    .getLocWithOffset(R.Begin);
171     Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
172                     .getLocWithOffset(R.End);
173     return ::RangeIs(Ls);
174   }
175 
176   /// Finds a subrange in O(n * m).
177   template <class T, class U, class Eq>
178   llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange,
179                                  llvm::ArrayRef<T> Range, Eq F) {
180     for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) {
181       auto It = Begin;
182       for (auto ItSub = Subrange.begin();
183            ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) {
184         if (!F(*ItSub, *It))
185           goto continue_outer;
186       }
187       return llvm::makeArrayRef(Begin, It);
188     continue_outer:;
189     }
190     return llvm::makeArrayRef(Range.end(), Range.end());
191   }
192 
193   /// Finds a subrange in \p Tokens that match the tokens specified in \p Query.
194   /// The match should be unique. \p Query is a whitespace-separated list of
195   /// tokens to search for.
196   llvm::ArrayRef<syntax::Token>
197   findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) {
198     llvm::SmallVector<llvm::StringRef, 8> QueryTokens;
199     Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
200     if (QueryTokens.empty()) {
201       ADD_FAILURE() << "will not look for an empty list of tokens";
202       std::abort();
203     }
204     // An equality test for search.
205     auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) {
206       return Q == T.text(*SourceMgr);
207     };
208     // Find a match.
209     auto Found =
210         findSubrange(llvm::makeArrayRef(QueryTokens), Tokens, TextMatches);
211     if (Found.begin() == Tokens.end()) {
212       ADD_FAILURE() << "could not find the subrange for " << Query;
213       std::abort();
214     }
215     // Check that the match is unique.
216     if (findSubrange(llvm::makeArrayRef(QueryTokens),
217                      llvm::makeArrayRef(Found.end(), Tokens.end()), TextMatches)
218             .begin() != Tokens.end()) {
219       ADD_FAILURE() << "match is not unique for " << Query;
220       std::abort();
221     }
222     return Found;
223   };
224 
225   // Specialized versions of findTokenRange for expanded and spelled tokens.
226   llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) {
227     return findTokenRange(Query, Buffer.expandedTokens());
228   }
229   llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query,
230                                             FileID File = FileID()) {
231     if (!File.isValid())
232       File = SourceMgr->getMainFileID();
233     return findTokenRange(Query, Buffer.spelledTokens(File));
234   }
235 
236   // Data fields.
237   llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
238       new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions);
239   IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
240       new llvm::vfs::InMemoryFileSystem;
241   llvm::IntrusiveRefCntPtr<FileManager> FileMgr =
242       new FileManager(FileSystemOptions(), FS);
243   llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr =
244       new SourceManager(*Diags, *FileMgr);
245   /// Contains last result of calling recordTokens().
246   TokenBuffer Buffer = TokenBuffer(*SourceMgr);
247 };
248 
249 TEST_F(TokenCollectorTest, RawMode) {
250   EXPECT_THAT(tokenize("int main() {}"),
251               ElementsAre(Kind(tok::kw_int),
252                           AllOf(HasText("main"), Kind(tok::identifier)),
253                           Kind(tok::l_paren), Kind(tok::r_paren),
254                           Kind(tok::l_brace), Kind(tok::r_brace)));
255   // Comments are ignored for now.
256   EXPECT_THAT(tokenize("/* foo */int a; // more comments"),
257               ElementsAre(Kind(tok::kw_int),
258                           AllOf(HasText("a"), Kind(tok::identifier)),
259                           Kind(tok::semi)));
260 }
261 
262 TEST_F(TokenCollectorTest, Basic) {
263   std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
264       {"int main() {}",
265        R"(expanded tokens:
266   int main ( ) { }
267 file './input.cpp'
268   spelled tokens:
269     int main ( ) { }
270   no mappings.
271 )"},
272       // All kinds of whitespace are ignored.
273       {"\t\n  int\t\n  main\t\n  (\t\n  )\t\n{\t\n  }\t\n",
274        R"(expanded tokens:
275   int main ( ) { }
276 file './input.cpp'
277   spelled tokens:
278     int main ( ) { }
279   no mappings.
280 )"},
281       // Annotation tokens are ignored.
282       {R"cpp(
283         #pragma GCC visibility push (public)
284         #pragma GCC visibility pop
285       )cpp",
286        R"(expanded tokens:
287   <empty>
288 file './input.cpp'
289   spelled tokens:
290     # pragma GCC visibility push ( public ) # pragma GCC visibility pop
291   mappings:
292     ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0)
293 )"},
294       // Empty files should not crash.
295       {R"cpp()cpp", R"(expanded tokens:
296   <empty>
297 file './input.cpp'
298   spelled tokens:
299     <empty>
300   no mappings.
301 )"},
302       // Should not crash on errors inside '#define' directives. Error is that
303       // stringification (#B) does not refer to a macro parameter.
304       {
305           R"cpp(
306 a
307 #define MACRO() A #B
308 )cpp",
309           R"(expanded tokens:
310   a
311 file './input.cpp'
312   spelled tokens:
313     a # define MACRO ( ) A # B
314   mappings:
315     ['#'_1, '<eof>'_9) => ['<eof>'_1, '<eof>'_1)
316 )"}};
317   for (auto &Test : TestCases)
318     EXPECT_EQ(collectAndDump(Test.first), Test.second)
319         << collectAndDump(Test.first);
320 }
321 
322 TEST_F(TokenCollectorTest, Locations) {
323   // Check locations of the tokens.
324   llvm::Annotations Code(R"cpp(
325     $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]]
326   )cpp");
327   recordTokens(Code.code());
328   // Check expanded tokens.
329   EXPECT_THAT(
330       Buffer.expandedTokens(),
331       ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
332                   AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
333                   AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
334                   AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
335                   AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))),
336                   Kind(tok::eof)));
337   // Check spelled tokens.
338   EXPECT_THAT(
339       Buffer.spelledTokens(SourceMgr->getMainFileID()),
340       ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
341                   AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
342                   AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
343                   AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
344                   AllOf(Kind(tok::semi), RangeIs(Code.range("r5")))));
345 }
346 
347 TEST_F(TokenCollectorTest, MacroDirectives) {
348   // Macro directives are not stored anywhere at the moment.
349   std::string Code = R"cpp(
350     #define FOO a
351     #include "unresolved_file.h"
352     #undef FOO
353     #ifdef X
354     #else
355     #endif
356     #ifndef Y
357     #endif
358     #if 1
359     #elif 2
360     #else
361     #endif
362     #pragma once
363     #pragma something lalala
364 
365     int a;
366   )cpp";
367   std::string Expected =
368       "expanded tokens:\n"
369       "  int a ;\n"
370       "file './input.cpp'\n"
371       "  spelled tokens:\n"
372       "    # define FOO a # include \"unresolved_file.h\" # undef FOO "
373       "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else "
374       "# endif # pragma once # pragma something lalala int a ;\n"
375       "  mappings:\n"
376       "    ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n";
377   EXPECT_EQ(collectAndDump(Code), Expected);
378 }
379 
380 TEST_F(TokenCollectorTest, MacroReplacements) {
381   std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
382       // A simple object-like macro.
383       {R"cpp(
384     #define INT int const
385     INT a;
386   )cpp",
387        R"(expanded tokens:
388   int const a ;
389 file './input.cpp'
390   spelled tokens:
391     # define INT int const INT a ;
392   mappings:
393     ['#'_0, 'INT'_5) => ['int'_0, 'int'_0)
394     ['INT'_5, 'a'_6) => ['int'_0, 'a'_2)
395 )"},
396       // A simple function-like macro.
397       {R"cpp(
398     #define INT(a) const int
399     INT(10+10) a;
400   )cpp",
401        R"(expanded tokens:
402   const int a ;
403 file './input.cpp'
404   spelled tokens:
405     # define INT ( a ) const int INT ( 10 + 10 ) a ;
406   mappings:
407     ['#'_0, 'INT'_8) => ['const'_0, 'const'_0)
408     ['INT'_8, 'a'_14) => ['const'_0, 'a'_2)
409 )"},
410       // Recursive macro replacements.
411       {R"cpp(
412     #define ID(X) X
413     #define INT int const
414     ID(ID(INT)) a;
415   )cpp",
416        R"(expanded tokens:
417   int const a ;
418 file './input.cpp'
419   spelled tokens:
420     # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ;
421   mappings:
422     ['#'_0, 'ID'_12) => ['int'_0, 'int'_0)
423     ['ID'_12, 'a'_19) => ['int'_0, 'a'_2)
424 )"},
425       // A little more complicated recursive macro replacements.
426       {R"cpp(
427     #define ADD(X, Y) X+Y
428     #define MULT(X, Y) X*Y
429 
430     int a = ADD(MULT(1,2), MULT(3,ADD(4,5)));
431   )cpp",
432        "expanded tokens:\n"
433        "  int a = 1 * 2 + 3 * 4 + 5 ;\n"
434        "file './input.cpp'\n"
435        "  spelled tokens:\n"
436        "    # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int "
437        "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n"
438        "  mappings:\n"
439        "    ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n"
440        "    ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"},
441       // Empty macro replacement.
442       // FIXME: the #define directives should not be glued together.
443       {R"cpp(
444     #define EMPTY
445     #define EMPTY_FUNC(X)
446     EMPTY
447     EMPTY_FUNC(1+2+3)
448     )cpp",
449        R"(expanded tokens:
450   <empty>
451 file './input.cpp'
452   spelled tokens:
453     # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 )
454   mappings:
455     ['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0)
456     ['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0)
457     ['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0)
458 )"},
459       // File ends with a macro replacement.
460       {R"cpp(
461     #define FOO 10+10;
462     int a = FOO
463     )cpp",
464        R"(expanded tokens:
465   int a = 10 + 10 ;
466 file './input.cpp'
467   spelled tokens:
468     # define FOO 10 + 10 ; int a = FOO
469   mappings:
470     ['#'_0, 'int'_7) => ['int'_0, 'int'_0)
471     ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7)
472 )"}};
473 
474   for (auto &Test : TestCases)
475     EXPECT_EQ(Test.second, collectAndDump(Test.first))
476         << collectAndDump(Test.first);
477 }
478 
479 TEST_F(TokenCollectorTest, SpecialTokens) {
480   // Tokens coming from concatenations.
481   recordTokens(R"cpp(
482     #define CONCAT(a, b) a ## b
483     int a = CONCAT(1, 2);
484   )cpp");
485   EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
486               Contains(HasText("12")));
487   // Multi-line tokens with slashes at the end.
488   recordTokens("i\\\nn\\\nt");
489   EXPECT_THAT(Buffer.expandedTokens(),
490               ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")),
491                           Kind(tok::eof)));
492   // FIXME: test tokens with digraphs and UCN identifiers.
493 }
494 
495 TEST_F(TokenCollectorTest, LateBoundTokens) {
496   // The parser eventually breaks the first '>>' into two tokens ('>' and '>'),
497   // but we choose to record them as a single token (for now).
498   llvm::Annotations Code(R"cpp(
499     template <class T>
500     struct foo { int a; };
501     int bar = foo<foo<int$br[[>>]]().a;
502     int baz = 10 $op[[>>]] 2;
503   )cpp");
504   recordTokens(Code.code());
505   EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
506               AllOf(Contains(AllOf(Kind(tok::greatergreater),
507                                    RangeIs(Code.range("br")))),
508                     Contains(AllOf(Kind(tok::greatergreater),
509                                    RangeIs(Code.range("op"))))));
510 }
511 
512 TEST_F(TokenCollectorTest, DelayedParsing) {
513   llvm::StringLiteral Code = R"cpp(
514     struct Foo {
515       int method() {
516         // Parser will visit method bodies and initializers multiple times, but
517         // TokenBuffer should only record the first walk over the tokens;
518         return 100;
519       }
520       int a = 10;
521 
522       struct Subclass {
523         void foo() {
524           Foo().method();
525         }
526       };
527     };
528   )cpp";
529   std::string ExpectedTokens =
530       "expanded tokens:\n"
531       "  struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct "
532       "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n";
533   EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens));
534 }
535 
536 TEST_F(TokenCollectorTest, MultiFile) {
537   addFile("./foo.h", R"cpp(
538     #define ADD(X, Y) X+Y
539     int a = 100;
540     #include "bar.h"
541   )cpp");
542   addFile("./bar.h", R"cpp(
543     int b = ADD(1, 2);
544     #define MULT(X, Y) X*Y
545   )cpp");
546   llvm::StringLiteral Code = R"cpp(
547     #include "foo.h"
548     int c = ADD(1, MULT(2,3));
549   )cpp";
550 
551   std::string Expected = R"(expanded tokens:
552   int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ;
553 file './input.cpp'
554   spelled tokens:
555     # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ;
556   mappings:
557     ['#'_0, 'int'_3) => ['int'_12, 'int'_12)
558     ['ADD'_6, ';'_17) => ['1'_15, ';'_20)
559 file './foo.h'
560   spelled tokens:
561     # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h"
562   mappings:
563     ['#'_0, 'int'_11) => ['int'_0, 'int'_0)
564     ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5)
565 file './bar.h'
566   spelled tokens:
567     int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y
568   mappings:
569     ['ADD'_3, ';'_9) => ['1'_8, ';'_11)
570     ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12)
571 )";
572 
573   EXPECT_EQ(Expected, collectAndDump(Code))
574       << "input: " << Code << "\nresults: " << collectAndDump(Code);
575 }
576 
577 class TokenBufferTest : public TokenCollectorTest {};
578 
579 TEST_F(TokenBufferTest, SpelledByExpanded) {
580   recordTokens(R"cpp(
581     a1 a2 a3 b1 b2
582   )cpp");
583 
584   // Sanity check: expanded and spelled tokens are stored separately.
585   EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
586   // Searching for subranges of expanded tokens should give the corresponding
587   // spelled ones.
588   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")),
589               ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2"))));
590   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
591               ValueIs(SameRange(findSpelled("a1 a2 a3"))));
592   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
593               ValueIs(SameRange(findSpelled("b1 b2"))));
594 
595   // Test search on simple macro expansions.
596   recordTokens(R"cpp(
597     #define A a1 a2 a3
598     #define B b1 b2
599 
600     A split B
601   )cpp");
602   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
603               ValueIs(SameRange(findSpelled("A split B"))));
604   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
605               ValueIs(SameRange(findSpelled("A split").drop_back())));
606   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
607               ValueIs(SameRange(findSpelled("split B").drop_front())));
608   // Ranges not fully covering macro invocations should fail.
609   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
610   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), llvm::None);
611   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")),
612             llvm::None);
613 
614   // Recursive macro invocations.
615   recordTokens(R"cpp(
616     #define ID(x) x
617     #define B b1 b2
618 
619     ID(ID(ID(a1) a2 a3)) split ID(B)
620   )cpp");
621 
622   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
623               ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )"))));
624   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
625               ValueIs(SameRange(findSpelled("ID ( B )"))));
626   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
627               ValueIs(SameRange(findSpelled(
628                   "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )"))));
629   // Ranges crossing macro call boundaries.
630   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1")),
631             llvm::None);
632   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1")),
633             llvm::None);
634   // FIXME: next two examples should map to macro arguments, but currently they
635   //        fail.
636   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2")), llvm::None);
637   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
638 
639   // Empty macro expansions.
640   recordTokens(R"cpp(
641     #define EMPTY
642     #define ID(X) X
643 
644     EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
645     EMPTY EMPTY ID(4 5 6) split2
646     ID(7 8 9) EMPTY EMPTY
647   )cpp");
648   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")),
649               ValueIs(SameRange(findSpelled("ID ( 1 2 3 )"))));
650   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")),
651               ValueIs(SameRange(findSpelled("ID ( 4 5 6 )"))));
652   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")),
653               ValueIs(SameRange(findSpelled("ID ( 7 8 9 )"))));
654 
655   // Empty mappings coming from various directives.
656   recordTokens(R"cpp(
657     #define ID(X) X
658     ID(1)
659     #pragma lalala
660     not_mapped
661   )cpp");
662   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")),
663               ValueIs(SameRange(findSpelled("not_mapped"))));
664 }
665 
666 TEST_F(TokenBufferTest, ExpansionStartingAt) {
667   // Object-like macro expansions.
668   recordTokens(R"cpp(
669     #define FOO 3+4
670     int a = FOO 1;
671     int b = FOO 2;
672   )cpp");
673 
674   llvm::ArrayRef<syntax::Token> Foo1 = findSpelled("FOO 1").drop_back();
675   EXPECT_THAT(
676       Buffer.expansionStartingAt(Foo1.data()),
677       ValueIs(IsExpansion(SameRange(Foo1),
678                           SameRange(findExpanded("3 + 4 1").drop_back()))));
679 
680   llvm::ArrayRef<syntax::Token> Foo2 = findSpelled("FOO 2").drop_back();
681   EXPECT_THAT(
682       Buffer.expansionStartingAt(Foo2.data()),
683       ValueIs(IsExpansion(SameRange(Foo2),
684                           SameRange(findExpanded("3 + 4 2").drop_back()))));
685 
686   // Function-like macro expansions.
687   recordTokens(R"cpp(
688     #define ID(X) X
689     int a = ID(1+2+3);
690     int b = ID(ID(2+3+4));
691   )cpp");
692 
693   llvm::ArrayRef<syntax::Token> ID1 = findSpelled("ID ( 1 + 2 + 3 )");
694   EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()),
695               ValueIs(IsExpansion(SameRange(ID1),
696                                   SameRange(findExpanded("1 + 2 + 3")))));
697   // Only the first spelled token should be found.
698   for (const auto &T : ID1.drop_front())
699     EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
700 
701   llvm::ArrayRef<syntax::Token> ID2 = findSpelled("ID ( ID ( 2 + 3 + 4 ) )");
702   EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()),
703               ValueIs(IsExpansion(SameRange(ID2),
704                                   SameRange(findExpanded("2 + 3 + 4")))));
705   // Only the first spelled token should be found.
706   for (const auto &T : ID2.drop_front())
707     EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
708 
709   // PP directives.
710   recordTokens(R"cpp(
711 #define FOO 1
712 int a = FOO;
713 #pragma once
714 int b = 1;
715   )cpp");
716 
717   llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled("# define FOO 1");
718   EXPECT_THAT(
719       Buffer.expansionStartingAt(&DefineFoo.front()),
720       ValueIs(IsExpansion(SameRange(DefineFoo),
721                           SameRange(findExpanded("int a").take_front(0)))));
722   // Only the first spelled token should be found.
723   for (const auto &T : DefineFoo.drop_front())
724     EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
725 
726   llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled("# pragma once");
727   EXPECT_THAT(
728       Buffer.expansionStartingAt(&PragmaOnce.front()),
729       ValueIs(IsExpansion(SameRange(PragmaOnce),
730                           SameRange(findExpanded("int b").take_front(0)))));
731   // Only the first spelled token should be found.
732   for (const auto &T : PragmaOnce.drop_front())
733     EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
734 }
735 
736 TEST_F(TokenBufferTest, TokensToFileRange) {
737   addFile("./foo.h", "token_from_header");
738   llvm::Annotations Code(R"cpp(
739     #define FOO token_from_expansion
740     #include "./foo.h"
741     $all[[$i[[int]] a = FOO;]]
742   )cpp");
743   recordTokens(Code.code());
744 
745   auto &SM = *SourceMgr;
746 
747   // Two simple examples.
748   auto Int = findExpanded("int").front();
749   auto Semi = findExpanded(";").front();
750   EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin,
751                                      Code.range("i").End));
752   EXPECT_EQ(syntax::Token::range(SM, Int, Semi),
753             FileRange(SM.getMainFileID(), Code.range("all").Begin,
754                       Code.range("all").End));
755   // We don't test assertion failures because death tests are slow.
756 }
757 
758 TEST_F(TokenBufferTest, macroExpansions) {
759   llvm::Annotations Code(R"cpp(
760     #define FOO B
761     #define FOO2 BA
762     #define CALL(X) int X
763     #define G CALL(FOO2)
764     int B;
765     $macro[[FOO]];
766     $macro[[CALL]](A);
767     $macro[[G]];
768   )cpp");
769   recordTokens(Code.code());
770   auto &SM = *SourceMgr;
771   auto Expansions = Buffer.macroExpansions(SM.getMainFileID());
772   std::vector<FileRange> ExpectedMacroRanges;
773   for (auto Range : Code.ranges("macro"))
774     ExpectedMacroRanges.push_back(
775         FileRange(SM.getMainFileID(), Range.Begin, Range.End));
776   std::vector<FileRange> ActualMacroRanges;
777   for (auto Expansion : Expansions)
778     ActualMacroRanges.push_back(Expansion->range(SM));
779   EXPECT_EQ(ExpectedMacroRanges, ActualMacroRanges);
780 }
781 } // namespace
782