1 //===- TokensTest.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/Syntax/Tokens.h"
10 #include "clang/AST/ASTConsumer.h"
11 #include "clang/AST/Expr.h"
12 #include "clang/Basic/Diagnostic.h"
13 #include "clang/Basic/DiagnosticIDs.h"
14 #include "clang/Basic/DiagnosticOptions.h"
15 #include "clang/Basic/FileManager.h"
16 #include "clang/Basic/FileSystemOptions.h"
17 #include "clang/Basic/LLVM.h"
18 #include "clang/Basic/LangOptions.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Basic/TokenKinds.def"
22 #include "clang/Basic/TokenKinds.h"
23 #include "clang/Frontend/CompilerInstance.h"
24 #include "clang/Frontend/FrontendAction.h"
25 #include "clang/Frontend/Utils.h"
26 #include "clang/Lex/Lexer.h"
27 #include "clang/Lex/PreprocessorOptions.h"
28 #include "clang/Lex/Token.h"
29 #include "clang/Tooling/Tooling.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/IntrusiveRefCntPtr.h"
32 #include "llvm/ADT/None.h"
33 #include "llvm/ADT/Optional.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/Support/FormatVariadic.h"
37 #include "llvm/Support/MemoryBuffer.h"
38 #include "llvm/Support/VirtualFileSystem.h"
39 #include "llvm/Support/raw_os_ostream.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include "llvm/Testing/Support/Annotations.h"
42 #include "llvm/Testing/Support/SupportHelpers.h"
43 #include <cassert>
44 #include <cstdlib>
45 #include <gmock/gmock.h>
46 #include <gtest/gtest.h>
47 #include <memory>
48 #include <ostream>
49 #include <string>
50 
51 using namespace clang;
52 using namespace clang::syntax;
53 
54 using llvm::ValueIs;
55 using ::testing::AllOf;
56 using ::testing::Contains;
57 using ::testing::ElementsAre;
58 using ::testing::Matcher;
59 using ::testing::Not;
60 using ::testing::StartsWith;
61 
62 namespace {
63 // Checks the passed ArrayRef<T> has the same begin() and end() iterators as the
64 // argument.
65 MATCHER_P(SameRange, A, "") {
66   return A.begin() == arg.begin() && A.end() == arg.end();
67 }
68 // Matchers for syntax::Token.
69 MATCHER_P(Kind, K, "") { return arg.kind() == K; }
70 MATCHER_P2(HasText, Text, SourceMgr, "") {
71   return arg.text(*SourceMgr) == Text;
72 }
73 /// Checks the start and end location of a token are equal to SourceRng.
74 MATCHER_P(RangeIs, SourceRng, "") {
75   return arg.location() == SourceRng.first &&
76          arg.endLocation() == SourceRng.second;
77 }
78 
79 class TokenCollectorTest : public ::testing::Test {
80 public:
81   /// Run the clang frontend, collect the preprocessed tokens from the frontend
82   /// invocation and store them in this->Buffer.
83   /// This also clears SourceManager before running the compiler.
84   void recordTokens(llvm::StringRef Code) {
85     class RecordTokens : public ASTFrontendAction {
86     public:
87       explicit RecordTokens(TokenBuffer &Result) : Result(Result) {}
88 
89       bool BeginSourceFileAction(CompilerInstance &CI) override {
90         assert(!Collector && "expected only a single call to BeginSourceFile");
91         Collector.emplace(CI.getPreprocessor());
92         return true;
93       }
94       void EndSourceFileAction() override {
95         assert(Collector && "BeginSourceFileAction was never called");
96         Result = std::move(*Collector).consume();
97       }
98 
99       std::unique_ptr<ASTConsumer>
100       CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override {
101         return llvm::make_unique<ASTConsumer>();
102       }
103 
104     private:
105       TokenBuffer &Result;
106       llvm::Optional<TokenCollector> Collector;
107     };
108 
109     constexpr const char *FileName = "./input.cpp";
110     FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy(""));
111     // Prepare to run a compiler.
112     if (!Diags->getClient())
113       Diags->setClient(new IgnoringDiagConsumer);
114     std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only",
115                                       FileName};
116     auto CI = createInvocationFromCommandLine(Args, Diags, FS);
117     assert(CI);
118     CI->getFrontendOpts().DisableFree = false;
119     CI->getPreprocessorOpts().addRemappedFile(
120         FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release());
121     CompilerInstance Compiler;
122     Compiler.setInvocation(std::move(CI));
123     Compiler.setDiagnostics(Diags.get());
124     Compiler.setFileManager(FileMgr.get());
125     Compiler.setSourceManager(SourceMgr.get());
126 
127     this->Buffer = TokenBuffer(*SourceMgr);
128     RecordTokens Recorder(this->Buffer);
129     ASSERT_TRUE(Compiler.ExecuteAction(Recorder))
130         << "failed to run the frontend";
131   }
132 
133   /// Record the tokens and return a test dump of the resulting buffer.
134   std::string collectAndDump(llvm::StringRef Code) {
135     recordTokens(Code);
136     return Buffer.dumpForTests();
137   }
138 
139   // Adds a file to the test VFS.
140   void addFile(llvm::StringRef Path, llvm::StringRef Contents) {
141     if (!FS->addFile(Path, time_t(),
142                      llvm::MemoryBuffer::getMemBufferCopy(Contents))) {
143       ADD_FAILURE() << "could not add a file to VFS: " << Path;
144     }
145   }
146 
147   /// Add a new file, run syntax::tokenize() on it and return the results.
148   std::vector<syntax::Token> tokenize(llvm::StringRef Text) {
149     // FIXME: pass proper LangOptions.
150     return syntax::tokenize(
151         SourceMgr->createFileID(llvm::MemoryBuffer::getMemBufferCopy(Text)),
152         *SourceMgr, LangOptions());
153   }
154 
155   // Specialized versions of matchers that hide the SourceManager from clients.
156   Matcher<syntax::Token> HasText(std::string Text) const {
157     return ::HasText(Text, SourceMgr.get());
158   }
159   Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const {
160     std::pair<SourceLocation, SourceLocation> Ls;
161     Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
162                    .getLocWithOffset(R.Begin);
163     Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
164                     .getLocWithOffset(R.End);
165     return ::RangeIs(Ls);
166   }
167 
168   /// Finds a subrange in O(n * m).
169   template <class T, class U, class Eq>
170   llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange,
171                                  llvm::ArrayRef<T> Range, Eq F) {
172     for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) {
173       auto It = Begin;
174       for (auto ItSub = Subrange.begin();
175            ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) {
176         if (!F(*ItSub, *It))
177           goto continue_outer;
178       }
179       return llvm::makeArrayRef(Begin, It);
180     continue_outer:;
181     }
182     return llvm::makeArrayRef(Range.end(), Range.end());
183   }
184 
185   /// Finds a subrange in \p Tokens that match the tokens specified in \p Query.
186   /// The match should be unique. \p Query is a whitespace-separated list of
187   /// tokens to search for.
188   llvm::ArrayRef<syntax::Token>
189   findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) {
190     llvm::SmallVector<llvm::StringRef, 8> QueryTokens;
191     Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
192     if (QueryTokens.empty()) {
193       ADD_FAILURE() << "will not look for an empty list of tokens";
194       std::abort();
195     }
196     // An equality test for search.
197     auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) {
198       return Q == T.text(*SourceMgr);
199     };
200     // Find a match.
201     auto Found =
202         findSubrange(llvm::makeArrayRef(QueryTokens), Tokens, TextMatches);
203     if (Found.begin() == Tokens.end()) {
204       ADD_FAILURE() << "could not find the subrange for " << Query;
205       std::abort();
206     }
207     // Check that the match is unique.
208     if (findSubrange(llvm::makeArrayRef(QueryTokens),
209                      llvm::makeArrayRef(Found.end(), Tokens.end()), TextMatches)
210             .begin() != Tokens.end()) {
211       ADD_FAILURE() << "match is not unique for " << Query;
212       std::abort();
213     }
214     return Found;
215   };
216 
217   // Specialized versions of findTokenRange for expanded and spelled tokens.
218   llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) {
219     return findTokenRange(Query, Buffer.expandedTokens());
220   }
221   llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query,
222                                             FileID File = FileID()) {
223     if (!File.isValid())
224       File = SourceMgr->getMainFileID();
225     return findTokenRange(Query, Buffer.spelledTokens(File));
226   }
227 
228   // Data fields.
229   llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
230       new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions);
231   IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
232       new llvm::vfs::InMemoryFileSystem;
233   llvm::IntrusiveRefCntPtr<FileManager> FileMgr =
234       new FileManager(FileSystemOptions(), FS);
235   llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr =
236       new SourceManager(*Diags, *FileMgr);
237   /// Contains last result of calling recordTokens().
238   TokenBuffer Buffer = TokenBuffer(*SourceMgr);
239 };
240 
241 TEST_F(TokenCollectorTest, RawMode) {
242   EXPECT_THAT(tokenize("int main() {}"),
243               ElementsAre(Kind(tok::kw_int),
244                           AllOf(HasText("main"), Kind(tok::identifier)),
245                           Kind(tok::l_paren), Kind(tok::r_paren),
246                           Kind(tok::l_brace), Kind(tok::r_brace)));
247   // Comments are ignored for now.
248   EXPECT_THAT(tokenize("/* foo */int a; // more comments"),
249               ElementsAre(Kind(tok::kw_int),
250                           AllOf(HasText("a"), Kind(tok::identifier)),
251                           Kind(tok::semi)));
252 }
253 
254 TEST_F(TokenCollectorTest, Basic) {
255   std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
256       {"int main() {}",
257        R"(expanded tokens:
258   int main ( ) { }
259 file './input.cpp'
260   spelled tokens:
261     int main ( ) { }
262   no mappings.
263 )"},
264       // All kinds of whitespace are ignored.
265       {"\t\n  int\t\n  main\t\n  (\t\n  )\t\n{\t\n  }\t\n",
266        R"(expanded tokens:
267   int main ( ) { }
268 file './input.cpp'
269   spelled tokens:
270     int main ( ) { }
271   no mappings.
272 )"},
273       // Annotation tokens are ignored.
274       {R"cpp(
275         #pragma GCC visibility push (public)
276         #pragma GCC visibility pop
277       )cpp",
278        R"(expanded tokens:
279   <empty>
280 file './input.cpp'
281   spelled tokens:
282     # pragma GCC visibility push ( public ) # pragma GCC visibility pop
283   mappings:
284     ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0)
285 )"}};
286   for (auto &Test : TestCases)
287     EXPECT_EQ(collectAndDump(Test.first), Test.second)
288         << collectAndDump(Test.first);
289 }
290 
291 TEST_F(TokenCollectorTest, Locations) {
292   // Check locations of the tokens.
293   llvm::Annotations Code(R"cpp(
294     $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]]
295   )cpp");
296   recordTokens(Code.code());
297   // Check expanded tokens.
298   EXPECT_THAT(
299       Buffer.expandedTokens(),
300       ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
301                   AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
302                   AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
303                   AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
304                   AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))),
305                   Kind(tok::eof)));
306   // Check spelled tokens.
307   EXPECT_THAT(
308       Buffer.spelledTokens(SourceMgr->getMainFileID()),
309       ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
310                   AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
311                   AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
312                   AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
313                   AllOf(Kind(tok::semi), RangeIs(Code.range("r5")))));
314 }
315 
316 TEST_F(TokenCollectorTest, MacroDirectives) {
317   // Macro directives are not stored anywhere at the moment.
318   std::string Code = R"cpp(
319     #define FOO a
320     #include "unresolved_file.h"
321     #undef FOO
322     #ifdef X
323     #else
324     #endif
325     #ifndef Y
326     #endif
327     #if 1
328     #elif 2
329     #else
330     #endif
331     #pragma once
332     #pragma something lalala
333 
334     int a;
335   )cpp";
336   std::string Expected =
337       "expanded tokens:\n"
338       "  int a ;\n"
339       "file './input.cpp'\n"
340       "  spelled tokens:\n"
341       "    # define FOO a # include \"unresolved_file.h\" # undef FOO "
342       "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else "
343       "# endif # pragma once # pragma something lalala int a ;\n"
344       "  mappings:\n"
345       "    ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n";
346   EXPECT_EQ(collectAndDump(Code), Expected);
347 }
348 
349 TEST_F(TokenCollectorTest, MacroReplacements) {
350   std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
351       // A simple object-like macro.
352       {R"cpp(
353     #define INT int const
354     INT a;
355   )cpp",
356        R"(expanded tokens:
357   int const a ;
358 file './input.cpp'
359   spelled tokens:
360     # define INT int const INT a ;
361   mappings:
362     ['#'_0, 'INT'_5) => ['int'_0, 'int'_0)
363     ['INT'_5, 'a'_6) => ['int'_0, 'a'_2)
364 )"},
365       // A simple function-like macro.
366       {R"cpp(
367     #define INT(a) const int
368     INT(10+10) a;
369   )cpp",
370        R"(expanded tokens:
371   const int a ;
372 file './input.cpp'
373   spelled tokens:
374     # define INT ( a ) const int INT ( 10 + 10 ) a ;
375   mappings:
376     ['#'_0, 'INT'_8) => ['const'_0, 'const'_0)
377     ['INT'_8, 'a'_14) => ['const'_0, 'a'_2)
378 )"},
379       // Recursive macro replacements.
380       {R"cpp(
381     #define ID(X) X
382     #define INT int const
383     ID(ID(INT)) a;
384   )cpp",
385        R"(expanded tokens:
386   int const a ;
387 file './input.cpp'
388   spelled tokens:
389     # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ;
390   mappings:
391     ['#'_0, 'ID'_12) => ['int'_0, 'int'_0)
392     ['ID'_12, 'a'_19) => ['int'_0, 'a'_2)
393 )"},
394       // A little more complicated recursive macro replacements.
395       {R"cpp(
396     #define ADD(X, Y) X+Y
397     #define MULT(X, Y) X*Y
398 
399     int a = ADD(MULT(1,2), MULT(3,ADD(4,5)));
400   )cpp",
401        "expanded tokens:\n"
402        "  int a = 1 * 2 + 3 * 4 + 5 ;\n"
403        "file './input.cpp'\n"
404        "  spelled tokens:\n"
405        "    # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int "
406        "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n"
407        "  mappings:\n"
408        "    ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n"
409        "    ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"},
410       // Empty macro replacement.
411       {R"cpp(
412     #define EMPTY
413     #define EMPTY_FUNC(X)
414     EMPTY
415     EMPTY_FUNC(1+2+3)
416     )cpp",
417        R"(expanded tokens:
418   <empty>
419 file './input.cpp'
420   spelled tokens:
421     # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 )
422   mappings:
423     ['#'_0, '<eof>'_18) => ['<eof>'_0, '<eof>'_0)
424 )"},
425       // File ends with a macro replacement.
426       {R"cpp(
427     #define FOO 10+10;
428     int a = FOO
429     )cpp",
430        R"(expanded tokens:
431   int a = 10 + 10 ;
432 file './input.cpp'
433   spelled tokens:
434     # define FOO 10 + 10 ; int a = FOO
435   mappings:
436     ['#'_0, 'int'_7) => ['int'_0, 'int'_0)
437     ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7)
438 )"}};
439 
440   for (auto &Test : TestCases)
441     EXPECT_EQ(Test.second, collectAndDump(Test.first))
442         << collectAndDump(Test.first);
443 }
444 
445 TEST_F(TokenCollectorTest, SpecialTokens) {
446   // Tokens coming from concatenations.
447   recordTokens(R"cpp(
448     #define CONCAT(a, b) a ## b
449     int a = CONCAT(1, 2);
450   )cpp");
451   EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
452               Contains(HasText("12")));
453   // Multi-line tokens with slashes at the end.
454   recordTokens("i\\\nn\\\nt");
455   EXPECT_THAT(Buffer.expandedTokens(),
456               ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")),
457                           Kind(tok::eof)));
458   // FIXME: test tokens with digraphs and UCN identifiers.
459 }
460 
461 TEST_F(TokenCollectorTest, LateBoundTokens) {
462   // The parser eventually breaks the first '>>' into two tokens ('>' and '>'),
463   // but we choose to record them as a single token (for now).
464   llvm::Annotations Code(R"cpp(
465     template <class T>
466     struct foo { int a; };
467     int bar = foo<foo<int$br[[>>]]().a;
468     int baz = 10 $op[[>>]] 2;
469   )cpp");
470   recordTokens(Code.code());
471   EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
472               AllOf(Contains(AllOf(Kind(tok::greatergreater),
473                                    RangeIs(Code.range("br")))),
474                     Contains(AllOf(Kind(tok::greatergreater),
475                                    RangeIs(Code.range("op"))))));
476 }
477 
478 TEST_F(TokenCollectorTest, DelayedParsing) {
479   llvm::StringLiteral Code = R"cpp(
480     struct Foo {
481       int method() {
482         // Parser will visit method bodies and initializers multiple times, but
483         // TokenBuffer should only record the first walk over the tokens;
484         return 100;
485       }
486       int a = 10;
487 
488       struct Subclass {
489         void foo() {
490           Foo().method();
491         }
492       };
493     };
494   )cpp";
495   std::string ExpectedTokens =
496       "expanded tokens:\n"
497       "  struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct "
498       "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n";
499   EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens));
500 }
501 
502 TEST_F(TokenCollectorTest, MultiFile) {
503   addFile("./foo.h", R"cpp(
504     #define ADD(X, Y) X+Y
505     int a = 100;
506     #include "bar.h"
507   )cpp");
508   addFile("./bar.h", R"cpp(
509     int b = ADD(1, 2);
510     #define MULT(X, Y) X*Y
511   )cpp");
512   llvm::StringLiteral Code = R"cpp(
513     #include "foo.h"
514     int c = ADD(1, MULT(2,3));
515   )cpp";
516 
517   std::string Expected = R"(expanded tokens:
518   int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ;
519 file './input.cpp'
520   spelled tokens:
521     # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ;
522   mappings:
523     ['#'_0, 'int'_3) => ['int'_12, 'int'_12)
524     ['ADD'_6, ';'_17) => ['1'_15, ';'_20)
525 file './foo.h'
526   spelled tokens:
527     # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h"
528   mappings:
529     ['#'_0, 'int'_11) => ['int'_0, 'int'_0)
530     ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5)
531 file './bar.h'
532   spelled tokens:
533     int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y
534   mappings:
535     ['ADD'_3, ';'_9) => ['1'_8, ';'_11)
536     ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12)
537 )";
538 
539   EXPECT_EQ(Expected, collectAndDump(Code))
540       << "input: " << Code << "\nresults: " << collectAndDump(Code);
541 }
542 
543 class TokenBufferTest : public TokenCollectorTest {};
544 
545 TEST_F(TokenBufferTest, SpelledByExpanded) {
546   recordTokens(R"cpp(
547     a1 a2 a3 b1 b2
548   )cpp");
549 
550   // Sanity check: expanded and spelled tokens are stored separately.
551   EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
552   // Searching for subranges of expanded tokens should give the corresponding
553   // spelled ones.
554   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")),
555               ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2"))));
556   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
557               ValueIs(SameRange(findSpelled("a1 a2 a3"))));
558   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
559               ValueIs(SameRange(findSpelled("b1 b2"))));
560 
561   // Test search on simple macro expansions.
562   recordTokens(R"cpp(
563     #define A a1 a2 a3
564     #define B b1 b2
565 
566     A split B
567   )cpp");
568   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
569               ValueIs(SameRange(findSpelled("A split B"))));
570   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
571               ValueIs(SameRange(findSpelled("A split").drop_back())));
572   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
573               ValueIs(SameRange(findSpelled("split B").drop_front())));
574   // Ranges not fully covering macro invocations should fail.
575   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
576   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), llvm::None);
577   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")),
578             llvm::None);
579 
580   // Recursive macro invocations.
581   recordTokens(R"cpp(
582     #define ID(x) x
583     #define B b1 b2
584 
585     ID(ID(ID(a1) a2 a3)) split ID(B)
586   )cpp");
587 
588   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
589               ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )"))));
590   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
591               ValueIs(SameRange(findSpelled("ID ( B )"))));
592   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
593               ValueIs(SameRange(findSpelled(
594                   "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )"))));
595   // Ranges crossing macro call boundaries.
596   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1")),
597             llvm::None);
598   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1")),
599             llvm::None);
600   // FIXME: next two examples should map to macro arguments, but currently they
601   //        fail.
602   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2")), llvm::None);
603   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
604 
605   // Empty macro expansions.
606   recordTokens(R"cpp(
607     #define EMPTY
608     #define ID(X) X
609 
610     EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
611     EMPTY EMPTY ID(4 5 6) split2
612     ID(7 8 9) EMPTY EMPTY
613   )cpp");
614   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")),
615               ValueIs(SameRange(findSpelled("ID ( 1 2 3 )"))));
616   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")),
617               ValueIs(SameRange(findSpelled("ID ( 4 5 6 )"))));
618   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")),
619               ValueIs(SameRange(findSpelled("ID ( 7 8 9 )"))));
620 
621   // Empty mappings coming from various directives.
622   recordTokens(R"cpp(
623     #define ID(X) X
624     ID(1)
625     #pragma lalala
626     not_mapped
627   )cpp");
628   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")),
629               ValueIs(SameRange(findSpelled("not_mapped"))));
630 }
631 
632 TEST_F(TokenBufferTest, TokensToFileRange) {
633   addFile("./foo.h", "token_from_header");
634   llvm::Annotations Code(R"cpp(
635     #define FOO token_from_expansion
636     #include "./foo.h"
637     $all[[$i[[int]] a = FOO;]]
638   )cpp");
639   recordTokens(Code.code());
640 
641   auto &SM = *SourceMgr;
642 
643   // Two simple examples.
644   auto Int = findExpanded("int").front();
645   auto Semi = findExpanded(";").front();
646   EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin,
647                                      Code.range("i").End));
648   EXPECT_EQ(syntax::Token::range(SM, Int, Semi),
649             FileRange(SM.getMainFileID(), Code.range("all").Begin,
650                       Code.range("all").End));
651   // We don't test assertion failures because death tests are slow.
652 }
653 
654 } // namespace
655