1 //===- TokensTest.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Tooling/Syntax/Tokens.h" 10 #include "clang/AST/ASTConsumer.h" 11 #include "clang/AST/Expr.h" 12 #include "clang/Basic/Diagnostic.h" 13 #include "clang/Basic/DiagnosticIDs.h" 14 #include "clang/Basic/DiagnosticOptions.h" 15 #include "clang/Basic/FileManager.h" 16 #include "clang/Basic/FileSystemOptions.h" 17 #include "clang/Basic/LLVM.h" 18 #include "clang/Basic/LangOptions.h" 19 #include "clang/Basic/SourceLocation.h" 20 #include "clang/Basic/SourceManager.h" 21 #include "clang/Basic/TokenKinds.def" 22 #include "clang/Basic/TokenKinds.h" 23 #include "clang/Frontend/CompilerInstance.h" 24 #include "clang/Frontend/FrontendAction.h" 25 #include "clang/Frontend/Utils.h" 26 #include "clang/Lex/Lexer.h" 27 #include "clang/Lex/PreprocessorOptions.h" 28 #include "clang/Lex/Token.h" 29 #include "clang/Tooling/Tooling.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/IntrusiveRefCntPtr.h" 32 #include "llvm/ADT/None.h" 33 #include "llvm/ADT/Optional.h" 34 #include "llvm/ADT/STLExtras.h" 35 #include "llvm/ADT/StringRef.h" 36 #include "llvm/Support/FormatVariadic.h" 37 #include "llvm/Support/MemoryBuffer.h" 38 #include "llvm/Support/VirtualFileSystem.h" 39 #include "llvm/Support/raw_os_ostream.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include "llvm/Testing/Support/Annotations.h" 42 #include "llvm/Testing/Support/SupportHelpers.h" 43 #include <cassert> 44 #include <cstdlib> 45 #include <gmock/gmock.h> 46 #include <gtest/gtest.h> 47 #include <memory> 48 #include <ostream> 49 #include <string> 50 51 using namespace clang; 52 using namespace clang::syntax; 53 54 using llvm::ValueIs; 55 using ::testing::AllOf; 56 using ::testing::Contains; 57 using ::testing::ElementsAre; 58 using ::testing::Matcher; 59 using ::testing::Not; 60 using ::testing::StartsWith; 61 62 namespace { 63 // Checks the passed ArrayRef<T> has the same begin() and end() iterators as the 64 // argument. 65 MATCHER_P(SameRange, A, "") { 66 return A.begin() == arg.begin() && A.end() == arg.end(); 67 } 68 // Matchers for syntax::Token. 69 MATCHER_P(Kind, K, "") { return arg.kind() == K; } 70 MATCHER_P2(HasText, Text, SourceMgr, "") { 71 return arg.text(*SourceMgr) == Text; 72 } 73 /// Checks the start and end location of a token are equal to SourceRng. 74 MATCHER_P(RangeIs, SourceRng, "") { 75 return arg.location() == SourceRng.first && 76 arg.endLocation() == SourceRng.second; 77 } 78 79 class TokenCollectorTest : public ::testing::Test { 80 public: 81 /// Run the clang frontend, collect the preprocessed tokens from the frontend 82 /// invocation and store them in this->Buffer. 83 /// This also clears SourceManager before running the compiler. 84 void recordTokens(llvm::StringRef Code) { 85 class RecordTokens : public ASTFrontendAction { 86 public: 87 explicit RecordTokens(TokenBuffer &Result) : Result(Result) {} 88 89 bool BeginSourceFileAction(CompilerInstance &CI) override { 90 assert(!Collector && "expected only a single call to BeginSourceFile"); 91 Collector.emplace(CI.getPreprocessor()); 92 return true; 93 } 94 void EndSourceFileAction() override { 95 assert(Collector && "BeginSourceFileAction was never called"); 96 Result = std::move(*Collector).consume(); 97 } 98 99 std::unique_ptr<ASTConsumer> 100 CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { 101 return llvm::make_unique<ASTConsumer>(); 102 } 103 104 private: 105 TokenBuffer &Result; 106 llvm::Optional<TokenCollector> Collector; 107 }; 108 109 constexpr const char *FileName = "./input.cpp"; 110 FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy("")); 111 // Prepare to run a compiler. 112 if (!Diags->getClient()) 113 Diags->setClient(new IgnoringDiagConsumer); 114 std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only", 115 FileName}; 116 auto CI = createInvocationFromCommandLine(Args, Diags, FS); 117 assert(CI); 118 CI->getFrontendOpts().DisableFree = false; 119 CI->getPreprocessorOpts().addRemappedFile( 120 FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release()); 121 CompilerInstance Compiler; 122 Compiler.setInvocation(std::move(CI)); 123 Compiler.setDiagnostics(Diags.get()); 124 Compiler.setFileManager(FileMgr.get()); 125 Compiler.setSourceManager(SourceMgr.get()); 126 127 this->Buffer = TokenBuffer(*SourceMgr); 128 RecordTokens Recorder(this->Buffer); 129 ASSERT_TRUE(Compiler.ExecuteAction(Recorder)) 130 << "failed to run the frontend"; 131 } 132 133 /// Record the tokens and return a test dump of the resulting buffer. 134 std::string collectAndDump(llvm::StringRef Code) { 135 recordTokens(Code); 136 return Buffer.dumpForTests(); 137 } 138 139 // Adds a file to the test VFS. 140 void addFile(llvm::StringRef Path, llvm::StringRef Contents) { 141 if (!FS->addFile(Path, time_t(), 142 llvm::MemoryBuffer::getMemBufferCopy(Contents))) { 143 ADD_FAILURE() << "could not add a file to VFS: " << Path; 144 } 145 } 146 147 /// Add a new file, run syntax::tokenize() on it and return the results. 148 std::vector<syntax::Token> tokenize(llvm::StringRef Text) { 149 // FIXME: pass proper LangOptions. 150 return syntax::tokenize( 151 SourceMgr->createFileID(llvm::MemoryBuffer::getMemBufferCopy(Text)), 152 *SourceMgr, LangOptions()); 153 } 154 155 // Specialized versions of matchers that hide the SourceManager from clients. 156 Matcher<syntax::Token> HasText(std::string Text) const { 157 return ::HasText(Text, SourceMgr.get()); 158 } 159 Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const { 160 std::pair<SourceLocation, SourceLocation> Ls; 161 Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID()) 162 .getLocWithOffset(R.Begin); 163 Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID()) 164 .getLocWithOffset(R.End); 165 return ::RangeIs(Ls); 166 } 167 168 /// Finds a subrange in O(n * m). 169 template <class T, class U, class Eq> 170 llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange, 171 llvm::ArrayRef<T> Range, Eq F) { 172 for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) { 173 auto It = Begin; 174 for (auto ItSub = Subrange.begin(); 175 ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) { 176 if (!F(*ItSub, *It)) 177 goto continue_outer; 178 } 179 return llvm::makeArrayRef(Begin, It); 180 continue_outer:; 181 } 182 return llvm::makeArrayRef(Range.end(), Range.end()); 183 } 184 185 /// Finds a subrange in \p Tokens that match the tokens specified in \p Query. 186 /// The match should be unique. \p Query is a whitespace-separated list of 187 /// tokens to search for. 188 llvm::ArrayRef<syntax::Token> 189 findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) { 190 llvm::SmallVector<llvm::StringRef, 8> QueryTokens; 191 Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false); 192 if (QueryTokens.empty()) { 193 ADD_FAILURE() << "will not look for an empty list of tokens"; 194 std::abort(); 195 } 196 // An equality test for search. 197 auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) { 198 return Q == T.text(*SourceMgr); 199 }; 200 // Find a match. 201 auto Found = 202 findSubrange(llvm::makeArrayRef(QueryTokens), Tokens, TextMatches); 203 if (Found.begin() == Tokens.end()) { 204 ADD_FAILURE() << "could not find the subrange for " << Query; 205 std::abort(); 206 } 207 // Check that the match is unique. 208 if (findSubrange(llvm::makeArrayRef(QueryTokens), 209 llvm::makeArrayRef(Found.end(), Tokens.end()), TextMatches) 210 .begin() != Tokens.end()) { 211 ADD_FAILURE() << "match is not unique for " << Query; 212 std::abort(); 213 } 214 return Found; 215 }; 216 217 // Specialized versions of findTokenRange for expanded and spelled tokens. 218 llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) { 219 return findTokenRange(Query, Buffer.expandedTokens()); 220 } 221 llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query, 222 FileID File = FileID()) { 223 if (!File.isValid()) 224 File = SourceMgr->getMainFileID(); 225 return findTokenRange(Query, Buffer.spelledTokens(File)); 226 } 227 228 // Data fields. 229 llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags = 230 new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions); 231 IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS = 232 new llvm::vfs::InMemoryFileSystem; 233 llvm::IntrusiveRefCntPtr<FileManager> FileMgr = 234 new FileManager(FileSystemOptions(), FS); 235 llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr = 236 new SourceManager(*Diags, *FileMgr); 237 /// Contains last result of calling recordTokens(). 238 TokenBuffer Buffer = TokenBuffer(*SourceMgr); 239 }; 240 241 TEST_F(TokenCollectorTest, RawMode) { 242 EXPECT_THAT(tokenize("int main() {}"), 243 ElementsAre(Kind(tok::kw_int), 244 AllOf(HasText("main"), Kind(tok::identifier)), 245 Kind(tok::l_paren), Kind(tok::r_paren), 246 Kind(tok::l_brace), Kind(tok::r_brace))); 247 // Comments are ignored for now. 248 EXPECT_THAT(tokenize("/* foo */int a; // more comments"), 249 ElementsAre(Kind(tok::kw_int), 250 AllOf(HasText("a"), Kind(tok::identifier)), 251 Kind(tok::semi))); 252 } 253 254 TEST_F(TokenCollectorTest, Basic) { 255 std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = { 256 {"int main() {}", 257 R"(expanded tokens: 258 int main ( ) { } 259 file './input.cpp' 260 spelled tokens: 261 int main ( ) { } 262 no mappings. 263 )"}, 264 // All kinds of whitespace are ignored. 265 {"\t\n int\t\n main\t\n (\t\n )\t\n{\t\n }\t\n", 266 R"(expanded tokens: 267 int main ( ) { } 268 file './input.cpp' 269 spelled tokens: 270 int main ( ) { } 271 no mappings. 272 )"}, 273 // Annotation tokens are ignored. 274 {R"cpp( 275 #pragma GCC visibility push (public) 276 #pragma GCC visibility pop 277 )cpp", 278 R"(expanded tokens: 279 <empty> 280 file './input.cpp' 281 spelled tokens: 282 # pragma GCC visibility push ( public ) # pragma GCC visibility pop 283 mappings: 284 ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0) 285 )"}}; 286 for (auto &Test : TestCases) 287 EXPECT_EQ(collectAndDump(Test.first), Test.second) 288 << collectAndDump(Test.first); 289 } 290 291 TEST_F(TokenCollectorTest, Locations) { 292 // Check locations of the tokens. 293 llvm::Annotations Code(R"cpp( 294 $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]] 295 )cpp"); 296 recordTokens(Code.code()); 297 // Check expanded tokens. 298 EXPECT_THAT( 299 Buffer.expandedTokens(), 300 ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))), 301 AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))), 302 AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))), 303 AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))), 304 AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))), 305 Kind(tok::eof))); 306 // Check spelled tokens. 307 EXPECT_THAT( 308 Buffer.spelledTokens(SourceMgr->getMainFileID()), 309 ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))), 310 AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))), 311 AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))), 312 AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))), 313 AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))))); 314 } 315 316 TEST_F(TokenCollectorTest, MacroDirectives) { 317 // Macro directives are not stored anywhere at the moment. 318 std::string Code = R"cpp( 319 #define FOO a 320 #include "unresolved_file.h" 321 #undef FOO 322 #ifdef X 323 #else 324 #endif 325 #ifndef Y 326 #endif 327 #if 1 328 #elif 2 329 #else 330 #endif 331 #pragma once 332 #pragma something lalala 333 334 int a; 335 )cpp"; 336 std::string Expected = 337 "expanded tokens:\n" 338 " int a ;\n" 339 "file './input.cpp'\n" 340 " spelled tokens:\n" 341 " # define FOO a # include \"unresolved_file.h\" # undef FOO " 342 "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else " 343 "# endif # pragma once # pragma something lalala int a ;\n" 344 " mappings:\n" 345 " ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n"; 346 EXPECT_EQ(collectAndDump(Code), Expected); 347 } 348 349 TEST_F(TokenCollectorTest, MacroReplacements) { 350 std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = { 351 // A simple object-like macro. 352 {R"cpp( 353 #define INT int const 354 INT a; 355 )cpp", 356 R"(expanded tokens: 357 int const a ; 358 file './input.cpp' 359 spelled tokens: 360 # define INT int const INT a ; 361 mappings: 362 ['#'_0, 'INT'_5) => ['int'_0, 'int'_0) 363 ['INT'_5, 'a'_6) => ['int'_0, 'a'_2) 364 )"}, 365 // A simple function-like macro. 366 {R"cpp( 367 #define INT(a) const int 368 INT(10+10) a; 369 )cpp", 370 R"(expanded tokens: 371 const int a ; 372 file './input.cpp' 373 spelled tokens: 374 # define INT ( a ) const int INT ( 10 + 10 ) a ; 375 mappings: 376 ['#'_0, 'INT'_8) => ['const'_0, 'const'_0) 377 ['INT'_8, 'a'_14) => ['const'_0, 'a'_2) 378 )"}, 379 // Recursive macro replacements. 380 {R"cpp( 381 #define ID(X) X 382 #define INT int const 383 ID(ID(INT)) a; 384 )cpp", 385 R"(expanded tokens: 386 int const a ; 387 file './input.cpp' 388 spelled tokens: 389 # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ; 390 mappings: 391 ['#'_0, 'ID'_12) => ['int'_0, 'int'_0) 392 ['ID'_12, 'a'_19) => ['int'_0, 'a'_2) 393 )"}, 394 // A little more complicated recursive macro replacements. 395 {R"cpp( 396 #define ADD(X, Y) X+Y 397 #define MULT(X, Y) X*Y 398 399 int a = ADD(MULT(1,2), MULT(3,ADD(4,5))); 400 )cpp", 401 "expanded tokens:\n" 402 " int a = 1 * 2 + 3 * 4 + 5 ;\n" 403 "file './input.cpp'\n" 404 " spelled tokens:\n" 405 " # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int " 406 "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n" 407 " mappings:\n" 408 " ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n" 409 " ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"}, 410 // Empty macro replacement. 411 {R"cpp( 412 #define EMPTY 413 #define EMPTY_FUNC(X) 414 EMPTY 415 EMPTY_FUNC(1+2+3) 416 )cpp", 417 R"(expanded tokens: 418 <empty> 419 file './input.cpp' 420 spelled tokens: 421 # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 ) 422 mappings: 423 ['#'_0, '<eof>'_18) => ['<eof>'_0, '<eof>'_0) 424 )"}, 425 // File ends with a macro replacement. 426 {R"cpp( 427 #define FOO 10+10; 428 int a = FOO 429 )cpp", 430 R"(expanded tokens: 431 int a = 10 + 10 ; 432 file './input.cpp' 433 spelled tokens: 434 # define FOO 10 + 10 ; int a = FOO 435 mappings: 436 ['#'_0, 'int'_7) => ['int'_0, 'int'_0) 437 ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7) 438 )"}}; 439 440 for (auto &Test : TestCases) 441 EXPECT_EQ(Test.second, collectAndDump(Test.first)) 442 << collectAndDump(Test.first); 443 } 444 445 TEST_F(TokenCollectorTest, SpecialTokens) { 446 // Tokens coming from concatenations. 447 recordTokens(R"cpp( 448 #define CONCAT(a, b) a ## b 449 int a = CONCAT(1, 2); 450 )cpp"); 451 EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()), 452 Contains(HasText("12"))); 453 // Multi-line tokens with slashes at the end. 454 recordTokens("i\\\nn\\\nt"); 455 EXPECT_THAT(Buffer.expandedTokens(), 456 ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")), 457 Kind(tok::eof))); 458 // FIXME: test tokens with digraphs and UCN identifiers. 459 } 460 461 TEST_F(TokenCollectorTest, LateBoundTokens) { 462 // The parser eventually breaks the first '>>' into two tokens ('>' and '>'), 463 // but we choose to record them as a single token (for now). 464 llvm::Annotations Code(R"cpp( 465 template <class T> 466 struct foo { int a; }; 467 int bar = foo<foo<int$br[[>>]]().a; 468 int baz = 10 $op[[>>]] 2; 469 )cpp"); 470 recordTokens(Code.code()); 471 EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()), 472 AllOf(Contains(AllOf(Kind(tok::greatergreater), 473 RangeIs(Code.range("br")))), 474 Contains(AllOf(Kind(tok::greatergreater), 475 RangeIs(Code.range("op")))))); 476 } 477 478 TEST_F(TokenCollectorTest, DelayedParsing) { 479 llvm::StringLiteral Code = R"cpp( 480 struct Foo { 481 int method() { 482 // Parser will visit method bodies and initializers multiple times, but 483 // TokenBuffer should only record the first walk over the tokens; 484 return 100; 485 } 486 int a = 10; 487 488 struct Subclass { 489 void foo() { 490 Foo().method(); 491 } 492 }; 493 }; 494 )cpp"; 495 std::string ExpectedTokens = 496 "expanded tokens:\n" 497 " struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct " 498 "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n"; 499 EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens)); 500 } 501 502 TEST_F(TokenCollectorTest, MultiFile) { 503 addFile("./foo.h", R"cpp( 504 #define ADD(X, Y) X+Y 505 int a = 100; 506 #include "bar.h" 507 )cpp"); 508 addFile("./bar.h", R"cpp( 509 int b = ADD(1, 2); 510 #define MULT(X, Y) X*Y 511 )cpp"); 512 llvm::StringLiteral Code = R"cpp( 513 #include "foo.h" 514 int c = ADD(1, MULT(2,3)); 515 )cpp"; 516 517 std::string Expected = R"(expanded tokens: 518 int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ; 519 file './input.cpp' 520 spelled tokens: 521 # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ; 522 mappings: 523 ['#'_0, 'int'_3) => ['int'_12, 'int'_12) 524 ['ADD'_6, ';'_17) => ['1'_15, ';'_20) 525 file './foo.h' 526 spelled tokens: 527 # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h" 528 mappings: 529 ['#'_0, 'int'_11) => ['int'_0, 'int'_0) 530 ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5) 531 file './bar.h' 532 spelled tokens: 533 int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y 534 mappings: 535 ['ADD'_3, ';'_9) => ['1'_8, ';'_11) 536 ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12) 537 )"; 538 539 EXPECT_EQ(Expected, collectAndDump(Code)) 540 << "input: " << Code << "\nresults: " << collectAndDump(Code); 541 } 542 543 class TokenBufferTest : public TokenCollectorTest {}; 544 545 TEST_F(TokenBufferTest, SpelledByExpanded) { 546 recordTokens(R"cpp( 547 a1 a2 a3 b1 b2 548 )cpp"); 549 550 // Sanity check: expanded and spelled tokens are stored separately. 551 EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2")))); 552 // Searching for subranges of expanded tokens should give the corresponding 553 // spelled ones. 554 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")), 555 ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2")))); 556 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 557 ValueIs(SameRange(findSpelled("a1 a2 a3")))); 558 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), 559 ValueIs(SameRange(findSpelled("b1 b2")))); 560 561 // Test search on simple macro expansions. 562 recordTokens(R"cpp( 563 #define A a1 a2 a3 564 #define B b1 b2 565 566 A split B 567 )cpp"); 568 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")), 569 ValueIs(SameRange(findSpelled("A split B")))); 570 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 571 ValueIs(SameRange(findSpelled("A split").drop_back()))); 572 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), 573 ValueIs(SameRange(findSpelled("split B").drop_front()))); 574 // Ranges not fully covering macro invocations should fail. 575 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None); 576 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), llvm::None); 577 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")), 578 llvm::None); 579 580 // Recursive macro invocations. 581 recordTokens(R"cpp( 582 #define ID(x) x 583 #define B b1 b2 584 585 ID(ID(ID(a1) a2 a3)) split ID(B) 586 )cpp"); 587 588 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 589 ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )")))); 590 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), 591 ValueIs(SameRange(findSpelled("ID ( B )")))); 592 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")), 593 ValueIs(SameRange(findSpelled( 594 "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")))); 595 // Ranges crossing macro call boundaries. 596 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1")), 597 llvm::None); 598 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1")), 599 llvm::None); 600 // FIXME: next two examples should map to macro arguments, but currently they 601 // fail. 602 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2")), llvm::None); 603 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None); 604 605 // Empty macro expansions. 606 recordTokens(R"cpp( 607 #define EMPTY 608 #define ID(X) X 609 610 EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1 611 EMPTY EMPTY ID(4 5 6) split2 612 ID(7 8 9) EMPTY EMPTY 613 )cpp"); 614 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")), 615 ValueIs(SameRange(findSpelled("ID ( 1 2 3 )")))); 616 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")), 617 ValueIs(SameRange(findSpelled("ID ( 4 5 6 )")))); 618 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")), 619 ValueIs(SameRange(findSpelled("ID ( 7 8 9 )")))); 620 621 // Empty mappings coming from various directives. 622 recordTokens(R"cpp( 623 #define ID(X) X 624 ID(1) 625 #pragma lalala 626 not_mapped 627 )cpp"); 628 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")), 629 ValueIs(SameRange(findSpelled("not_mapped")))); 630 } 631 632 TEST_F(TokenBufferTest, TokensToFileRange) { 633 addFile("./foo.h", "token_from_header"); 634 llvm::Annotations Code(R"cpp( 635 #define FOO token_from_expansion 636 #include "./foo.h" 637 $all[[$i[[int]] a = FOO;]] 638 )cpp"); 639 recordTokens(Code.code()); 640 641 auto &SM = *SourceMgr; 642 643 // Two simple examples. 644 auto Int = findExpanded("int").front(); 645 auto Semi = findExpanded(";").front(); 646 EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin, 647 Code.range("i").End)); 648 EXPECT_EQ(syntax::Token::range(SM, Int, Semi), 649 FileRange(SM.getMainFileID(), Code.range("all").Begin, 650 Code.range("all").End)); 651 // We don't test assertion failures because death tests are slow. 652 } 653 654 } // namespace 655