1 //===- TokensTest.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Tooling/Syntax/Tokens.h" 10 #include "clang/AST/ASTConsumer.h" 11 #include "clang/AST/Expr.h" 12 #include "clang/Basic/Diagnostic.h" 13 #include "clang/Basic/DiagnosticIDs.h" 14 #include "clang/Basic/DiagnosticOptions.h" 15 #include "clang/Basic/FileManager.h" 16 #include "clang/Basic/FileSystemOptions.h" 17 #include "clang/Basic/LLVM.h" 18 #include "clang/Basic/LangOptions.h" 19 #include "clang/Basic/SourceLocation.h" 20 #include "clang/Basic/SourceManager.h" 21 #include "clang/Basic/TokenKinds.def" 22 #include "clang/Basic/TokenKinds.h" 23 #include "clang/Frontend/CompilerInstance.h" 24 #include "clang/Frontend/FrontendAction.h" 25 #include "clang/Frontend/Utils.h" 26 #include "clang/Lex/Lexer.h" 27 #include "clang/Lex/PreprocessorOptions.h" 28 #include "clang/Lex/Token.h" 29 #include "clang/Tooling/Tooling.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/IntrusiveRefCntPtr.h" 32 #include "llvm/ADT/None.h" 33 #include "llvm/ADT/Optional.h" 34 #include "llvm/ADT/STLExtras.h" 35 #include "llvm/ADT/StringRef.h" 36 #include "llvm/Support/FormatVariadic.h" 37 #include "llvm/Support/MemoryBuffer.h" 38 #include "llvm/Support/VirtualFileSystem.h" 39 #include "llvm/Support/raw_os_ostream.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include "llvm/Testing/Support/Annotations.h" 42 #include "llvm/Testing/Support/SupportHelpers.h" 43 #include <cassert> 44 #include <cstdlib> 45 #include <gmock/gmock.h> 46 #include <gtest/gtest.h> 47 #include <memory> 48 #include <ostream> 49 #include <string> 50 51 using namespace clang; 52 using namespace clang::syntax; 53 54 using llvm::ValueIs; 55 using ::testing::AllOf; 56 using ::testing::Contains; 57 using ::testing::ElementsAre; 58 using ::testing::Field; 59 using ::testing::Matcher; 60 using ::testing::Not; 61 using ::testing::StartsWith; 62 63 namespace { 64 // Checks the passed ArrayRef<T> has the same begin() and end() iterators as the 65 // argument. 66 MATCHER_P(SameRange, A, "") { 67 return A.begin() == arg.begin() && A.end() == arg.end(); 68 } 69 70 Matcher<TokenBuffer::Expansion> 71 IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled, 72 Matcher<llvm::ArrayRef<syntax::Token>> Expanded) { 73 return AllOf(Field(&TokenBuffer::Expansion::Spelled, Spelled), 74 Field(&TokenBuffer::Expansion::Expanded, Expanded)); 75 } 76 // Matchers for syntax::Token. 77 MATCHER_P(Kind, K, "") { return arg.kind() == K; } 78 MATCHER_P2(HasText, Text, SourceMgr, "") { 79 return arg.text(*SourceMgr) == Text; 80 } 81 /// Checks the start and end location of a token are equal to SourceRng. 82 MATCHER_P(RangeIs, SourceRng, "") { 83 return arg.location() == SourceRng.first && 84 arg.endLocation() == SourceRng.second; 85 } 86 87 class TokenCollectorTest : public ::testing::Test { 88 public: 89 /// Run the clang frontend, collect the preprocessed tokens from the frontend 90 /// invocation and store them in this->Buffer. 91 /// This also clears SourceManager before running the compiler. 92 void recordTokens(llvm::StringRef Code) { 93 class RecordTokens : public ASTFrontendAction { 94 public: 95 explicit RecordTokens(TokenBuffer &Result) : Result(Result) {} 96 97 bool BeginSourceFileAction(CompilerInstance &CI) override { 98 assert(!Collector && "expected only a single call to BeginSourceFile"); 99 Collector.emplace(CI.getPreprocessor()); 100 return true; 101 } 102 void EndSourceFileAction() override { 103 assert(Collector && "BeginSourceFileAction was never called"); 104 Result = std::move(*Collector).consume(); 105 } 106 107 std::unique_ptr<ASTConsumer> 108 CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { 109 return llvm::make_unique<ASTConsumer>(); 110 } 111 112 private: 113 TokenBuffer &Result; 114 llvm::Optional<TokenCollector> Collector; 115 }; 116 117 constexpr const char *FileName = "./input.cpp"; 118 FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy("")); 119 // Prepare to run a compiler. 120 if (!Diags->getClient()) 121 Diags->setClient(new IgnoringDiagConsumer); 122 std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only", 123 FileName}; 124 auto CI = createInvocationFromCommandLine(Args, Diags, FS); 125 assert(CI); 126 CI->getFrontendOpts().DisableFree = false; 127 CI->getPreprocessorOpts().addRemappedFile( 128 FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release()); 129 CompilerInstance Compiler; 130 Compiler.setInvocation(std::move(CI)); 131 Compiler.setDiagnostics(Diags.get()); 132 Compiler.setFileManager(FileMgr.get()); 133 Compiler.setSourceManager(SourceMgr.get()); 134 135 this->Buffer = TokenBuffer(*SourceMgr); 136 RecordTokens Recorder(this->Buffer); 137 ASSERT_TRUE(Compiler.ExecuteAction(Recorder)) 138 << "failed to run the frontend"; 139 } 140 141 /// Record the tokens and return a test dump of the resulting buffer. 142 std::string collectAndDump(llvm::StringRef Code) { 143 recordTokens(Code); 144 return Buffer.dumpForTests(); 145 } 146 147 // Adds a file to the test VFS. 148 void addFile(llvm::StringRef Path, llvm::StringRef Contents) { 149 if (!FS->addFile(Path, time_t(), 150 llvm::MemoryBuffer::getMemBufferCopy(Contents))) { 151 ADD_FAILURE() << "could not add a file to VFS: " << Path; 152 } 153 } 154 155 /// Add a new file, run syntax::tokenize() on it and return the results. 156 std::vector<syntax::Token> tokenize(llvm::StringRef Text) { 157 // FIXME: pass proper LangOptions. 158 return syntax::tokenize( 159 SourceMgr->createFileID(llvm::MemoryBuffer::getMemBufferCopy(Text)), 160 *SourceMgr, LangOptions()); 161 } 162 163 // Specialized versions of matchers that hide the SourceManager from clients. 164 Matcher<syntax::Token> HasText(std::string Text) const { 165 return ::HasText(Text, SourceMgr.get()); 166 } 167 Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const { 168 std::pair<SourceLocation, SourceLocation> Ls; 169 Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID()) 170 .getLocWithOffset(R.Begin); 171 Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID()) 172 .getLocWithOffset(R.End); 173 return ::RangeIs(Ls); 174 } 175 176 /// Finds a subrange in O(n * m). 177 template <class T, class U, class Eq> 178 llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange, 179 llvm::ArrayRef<T> Range, Eq F) { 180 for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) { 181 auto It = Begin; 182 for (auto ItSub = Subrange.begin(); 183 ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) { 184 if (!F(*ItSub, *It)) 185 goto continue_outer; 186 } 187 return llvm::makeArrayRef(Begin, It); 188 continue_outer:; 189 } 190 return llvm::makeArrayRef(Range.end(), Range.end()); 191 } 192 193 /// Finds a subrange in \p Tokens that match the tokens specified in \p Query. 194 /// The match should be unique. \p Query is a whitespace-separated list of 195 /// tokens to search for. 196 llvm::ArrayRef<syntax::Token> 197 findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) { 198 llvm::SmallVector<llvm::StringRef, 8> QueryTokens; 199 Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false); 200 if (QueryTokens.empty()) { 201 ADD_FAILURE() << "will not look for an empty list of tokens"; 202 std::abort(); 203 } 204 // An equality test for search. 205 auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) { 206 return Q == T.text(*SourceMgr); 207 }; 208 // Find a match. 209 auto Found = 210 findSubrange(llvm::makeArrayRef(QueryTokens), Tokens, TextMatches); 211 if (Found.begin() == Tokens.end()) { 212 ADD_FAILURE() << "could not find the subrange for " << Query; 213 std::abort(); 214 } 215 // Check that the match is unique. 216 if (findSubrange(llvm::makeArrayRef(QueryTokens), 217 llvm::makeArrayRef(Found.end(), Tokens.end()), TextMatches) 218 .begin() != Tokens.end()) { 219 ADD_FAILURE() << "match is not unique for " << Query; 220 std::abort(); 221 } 222 return Found; 223 }; 224 225 // Specialized versions of findTokenRange for expanded and spelled tokens. 226 llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) { 227 return findTokenRange(Query, Buffer.expandedTokens()); 228 } 229 llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query, 230 FileID File = FileID()) { 231 if (!File.isValid()) 232 File = SourceMgr->getMainFileID(); 233 return findTokenRange(Query, Buffer.spelledTokens(File)); 234 } 235 236 // Data fields. 237 llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags = 238 new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions); 239 IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS = 240 new llvm::vfs::InMemoryFileSystem; 241 llvm::IntrusiveRefCntPtr<FileManager> FileMgr = 242 new FileManager(FileSystemOptions(), FS); 243 llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr = 244 new SourceManager(*Diags, *FileMgr); 245 /// Contains last result of calling recordTokens(). 246 TokenBuffer Buffer = TokenBuffer(*SourceMgr); 247 }; 248 249 TEST_F(TokenCollectorTest, RawMode) { 250 EXPECT_THAT(tokenize("int main() {}"), 251 ElementsAre(Kind(tok::kw_int), 252 AllOf(HasText("main"), Kind(tok::identifier)), 253 Kind(tok::l_paren), Kind(tok::r_paren), 254 Kind(tok::l_brace), Kind(tok::r_brace))); 255 // Comments are ignored for now. 256 EXPECT_THAT(tokenize("/* foo */int a; // more comments"), 257 ElementsAre(Kind(tok::kw_int), 258 AllOf(HasText("a"), Kind(tok::identifier)), 259 Kind(tok::semi))); 260 } 261 262 TEST_F(TokenCollectorTest, Basic) { 263 std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = { 264 {"int main() {}", 265 R"(expanded tokens: 266 int main ( ) { } 267 file './input.cpp' 268 spelled tokens: 269 int main ( ) { } 270 no mappings. 271 )"}, 272 // All kinds of whitespace are ignored. 273 {"\t\n int\t\n main\t\n (\t\n )\t\n{\t\n }\t\n", 274 R"(expanded tokens: 275 int main ( ) { } 276 file './input.cpp' 277 spelled tokens: 278 int main ( ) { } 279 no mappings. 280 )"}, 281 // Annotation tokens are ignored. 282 {R"cpp( 283 #pragma GCC visibility push (public) 284 #pragma GCC visibility pop 285 )cpp", 286 R"(expanded tokens: 287 <empty> 288 file './input.cpp' 289 spelled tokens: 290 # pragma GCC visibility push ( public ) # pragma GCC visibility pop 291 mappings: 292 ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0) 293 )"}, 294 // Empty files should not crash. 295 {R"cpp()cpp", R"(expanded tokens: 296 <empty> 297 file './input.cpp' 298 spelled tokens: 299 <empty> 300 no mappings. 301 )"}}; 302 for (auto &Test : TestCases) 303 EXPECT_EQ(collectAndDump(Test.first), Test.second) 304 << collectAndDump(Test.first); 305 } 306 307 TEST_F(TokenCollectorTest, Locations) { 308 // Check locations of the tokens. 309 llvm::Annotations Code(R"cpp( 310 $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]] 311 )cpp"); 312 recordTokens(Code.code()); 313 // Check expanded tokens. 314 EXPECT_THAT( 315 Buffer.expandedTokens(), 316 ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))), 317 AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))), 318 AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))), 319 AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))), 320 AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))), 321 Kind(tok::eof))); 322 // Check spelled tokens. 323 EXPECT_THAT( 324 Buffer.spelledTokens(SourceMgr->getMainFileID()), 325 ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))), 326 AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))), 327 AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))), 328 AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))), 329 AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))))); 330 } 331 332 TEST_F(TokenCollectorTest, MacroDirectives) { 333 // Macro directives are not stored anywhere at the moment. 334 std::string Code = R"cpp( 335 #define FOO a 336 #include "unresolved_file.h" 337 #undef FOO 338 #ifdef X 339 #else 340 #endif 341 #ifndef Y 342 #endif 343 #if 1 344 #elif 2 345 #else 346 #endif 347 #pragma once 348 #pragma something lalala 349 350 int a; 351 )cpp"; 352 std::string Expected = 353 "expanded tokens:\n" 354 " int a ;\n" 355 "file './input.cpp'\n" 356 " spelled tokens:\n" 357 " # define FOO a # include \"unresolved_file.h\" # undef FOO " 358 "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else " 359 "# endif # pragma once # pragma something lalala int a ;\n" 360 " mappings:\n" 361 " ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n"; 362 EXPECT_EQ(collectAndDump(Code), Expected); 363 } 364 365 TEST_F(TokenCollectorTest, MacroReplacements) { 366 std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = { 367 // A simple object-like macro. 368 {R"cpp( 369 #define INT int const 370 INT a; 371 )cpp", 372 R"(expanded tokens: 373 int const a ; 374 file './input.cpp' 375 spelled tokens: 376 # define INT int const INT a ; 377 mappings: 378 ['#'_0, 'INT'_5) => ['int'_0, 'int'_0) 379 ['INT'_5, 'a'_6) => ['int'_0, 'a'_2) 380 )"}, 381 // A simple function-like macro. 382 {R"cpp( 383 #define INT(a) const int 384 INT(10+10) a; 385 )cpp", 386 R"(expanded tokens: 387 const int a ; 388 file './input.cpp' 389 spelled tokens: 390 # define INT ( a ) const int INT ( 10 + 10 ) a ; 391 mappings: 392 ['#'_0, 'INT'_8) => ['const'_0, 'const'_0) 393 ['INT'_8, 'a'_14) => ['const'_0, 'a'_2) 394 )"}, 395 // Recursive macro replacements. 396 {R"cpp( 397 #define ID(X) X 398 #define INT int const 399 ID(ID(INT)) a; 400 )cpp", 401 R"(expanded tokens: 402 int const a ; 403 file './input.cpp' 404 spelled tokens: 405 # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ; 406 mappings: 407 ['#'_0, 'ID'_12) => ['int'_0, 'int'_0) 408 ['ID'_12, 'a'_19) => ['int'_0, 'a'_2) 409 )"}, 410 // A little more complicated recursive macro replacements. 411 {R"cpp( 412 #define ADD(X, Y) X+Y 413 #define MULT(X, Y) X*Y 414 415 int a = ADD(MULT(1,2), MULT(3,ADD(4,5))); 416 )cpp", 417 "expanded tokens:\n" 418 " int a = 1 * 2 + 3 * 4 + 5 ;\n" 419 "file './input.cpp'\n" 420 " spelled tokens:\n" 421 " # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int " 422 "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n" 423 " mappings:\n" 424 " ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n" 425 " ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"}, 426 // Empty macro replacement. 427 // FIXME: the #define directives should not be glued together. 428 {R"cpp( 429 #define EMPTY 430 #define EMPTY_FUNC(X) 431 EMPTY 432 EMPTY_FUNC(1+2+3) 433 )cpp", 434 R"(expanded tokens: 435 <empty> 436 file './input.cpp' 437 spelled tokens: 438 # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 ) 439 mappings: 440 ['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0) 441 ['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0) 442 ['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0) 443 )"}, 444 // File ends with a macro replacement. 445 {R"cpp( 446 #define FOO 10+10; 447 int a = FOO 448 )cpp", 449 R"(expanded tokens: 450 int a = 10 + 10 ; 451 file './input.cpp' 452 spelled tokens: 453 # define FOO 10 + 10 ; int a = FOO 454 mappings: 455 ['#'_0, 'int'_7) => ['int'_0, 'int'_0) 456 ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7) 457 )"}}; 458 459 for (auto &Test : TestCases) 460 EXPECT_EQ(Test.second, collectAndDump(Test.first)) 461 << collectAndDump(Test.first); 462 } 463 464 TEST_F(TokenCollectorTest, SpecialTokens) { 465 // Tokens coming from concatenations. 466 recordTokens(R"cpp( 467 #define CONCAT(a, b) a ## b 468 int a = CONCAT(1, 2); 469 )cpp"); 470 EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()), 471 Contains(HasText("12"))); 472 // Multi-line tokens with slashes at the end. 473 recordTokens("i\\\nn\\\nt"); 474 EXPECT_THAT(Buffer.expandedTokens(), 475 ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")), 476 Kind(tok::eof))); 477 // FIXME: test tokens with digraphs and UCN identifiers. 478 } 479 480 TEST_F(TokenCollectorTest, LateBoundTokens) { 481 // The parser eventually breaks the first '>>' into two tokens ('>' and '>'), 482 // but we choose to record them as a single token (for now). 483 llvm::Annotations Code(R"cpp( 484 template <class T> 485 struct foo { int a; }; 486 int bar = foo<foo<int$br[[>>]]().a; 487 int baz = 10 $op[[>>]] 2; 488 )cpp"); 489 recordTokens(Code.code()); 490 EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()), 491 AllOf(Contains(AllOf(Kind(tok::greatergreater), 492 RangeIs(Code.range("br")))), 493 Contains(AllOf(Kind(tok::greatergreater), 494 RangeIs(Code.range("op")))))); 495 } 496 497 TEST_F(TokenCollectorTest, DelayedParsing) { 498 llvm::StringLiteral Code = R"cpp( 499 struct Foo { 500 int method() { 501 // Parser will visit method bodies and initializers multiple times, but 502 // TokenBuffer should only record the first walk over the tokens; 503 return 100; 504 } 505 int a = 10; 506 507 struct Subclass { 508 void foo() { 509 Foo().method(); 510 } 511 }; 512 }; 513 )cpp"; 514 std::string ExpectedTokens = 515 "expanded tokens:\n" 516 " struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct " 517 "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n"; 518 EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens)); 519 } 520 521 TEST_F(TokenCollectorTest, MultiFile) { 522 addFile("./foo.h", R"cpp( 523 #define ADD(X, Y) X+Y 524 int a = 100; 525 #include "bar.h" 526 )cpp"); 527 addFile("./bar.h", R"cpp( 528 int b = ADD(1, 2); 529 #define MULT(X, Y) X*Y 530 )cpp"); 531 llvm::StringLiteral Code = R"cpp( 532 #include "foo.h" 533 int c = ADD(1, MULT(2,3)); 534 )cpp"; 535 536 std::string Expected = R"(expanded tokens: 537 int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ; 538 file './input.cpp' 539 spelled tokens: 540 # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ; 541 mappings: 542 ['#'_0, 'int'_3) => ['int'_12, 'int'_12) 543 ['ADD'_6, ';'_17) => ['1'_15, ';'_20) 544 file './foo.h' 545 spelled tokens: 546 # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h" 547 mappings: 548 ['#'_0, 'int'_11) => ['int'_0, 'int'_0) 549 ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5) 550 file './bar.h' 551 spelled tokens: 552 int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y 553 mappings: 554 ['ADD'_3, ';'_9) => ['1'_8, ';'_11) 555 ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12) 556 )"; 557 558 EXPECT_EQ(Expected, collectAndDump(Code)) 559 << "input: " << Code << "\nresults: " << collectAndDump(Code); 560 } 561 562 class TokenBufferTest : public TokenCollectorTest {}; 563 564 TEST_F(TokenBufferTest, SpelledByExpanded) { 565 recordTokens(R"cpp( 566 a1 a2 a3 b1 b2 567 )cpp"); 568 569 // Sanity check: expanded and spelled tokens are stored separately. 570 EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2")))); 571 // Searching for subranges of expanded tokens should give the corresponding 572 // spelled ones. 573 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")), 574 ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2")))); 575 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 576 ValueIs(SameRange(findSpelled("a1 a2 a3")))); 577 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), 578 ValueIs(SameRange(findSpelled("b1 b2")))); 579 580 // Test search on simple macro expansions. 581 recordTokens(R"cpp( 582 #define A a1 a2 a3 583 #define B b1 b2 584 585 A split B 586 )cpp"); 587 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")), 588 ValueIs(SameRange(findSpelled("A split B")))); 589 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 590 ValueIs(SameRange(findSpelled("A split").drop_back()))); 591 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), 592 ValueIs(SameRange(findSpelled("split B").drop_front()))); 593 // Ranges not fully covering macro invocations should fail. 594 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None); 595 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), llvm::None); 596 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")), 597 llvm::None); 598 599 // Recursive macro invocations. 600 recordTokens(R"cpp( 601 #define ID(x) x 602 #define B b1 b2 603 604 ID(ID(ID(a1) a2 a3)) split ID(B) 605 )cpp"); 606 607 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 608 ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )")))); 609 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), 610 ValueIs(SameRange(findSpelled("ID ( B )")))); 611 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")), 612 ValueIs(SameRange(findSpelled( 613 "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")))); 614 // Ranges crossing macro call boundaries. 615 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1")), 616 llvm::None); 617 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1")), 618 llvm::None); 619 // FIXME: next two examples should map to macro arguments, but currently they 620 // fail. 621 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2")), llvm::None); 622 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None); 623 624 // Empty macro expansions. 625 recordTokens(R"cpp( 626 #define EMPTY 627 #define ID(X) X 628 629 EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1 630 EMPTY EMPTY ID(4 5 6) split2 631 ID(7 8 9) EMPTY EMPTY 632 )cpp"); 633 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")), 634 ValueIs(SameRange(findSpelled("ID ( 1 2 3 )")))); 635 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")), 636 ValueIs(SameRange(findSpelled("ID ( 4 5 6 )")))); 637 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")), 638 ValueIs(SameRange(findSpelled("ID ( 7 8 9 )")))); 639 640 // Empty mappings coming from various directives. 641 recordTokens(R"cpp( 642 #define ID(X) X 643 ID(1) 644 #pragma lalala 645 not_mapped 646 )cpp"); 647 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")), 648 ValueIs(SameRange(findSpelled("not_mapped")))); 649 } 650 651 TEST_F(TokenBufferTest, ExpansionStartingAt) { 652 // Object-like macro expansions. 653 recordTokens(R"cpp( 654 #define FOO 3+4 655 int a = FOO 1; 656 int b = FOO 2; 657 )cpp"); 658 659 llvm::ArrayRef<syntax::Token> Foo1 = findSpelled("FOO 1").drop_back(); 660 EXPECT_THAT( 661 Buffer.expansionStartingAt(Foo1.data()), 662 ValueIs(IsExpansion(SameRange(Foo1), 663 SameRange(findExpanded("3 + 4 1").drop_back())))); 664 665 llvm::ArrayRef<syntax::Token> Foo2 = findSpelled("FOO 2").drop_back(); 666 EXPECT_THAT( 667 Buffer.expansionStartingAt(Foo2.data()), 668 ValueIs(IsExpansion(SameRange(Foo2), 669 SameRange(findExpanded("3 + 4 2").drop_back())))); 670 671 // Function-like macro expansions. 672 recordTokens(R"cpp( 673 #define ID(X) X 674 int a = ID(1+2+3); 675 int b = ID(ID(2+3+4)); 676 )cpp"); 677 678 llvm::ArrayRef<syntax::Token> ID1 = findSpelled("ID ( 1 + 2 + 3 )"); 679 EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()), 680 ValueIs(IsExpansion(SameRange(ID1), 681 SameRange(findExpanded("1 + 2 + 3"))))); 682 // Only the first spelled token should be found. 683 for (const auto &T : ID1.drop_front()) 684 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 685 686 llvm::ArrayRef<syntax::Token> ID2 = findSpelled("ID ( ID ( 2 + 3 + 4 ) )"); 687 EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()), 688 ValueIs(IsExpansion(SameRange(ID2), 689 SameRange(findExpanded("2 + 3 + 4"))))); 690 // Only the first spelled token should be found. 691 for (const auto &T : ID2.drop_front()) 692 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 693 694 // PP directives. 695 recordTokens(R"cpp( 696 #define FOO 1 697 int a = FOO; 698 #pragma once 699 int b = 1; 700 )cpp"); 701 702 llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled("# define FOO 1"); 703 EXPECT_THAT( 704 Buffer.expansionStartingAt(&DefineFoo.front()), 705 ValueIs(IsExpansion(SameRange(DefineFoo), 706 SameRange(findExpanded("int a").take_front(0))))); 707 // Only the first spelled token should be found. 708 for (const auto &T : DefineFoo.drop_front()) 709 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 710 711 llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled("# pragma once"); 712 EXPECT_THAT( 713 Buffer.expansionStartingAt(&PragmaOnce.front()), 714 ValueIs(IsExpansion(SameRange(PragmaOnce), 715 SameRange(findExpanded("int b").take_front(0))))); 716 // Only the first spelled token should be found. 717 for (const auto &T : PragmaOnce.drop_front()) 718 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 719 } 720 721 TEST_F(TokenBufferTest, TokensToFileRange) { 722 addFile("./foo.h", "token_from_header"); 723 llvm::Annotations Code(R"cpp( 724 #define FOO token_from_expansion 725 #include "./foo.h" 726 $all[[$i[[int]] a = FOO;]] 727 )cpp"); 728 recordTokens(Code.code()); 729 730 auto &SM = *SourceMgr; 731 732 // Two simple examples. 733 auto Int = findExpanded("int").front(); 734 auto Semi = findExpanded(";").front(); 735 EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin, 736 Code.range("i").End)); 737 EXPECT_EQ(syntax::Token::range(SM, Int, Semi), 738 FileRange(SM.getMainFileID(), Code.range("all").Begin, 739 Code.range("all").End)); 740 // We don't test assertion failures because death tests are slow. 741 } 742 743 } // namespace 744