1 //===- TokensTest.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Tooling/Syntax/Tokens.h" 10 #include "clang/AST/ASTConsumer.h" 11 #include "clang/AST/Expr.h" 12 #include "clang/Basic/Diagnostic.h" 13 #include "clang/Basic/DiagnosticIDs.h" 14 #include "clang/Basic/DiagnosticOptions.h" 15 #include "clang/Basic/FileManager.h" 16 #include "clang/Basic/FileSystemOptions.h" 17 #include "clang/Basic/LLVM.h" 18 #include "clang/Basic/LangOptions.h" 19 #include "clang/Basic/SourceLocation.h" 20 #include "clang/Basic/SourceManager.h" 21 #include "clang/Basic/TokenKinds.def" 22 #include "clang/Basic/TokenKinds.h" 23 #include "clang/Frontend/CompilerInstance.h" 24 #include "clang/Frontend/FrontendAction.h" 25 #include "clang/Frontend/Utils.h" 26 #include "clang/Lex/Lexer.h" 27 #include "clang/Lex/PreprocessorOptions.h" 28 #include "clang/Lex/Token.h" 29 #include "clang/Tooling/Tooling.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/IntrusiveRefCntPtr.h" 32 #include "llvm/ADT/None.h" 33 #include "llvm/ADT/Optional.h" 34 #include "llvm/ADT/STLExtras.h" 35 #include "llvm/ADT/StringRef.h" 36 #include "llvm/Support/FormatVariadic.h" 37 #include "llvm/Support/MemoryBuffer.h" 38 #include "llvm/Support/VirtualFileSystem.h" 39 #include "llvm/Support/raw_os_ostream.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include "llvm/Testing/Support/Annotations.h" 42 #include "llvm/Testing/Support/SupportHelpers.h" 43 #include <cassert> 44 #include <cstdlib> 45 #include <gmock/gmock.h> 46 #include <gtest/gtest.h> 47 #include <memory> 48 #include <ostream> 49 #include <string> 50 51 using namespace clang; 52 using namespace clang::syntax; 53 54 using llvm::ValueIs; 55 using ::testing::AllOf; 56 using ::testing::Contains; 57 using ::testing::ElementsAre; 58 using ::testing::Field; 59 using ::testing::Matcher; 60 using ::testing::Not; 61 using ::testing::StartsWith; 62 63 namespace { 64 // Checks the passed ArrayRef<T> has the same begin() and end() iterators as the 65 // argument. 66 MATCHER_P(SameRange, A, "") { 67 return A.begin() == arg.begin() && A.end() == arg.end(); 68 } 69 70 Matcher<TokenBuffer::Expansion> 71 IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled, 72 Matcher<llvm::ArrayRef<syntax::Token>> Expanded) { 73 return AllOf(Field(&TokenBuffer::Expansion::Spelled, Spelled), 74 Field(&TokenBuffer::Expansion::Expanded, Expanded)); 75 } 76 // Matchers for syntax::Token. 77 MATCHER_P(Kind, K, "") { return arg.kind() == K; } 78 MATCHER_P2(HasText, Text, SourceMgr, "") { 79 return arg.text(*SourceMgr) == Text; 80 } 81 /// Checks the start and end location of a token are equal to SourceRng. 82 MATCHER_P(RangeIs, SourceRng, "") { 83 return arg.location() == SourceRng.first && 84 arg.endLocation() == SourceRng.second; 85 } 86 87 class TokenCollectorTest : public ::testing::Test { 88 public: 89 /// Run the clang frontend, collect the preprocessed tokens from the frontend 90 /// invocation and store them in this->Buffer. 91 /// This also clears SourceManager before running the compiler. 92 void recordTokens(llvm::StringRef Code) { 93 class RecordTokens : public ASTFrontendAction { 94 public: 95 explicit RecordTokens(TokenBuffer &Result) : Result(Result) {} 96 97 bool BeginSourceFileAction(CompilerInstance &CI) override { 98 assert(!Collector && "expected only a single call to BeginSourceFile"); 99 Collector.emplace(CI.getPreprocessor()); 100 return true; 101 } 102 void EndSourceFileAction() override { 103 assert(Collector && "BeginSourceFileAction was never called"); 104 Result = std::move(*Collector).consume(); 105 } 106 107 std::unique_ptr<ASTConsumer> 108 CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { 109 return std::make_unique<ASTConsumer>(); 110 } 111 112 private: 113 TokenBuffer &Result; 114 llvm::Optional<TokenCollector> Collector; 115 }; 116 117 constexpr const char *FileName = "./input.cpp"; 118 FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy("")); 119 // Prepare to run a compiler. 120 if (!Diags->getClient()) 121 Diags->setClient(new IgnoringDiagConsumer); 122 std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only", 123 FileName}; 124 auto CI = createInvocationFromCommandLine(Args, Diags, FS); 125 assert(CI); 126 CI->getFrontendOpts().DisableFree = false; 127 CI->getPreprocessorOpts().addRemappedFile( 128 FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release()); 129 CompilerInstance Compiler; 130 Compiler.setInvocation(std::move(CI)); 131 Compiler.setDiagnostics(Diags.get()); 132 Compiler.setFileManager(FileMgr.get()); 133 Compiler.setSourceManager(SourceMgr.get()); 134 135 this->Buffer = TokenBuffer(*SourceMgr); 136 RecordTokens Recorder(this->Buffer); 137 ASSERT_TRUE(Compiler.ExecuteAction(Recorder)) 138 << "failed to run the frontend"; 139 } 140 141 /// Record the tokens and return a test dump of the resulting buffer. 142 std::string collectAndDump(llvm::StringRef Code) { 143 recordTokens(Code); 144 return Buffer.dumpForTests(); 145 } 146 147 // Adds a file to the test VFS. 148 void addFile(llvm::StringRef Path, llvm::StringRef Contents) { 149 if (!FS->addFile(Path, time_t(), 150 llvm::MemoryBuffer::getMemBufferCopy(Contents))) { 151 ADD_FAILURE() << "could not add a file to VFS: " << Path; 152 } 153 } 154 155 /// Add a new file, run syntax::tokenize() on it and return the results. 156 std::vector<syntax::Token> tokenize(llvm::StringRef Text) { 157 // FIXME: pass proper LangOptions. 158 return syntax::tokenize( 159 SourceMgr->createFileID(llvm::MemoryBuffer::getMemBufferCopy(Text)), 160 *SourceMgr, LangOptions()); 161 } 162 163 // Specialized versions of matchers that hide the SourceManager from clients. 164 Matcher<syntax::Token> HasText(std::string Text) const { 165 return ::HasText(Text, SourceMgr.get()); 166 } 167 Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const { 168 std::pair<SourceLocation, SourceLocation> Ls; 169 Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID()) 170 .getLocWithOffset(R.Begin); 171 Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID()) 172 .getLocWithOffset(R.End); 173 return ::RangeIs(Ls); 174 } 175 176 /// Finds a subrange in O(n * m). 177 template <class T, class U, class Eq> 178 llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange, 179 llvm::ArrayRef<T> Range, Eq F) { 180 for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) { 181 auto It = Begin; 182 for (auto ItSub = Subrange.begin(); 183 ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) { 184 if (!F(*ItSub, *It)) 185 goto continue_outer; 186 } 187 return llvm::makeArrayRef(Begin, It); 188 continue_outer:; 189 } 190 return llvm::makeArrayRef(Range.end(), Range.end()); 191 } 192 193 /// Finds a subrange in \p Tokens that match the tokens specified in \p Query. 194 /// The match should be unique. \p Query is a whitespace-separated list of 195 /// tokens to search for. 196 llvm::ArrayRef<syntax::Token> 197 findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) { 198 llvm::SmallVector<llvm::StringRef, 8> QueryTokens; 199 Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false); 200 if (QueryTokens.empty()) { 201 ADD_FAILURE() << "will not look for an empty list of tokens"; 202 std::abort(); 203 } 204 // An equality test for search. 205 auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) { 206 return Q == T.text(*SourceMgr); 207 }; 208 // Find a match. 209 auto Found = 210 findSubrange(llvm::makeArrayRef(QueryTokens), Tokens, TextMatches); 211 if (Found.begin() == Tokens.end()) { 212 ADD_FAILURE() << "could not find the subrange for " << Query; 213 std::abort(); 214 } 215 // Check that the match is unique. 216 if (findSubrange(llvm::makeArrayRef(QueryTokens), 217 llvm::makeArrayRef(Found.end(), Tokens.end()), TextMatches) 218 .begin() != Tokens.end()) { 219 ADD_FAILURE() << "match is not unique for " << Query; 220 std::abort(); 221 } 222 return Found; 223 }; 224 225 // Specialized versions of findTokenRange for expanded and spelled tokens. 226 llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) { 227 return findTokenRange(Query, Buffer.expandedTokens()); 228 } 229 llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query, 230 FileID File = FileID()) { 231 if (!File.isValid()) 232 File = SourceMgr->getMainFileID(); 233 return findTokenRange(Query, Buffer.spelledTokens(File)); 234 } 235 236 // Data fields. 237 llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags = 238 new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions); 239 IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS = 240 new llvm::vfs::InMemoryFileSystem; 241 llvm::IntrusiveRefCntPtr<FileManager> FileMgr = 242 new FileManager(FileSystemOptions(), FS); 243 llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr = 244 new SourceManager(*Diags, *FileMgr); 245 /// Contains last result of calling recordTokens(). 246 TokenBuffer Buffer = TokenBuffer(*SourceMgr); 247 }; 248 249 TEST_F(TokenCollectorTest, RawMode) { 250 EXPECT_THAT(tokenize("int main() {}"), 251 ElementsAre(Kind(tok::kw_int), 252 AllOf(HasText("main"), Kind(tok::identifier)), 253 Kind(tok::l_paren), Kind(tok::r_paren), 254 Kind(tok::l_brace), Kind(tok::r_brace))); 255 // Comments are ignored for now. 256 EXPECT_THAT(tokenize("/* foo */int a; // more comments"), 257 ElementsAre(Kind(tok::kw_int), 258 AllOf(HasText("a"), Kind(tok::identifier)), 259 Kind(tok::semi))); 260 } 261 262 TEST_F(TokenCollectorTest, Basic) { 263 std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = { 264 {"int main() {}", 265 R"(expanded tokens: 266 int main ( ) { } 267 file './input.cpp' 268 spelled tokens: 269 int main ( ) { } 270 no mappings. 271 )"}, 272 // All kinds of whitespace are ignored. 273 {"\t\n int\t\n main\t\n (\t\n )\t\n{\t\n }\t\n", 274 R"(expanded tokens: 275 int main ( ) { } 276 file './input.cpp' 277 spelled tokens: 278 int main ( ) { } 279 no mappings. 280 )"}, 281 // Annotation tokens are ignored. 282 {R"cpp( 283 #pragma GCC visibility push (public) 284 #pragma GCC visibility pop 285 )cpp", 286 R"(expanded tokens: 287 <empty> 288 file './input.cpp' 289 spelled tokens: 290 # pragma GCC visibility push ( public ) # pragma GCC visibility pop 291 mappings: 292 ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0) 293 )"}, 294 // Empty files should not crash. 295 {R"cpp()cpp", R"(expanded tokens: 296 <empty> 297 file './input.cpp' 298 spelled tokens: 299 <empty> 300 no mappings. 301 )"}, 302 // Should not crash on errors inside '#define' directives. Error is that 303 // stringification (#B) does not refer to a macro parameter. 304 { 305 R"cpp( 306 a 307 #define MACRO() A #B 308 )cpp", 309 R"(expanded tokens: 310 a 311 file './input.cpp' 312 spelled tokens: 313 a # define MACRO ( ) A # B 314 mappings: 315 ['#'_1, '<eof>'_9) => ['<eof>'_1, '<eof>'_1) 316 )"}}; 317 for (auto &Test : TestCases) 318 EXPECT_EQ(collectAndDump(Test.first), Test.second) 319 << collectAndDump(Test.first); 320 } 321 322 TEST_F(TokenCollectorTest, Locations) { 323 // Check locations of the tokens. 324 llvm::Annotations Code(R"cpp( 325 $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]] 326 )cpp"); 327 recordTokens(Code.code()); 328 // Check expanded tokens. 329 EXPECT_THAT( 330 Buffer.expandedTokens(), 331 ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))), 332 AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))), 333 AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))), 334 AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))), 335 AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))), 336 Kind(tok::eof))); 337 // Check spelled tokens. 338 EXPECT_THAT( 339 Buffer.spelledTokens(SourceMgr->getMainFileID()), 340 ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))), 341 AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))), 342 AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))), 343 AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))), 344 AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))))); 345 } 346 347 TEST_F(TokenCollectorTest, MacroDirectives) { 348 // Macro directives are not stored anywhere at the moment. 349 std::string Code = R"cpp( 350 #define FOO a 351 #include "unresolved_file.h" 352 #undef FOO 353 #ifdef X 354 #else 355 #endif 356 #ifndef Y 357 #endif 358 #if 1 359 #elif 2 360 #else 361 #endif 362 #pragma once 363 #pragma something lalala 364 365 int a; 366 )cpp"; 367 std::string Expected = 368 "expanded tokens:\n" 369 " int a ;\n" 370 "file './input.cpp'\n" 371 " spelled tokens:\n" 372 " # define FOO a # include \"unresolved_file.h\" # undef FOO " 373 "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else " 374 "# endif # pragma once # pragma something lalala int a ;\n" 375 " mappings:\n" 376 " ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n"; 377 EXPECT_EQ(collectAndDump(Code), Expected); 378 } 379 380 TEST_F(TokenCollectorTest, MacroReplacements) { 381 std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = { 382 // A simple object-like macro. 383 {R"cpp( 384 #define INT int const 385 INT a; 386 )cpp", 387 R"(expanded tokens: 388 int const a ; 389 file './input.cpp' 390 spelled tokens: 391 # define INT int const INT a ; 392 mappings: 393 ['#'_0, 'INT'_5) => ['int'_0, 'int'_0) 394 ['INT'_5, 'a'_6) => ['int'_0, 'a'_2) 395 )"}, 396 // A simple function-like macro. 397 {R"cpp( 398 #define INT(a) const int 399 INT(10+10) a; 400 )cpp", 401 R"(expanded tokens: 402 const int a ; 403 file './input.cpp' 404 spelled tokens: 405 # define INT ( a ) const int INT ( 10 + 10 ) a ; 406 mappings: 407 ['#'_0, 'INT'_8) => ['const'_0, 'const'_0) 408 ['INT'_8, 'a'_14) => ['const'_0, 'a'_2) 409 )"}, 410 // Recursive macro replacements. 411 {R"cpp( 412 #define ID(X) X 413 #define INT int const 414 ID(ID(INT)) a; 415 )cpp", 416 R"(expanded tokens: 417 int const a ; 418 file './input.cpp' 419 spelled tokens: 420 # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ; 421 mappings: 422 ['#'_0, 'ID'_12) => ['int'_0, 'int'_0) 423 ['ID'_12, 'a'_19) => ['int'_0, 'a'_2) 424 )"}, 425 // A little more complicated recursive macro replacements. 426 {R"cpp( 427 #define ADD(X, Y) X+Y 428 #define MULT(X, Y) X*Y 429 430 int a = ADD(MULT(1,2), MULT(3,ADD(4,5))); 431 )cpp", 432 "expanded tokens:\n" 433 " int a = 1 * 2 + 3 * 4 + 5 ;\n" 434 "file './input.cpp'\n" 435 " spelled tokens:\n" 436 " # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int " 437 "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n" 438 " mappings:\n" 439 " ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n" 440 " ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"}, 441 // Empty macro replacement. 442 // FIXME: the #define directives should not be glued together. 443 {R"cpp( 444 #define EMPTY 445 #define EMPTY_FUNC(X) 446 EMPTY 447 EMPTY_FUNC(1+2+3) 448 )cpp", 449 R"(expanded tokens: 450 <empty> 451 file './input.cpp' 452 spelled tokens: 453 # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 ) 454 mappings: 455 ['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0) 456 ['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0) 457 ['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0) 458 )"}, 459 // File ends with a macro replacement. 460 {R"cpp( 461 #define FOO 10+10; 462 int a = FOO 463 )cpp", 464 R"(expanded tokens: 465 int a = 10 + 10 ; 466 file './input.cpp' 467 spelled tokens: 468 # define FOO 10 + 10 ; int a = FOO 469 mappings: 470 ['#'_0, 'int'_7) => ['int'_0, 'int'_0) 471 ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7) 472 )"}}; 473 474 for (auto &Test : TestCases) 475 EXPECT_EQ(Test.second, collectAndDump(Test.first)) 476 << collectAndDump(Test.first); 477 } 478 479 TEST_F(TokenCollectorTest, SpecialTokens) { 480 // Tokens coming from concatenations. 481 recordTokens(R"cpp( 482 #define CONCAT(a, b) a ## b 483 int a = CONCAT(1, 2); 484 )cpp"); 485 EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()), 486 Contains(HasText("12"))); 487 // Multi-line tokens with slashes at the end. 488 recordTokens("i\\\nn\\\nt"); 489 EXPECT_THAT(Buffer.expandedTokens(), 490 ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")), 491 Kind(tok::eof))); 492 // FIXME: test tokens with digraphs and UCN identifiers. 493 } 494 495 TEST_F(TokenCollectorTest, LateBoundTokens) { 496 // The parser eventually breaks the first '>>' into two tokens ('>' and '>'), 497 // but we choose to record them as a single token (for now). 498 llvm::Annotations Code(R"cpp( 499 template <class T> 500 struct foo { int a; }; 501 int bar = foo<foo<int$br[[>>]]().a; 502 int baz = 10 $op[[>>]] 2; 503 )cpp"); 504 recordTokens(Code.code()); 505 EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()), 506 AllOf(Contains(AllOf(Kind(tok::greatergreater), 507 RangeIs(Code.range("br")))), 508 Contains(AllOf(Kind(tok::greatergreater), 509 RangeIs(Code.range("op")))))); 510 } 511 512 TEST_F(TokenCollectorTest, DelayedParsing) { 513 llvm::StringLiteral Code = R"cpp( 514 struct Foo { 515 int method() { 516 // Parser will visit method bodies and initializers multiple times, but 517 // TokenBuffer should only record the first walk over the tokens; 518 return 100; 519 } 520 int a = 10; 521 522 struct Subclass { 523 void foo() { 524 Foo().method(); 525 } 526 }; 527 }; 528 )cpp"; 529 std::string ExpectedTokens = 530 "expanded tokens:\n" 531 " struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct " 532 "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n"; 533 EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens)); 534 } 535 536 TEST_F(TokenCollectorTest, MultiFile) { 537 addFile("./foo.h", R"cpp( 538 #define ADD(X, Y) X+Y 539 int a = 100; 540 #include "bar.h" 541 )cpp"); 542 addFile("./bar.h", R"cpp( 543 int b = ADD(1, 2); 544 #define MULT(X, Y) X*Y 545 )cpp"); 546 llvm::StringLiteral Code = R"cpp( 547 #include "foo.h" 548 int c = ADD(1, MULT(2,3)); 549 )cpp"; 550 551 std::string Expected = R"(expanded tokens: 552 int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ; 553 file './input.cpp' 554 spelled tokens: 555 # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ; 556 mappings: 557 ['#'_0, 'int'_3) => ['int'_12, 'int'_12) 558 ['ADD'_6, ';'_17) => ['1'_15, ';'_20) 559 file './foo.h' 560 spelled tokens: 561 # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h" 562 mappings: 563 ['#'_0, 'int'_11) => ['int'_0, 'int'_0) 564 ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5) 565 file './bar.h' 566 spelled tokens: 567 int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y 568 mappings: 569 ['ADD'_3, ';'_9) => ['1'_8, ';'_11) 570 ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12) 571 )"; 572 573 EXPECT_EQ(Expected, collectAndDump(Code)) 574 << "input: " << Code << "\nresults: " << collectAndDump(Code); 575 } 576 577 class TokenBufferTest : public TokenCollectorTest {}; 578 579 TEST_F(TokenBufferTest, SpelledByExpanded) { 580 recordTokens(R"cpp( 581 a1 a2 a3 b1 b2 582 )cpp"); 583 584 // Sanity check: expanded and spelled tokens are stored separately. 585 EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2")))); 586 // Searching for subranges of expanded tokens should give the corresponding 587 // spelled ones. 588 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")), 589 ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2")))); 590 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 591 ValueIs(SameRange(findSpelled("a1 a2 a3")))); 592 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), 593 ValueIs(SameRange(findSpelled("b1 b2")))); 594 595 // Test search on simple macro expansions. 596 recordTokens(R"cpp( 597 #define A a1 a2 a3 598 #define B b1 b2 599 600 A split B 601 )cpp"); 602 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")), 603 ValueIs(SameRange(findSpelled("A split B")))); 604 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 605 ValueIs(SameRange(findSpelled("A split").drop_back()))); 606 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), 607 ValueIs(SameRange(findSpelled("split B").drop_front()))); 608 // Ranges not fully covering macro invocations should fail. 609 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None); 610 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), llvm::None); 611 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")), 612 llvm::None); 613 614 // Recursive macro invocations. 615 recordTokens(R"cpp( 616 #define ID(x) x 617 #define B b1 b2 618 619 ID(ID(ID(a1) a2 a3)) split ID(B) 620 )cpp"); 621 622 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), 623 ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )")))); 624 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), 625 ValueIs(SameRange(findSpelled("ID ( B )")))); 626 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")), 627 ValueIs(SameRange(findSpelled( 628 "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")))); 629 // Ranges crossing macro call boundaries. 630 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1")), 631 llvm::None); 632 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1")), 633 llvm::None); 634 // FIXME: next two examples should map to macro arguments, but currently they 635 // fail. 636 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2")), llvm::None); 637 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None); 638 639 // Empty macro expansions. 640 recordTokens(R"cpp( 641 #define EMPTY 642 #define ID(X) X 643 644 EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1 645 EMPTY EMPTY ID(4 5 6) split2 646 ID(7 8 9) EMPTY EMPTY 647 )cpp"); 648 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")), 649 ValueIs(SameRange(findSpelled("ID ( 1 2 3 )")))); 650 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")), 651 ValueIs(SameRange(findSpelled("ID ( 4 5 6 )")))); 652 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")), 653 ValueIs(SameRange(findSpelled("ID ( 7 8 9 )")))); 654 655 // Empty mappings coming from various directives. 656 recordTokens(R"cpp( 657 #define ID(X) X 658 ID(1) 659 #pragma lalala 660 not_mapped 661 )cpp"); 662 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")), 663 ValueIs(SameRange(findSpelled("not_mapped")))); 664 } 665 666 TEST_F(TokenBufferTest, ExpansionStartingAt) { 667 // Object-like macro expansions. 668 recordTokens(R"cpp( 669 #define FOO 3+4 670 int a = FOO 1; 671 int b = FOO 2; 672 )cpp"); 673 674 llvm::ArrayRef<syntax::Token> Foo1 = findSpelled("FOO 1").drop_back(); 675 EXPECT_THAT( 676 Buffer.expansionStartingAt(Foo1.data()), 677 ValueIs(IsExpansion(SameRange(Foo1), 678 SameRange(findExpanded("3 + 4 1").drop_back())))); 679 680 llvm::ArrayRef<syntax::Token> Foo2 = findSpelled("FOO 2").drop_back(); 681 EXPECT_THAT( 682 Buffer.expansionStartingAt(Foo2.data()), 683 ValueIs(IsExpansion(SameRange(Foo2), 684 SameRange(findExpanded("3 + 4 2").drop_back())))); 685 686 // Function-like macro expansions. 687 recordTokens(R"cpp( 688 #define ID(X) X 689 int a = ID(1+2+3); 690 int b = ID(ID(2+3+4)); 691 )cpp"); 692 693 llvm::ArrayRef<syntax::Token> ID1 = findSpelled("ID ( 1 + 2 + 3 )"); 694 EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()), 695 ValueIs(IsExpansion(SameRange(ID1), 696 SameRange(findExpanded("1 + 2 + 3"))))); 697 // Only the first spelled token should be found. 698 for (const auto &T : ID1.drop_front()) 699 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 700 701 llvm::ArrayRef<syntax::Token> ID2 = findSpelled("ID ( ID ( 2 + 3 + 4 ) )"); 702 EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()), 703 ValueIs(IsExpansion(SameRange(ID2), 704 SameRange(findExpanded("2 + 3 + 4"))))); 705 // Only the first spelled token should be found. 706 for (const auto &T : ID2.drop_front()) 707 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 708 709 // PP directives. 710 recordTokens(R"cpp( 711 #define FOO 1 712 int a = FOO; 713 #pragma once 714 int b = 1; 715 )cpp"); 716 717 llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled("# define FOO 1"); 718 EXPECT_THAT( 719 Buffer.expansionStartingAt(&DefineFoo.front()), 720 ValueIs(IsExpansion(SameRange(DefineFoo), 721 SameRange(findExpanded("int a").take_front(0))))); 722 // Only the first spelled token should be found. 723 for (const auto &T : DefineFoo.drop_front()) 724 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 725 726 llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled("# pragma once"); 727 EXPECT_THAT( 728 Buffer.expansionStartingAt(&PragmaOnce.front()), 729 ValueIs(IsExpansion(SameRange(PragmaOnce), 730 SameRange(findExpanded("int b").take_front(0))))); 731 // Only the first spelled token should be found. 732 for (const auto &T : PragmaOnce.drop_front()) 733 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); 734 } 735 736 TEST_F(TokenBufferTest, TokensToFileRange) { 737 addFile("./foo.h", "token_from_header"); 738 llvm::Annotations Code(R"cpp( 739 #define FOO token_from_expansion 740 #include "./foo.h" 741 $all[[$i[[int]] a = FOO;]] 742 )cpp"); 743 recordTokens(Code.code()); 744 745 auto &SM = *SourceMgr; 746 747 // Two simple examples. 748 auto Int = findExpanded("int").front(); 749 auto Semi = findExpanded(";").front(); 750 EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin, 751 Code.range("i").End)); 752 EXPECT_EQ(syntax::Token::range(SM, Int, Semi), 753 FileRange(SM.getMainFileID(), Code.range("all").Begin, 754 Code.range("all").End)); 755 // We don't test assertion failures because death tests are slow. 756 } 757 758 TEST_F(TokenBufferTest, macroExpansions) { 759 llvm::Annotations Code(R"cpp( 760 #define FOO B 761 #define FOO2 BA 762 #define CALL(X) int X 763 #define G CALL(FOO2) 764 int B; 765 $macro[[FOO]]; 766 $macro[[CALL]](A); 767 $macro[[G]]; 768 )cpp"); 769 recordTokens(Code.code()); 770 auto &SM = *SourceMgr; 771 auto Expansions = Buffer.macroExpansions(SM.getMainFileID()); 772 std::vector<FileRange> ExpectedMacroRanges; 773 for (auto Range : Code.ranges("macro")) 774 ExpectedMacroRanges.push_back( 775 FileRange(SM.getMainFileID(), Range.Begin, Range.End)); 776 std::vector<FileRange> ActualMacroRanges; 777 for (auto Expansion : Expansions) 778 ActualMacroRanges.push_back(Expansion->range(SM)); 779 EXPECT_EQ(ExpectedMacroRanges, ActualMacroRanges); 780 } 781 } // namespace 782