1 //===- unittests/Lex/LexerTest.cpp ------ Lexer tests ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Lex/Lexer.h" 10 #include "clang/Basic/Diagnostic.h" 11 #include "clang/Basic/DiagnosticOptions.h" 12 #include "clang/Basic/FileManager.h" 13 #include "clang/Basic/LangOptions.h" 14 #include "clang/Basic/SourceLocation.h" 15 #include "clang/Basic/SourceManager.h" 16 #include "clang/Basic/TargetInfo.h" 17 #include "clang/Basic/TargetOptions.h" 18 #include "clang/Basic/TokenKinds.h" 19 #include "clang/Lex/HeaderSearch.h" 20 #include "clang/Lex/HeaderSearchOptions.h" 21 #include "clang/Lex/MacroArgs.h" 22 #include "clang/Lex/MacroInfo.h" 23 #include "clang/Lex/ModuleLoader.h" 24 #include "clang/Lex/Preprocessor.h" 25 #include "clang/Lex/PreprocessorOptions.h" 26 #include "gmock/gmock.h" 27 #include "gtest/gtest.h" 28 #include <memory> 29 #include <vector> 30 31 namespace { 32 using namespace clang; 33 using testing::ElementsAre; 34 35 // The test fixture. 36 class LexerTest : public ::testing::Test { 37 protected: 38 LexerTest() 39 : FileMgr(FileMgrOpts), 40 DiagID(new DiagnosticIDs()), 41 Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()), 42 SourceMgr(Diags, FileMgr), 43 TargetOpts(new TargetOptions) 44 { 45 TargetOpts->Triple = "x86_64-apple-darwin11.1.0"; 46 Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts); 47 } 48 49 std::unique_ptr<Preprocessor> CreatePP(StringRef Source, 50 TrivialModuleLoader &ModLoader) { 51 std::unique_ptr<llvm::MemoryBuffer> Buf = 52 llvm::MemoryBuffer::getMemBuffer(Source); 53 SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf))); 54 55 HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr, 56 Diags, LangOpts, Target.get()); 57 std::unique_ptr<Preprocessor> PP = std::make_unique<Preprocessor>( 58 std::make_shared<PreprocessorOptions>(), Diags, LangOpts, SourceMgr, 59 HeaderInfo, ModLoader, 60 /*IILookup =*/nullptr, 61 /*OwnsHeaderSearch =*/false); 62 PP->Initialize(*Target); 63 PP->EnterMainSourceFile(); 64 return PP; 65 } 66 67 std::vector<Token> Lex(StringRef Source) { 68 TrivialModuleLoader ModLoader; 69 PP = CreatePP(Source, ModLoader); 70 71 std::vector<Token> toks; 72 while (1) { 73 Token tok; 74 PP->Lex(tok); 75 if (tok.is(tok::eof)) 76 break; 77 toks.push_back(tok); 78 } 79 80 return toks; 81 } 82 83 std::vector<Token> CheckLex(StringRef Source, 84 ArrayRef<tok::TokenKind> ExpectedTokens) { 85 auto toks = Lex(Source); 86 EXPECT_EQ(ExpectedTokens.size(), toks.size()); 87 for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) { 88 EXPECT_EQ(ExpectedTokens[i], toks[i].getKind()); 89 } 90 91 return toks; 92 } 93 94 std::string getSourceText(Token Begin, Token End) { 95 bool Invalid; 96 StringRef Str = 97 Lexer::getSourceText(CharSourceRange::getTokenRange(SourceRange( 98 Begin.getLocation(), End.getLocation())), 99 SourceMgr, LangOpts, &Invalid); 100 if (Invalid) 101 return "<INVALID>"; 102 return std::string(Str); 103 } 104 105 FileSystemOptions FileMgrOpts; 106 FileManager FileMgr; 107 IntrusiveRefCntPtr<DiagnosticIDs> DiagID; 108 DiagnosticsEngine Diags; 109 SourceManager SourceMgr; 110 LangOptions LangOpts; 111 std::shared_ptr<TargetOptions> TargetOpts; 112 IntrusiveRefCntPtr<TargetInfo> Target; 113 std::unique_ptr<Preprocessor> PP; 114 }; 115 116 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) { 117 std::vector<tok::TokenKind> ExpectedTokens; 118 ExpectedTokens.push_back(tok::identifier); 119 ExpectedTokens.push_back(tok::l_paren); 120 ExpectedTokens.push_back(tok::identifier); 121 ExpectedTokens.push_back(tok::r_paren); 122 123 std::vector<Token> toks = CheckLex("#define M(x) x\n" 124 "M(f(M(i)))", 125 ExpectedTokens); 126 127 EXPECT_EQ("M(i)", getSourceText(toks[2], toks[2])); 128 } 129 130 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) { 131 std::vector<tok::TokenKind> ExpectedTokens; 132 ExpectedTokens.push_back(tok::identifier); 133 ExpectedTokens.push_back(tok::identifier); 134 135 std::vector<Token> toks = CheckLex("#define M(x) x\n" 136 "M(M(i) c)", 137 ExpectedTokens); 138 139 EXPECT_EQ("M(i)", getSourceText(toks[0], toks[0])); 140 } 141 142 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForBeginOfMacro) { 143 std::vector<tok::TokenKind> ExpectedTokens; 144 ExpectedTokens.push_back(tok::identifier); 145 ExpectedTokens.push_back(tok::identifier); 146 ExpectedTokens.push_back(tok::identifier); 147 148 std::vector<Token> toks = CheckLex("#define M(x) x\n" 149 "M(c c M(i))", 150 ExpectedTokens); 151 152 EXPECT_EQ("c M(i)", getSourceText(toks[1], toks[2])); 153 } 154 155 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForEndOfMacro) { 156 std::vector<tok::TokenKind> ExpectedTokens; 157 ExpectedTokens.push_back(tok::identifier); 158 ExpectedTokens.push_back(tok::identifier); 159 ExpectedTokens.push_back(tok::identifier); 160 161 std::vector<Token> toks = CheckLex("#define M(x) x\n" 162 "M(M(i) c c)", 163 ExpectedTokens); 164 165 EXPECT_EQ("M(i) c", getSourceText(toks[0], toks[1])); 166 } 167 168 TEST_F(LexerTest, GetSourceTextInSeparateFnMacros) { 169 std::vector<tok::TokenKind> ExpectedTokens; 170 ExpectedTokens.push_back(tok::identifier); 171 ExpectedTokens.push_back(tok::identifier); 172 ExpectedTokens.push_back(tok::identifier); 173 ExpectedTokens.push_back(tok::identifier); 174 175 std::vector<Token> toks = CheckLex("#define M(x) x\n" 176 "M(c M(i)) M(M(i) c)", 177 ExpectedTokens); 178 179 EXPECT_EQ("<INVALID>", getSourceText(toks[1], toks[2])); 180 } 181 182 TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) { 183 std::vector<tok::TokenKind> ExpectedTokens; 184 ExpectedTokens.push_back(tok::identifier); 185 ExpectedTokens.push_back(tok::l_paren); 186 ExpectedTokens.push_back(tok::identifier); 187 ExpectedTokens.push_back(tok::r_paren); 188 189 std::vector<Token> toks = CheckLex("#define M(x) x\n" 190 "#define C(x) M(x##c)\n" 191 "M(f(C(i)))", 192 ExpectedTokens); 193 194 EXPECT_EQ("C(i)", getSourceText(toks[2], toks[2])); 195 } 196 197 TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) { 198 std::vector<tok::TokenKind> ExpectedTokens; 199 ExpectedTokens.push_back(tok::identifier); 200 ExpectedTokens.push_back(tok::l_paren); 201 ExpectedTokens.push_back(tok::identifier); 202 ExpectedTokens.push_back(tok::r_paren); 203 204 std::vector<Token> toks = CheckLex("#define M(x) x\n" 205 "f(M(M(i)))", 206 ExpectedTokens); 207 EXPECT_EQ("M(M(i))", getSourceText(toks[2], toks[2])); 208 } 209 210 TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) { 211 std::vector<tok::TokenKind> ExpectedTokens; 212 ExpectedTokens.push_back(tok::identifier); 213 ExpectedTokens.push_back(tok::l_paren); 214 ExpectedTokens.push_back(tok::identifier); 215 ExpectedTokens.push_back(tok::r_paren); 216 217 std::vector<Token> toks = CheckLex("#define M(x) x\n" 218 "M(f(i))", 219 ExpectedTokens); 220 EXPECT_EQ("i", getSourceText(toks[2], toks[2])); 221 } 222 223 TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) { 224 std::vector<tok::TokenKind> ExpectedTokens; 225 ExpectedTokens.push_back(tok::identifier); 226 ExpectedTokens.push_back(tok::l_paren); 227 ExpectedTokens.push_back(tok::identifier); 228 ExpectedTokens.push_back(tok::r_paren); 229 230 std::vector<Token> toks = CheckLex("#define M(x) x\n" 231 "#define C(x) x\n" 232 "f(C(M(i)))", 233 ExpectedTokens); 234 EXPECT_EQ("C(M(i))", getSourceText(toks[2], toks[2])); 235 } 236 237 TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) { 238 std::vector<tok::TokenKind> ExpectedTokens; 239 ExpectedTokens.push_back(tok::identifier); 240 ExpectedTokens.push_back(tok::l_paren); 241 ExpectedTokens.push_back(tok::identifier); 242 ExpectedTokens.push_back(tok::identifier); 243 ExpectedTokens.push_back(tok::r_paren); 244 245 std::vector<Token> toks = CheckLex("#define M(x) x\n" 246 "#define C(x) c x\n" 247 "f(C(M(i)))", 248 ExpectedTokens); 249 EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3])); 250 } 251 252 TEST_F(LexerTest, GetSourceTextExpandsRecursively) { 253 std::vector<tok::TokenKind> ExpectedTokens; 254 ExpectedTokens.push_back(tok::identifier); 255 ExpectedTokens.push_back(tok::identifier); 256 ExpectedTokens.push_back(tok::l_paren); 257 ExpectedTokens.push_back(tok::identifier); 258 ExpectedTokens.push_back(tok::r_paren); 259 260 std::vector<Token> toks = CheckLex("#define M(x) x\n" 261 "#define C(x) c M(x)\n" 262 "C(f(M(i)))", 263 ExpectedTokens); 264 EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3])); 265 } 266 267 TEST_F(LexerTest, LexAPI) { 268 std::vector<tok::TokenKind> ExpectedTokens; 269 // Line 1 (after the #defines) 270 ExpectedTokens.push_back(tok::l_square); 271 ExpectedTokens.push_back(tok::identifier); 272 ExpectedTokens.push_back(tok::r_square); 273 ExpectedTokens.push_back(tok::l_square); 274 ExpectedTokens.push_back(tok::identifier); 275 ExpectedTokens.push_back(tok::r_square); 276 // Line 2 277 ExpectedTokens.push_back(tok::identifier); 278 ExpectedTokens.push_back(tok::identifier); 279 ExpectedTokens.push_back(tok::identifier); 280 ExpectedTokens.push_back(tok::identifier); 281 282 std::vector<Token> toks = CheckLex("#define M(x) [x]\n" 283 "#define N(x) x\n" 284 "#define INN(x) x\n" 285 "#define NOF1 INN(val)\n" 286 "#define NOF2 val\n" 287 "M(foo) N([bar])\n" 288 "N(INN(val)) N(NOF1) N(NOF2) N(val)", 289 ExpectedTokens); 290 291 SourceLocation lsqrLoc = toks[0].getLocation(); 292 SourceLocation idLoc = toks[1].getLocation(); 293 SourceLocation rsqrLoc = toks[2].getLocation(); 294 CharSourceRange macroRange = SourceMgr.getExpansionRange(lsqrLoc); 295 296 SourceLocation Loc; 297 EXPECT_TRUE(Lexer::isAtStartOfMacroExpansion(lsqrLoc, SourceMgr, LangOpts, &Loc)); 298 EXPECT_EQ(Loc, macroRange.getBegin()); 299 EXPECT_FALSE(Lexer::isAtStartOfMacroExpansion(idLoc, SourceMgr, LangOpts)); 300 EXPECT_FALSE(Lexer::isAtEndOfMacroExpansion(idLoc, SourceMgr, LangOpts)); 301 EXPECT_TRUE(Lexer::isAtEndOfMacroExpansion(rsqrLoc, SourceMgr, LangOpts, &Loc)); 302 EXPECT_EQ(Loc, macroRange.getEnd()); 303 EXPECT_TRUE(macroRange.isTokenRange()); 304 305 CharSourceRange range = Lexer::makeFileCharRange( 306 CharSourceRange::getTokenRange(lsqrLoc, idLoc), SourceMgr, LangOpts); 307 EXPECT_TRUE(range.isInvalid()); 308 range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(idLoc, rsqrLoc), 309 SourceMgr, LangOpts); 310 EXPECT_TRUE(range.isInvalid()); 311 range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc), 312 SourceMgr, LangOpts); 313 EXPECT_TRUE(!range.isTokenRange()); 314 EXPECT_EQ(range.getAsRange(), 315 SourceRange(macroRange.getBegin(), 316 macroRange.getEnd().getLocWithOffset(1))); 317 318 StringRef text = Lexer::getSourceText( 319 CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc), 320 SourceMgr, LangOpts); 321 EXPECT_EQ(text, "M(foo)"); 322 323 SourceLocation macroLsqrLoc = toks[3].getLocation(); 324 SourceLocation macroIdLoc = toks[4].getLocation(); 325 SourceLocation macroRsqrLoc = toks[5].getLocation(); 326 SourceLocation fileLsqrLoc = SourceMgr.getSpellingLoc(macroLsqrLoc); 327 SourceLocation fileIdLoc = SourceMgr.getSpellingLoc(macroIdLoc); 328 SourceLocation fileRsqrLoc = SourceMgr.getSpellingLoc(macroRsqrLoc); 329 330 range = Lexer::makeFileCharRange( 331 CharSourceRange::getTokenRange(macroLsqrLoc, macroIdLoc), 332 SourceMgr, LangOpts); 333 EXPECT_EQ(SourceRange(fileLsqrLoc, fileIdLoc.getLocWithOffset(3)), 334 range.getAsRange()); 335 336 range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(macroIdLoc, macroRsqrLoc), 337 SourceMgr, LangOpts); 338 EXPECT_EQ(SourceRange(fileIdLoc, fileRsqrLoc.getLocWithOffset(1)), 339 range.getAsRange()); 340 341 macroRange = SourceMgr.getExpansionRange(macroLsqrLoc); 342 range = Lexer::makeFileCharRange( 343 CharSourceRange::getTokenRange(macroLsqrLoc, macroRsqrLoc), 344 SourceMgr, LangOpts); 345 EXPECT_EQ(SourceRange(macroRange.getBegin(), macroRange.getEnd().getLocWithOffset(1)), 346 range.getAsRange()); 347 348 text = Lexer::getSourceText( 349 CharSourceRange::getTokenRange(SourceRange(macroLsqrLoc, macroIdLoc)), 350 SourceMgr, LangOpts); 351 EXPECT_EQ(text, "[bar"); 352 353 354 SourceLocation idLoc1 = toks[6].getLocation(); 355 SourceLocation idLoc2 = toks[7].getLocation(); 356 SourceLocation idLoc3 = toks[8].getLocation(); 357 SourceLocation idLoc4 = toks[9].getLocation(); 358 EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc1, SourceMgr, LangOpts)); 359 EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc2, SourceMgr, LangOpts)); 360 EXPECT_EQ("NOF2", Lexer::getImmediateMacroName(idLoc3, SourceMgr, LangOpts)); 361 EXPECT_EQ("N", Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts)); 362 } 363 364 TEST_F(LexerTest, HandlesSplitTokens) { 365 std::vector<tok::TokenKind> ExpectedTokens; 366 // Line 1 (after the #defines) 367 ExpectedTokens.push_back(tok::identifier); 368 ExpectedTokens.push_back(tok::less); 369 ExpectedTokens.push_back(tok::identifier); 370 ExpectedTokens.push_back(tok::less); 371 ExpectedTokens.push_back(tok::greatergreater); 372 // Line 2 373 ExpectedTokens.push_back(tok::identifier); 374 ExpectedTokens.push_back(tok::less); 375 ExpectedTokens.push_back(tok::identifier); 376 ExpectedTokens.push_back(tok::less); 377 ExpectedTokens.push_back(tok::greatergreater); 378 379 std::vector<Token> toks = CheckLex("#define TY ty\n" 380 "#define RANGLE ty<ty<>>\n" 381 "TY<ty<>>\n" 382 "RANGLE", 383 ExpectedTokens); 384 385 SourceLocation outerTyLoc = toks[0].getLocation(); 386 SourceLocation innerTyLoc = toks[2].getLocation(); 387 SourceLocation gtgtLoc = toks[4].getLocation(); 388 // Split the token to simulate the action of the parser and force creation of 389 // an `ExpansionTokenRange`. 390 SourceLocation rangleLoc = PP->SplitToken(gtgtLoc, 1); 391 392 // Verify that it only captures the first greater-then and not the second one. 393 CharSourceRange range = Lexer::makeFileCharRange( 394 CharSourceRange::getTokenRange(innerTyLoc, rangleLoc), SourceMgr, 395 LangOpts); 396 EXPECT_TRUE(range.isCharRange()); 397 EXPECT_EQ(range.getAsRange(), 398 SourceRange(innerTyLoc, gtgtLoc.getLocWithOffset(1))); 399 400 // Verify case where range begins in a macro expansion. 401 range = Lexer::makeFileCharRange( 402 CharSourceRange::getTokenRange(outerTyLoc, rangleLoc), SourceMgr, 403 LangOpts); 404 EXPECT_TRUE(range.isCharRange()); 405 EXPECT_EQ(range.getAsRange(), 406 SourceRange(SourceMgr.getExpansionLoc(outerTyLoc), 407 gtgtLoc.getLocWithOffset(1))); 408 409 SourceLocation macroInnerTyLoc = toks[7].getLocation(); 410 SourceLocation macroGtgtLoc = toks[9].getLocation(); 411 // Split the token to simulate the action of the parser and force creation of 412 // an `ExpansionTokenRange`. 413 SourceLocation macroRAngleLoc = PP->SplitToken(macroGtgtLoc, 1); 414 415 // Verify that it fails (because it only captures the first greater-then and 416 // not the second one, so it doesn't span the entire macro expansion). 417 range = Lexer::makeFileCharRange( 418 CharSourceRange::getTokenRange(macroInnerTyLoc, macroRAngleLoc), 419 SourceMgr, LangOpts); 420 EXPECT_TRUE(range.isInvalid()); 421 } 422 423 TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) { 424 std::vector<Token> toks = 425 Lex("#define helper1 0\n" 426 "void helper2(const char *, ...);\n" 427 "#define M1(a, ...) helper2(a, ##__VA_ARGS__)\n" 428 "#define M2(a, ...) M1(a, helper1, ##__VA_ARGS__)\n" 429 "void f1() { M2(\"a\", \"b\"); }"); 430 431 // Check the file corresponding to the "helper1" macro arg in M2. 432 // 433 // The lexer used to report its size as 31, meaning that the end of the 434 // expansion would be on the *next line* (just past `M2("a", "b")`). Make 435 // sure that we get the correct end location (the comma after "helper1"). 436 SourceLocation helper1ArgLoc = toks[20].getLocation(); 437 EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U); 438 } 439 440 TEST_F(LexerTest, DontOverallocateStringifyArgs) { 441 TrivialModuleLoader ModLoader; 442 auto PP = CreatePP("\"StrArg\", 5, 'C'", ModLoader); 443 444 llvm::BumpPtrAllocator Allocator; 445 std::array<IdentifierInfo *, 3> ParamList; 446 MacroInfo *MI = PP->AllocateMacroInfo({}); 447 MI->setIsFunctionLike(); 448 MI->setParameterList(ParamList, Allocator); 449 EXPECT_EQ(3u, MI->getNumParams()); 450 EXPECT_TRUE(MI->isFunctionLike()); 451 452 Token Eof; 453 Eof.setKind(tok::eof); 454 std::vector<Token> ArgTokens; 455 while (1) { 456 Token tok; 457 PP->Lex(tok); 458 if (tok.is(tok::eof)) { 459 ArgTokens.push_back(Eof); 460 break; 461 } 462 if (tok.is(tok::comma)) 463 ArgTokens.push_back(Eof); 464 else 465 ArgTokens.push_back(tok); 466 } 467 468 auto MacroArgsDeleter = [&PP](MacroArgs *M) { M->destroy(*PP); }; 469 std::unique_ptr<MacroArgs, decltype(MacroArgsDeleter)> MA( 470 MacroArgs::create(MI, ArgTokens, false, *PP), MacroArgsDeleter); 471 auto StringifyArg = [&](int ArgNo) { 472 return MA->StringifyArgument(MA->getUnexpArgument(ArgNo), *PP, 473 /*Charify=*/false, {}, {}); 474 }; 475 Token Result = StringifyArg(0); 476 EXPECT_EQ(tok::string_literal, Result.getKind()); 477 EXPECT_STREQ("\"\\\"StrArg\\\"\"", Result.getLiteralData()); 478 Result = StringifyArg(1); 479 EXPECT_EQ(tok::string_literal, Result.getKind()); 480 EXPECT_STREQ("\"5\"", Result.getLiteralData()); 481 Result = StringifyArg(2); 482 EXPECT_EQ(tok::string_literal, Result.getKind()); 483 EXPECT_STREQ("\"'C'\"", Result.getLiteralData()); 484 #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST 485 EXPECT_DEATH(StringifyArg(3), "Invalid arg #"); 486 #endif 487 } 488 489 TEST_F(LexerTest, IsNewLineEscapedValid) { 490 auto hasNewLineEscaped = [](const char *S) { 491 return Lexer::isNewLineEscaped(S, S + strlen(S) - 1); 492 }; 493 494 EXPECT_TRUE(hasNewLineEscaped("\\\r")); 495 EXPECT_TRUE(hasNewLineEscaped("\\\n")); 496 EXPECT_TRUE(hasNewLineEscaped("\\\r\n")); 497 EXPECT_TRUE(hasNewLineEscaped("\\\n\r")); 498 EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r")); 499 EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n")); 500 501 EXPECT_FALSE(hasNewLineEscaped("\\\r\r")); 502 EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n")); 503 EXPECT_FALSE(hasNewLineEscaped("\\\n\n")); 504 EXPECT_FALSE(hasNewLineEscaped("\r")); 505 EXPECT_FALSE(hasNewLineEscaped("\n")); 506 EXPECT_FALSE(hasNewLineEscaped("\r\n")); 507 EXPECT_FALSE(hasNewLineEscaped("\n\r")); 508 EXPECT_FALSE(hasNewLineEscaped("\r\r")); 509 EXPECT_FALSE(hasNewLineEscaped("\n\n")); 510 } 511 512 TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) { 513 // Each line should have the same length for 514 // further offset calculation to be more straightforward. 515 const unsigned IdentifierLength = 8; 516 std::string TextToLex = "rabarbar\n" 517 "foo\\\nbar\n" 518 "foo\\\rbar\n" 519 "fo\\\r\nbar\n" 520 "foo\\\n\rba\n"; 521 std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier}; 522 std::vector<Token> LexedTokens = CheckLex(TextToLex, ExpectedTokens); 523 524 for (const Token &Tok : LexedTokens) { 525 std::pair<FileID, unsigned> OriginalLocation = 526 SourceMgr.getDecomposedLoc(Tok.getLocation()); 527 for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) { 528 SourceLocation LookupLocation = 529 Tok.getLocation().getLocWithOffset(Offset); 530 531 std::pair<FileID, unsigned> FoundLocation = 532 SourceMgr.getDecomposedExpansionLoc( 533 Lexer::GetBeginningOfToken(LookupLocation, SourceMgr, LangOpts)); 534 535 // Check that location returned by the GetBeginningOfToken 536 // is the same as original token location reported by Lexer. 537 EXPECT_EQ(FoundLocation.second, OriginalLocation.second); 538 } 539 } 540 } 541 542 TEST_F(LexerTest, AvoidPastEndOfStringDereference) { 543 EXPECT_TRUE(Lex(" // \\\n").empty()); 544 EXPECT_TRUE(Lex("#include <\\\\").empty()); 545 EXPECT_TRUE(Lex("#include <\\\\\n").empty()); 546 } 547 548 TEST_F(LexerTest, StringizingRasString) { 549 // For "std::string Lexer::Stringify(StringRef Str, bool Charify)". 550 std::string String1 = R"(foo 551 {"bar":[]} 552 baz)"; 553 // For "void Lexer::Stringify(SmallVectorImpl<char> &Str)". 554 SmallString<128> String2; 555 String2 += String1.c_str(); 556 557 // Corner cases. 558 std::string String3 = R"(\ 559 \n 560 \\n 561 \\)"; 562 SmallString<128> String4; 563 String4 += String3.c_str(); 564 std::string String5 = R"(a\ 565 566 567 \\b)"; 568 SmallString<128> String6; 569 String6 += String5.c_str(); 570 571 String1 = Lexer::Stringify(StringRef(String1)); 572 Lexer::Stringify(String2); 573 String3 = Lexer::Stringify(StringRef(String3)); 574 Lexer::Stringify(String4); 575 String5 = Lexer::Stringify(StringRef(String5)); 576 Lexer::Stringify(String6); 577 578 EXPECT_EQ(String1, R"(foo\n {\"bar\":[]}\n baz)"); 579 EXPECT_EQ(String2, R"(foo\n {\"bar\":[]}\n baz)"); 580 EXPECT_EQ(String3, R"(\\\n \\n\n \\\\n\n \\\\)"); 581 EXPECT_EQ(String4, R"(\\\n \\n\n \\\\n\n \\\\)"); 582 EXPECT_EQ(String5, R"(a\\\n\n\n \\\\b)"); 583 EXPECT_EQ(String6, R"(a\\\n\n\n \\\\b)"); 584 } 585 586 TEST_F(LexerTest, CharRangeOffByOne) { 587 std::vector<Token> toks = Lex(R"(#define MOO 1 588 void foo() { MOO; })"); 589 const Token &moo = toks[5]; 590 591 EXPECT_EQ(getSourceText(moo, moo), "MOO"); 592 593 SourceRange R{moo.getLocation(), moo.getLocation()}; 594 595 EXPECT_TRUE( 596 Lexer::isAtStartOfMacroExpansion(R.getBegin(), SourceMgr, LangOpts)); 597 EXPECT_TRUE( 598 Lexer::isAtEndOfMacroExpansion(R.getEnd(), SourceMgr, LangOpts)); 599 600 CharSourceRange CR = Lexer::getAsCharRange(R, SourceMgr, LangOpts); 601 602 EXPECT_EQ(Lexer::getSourceText(CR, SourceMgr, LangOpts), "MOO"); // Was "MO". 603 } 604 605 TEST_F(LexerTest, FindNextToken) { 606 Lex("int abcd = 0;\n" 607 "int xyz = abcd;\n"); 608 std::vector<std::string> GeneratedByNextToken; 609 SourceLocation Loc = 610 SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID()); 611 while (true) { 612 auto T = Lexer::findNextToken(Loc, SourceMgr, LangOpts); 613 ASSERT_TRUE(T.hasValue()); 614 if (T->is(tok::eof)) 615 break; 616 GeneratedByNextToken.push_back(getSourceText(*T, *T)); 617 Loc = T->getLocation(); 618 } 619 EXPECT_THAT(GeneratedByNextToken, ElementsAre("abcd", "=", "0", ";", "int", 620 "xyz", "=", "abcd", ";")); 621 } 622 623 TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) { 624 TrivialModuleLoader ModLoader; 625 auto PP = CreatePP("", ModLoader); 626 while (1) { 627 Token tok; 628 PP->Lex(tok); 629 if (tok.is(tok::eof)) 630 break; 631 } 632 EXPECT_EQ(SourceMgr.getNumCreatedFIDsForFileID(PP->getPredefinesFileID()), 633 1U); 634 } 635 } // anonymous namespace 636