1 //===- unittests/Lex/LexerTest.cpp ------ Lexer tests ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "clang/Lex/Lexer.h"
10 #include "clang/Basic/Diagnostic.h"
11 #include "clang/Basic/DiagnosticOptions.h"
12 #include "clang/Basic/FileManager.h"
13 #include "clang/Basic/LangOptions.h"
14 #include "clang/Basic/SourceLocation.h"
15 #include "clang/Basic/SourceManager.h"
16 #include "clang/Basic/TargetInfo.h"
17 #include "clang/Basic/TargetOptions.h"
18 #include "clang/Basic/TokenKinds.h"
19 #include "clang/Lex/HeaderSearch.h"
20 #include "clang/Lex/HeaderSearchOptions.h"
21 #include "clang/Lex/MacroArgs.h"
22 #include "clang/Lex/MacroInfo.h"
23 #include "clang/Lex/ModuleLoader.h"
24 #include "clang/Lex/Preprocessor.h"
25 #include "clang/Lex/PreprocessorOptions.h"
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "gmock/gmock.h"
29 #include "gtest/gtest.h"
30 #include <memory>
31 #include <vector>
32
33 namespace {
34 using namespace clang;
35 using testing::ElementsAre;
36
37 // The test fixture.
38 class LexerTest : public ::testing::Test {
39 protected:
LexerTest()40 LexerTest()
41 : FileMgr(FileMgrOpts),
42 DiagID(new DiagnosticIDs()),
43 Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
44 SourceMgr(Diags, FileMgr),
45 TargetOpts(new TargetOptions)
46 {
47 TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
48 Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts);
49 }
50
CreatePP(StringRef Source,TrivialModuleLoader & ModLoader)51 std::unique_ptr<Preprocessor> CreatePP(StringRef Source,
52 TrivialModuleLoader &ModLoader) {
53 std::unique_ptr<llvm::MemoryBuffer> Buf =
54 llvm::MemoryBuffer::getMemBuffer(Source);
55 SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf)));
56
57 HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr,
58 Diags, LangOpts, Target.get());
59 std::unique_ptr<Preprocessor> PP = std::make_unique<Preprocessor>(
60 std::make_shared<PreprocessorOptions>(), Diags, LangOpts, SourceMgr,
61 HeaderInfo, ModLoader,
62 /*IILookup =*/nullptr,
63 /*OwnsHeaderSearch =*/false);
64 PP->Initialize(*Target);
65 PP->EnterMainSourceFile();
66 return PP;
67 }
68
Lex(StringRef Source)69 std::vector<Token> Lex(StringRef Source) {
70 TrivialModuleLoader ModLoader;
71 PP = CreatePP(Source, ModLoader);
72
73 std::vector<Token> toks;
74 while (1) {
75 Token tok;
76 PP->Lex(tok);
77 if (tok.is(tok::eof))
78 break;
79 toks.push_back(tok);
80 }
81
82 return toks;
83 }
84
CheckLex(StringRef Source,ArrayRef<tok::TokenKind> ExpectedTokens)85 std::vector<Token> CheckLex(StringRef Source,
86 ArrayRef<tok::TokenKind> ExpectedTokens) {
87 auto toks = Lex(Source);
88 EXPECT_EQ(ExpectedTokens.size(), toks.size());
89 for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) {
90 EXPECT_EQ(ExpectedTokens[i], toks[i].getKind());
91 }
92
93 return toks;
94 }
95
getSourceText(Token Begin,Token End)96 std::string getSourceText(Token Begin, Token End) {
97 bool Invalid;
98 StringRef Str =
99 Lexer::getSourceText(CharSourceRange::getTokenRange(SourceRange(
100 Begin.getLocation(), End.getLocation())),
101 SourceMgr, LangOpts, &Invalid);
102 if (Invalid)
103 return "<INVALID>";
104 return std::string(Str);
105 }
106
107 FileSystemOptions FileMgrOpts;
108 FileManager FileMgr;
109 IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
110 DiagnosticsEngine Diags;
111 SourceManager SourceMgr;
112 LangOptions LangOpts;
113 std::shared_ptr<TargetOptions> TargetOpts;
114 IntrusiveRefCntPtr<TargetInfo> Target;
115 std::unique_ptr<Preprocessor> PP;
116 };
117
TEST_F(LexerTest,GetSourceTextExpandsToMaximumInMacroArgument)118 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) {
119 std::vector<tok::TokenKind> ExpectedTokens;
120 ExpectedTokens.push_back(tok::identifier);
121 ExpectedTokens.push_back(tok::l_paren);
122 ExpectedTokens.push_back(tok::identifier);
123 ExpectedTokens.push_back(tok::r_paren);
124
125 std::vector<Token> toks = CheckLex("#define M(x) x\n"
126 "M(f(M(i)))",
127 ExpectedTokens);
128
129 EXPECT_EQ("M(i)", getSourceText(toks[2], toks[2]));
130 }
131
TEST_F(LexerTest,GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro)132 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) {
133 std::vector<tok::TokenKind> ExpectedTokens;
134 ExpectedTokens.push_back(tok::identifier);
135 ExpectedTokens.push_back(tok::identifier);
136
137 std::vector<Token> toks = CheckLex("#define M(x) x\n"
138 "M(M(i) c)",
139 ExpectedTokens);
140
141 EXPECT_EQ("M(i)", getSourceText(toks[0], toks[0]));
142 }
143
TEST_F(LexerTest,GetSourceTextExpandsInMacroArgumentForBeginOfMacro)144 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForBeginOfMacro) {
145 std::vector<tok::TokenKind> ExpectedTokens;
146 ExpectedTokens.push_back(tok::identifier);
147 ExpectedTokens.push_back(tok::identifier);
148 ExpectedTokens.push_back(tok::identifier);
149
150 std::vector<Token> toks = CheckLex("#define M(x) x\n"
151 "M(c c M(i))",
152 ExpectedTokens);
153
154 EXPECT_EQ("c M(i)", getSourceText(toks[1], toks[2]));
155 }
156
TEST_F(LexerTest,GetSourceTextExpandsInMacroArgumentForEndOfMacro)157 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForEndOfMacro) {
158 std::vector<tok::TokenKind> ExpectedTokens;
159 ExpectedTokens.push_back(tok::identifier);
160 ExpectedTokens.push_back(tok::identifier);
161 ExpectedTokens.push_back(tok::identifier);
162
163 std::vector<Token> toks = CheckLex("#define M(x) x\n"
164 "M(M(i) c c)",
165 ExpectedTokens);
166
167 EXPECT_EQ("M(i) c", getSourceText(toks[0], toks[1]));
168 }
169
TEST_F(LexerTest,GetSourceTextInSeparateFnMacros)170 TEST_F(LexerTest, GetSourceTextInSeparateFnMacros) {
171 std::vector<tok::TokenKind> ExpectedTokens;
172 ExpectedTokens.push_back(tok::identifier);
173 ExpectedTokens.push_back(tok::identifier);
174 ExpectedTokens.push_back(tok::identifier);
175 ExpectedTokens.push_back(tok::identifier);
176
177 std::vector<Token> toks = CheckLex("#define M(x) x\n"
178 "M(c M(i)) M(M(i) c)",
179 ExpectedTokens);
180
181 EXPECT_EQ("<INVALID>", getSourceText(toks[1], toks[2]));
182 }
183
TEST_F(LexerTest,GetSourceTextWorksAcrossTokenPastes)184 TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) {
185 std::vector<tok::TokenKind> ExpectedTokens;
186 ExpectedTokens.push_back(tok::identifier);
187 ExpectedTokens.push_back(tok::l_paren);
188 ExpectedTokens.push_back(tok::identifier);
189 ExpectedTokens.push_back(tok::r_paren);
190
191 std::vector<Token> toks = CheckLex("#define M(x) x\n"
192 "#define C(x) M(x##c)\n"
193 "M(f(C(i)))",
194 ExpectedTokens);
195
196 EXPECT_EQ("C(i)", getSourceText(toks[2], toks[2]));
197 }
198
TEST_F(LexerTest,GetSourceTextExpandsAcrossMultipleMacroCalls)199 TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) {
200 std::vector<tok::TokenKind> ExpectedTokens;
201 ExpectedTokens.push_back(tok::identifier);
202 ExpectedTokens.push_back(tok::l_paren);
203 ExpectedTokens.push_back(tok::identifier);
204 ExpectedTokens.push_back(tok::r_paren);
205
206 std::vector<Token> toks = CheckLex("#define M(x) x\n"
207 "f(M(M(i)))",
208 ExpectedTokens);
209 EXPECT_EQ("M(M(i))", getSourceText(toks[2], toks[2]));
210 }
211
TEST_F(LexerTest,GetSourceTextInMiddleOfMacroArgument)212 TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) {
213 std::vector<tok::TokenKind> ExpectedTokens;
214 ExpectedTokens.push_back(tok::identifier);
215 ExpectedTokens.push_back(tok::l_paren);
216 ExpectedTokens.push_back(tok::identifier);
217 ExpectedTokens.push_back(tok::r_paren);
218
219 std::vector<Token> toks = CheckLex("#define M(x) x\n"
220 "M(f(i))",
221 ExpectedTokens);
222 EXPECT_EQ("i", getSourceText(toks[2], toks[2]));
223 }
224
TEST_F(LexerTest,GetSourceTextExpandsAroundDifferentMacroCalls)225 TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) {
226 std::vector<tok::TokenKind> ExpectedTokens;
227 ExpectedTokens.push_back(tok::identifier);
228 ExpectedTokens.push_back(tok::l_paren);
229 ExpectedTokens.push_back(tok::identifier);
230 ExpectedTokens.push_back(tok::r_paren);
231
232 std::vector<Token> toks = CheckLex("#define M(x) x\n"
233 "#define C(x) x\n"
234 "f(C(M(i)))",
235 ExpectedTokens);
236 EXPECT_EQ("C(M(i))", getSourceText(toks[2], toks[2]));
237 }
238
TEST_F(LexerTest,GetSourceTextOnlyExpandsIfFirstTokenInMacro)239 TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) {
240 std::vector<tok::TokenKind> ExpectedTokens;
241 ExpectedTokens.push_back(tok::identifier);
242 ExpectedTokens.push_back(tok::l_paren);
243 ExpectedTokens.push_back(tok::identifier);
244 ExpectedTokens.push_back(tok::identifier);
245 ExpectedTokens.push_back(tok::r_paren);
246
247 std::vector<Token> toks = CheckLex("#define M(x) x\n"
248 "#define C(x) c x\n"
249 "f(C(M(i)))",
250 ExpectedTokens);
251 EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
252 }
253
TEST_F(LexerTest,GetSourceTextExpandsRecursively)254 TEST_F(LexerTest, GetSourceTextExpandsRecursively) {
255 std::vector<tok::TokenKind> ExpectedTokens;
256 ExpectedTokens.push_back(tok::identifier);
257 ExpectedTokens.push_back(tok::identifier);
258 ExpectedTokens.push_back(tok::l_paren);
259 ExpectedTokens.push_back(tok::identifier);
260 ExpectedTokens.push_back(tok::r_paren);
261
262 std::vector<Token> toks = CheckLex("#define M(x) x\n"
263 "#define C(x) c M(x)\n"
264 "C(f(M(i)))",
265 ExpectedTokens);
266 EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
267 }
268
TEST_F(LexerTest,LexAPI)269 TEST_F(LexerTest, LexAPI) {
270 std::vector<tok::TokenKind> ExpectedTokens;
271 // Line 1 (after the #defines)
272 ExpectedTokens.push_back(tok::l_square);
273 ExpectedTokens.push_back(tok::identifier);
274 ExpectedTokens.push_back(tok::r_square);
275 ExpectedTokens.push_back(tok::l_square);
276 ExpectedTokens.push_back(tok::identifier);
277 ExpectedTokens.push_back(tok::r_square);
278 // Line 2
279 ExpectedTokens.push_back(tok::identifier);
280 ExpectedTokens.push_back(tok::identifier);
281 ExpectedTokens.push_back(tok::identifier);
282 ExpectedTokens.push_back(tok::identifier);
283
284 std::vector<Token> toks = CheckLex("#define M(x) [x]\n"
285 "#define N(x) x\n"
286 "#define INN(x) x\n"
287 "#define NOF1 INN(val)\n"
288 "#define NOF2 val\n"
289 "M(foo) N([bar])\n"
290 "N(INN(val)) N(NOF1) N(NOF2) N(val)",
291 ExpectedTokens);
292
293 SourceLocation lsqrLoc = toks[0].getLocation();
294 SourceLocation idLoc = toks[1].getLocation();
295 SourceLocation rsqrLoc = toks[2].getLocation();
296 CharSourceRange macroRange = SourceMgr.getExpansionRange(lsqrLoc);
297
298 SourceLocation Loc;
299 EXPECT_TRUE(Lexer::isAtStartOfMacroExpansion(lsqrLoc, SourceMgr, LangOpts, &Loc));
300 EXPECT_EQ(Loc, macroRange.getBegin());
301 EXPECT_FALSE(Lexer::isAtStartOfMacroExpansion(idLoc, SourceMgr, LangOpts));
302 EXPECT_FALSE(Lexer::isAtEndOfMacroExpansion(idLoc, SourceMgr, LangOpts));
303 EXPECT_TRUE(Lexer::isAtEndOfMacroExpansion(rsqrLoc, SourceMgr, LangOpts, &Loc));
304 EXPECT_EQ(Loc, macroRange.getEnd());
305 EXPECT_TRUE(macroRange.isTokenRange());
306
307 CharSourceRange range = Lexer::makeFileCharRange(
308 CharSourceRange::getTokenRange(lsqrLoc, idLoc), SourceMgr, LangOpts);
309 EXPECT_TRUE(range.isInvalid());
310 range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(idLoc, rsqrLoc),
311 SourceMgr, LangOpts);
312 EXPECT_TRUE(range.isInvalid());
313 range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc),
314 SourceMgr, LangOpts);
315 EXPECT_TRUE(!range.isTokenRange());
316 EXPECT_EQ(range.getAsRange(),
317 SourceRange(macroRange.getBegin(),
318 macroRange.getEnd().getLocWithOffset(1)));
319
320 StringRef text = Lexer::getSourceText(
321 CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc),
322 SourceMgr, LangOpts);
323 EXPECT_EQ(text, "M(foo)");
324
325 SourceLocation macroLsqrLoc = toks[3].getLocation();
326 SourceLocation macroIdLoc = toks[4].getLocation();
327 SourceLocation macroRsqrLoc = toks[5].getLocation();
328 SourceLocation fileLsqrLoc = SourceMgr.getSpellingLoc(macroLsqrLoc);
329 SourceLocation fileIdLoc = SourceMgr.getSpellingLoc(macroIdLoc);
330 SourceLocation fileRsqrLoc = SourceMgr.getSpellingLoc(macroRsqrLoc);
331
332 range = Lexer::makeFileCharRange(
333 CharSourceRange::getTokenRange(macroLsqrLoc, macroIdLoc),
334 SourceMgr, LangOpts);
335 EXPECT_EQ(SourceRange(fileLsqrLoc, fileIdLoc.getLocWithOffset(3)),
336 range.getAsRange());
337
338 range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(macroIdLoc, macroRsqrLoc),
339 SourceMgr, LangOpts);
340 EXPECT_EQ(SourceRange(fileIdLoc, fileRsqrLoc.getLocWithOffset(1)),
341 range.getAsRange());
342
343 macroRange = SourceMgr.getExpansionRange(macroLsqrLoc);
344 range = Lexer::makeFileCharRange(
345 CharSourceRange::getTokenRange(macroLsqrLoc, macroRsqrLoc),
346 SourceMgr, LangOpts);
347 EXPECT_EQ(SourceRange(macroRange.getBegin(), macroRange.getEnd().getLocWithOffset(1)),
348 range.getAsRange());
349
350 text = Lexer::getSourceText(
351 CharSourceRange::getTokenRange(SourceRange(macroLsqrLoc, macroIdLoc)),
352 SourceMgr, LangOpts);
353 EXPECT_EQ(text, "[bar");
354
355
356 SourceLocation idLoc1 = toks[6].getLocation();
357 SourceLocation idLoc2 = toks[7].getLocation();
358 SourceLocation idLoc3 = toks[8].getLocation();
359 SourceLocation idLoc4 = toks[9].getLocation();
360 EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc1, SourceMgr, LangOpts));
361 EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc2, SourceMgr, LangOpts));
362 EXPECT_EQ("NOF2", Lexer::getImmediateMacroName(idLoc3, SourceMgr, LangOpts));
363 EXPECT_EQ("N", Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts));
364 }
365
TEST_F(LexerTest,HandlesSplitTokens)366 TEST_F(LexerTest, HandlesSplitTokens) {
367 std::vector<tok::TokenKind> ExpectedTokens;
368 // Line 1 (after the #defines)
369 ExpectedTokens.push_back(tok::identifier);
370 ExpectedTokens.push_back(tok::less);
371 ExpectedTokens.push_back(tok::identifier);
372 ExpectedTokens.push_back(tok::less);
373 ExpectedTokens.push_back(tok::greatergreater);
374 // Line 2
375 ExpectedTokens.push_back(tok::identifier);
376 ExpectedTokens.push_back(tok::less);
377 ExpectedTokens.push_back(tok::identifier);
378 ExpectedTokens.push_back(tok::less);
379 ExpectedTokens.push_back(tok::greatergreater);
380
381 std::vector<Token> toks = CheckLex("#define TY ty\n"
382 "#define RANGLE ty<ty<>>\n"
383 "TY<ty<>>\n"
384 "RANGLE",
385 ExpectedTokens);
386
387 SourceLocation outerTyLoc = toks[0].getLocation();
388 SourceLocation innerTyLoc = toks[2].getLocation();
389 SourceLocation gtgtLoc = toks[4].getLocation();
390 // Split the token to simulate the action of the parser and force creation of
391 // an `ExpansionTokenRange`.
392 SourceLocation rangleLoc = PP->SplitToken(gtgtLoc, 1);
393
394 // Verify that it only captures the first greater-then and not the second one.
395 CharSourceRange range = Lexer::makeFileCharRange(
396 CharSourceRange::getTokenRange(innerTyLoc, rangleLoc), SourceMgr,
397 LangOpts);
398 EXPECT_TRUE(range.isCharRange());
399 EXPECT_EQ(range.getAsRange(),
400 SourceRange(innerTyLoc, gtgtLoc.getLocWithOffset(1)));
401
402 // Verify case where range begins in a macro expansion.
403 range = Lexer::makeFileCharRange(
404 CharSourceRange::getTokenRange(outerTyLoc, rangleLoc), SourceMgr,
405 LangOpts);
406 EXPECT_TRUE(range.isCharRange());
407 EXPECT_EQ(range.getAsRange(),
408 SourceRange(SourceMgr.getExpansionLoc(outerTyLoc),
409 gtgtLoc.getLocWithOffset(1)));
410
411 SourceLocation macroInnerTyLoc = toks[7].getLocation();
412 SourceLocation macroGtgtLoc = toks[9].getLocation();
413 // Split the token to simulate the action of the parser and force creation of
414 // an `ExpansionTokenRange`.
415 SourceLocation macroRAngleLoc = PP->SplitToken(macroGtgtLoc, 1);
416
417 // Verify that it fails (because it only captures the first greater-then and
418 // not the second one, so it doesn't span the entire macro expansion).
419 range = Lexer::makeFileCharRange(
420 CharSourceRange::getTokenRange(macroInnerTyLoc, macroRAngleLoc),
421 SourceMgr, LangOpts);
422 EXPECT_TRUE(range.isInvalid());
423 }
424
TEST_F(LexerTest,DontMergeMacroArgsFromDifferentMacroFiles)425 TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) {
426 std::vector<Token> toks =
427 Lex("#define helper1 0\n"
428 "void helper2(const char *, ...);\n"
429 "#define M1(a, ...) helper2(a, ##__VA_ARGS__)\n"
430 "#define M2(a, ...) M1(a, helper1, ##__VA_ARGS__)\n"
431 "void f1() { M2(\"a\", \"b\"); }");
432
433 // Check the file corresponding to the "helper1" macro arg in M2.
434 //
435 // The lexer used to report its size as 31, meaning that the end of the
436 // expansion would be on the *next line* (just past `M2("a", "b")`). Make
437 // sure that we get the correct end location (the comma after "helper1").
438 SourceLocation helper1ArgLoc = toks[20].getLocation();
439 EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U);
440 }
441
TEST_F(LexerTest,DontOverallocateStringifyArgs)442 TEST_F(LexerTest, DontOverallocateStringifyArgs) {
443 TrivialModuleLoader ModLoader;
444 auto PP = CreatePP("\"StrArg\", 5, 'C'", ModLoader);
445
446 llvm::BumpPtrAllocator Allocator;
447 std::array<IdentifierInfo *, 3> ParamList;
448 MacroInfo *MI = PP->AllocateMacroInfo({});
449 MI->setIsFunctionLike();
450 MI->setParameterList(ParamList, Allocator);
451 EXPECT_EQ(3u, MI->getNumParams());
452 EXPECT_TRUE(MI->isFunctionLike());
453
454 Token Eof;
455 Eof.setKind(tok::eof);
456 std::vector<Token> ArgTokens;
457 while (1) {
458 Token tok;
459 PP->Lex(tok);
460 if (tok.is(tok::eof)) {
461 ArgTokens.push_back(Eof);
462 break;
463 }
464 if (tok.is(tok::comma))
465 ArgTokens.push_back(Eof);
466 else
467 ArgTokens.push_back(tok);
468 }
469
470 auto MacroArgsDeleter = [&PP](MacroArgs *M) { M->destroy(*PP); };
471 std::unique_ptr<MacroArgs, decltype(MacroArgsDeleter)> MA(
472 MacroArgs::create(MI, ArgTokens, false, *PP), MacroArgsDeleter);
473 auto StringifyArg = [&](int ArgNo) {
474 return MA->StringifyArgument(MA->getUnexpArgument(ArgNo), *PP,
475 /*Charify=*/false, {}, {});
476 };
477 Token Result = StringifyArg(0);
478 EXPECT_EQ(tok::string_literal, Result.getKind());
479 EXPECT_STREQ("\"\\\"StrArg\\\"\"", Result.getLiteralData());
480 Result = StringifyArg(1);
481 EXPECT_EQ(tok::string_literal, Result.getKind());
482 EXPECT_STREQ("\"5\"", Result.getLiteralData());
483 Result = StringifyArg(2);
484 EXPECT_EQ(tok::string_literal, Result.getKind());
485 EXPECT_STREQ("\"'C'\"", Result.getLiteralData());
486 #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST
487 EXPECT_DEATH(StringifyArg(3), "Invalid arg #");
488 #endif
489 }
490
TEST_F(LexerTest,IsNewLineEscapedValid)491 TEST_F(LexerTest, IsNewLineEscapedValid) {
492 auto hasNewLineEscaped = [](const char *S) {
493 return Lexer::isNewLineEscaped(S, S + strlen(S) - 1);
494 };
495
496 EXPECT_TRUE(hasNewLineEscaped("\\\r"));
497 EXPECT_TRUE(hasNewLineEscaped("\\\n"));
498 EXPECT_TRUE(hasNewLineEscaped("\\\r\n"));
499 EXPECT_TRUE(hasNewLineEscaped("\\\n\r"));
500 EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r"));
501 EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n"));
502
503 EXPECT_FALSE(hasNewLineEscaped("\\\r\r"));
504 EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n"));
505 EXPECT_FALSE(hasNewLineEscaped("\\\n\n"));
506 EXPECT_FALSE(hasNewLineEscaped("\r"));
507 EXPECT_FALSE(hasNewLineEscaped("\n"));
508 EXPECT_FALSE(hasNewLineEscaped("\r\n"));
509 EXPECT_FALSE(hasNewLineEscaped("\n\r"));
510 EXPECT_FALSE(hasNewLineEscaped("\r\r"));
511 EXPECT_FALSE(hasNewLineEscaped("\n\n"));
512 }
513
TEST_F(LexerTest,GetBeginningOfTokenWithEscapedNewLine)514 TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) {
515 // Each line should have the same length for
516 // further offset calculation to be more straightforward.
517 const unsigned IdentifierLength = 8;
518 std::string TextToLex = "rabarbar\n"
519 "foo\\\nbar\n"
520 "foo\\\rbar\n"
521 "fo\\\r\nbar\n"
522 "foo\\\n\rba\n";
523 std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier};
524 std::vector<Token> LexedTokens = CheckLex(TextToLex, ExpectedTokens);
525
526 for (const Token &Tok : LexedTokens) {
527 std::pair<FileID, unsigned> OriginalLocation =
528 SourceMgr.getDecomposedLoc(Tok.getLocation());
529 for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) {
530 SourceLocation LookupLocation =
531 Tok.getLocation().getLocWithOffset(Offset);
532
533 std::pair<FileID, unsigned> FoundLocation =
534 SourceMgr.getDecomposedExpansionLoc(
535 Lexer::GetBeginningOfToken(LookupLocation, SourceMgr, LangOpts));
536
537 // Check that location returned by the GetBeginningOfToken
538 // is the same as original token location reported by Lexer.
539 EXPECT_EQ(FoundLocation.second, OriginalLocation.second);
540 }
541 }
542 }
543
TEST_F(LexerTest,AvoidPastEndOfStringDereference)544 TEST_F(LexerTest, AvoidPastEndOfStringDereference) {
545 EXPECT_TRUE(Lex(" // \\\n").empty());
546 EXPECT_TRUE(Lex("#include <\\\\").empty());
547 EXPECT_TRUE(Lex("#include <\\\\\n").empty());
548 }
549
TEST_F(LexerTest,StringizingRasString)550 TEST_F(LexerTest, StringizingRasString) {
551 // For "std::string Lexer::Stringify(StringRef Str, bool Charify)".
552 std::string String1 = R"(foo
553 {"bar":[]}
554 baz)";
555 // For "void Lexer::Stringify(SmallVectorImpl<char> &Str)".
556 SmallString<128> String2;
557 String2 += String1.c_str();
558
559 // Corner cases.
560 std::string String3 = R"(\
561 \n
562 \\n
563 \\)";
564 SmallString<128> String4;
565 String4 += String3.c_str();
566 std::string String5 = R"(a\
567
568
569 \\b)";
570 SmallString<128> String6;
571 String6 += String5.c_str();
572
573 String1 = Lexer::Stringify(StringRef(String1));
574 Lexer::Stringify(String2);
575 String3 = Lexer::Stringify(StringRef(String3));
576 Lexer::Stringify(String4);
577 String5 = Lexer::Stringify(StringRef(String5));
578 Lexer::Stringify(String6);
579
580 EXPECT_EQ(String1, R"(foo\n {\"bar\":[]}\n baz)");
581 EXPECT_EQ(String2, R"(foo\n {\"bar\":[]}\n baz)");
582 EXPECT_EQ(String3, R"(\\\n \\n\n \\\\n\n \\\\)");
583 EXPECT_EQ(String4, R"(\\\n \\n\n \\\\n\n \\\\)");
584 EXPECT_EQ(String5, R"(a\\\n\n\n \\\\b)");
585 EXPECT_EQ(String6, R"(a\\\n\n\n \\\\b)");
586 }
587
TEST_F(LexerTest,CharRangeOffByOne)588 TEST_F(LexerTest, CharRangeOffByOne) {
589 std::vector<Token> toks = Lex(R"(#define MOO 1
590 void foo() { MOO; })");
591 const Token &moo = toks[5];
592
593 EXPECT_EQ(getSourceText(moo, moo), "MOO");
594
595 SourceRange R{moo.getLocation(), moo.getLocation()};
596
597 EXPECT_TRUE(
598 Lexer::isAtStartOfMacroExpansion(R.getBegin(), SourceMgr, LangOpts));
599 EXPECT_TRUE(
600 Lexer::isAtEndOfMacroExpansion(R.getEnd(), SourceMgr, LangOpts));
601
602 CharSourceRange CR = Lexer::getAsCharRange(R, SourceMgr, LangOpts);
603
604 EXPECT_EQ(Lexer::getSourceText(CR, SourceMgr, LangOpts), "MOO"); // Was "MO".
605 }
606
TEST_F(LexerTest,FindNextToken)607 TEST_F(LexerTest, FindNextToken) {
608 Lex("int abcd = 0;\n"
609 "int xyz = abcd;\n");
610 std::vector<std::string> GeneratedByNextToken;
611 SourceLocation Loc =
612 SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID());
613 while (true) {
614 auto T = Lexer::findNextToken(Loc, SourceMgr, LangOpts);
615 ASSERT_TRUE(T);
616 if (T->is(tok::eof))
617 break;
618 GeneratedByNextToken.push_back(getSourceText(*T, *T));
619 Loc = T->getLocation();
620 }
621 EXPECT_THAT(GeneratedByNextToken, ElementsAre("abcd", "=", "0", ";", "int",
622 "xyz", "=", "abcd", ";"));
623 }
624
TEST_F(LexerTest,CreatedFIDCountForPredefinedBuffer)625 TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) {
626 TrivialModuleLoader ModLoader;
627 auto PP = CreatePP("", ModLoader);
628 while (1) {
629 Token tok;
630 PP->Lex(tok);
631 if (tok.is(tok::eof))
632 break;
633 }
634 EXPECT_EQ(SourceMgr.getNumCreatedFIDsForFileID(PP->getPredefinesFileID()),
635 1U);
636 }
637
TEST_F(LexerTest,RawAndNormalLexSameForLineComments)638 TEST_F(LexerTest, RawAndNormalLexSameForLineComments) {
639 const llvm::StringLiteral Source = R"cpp(
640 // First line comment.
641 //* Second line comment which is ambigious.
642 ; // Have a non-comment token to make sure something is lexed.
643 )cpp";
644 LangOpts.LineComment = false;
645 auto Toks = Lex(Source);
646 auto &SM = PP->getSourceManager();
647 auto SrcBuffer = SM.getBufferData(SM.getMainFileID());
648 Lexer L(SM.getLocForStartOfFile(SM.getMainFileID()), PP->getLangOpts(),
649 SrcBuffer.data(), SrcBuffer.data(),
650 SrcBuffer.data() + SrcBuffer.size());
651
652 auto ToksView = llvm::makeArrayRef(Toks);
653 clang::Token T;
654 EXPECT_FALSE(ToksView.empty());
655 while (!L.LexFromRawLexer(T)) {
656 ASSERT_TRUE(!ToksView.empty());
657 EXPECT_EQ(T.getKind(), ToksView.front().getKind());
658 ToksView = ToksView.drop_front();
659 }
660 EXPECT_TRUE(ToksView.empty());
661 }
662 } // anonymous namespace
663