1 //===- TokenLexer.cpp - Lex from a token stream ---------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the TokenLexer interface. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "clang/Lex/TokenLexer.h" 14 #include "clang/Basic/Diagnostic.h" 15 #include "clang/Basic/IdentifierTable.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/SourceLocation.h" 18 #include "clang/Basic/SourceManager.h" 19 #include "clang/Basic/TokenKinds.h" 20 #include "clang/Lex/LexDiagnostic.h" 21 #include "clang/Lex/Lexer.h" 22 #include "clang/Lex/MacroArgs.h" 23 #include "clang/Lex/MacroInfo.h" 24 #include "clang/Lex/Preprocessor.h" 25 #include "clang/Lex/Token.h" 26 #include "clang/Lex/VariadicMacroSupport.h" 27 #include "llvm/ADT/ArrayRef.h" 28 #include "llvm/ADT/SmallString.h" 29 #include "llvm/ADT/SmallVector.h" 30 #include "llvm/ADT/iterator_range.h" 31 #include <cassert> 32 #include <cstring> 33 34 using namespace clang; 35 36 /// Create a TokenLexer for the specified macro with the specified actual 37 /// arguments. Note that this ctor takes ownership of the ActualArgs pointer. 38 void TokenLexer::Init(Token &Tok, SourceLocation ELEnd, MacroInfo *MI, 39 MacroArgs *Actuals) { 40 // If the client is reusing a TokenLexer, make sure to free any memory 41 // associated with it. 42 destroy(); 43 44 Macro = MI; 45 ActualArgs = Actuals; 46 CurTokenIdx = 0; 47 48 ExpandLocStart = Tok.getLocation(); 49 ExpandLocEnd = ELEnd; 50 AtStartOfLine = Tok.isAtStartOfLine(); 51 HasLeadingSpace = Tok.hasLeadingSpace(); 52 NextTokGetsSpace = false; 53 Tokens = &*Macro->tokens_begin(); 54 OwnsTokens = false; 55 DisableMacroExpansion = false; 56 NumTokens = Macro->tokens_end()-Macro->tokens_begin(); 57 MacroExpansionStart = SourceLocation(); 58 59 SourceManager &SM = PP.getSourceManager(); 60 MacroStartSLocOffset = SM.getNextLocalOffset(); 61 62 if (NumTokens > 0) { 63 assert(Tokens[0].getLocation().isValid()); 64 assert((Tokens[0].getLocation().isFileID() || Tokens[0].is(tok::comment)) && 65 "Macro defined in macro?"); 66 assert(ExpandLocStart.isValid()); 67 68 // Reserve a source location entry chunk for the length of the macro 69 // definition. Tokens that get lexed directly from the definition will 70 // have their locations pointing inside this chunk. This is to avoid 71 // creating separate source location entries for each token. 72 MacroDefStart = SM.getExpansionLoc(Tokens[0].getLocation()); 73 MacroDefLength = Macro->getDefinitionLength(SM); 74 MacroExpansionStart = SM.createExpansionLoc(MacroDefStart, 75 ExpandLocStart, 76 ExpandLocEnd, 77 MacroDefLength); 78 } 79 80 // If this is a function-like macro, expand the arguments and change 81 // Tokens to point to the expanded tokens. 82 if (Macro->isFunctionLike() && Macro->getNumParams()) 83 ExpandFunctionArguments(); 84 85 // Mark the macro as currently disabled, so that it is not recursively 86 // expanded. The macro must be disabled only after argument pre-expansion of 87 // function-like macro arguments occurs. 88 Macro->DisableMacro(); 89 } 90 91 /// Create a TokenLexer for the specified token stream. This does not 92 /// take ownership of the specified token vector. 93 void TokenLexer::Init(const Token *TokArray, unsigned NumToks, 94 bool disableMacroExpansion, bool ownsTokens) { 95 // If the client is reusing a TokenLexer, make sure to free any memory 96 // associated with it. 97 destroy(); 98 99 Macro = nullptr; 100 ActualArgs = nullptr; 101 Tokens = TokArray; 102 OwnsTokens = ownsTokens; 103 DisableMacroExpansion = disableMacroExpansion; 104 NumTokens = NumToks; 105 CurTokenIdx = 0; 106 ExpandLocStart = ExpandLocEnd = SourceLocation(); 107 AtStartOfLine = false; 108 HasLeadingSpace = false; 109 NextTokGetsSpace = false; 110 MacroExpansionStart = SourceLocation(); 111 112 // Set HasLeadingSpace/AtStartOfLine so that the first token will be 113 // returned unmodified. 114 if (NumToks != 0) { 115 AtStartOfLine = TokArray[0].isAtStartOfLine(); 116 HasLeadingSpace = TokArray[0].hasLeadingSpace(); 117 } 118 } 119 120 void TokenLexer::destroy() { 121 // If this was a function-like macro that actually uses its arguments, delete 122 // the expanded tokens. 123 if (OwnsTokens) { 124 delete [] Tokens; 125 Tokens = nullptr; 126 OwnsTokens = false; 127 } 128 129 // TokenLexer owns its formal arguments. 130 if (ActualArgs) ActualArgs->destroy(PP); 131 } 132 133 bool TokenLexer::MaybeRemoveCommaBeforeVaArgs( 134 SmallVectorImpl<Token> &ResultToks, bool HasPasteOperator, MacroInfo *Macro, 135 unsigned MacroArgNo, Preprocessor &PP) { 136 // Is the macro argument __VA_ARGS__? 137 if (!Macro->isVariadic() || MacroArgNo != Macro->getNumParams()-1) 138 return false; 139 140 // In Microsoft-compatibility mode, a comma is removed in the expansion 141 // of " ... , __VA_ARGS__ " if __VA_ARGS__ is empty. This extension is 142 // not supported by gcc. 143 if (!HasPasteOperator && !PP.getLangOpts().MSVCCompat) 144 return false; 145 146 // GCC removes the comma in the expansion of " ... , ## __VA_ARGS__ " if 147 // __VA_ARGS__ is empty, but not in strict C99 mode where there are no 148 // named arguments, where it remains. In all other modes, including C99 149 // with GNU extensions, it is removed regardless of named arguments. 150 // Microsoft also appears to support this extension, unofficially. 151 if (PP.getLangOpts().C99 && !PP.getLangOpts().GNUMode 152 && Macro->getNumParams() < 2) 153 return false; 154 155 // Is a comma available to be removed? 156 if (ResultToks.empty() || !ResultToks.back().is(tok::comma)) 157 return false; 158 159 // Issue an extension diagnostic for the paste operator. 160 if (HasPasteOperator) 161 PP.Diag(ResultToks.back().getLocation(), diag::ext_paste_comma); 162 163 // Remove the comma. 164 ResultToks.pop_back(); 165 166 if (!ResultToks.empty()) { 167 // If the comma was right after another paste (e.g. "X##,##__VA_ARGS__"), 168 // then removal of the comma should produce a placemarker token (in C99 169 // terms) which we model by popping off the previous ##, giving us a plain 170 // "X" when __VA_ARGS__ is empty. 171 if (ResultToks.back().is(tok::hashhash)) 172 ResultToks.pop_back(); 173 174 // Remember that this comma was elided. 175 ResultToks.back().setFlag(Token::CommaAfterElided); 176 } 177 178 // Never add a space, even if the comma, ##, or arg had a space. 179 NextTokGetsSpace = false; 180 return true; 181 } 182 183 void TokenLexer::stringifyVAOPTContents( 184 SmallVectorImpl<Token> &ResultToks, const VAOptExpansionContext &VCtx, 185 const SourceLocation VAOPTClosingParenLoc) { 186 const int NumToksPriorToVAOpt = VCtx.getNumberOfTokensPriorToVAOpt(); 187 const unsigned int NumVAOptTokens = ResultToks.size() - NumToksPriorToVAOpt; 188 Token *const VAOPTTokens = 189 NumVAOptTokens ? &ResultToks[NumToksPriorToVAOpt] : nullptr; 190 191 SmallVector<Token, 64> ConcatenatedVAOPTResultToks; 192 // FIXME: Should we keep track within VCtx that we did or didnot 193 // encounter pasting - and only then perform this loop. 194 195 // Perform token pasting (concatenation) prior to stringization. 196 for (unsigned int CurTokenIdx = 0; CurTokenIdx != NumVAOptTokens; 197 ++CurTokenIdx) { 198 if (VAOPTTokens[CurTokenIdx].is(tok::hashhash)) { 199 assert(CurTokenIdx != 0 && 200 "Can not have __VAOPT__ contents begin with a ##"); 201 Token &LHS = VAOPTTokens[CurTokenIdx - 1]; 202 pasteTokens(LHS, llvm::makeArrayRef(VAOPTTokens, NumVAOptTokens), 203 CurTokenIdx); 204 // Replace the token prior to the first ## in this iteration. 205 ConcatenatedVAOPTResultToks.back() = LHS; 206 if (CurTokenIdx == NumVAOptTokens) 207 break; 208 } 209 ConcatenatedVAOPTResultToks.push_back(VAOPTTokens[CurTokenIdx]); 210 } 211 212 ConcatenatedVAOPTResultToks.push_back(VCtx.getEOFTok()); 213 // Get the SourceLocation that represents the start location within 214 // the macro definition that marks where this string is substituted 215 // into: i.e. the __VA_OPT__ and the ')' within the spelling of the 216 // macro definition, and use it to indicate that the stringified token 217 // was generated from that location. 218 const SourceLocation ExpansionLocStartWithinMacro = 219 getExpansionLocForMacroDefLoc(VCtx.getVAOptLoc()); 220 const SourceLocation ExpansionLocEndWithinMacro = 221 getExpansionLocForMacroDefLoc(VAOPTClosingParenLoc); 222 223 Token StringifiedVAOPT = MacroArgs::StringifyArgument( 224 &ConcatenatedVAOPTResultToks[0], PP, VCtx.hasCharifyBefore() /*Charify*/, 225 ExpansionLocStartWithinMacro, ExpansionLocEndWithinMacro); 226 227 if (VCtx.getLeadingSpaceForStringifiedToken()) 228 StringifiedVAOPT.setFlag(Token::LeadingSpace); 229 230 StringifiedVAOPT.setFlag(Token::StringifiedInMacro); 231 // Resize (shrink) the token stream to just capture this stringified token. 232 ResultToks.resize(NumToksPriorToVAOpt + 1); 233 ResultToks.back() = StringifiedVAOPT; 234 } 235 236 /// Expand the arguments of a function-like macro so that we can quickly 237 /// return preexpanded tokens from Tokens. 238 void TokenLexer::ExpandFunctionArguments() { 239 SmallVector<Token, 128> ResultToks; 240 241 // Loop through 'Tokens', expanding them into ResultToks. Keep 242 // track of whether we change anything. If not, no need to keep them. If so, 243 // we install the newly expanded sequence as the new 'Tokens' list. 244 bool MadeChange = false; 245 246 const bool CalledWithVariadicArguments = 247 ActualArgs->invokedWithVariadicArgument(Macro); 248 249 VAOptExpansionContext VCtx(PP); 250 251 for (unsigned I = 0, E = NumTokens; I != E; ++I) { 252 const Token &CurTok = Tokens[I]; 253 // We don't want a space for the next token after a paste 254 // operator. In valid code, the token will get smooshed onto the 255 // preceding one anyway. In assembler-with-cpp mode, invalid 256 // pastes are allowed through: in this case, we do not want the 257 // extra whitespace to be added. For example, we want ". ## foo" 258 // -> ".foo" not ". foo". 259 if (I != 0 && !Tokens[I-1].is(tok::hashhash) && CurTok.hasLeadingSpace()) 260 NextTokGetsSpace = true; 261 262 if (VCtx.isVAOptToken(CurTok)) { 263 MadeChange = true; 264 assert(Tokens[I + 1].is(tok::l_paren) && 265 "__VA_OPT__ must be followed by '('"); 266 267 ++I; // Skip the l_paren 268 VCtx.sawVAOptFollowedByOpeningParens(CurTok.getLocation(), 269 ResultToks.size()); 270 271 continue; 272 } 273 274 // We have entered into the __VA_OPT__ context, so handle tokens 275 // appropriately. 276 if (VCtx.isInVAOpt()) { 277 // If we are about to process a token that is either an argument to 278 // __VA_OPT__ or its closing rparen, then: 279 // 1) If the token is the closing rparen that exits us out of __VA_OPT__, 280 // perform any necessary stringification or placemarker processing, 281 // and/or skip to the next token. 282 // 2) else if macro was invoked without variadic arguments skip this 283 // token. 284 // 3) else (macro was invoked with variadic arguments) process the token 285 // normally. 286 287 if (Tokens[I].is(tok::l_paren)) 288 VCtx.sawOpeningParen(Tokens[I].getLocation()); 289 // Continue skipping tokens within __VA_OPT__ if the macro was not 290 // called with variadic arguments, else let the rest of the loop handle 291 // this token. Note sawClosingParen() returns true only if the r_paren matches 292 // the closing r_paren of the __VA_OPT__. 293 if (!Tokens[I].is(tok::r_paren) || !VCtx.sawClosingParen()) { 294 if (!CalledWithVariadicArguments) { 295 // Skip this token. 296 continue; 297 } 298 // ... else the macro was called with variadic arguments, and we do not 299 // have a closing rparen - so process this token normally. 300 } else { 301 // Current token is the closing r_paren which marks the end of the 302 // __VA_OPT__ invocation, so handle any place-marker pasting (if 303 // empty) by removing hashhash either before (if exists) or after. And 304 // also stringify the entire contents if VAOPT was preceded by a hash, 305 // but do so only after any token concatenation that needs to occur 306 // within the contents of VAOPT. 307 308 if (VCtx.hasStringifyOrCharifyBefore()) { 309 // Replace all the tokens just added from within VAOPT into a single 310 // stringified token. This requires token-pasting to eagerly occur 311 // within these tokens. If either the contents of VAOPT were empty 312 // or the macro wasn't called with any variadic arguments, the result 313 // is a token that represents an empty string. 314 stringifyVAOPTContents(ResultToks, VCtx, 315 /*ClosingParenLoc*/ Tokens[I].getLocation()); 316 317 } else if (/*No tokens within VAOPT*/ !( 318 ResultToks.size() - VCtx.getNumberOfTokensPriorToVAOpt())) { 319 // Treat VAOPT as a placemarker token. Eat either the '##' before the 320 // RHS/VAOPT (if one exists, suggesting that the LHS (if any) to that 321 // hashhash was not a placemarker) or the '##' 322 // after VAOPT, but not both. 323 324 if (ResultToks.size() && ResultToks.back().is(tok::hashhash)) { 325 ResultToks.pop_back(); 326 } else if ((I + 1 != E) && Tokens[I + 1].is(tok::hashhash)) { 327 ++I; // Skip the following hashhash. 328 } 329 } 330 VCtx.reset(); 331 // We processed __VA_OPT__'s closing paren (and the exit out of 332 // __VA_OPT__), so skip to the next token. 333 continue; 334 } 335 } 336 337 // If we found the stringify operator, get the argument stringified. The 338 // preprocessor already verified that the following token is a macro 339 // parameter or __VA_OPT__ when the #define was lexed. 340 341 if (CurTok.isOneOf(tok::hash, tok::hashat)) { 342 int ArgNo = Macro->getParameterNum(Tokens[I+1].getIdentifierInfo()); 343 assert((ArgNo != -1 || VCtx.isVAOptToken(Tokens[I + 1])) && 344 "Token following # is not an argument or __VA_OPT__!"); 345 346 if (ArgNo == -1) { 347 // Handle the __VA_OPT__ case. 348 VCtx.sawHashOrHashAtBefore(NextTokGetsSpace, 349 CurTok.is(tok::hashat)); 350 continue; 351 } 352 // Else handle the simple argument case. 353 SourceLocation ExpansionLocStart = 354 getExpansionLocForMacroDefLoc(CurTok.getLocation()); 355 SourceLocation ExpansionLocEnd = 356 getExpansionLocForMacroDefLoc(Tokens[I+1].getLocation()); 357 358 Token Res; 359 if (CurTok.is(tok::hash)) // Stringify 360 Res = ActualArgs->getStringifiedArgument(ArgNo, PP, 361 ExpansionLocStart, 362 ExpansionLocEnd); 363 else { 364 // 'charify': don't bother caching these. 365 Res = MacroArgs::StringifyArgument(ActualArgs->getUnexpArgument(ArgNo), 366 PP, true, 367 ExpansionLocStart, 368 ExpansionLocEnd); 369 } 370 Res.setFlag(Token::StringifiedInMacro); 371 372 // The stringified/charified string leading space flag gets set to match 373 // the #/#@ operator. 374 if (NextTokGetsSpace) 375 Res.setFlag(Token::LeadingSpace); 376 377 ResultToks.push_back(Res); 378 MadeChange = true; 379 ++I; // Skip arg name. 380 NextTokGetsSpace = false; 381 continue; 382 } 383 384 // Find out if there is a paste (##) operator before or after the token. 385 bool NonEmptyPasteBefore = 386 !ResultToks.empty() && ResultToks.back().is(tok::hashhash); 387 bool PasteBefore = I != 0 && Tokens[I-1].is(tok::hashhash); 388 bool PasteAfter = I+1 != E && Tokens[I+1].is(tok::hashhash); 389 390 assert((!NonEmptyPasteBefore || PasteBefore || VCtx.isInVAOpt()) && 391 "unexpected ## in ResultToks"); 392 393 // Otherwise, if this is not an argument token, just add the token to the 394 // output buffer. 395 IdentifierInfo *II = CurTok.getIdentifierInfo(); 396 int ArgNo = II ? Macro->getParameterNum(II) : -1; 397 if (ArgNo == -1) { 398 // This isn't an argument, just add it. 399 ResultToks.push_back(CurTok); 400 401 if (NextTokGetsSpace) { 402 ResultToks.back().setFlag(Token::LeadingSpace); 403 NextTokGetsSpace = false; 404 } else if (PasteBefore && !NonEmptyPasteBefore) 405 ResultToks.back().clearFlag(Token::LeadingSpace); 406 407 continue; 408 } 409 410 // An argument is expanded somehow, the result is different than the 411 // input. 412 MadeChange = true; 413 414 // Otherwise, this is a use of the argument. 415 416 // In Microsoft mode, remove the comma before __VA_ARGS__ to ensure there 417 // are no trailing commas if __VA_ARGS__ is empty. 418 if (!PasteBefore && ActualArgs->isVarargsElidedUse() && 419 MaybeRemoveCommaBeforeVaArgs(ResultToks, 420 /*HasPasteOperator=*/false, 421 Macro, ArgNo, PP)) 422 continue; 423 424 // If it is not the LHS/RHS of a ## operator, we must pre-expand the 425 // argument and substitute the expanded tokens into the result. This is 426 // C99 6.10.3.1p1. 427 if (!PasteBefore && !PasteAfter) { 428 const Token *ResultArgToks; 429 430 // Only preexpand the argument if it could possibly need it. This 431 // avoids some work in common cases. 432 const Token *ArgTok = ActualArgs->getUnexpArgument(ArgNo); 433 if (ActualArgs->ArgNeedsPreexpansion(ArgTok, PP)) 434 ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, PP)[0]; 435 else 436 ResultArgToks = ArgTok; // Use non-preexpanded tokens. 437 438 // If the arg token expanded into anything, append it. 439 if (ResultArgToks->isNot(tok::eof)) { 440 size_t FirstResult = ResultToks.size(); 441 unsigned NumToks = MacroArgs::getArgLength(ResultArgToks); 442 ResultToks.append(ResultArgToks, ResultArgToks+NumToks); 443 444 // In Microsoft-compatibility mode, we follow MSVC's preprocessing 445 // behavior by not considering single commas from nested macro 446 // expansions as argument separators. Set a flag on the token so we can 447 // test for this later when the macro expansion is processed. 448 if (PP.getLangOpts().MSVCCompat && NumToks == 1 && 449 ResultToks.back().is(tok::comma)) 450 ResultToks.back().setFlag(Token::IgnoredComma); 451 452 // If the '##' came from expanding an argument, turn it into 'unknown' 453 // to avoid pasting. 454 for (Token &Tok : llvm::make_range(ResultToks.begin() + FirstResult, 455 ResultToks.end())) { 456 if (Tok.is(tok::hashhash)) 457 Tok.setKind(tok::unknown); 458 } 459 460 if(ExpandLocStart.isValid()) { 461 updateLocForMacroArgTokens(CurTok.getLocation(), 462 ResultToks.begin()+FirstResult, 463 ResultToks.end()); 464 } 465 466 // If any tokens were substituted from the argument, the whitespace 467 // before the first token should match the whitespace of the arg 468 // identifier. 469 ResultToks[FirstResult].setFlagValue(Token::LeadingSpace, 470 NextTokGetsSpace); 471 ResultToks[FirstResult].setFlagValue(Token::StartOfLine, false); 472 NextTokGetsSpace = false; 473 } 474 continue; 475 } 476 477 // Okay, we have a token that is either the LHS or RHS of a paste (##) 478 // argument. It gets substituted as its non-pre-expanded tokens. 479 const Token *ArgToks = ActualArgs->getUnexpArgument(ArgNo); 480 unsigned NumToks = MacroArgs::getArgLength(ArgToks); 481 if (NumToks) { // Not an empty argument? 482 bool VaArgsPseudoPaste = false; 483 // If this is the GNU ", ## __VA_ARGS__" extension, and we just learned 484 // that __VA_ARGS__ expands to multiple tokens, avoid a pasting error when 485 // the expander tries to paste ',' with the first token of the __VA_ARGS__ 486 // expansion. 487 if (NonEmptyPasteBefore && ResultToks.size() >= 2 && 488 ResultToks[ResultToks.size()-2].is(tok::comma) && 489 (unsigned)ArgNo == Macro->getNumParams()-1 && 490 Macro->isVariadic()) { 491 VaArgsPseudoPaste = true; 492 // Remove the paste operator, report use of the extension. 493 PP.Diag(ResultToks.pop_back_val().getLocation(), diag::ext_paste_comma); 494 } 495 496 ResultToks.append(ArgToks, ArgToks+NumToks); 497 498 // If the '##' came from expanding an argument, turn it into 'unknown' 499 // to avoid pasting. 500 for (Token &Tok : llvm::make_range(ResultToks.end() - NumToks, 501 ResultToks.end())) { 502 if (Tok.is(tok::hashhash)) 503 Tok.setKind(tok::unknown); 504 } 505 506 if (ExpandLocStart.isValid()) { 507 updateLocForMacroArgTokens(CurTok.getLocation(), 508 ResultToks.end()-NumToks, ResultToks.end()); 509 } 510 511 // Transfer the leading whitespace information from the token 512 // (the macro argument) onto the first token of the 513 // expansion. Note that we don't do this for the GNU 514 // pseudo-paste extension ", ## __VA_ARGS__". 515 if (!VaArgsPseudoPaste) { 516 ResultToks[ResultToks.size() - NumToks].setFlagValue(Token::StartOfLine, 517 false); 518 ResultToks[ResultToks.size() - NumToks].setFlagValue( 519 Token::LeadingSpace, NextTokGetsSpace); 520 } 521 522 NextTokGetsSpace = false; 523 continue; 524 } 525 526 // If an empty argument is on the LHS or RHS of a paste, the standard (C99 527 // 6.10.3.3p2,3) calls for a bunch of placemarker stuff to occur. We 528 // implement this by eating ## operators when a LHS or RHS expands to 529 // empty. 530 if (PasteAfter) { 531 // Discard the argument token and skip (don't copy to the expansion 532 // buffer) the paste operator after it. 533 ++I; 534 continue; 535 } 536 537 // If this is on the RHS of a paste operator, we've already copied the 538 // paste operator to the ResultToks list, unless the LHS was empty too. 539 // Remove it. 540 assert(PasteBefore); 541 if (NonEmptyPasteBefore) { 542 assert(ResultToks.back().is(tok::hashhash)); 543 // Do not remove the paste operator if it is the one before __VA_OPT__ 544 // (and we are still processing tokens within VA_OPT). We handle the case 545 // of removing the paste operator if __VA_OPT__ reduces to the notional 546 // placemarker above when we encounter the closing paren of VA_OPT. 547 if (!VCtx.isInVAOpt() || 548 ResultToks.size() > VCtx.getNumberOfTokensPriorToVAOpt()) 549 ResultToks.pop_back(); 550 } 551 552 // If this is the __VA_ARGS__ token, and if the argument wasn't provided, 553 // and if the macro had at least one real argument, and if the token before 554 // the ## was a comma, remove the comma. This is a GCC extension which is 555 // disabled when using -std=c99. 556 if (ActualArgs->isVarargsElidedUse()) 557 MaybeRemoveCommaBeforeVaArgs(ResultToks, 558 /*HasPasteOperator=*/true, 559 Macro, ArgNo, PP); 560 } 561 562 // If anything changed, install this as the new Tokens list. 563 if (MadeChange) { 564 assert(!OwnsTokens && "This would leak if we already own the token list"); 565 // This is deleted in the dtor. 566 NumTokens = ResultToks.size(); 567 // The tokens will be added to Preprocessor's cache and will be removed 568 // when this TokenLexer finishes lexing them. 569 Tokens = PP.cacheMacroExpandedTokens(this, ResultToks); 570 571 // The preprocessor cache of macro expanded tokens owns these tokens,not us. 572 OwnsTokens = false; 573 } 574 } 575 576 /// Checks if two tokens form wide string literal. 577 static bool isWideStringLiteralFromMacro(const Token &FirstTok, 578 const Token &SecondTok) { 579 return FirstTok.is(tok::identifier) && 580 FirstTok.getIdentifierInfo()->isStr("L") && SecondTok.isLiteral() && 581 SecondTok.stringifiedInMacro(); 582 } 583 584 /// Lex - Lex and return a token from this macro stream. 585 bool TokenLexer::Lex(Token &Tok) { 586 // Lexing off the end of the macro, pop this macro off the expansion stack. 587 if (isAtEnd()) { 588 // If this is a macro (not a token stream), mark the macro enabled now 589 // that it is no longer being expanded. 590 if (Macro) Macro->EnableMacro(); 591 592 Tok.startToken(); 593 Tok.setFlagValue(Token::StartOfLine , AtStartOfLine); 594 Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace || NextTokGetsSpace); 595 if (CurTokenIdx == 0) 596 Tok.setFlag(Token::LeadingEmptyMacro); 597 return PP.HandleEndOfTokenLexer(Tok); 598 } 599 600 SourceManager &SM = PP.getSourceManager(); 601 602 // If this is the first token of the expanded result, we inherit spacing 603 // properties later. 604 bool isFirstToken = CurTokenIdx == 0; 605 606 // Get the next token to return. 607 Tok = Tokens[CurTokenIdx++]; 608 609 bool TokenIsFromPaste = false; 610 611 // If this token is followed by a token paste (##) operator, paste the tokens! 612 // Note that ## is a normal token when not expanding a macro. 613 if (!isAtEnd() && Macro && 614 (Tokens[CurTokenIdx].is(tok::hashhash) || 615 // Special processing of L#x macros in -fms-compatibility mode. 616 // Microsoft compiler is able to form a wide string literal from 617 // 'L#macro_arg' construct in a function-like macro. 618 (PP.getLangOpts().MSVCCompat && 619 isWideStringLiteralFromMacro(Tok, Tokens[CurTokenIdx])))) { 620 // When handling the microsoft /##/ extension, the final token is 621 // returned by pasteTokens, not the pasted token. 622 if (pasteTokens(Tok)) 623 return true; 624 625 TokenIsFromPaste = true; 626 } 627 628 // The token's current location indicate where the token was lexed from. We 629 // need this information to compute the spelling of the token, but any 630 // diagnostics for the expanded token should appear as if they came from 631 // ExpansionLoc. Pull this information together into a new SourceLocation 632 // that captures all of this. 633 if (ExpandLocStart.isValid() && // Don't do this for token streams. 634 // Check that the token's location was not already set properly. 635 SM.isBeforeInSLocAddrSpace(Tok.getLocation(), MacroStartSLocOffset)) { 636 SourceLocation instLoc; 637 if (Tok.is(tok::comment)) { 638 instLoc = SM.createExpansionLoc(Tok.getLocation(), 639 ExpandLocStart, 640 ExpandLocEnd, 641 Tok.getLength()); 642 } else { 643 instLoc = getExpansionLocForMacroDefLoc(Tok.getLocation()); 644 } 645 646 Tok.setLocation(instLoc); 647 } 648 649 // If this is the first token, set the lexical properties of the token to 650 // match the lexical properties of the macro identifier. 651 if (isFirstToken) { 652 Tok.setFlagValue(Token::StartOfLine , AtStartOfLine); 653 Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace); 654 } else { 655 // If this is not the first token, we may still need to pass through 656 // leading whitespace if we've expanded a macro. 657 if (AtStartOfLine) Tok.setFlag(Token::StartOfLine); 658 if (HasLeadingSpace) Tok.setFlag(Token::LeadingSpace); 659 } 660 AtStartOfLine = false; 661 HasLeadingSpace = false; 662 663 // Handle recursive expansion! 664 if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr) { 665 // Change the kind of this identifier to the appropriate token kind, e.g. 666 // turning "for" into a keyword. 667 IdentifierInfo *II = Tok.getIdentifierInfo(); 668 Tok.setKind(II->getTokenID()); 669 670 // If this identifier was poisoned and from a paste, emit an error. This 671 // won't be handled by Preprocessor::HandleIdentifier because this is coming 672 // from a macro expansion. 673 if (II->isPoisoned() && TokenIsFromPaste) { 674 PP.HandlePoisonedIdentifier(Tok); 675 } 676 677 if (!DisableMacroExpansion && II->isHandleIdentifierCase()) 678 return PP.HandleIdentifier(Tok); 679 } 680 681 // Otherwise, return a normal token. 682 return true; 683 } 684 685 bool TokenLexer::pasteTokens(Token &Tok) { 686 return pasteTokens(Tok, llvm::makeArrayRef(Tokens, NumTokens), CurTokenIdx); 687 } 688 689 /// LHSTok is the LHS of a ## operator, and CurTokenIdx is the ## 690 /// operator. Read the ## and RHS, and paste the LHS/RHS together. If there 691 /// are more ## after it, chomp them iteratively. Return the result as LHSTok. 692 /// If this returns true, the caller should immediately return the token. 693 bool TokenLexer::pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream, 694 unsigned int &CurIdx) { 695 assert(CurIdx > 0 && "## can not be the first token within tokens"); 696 assert((TokenStream[CurIdx].is(tok::hashhash) || 697 (PP.getLangOpts().MSVCCompat && 698 isWideStringLiteralFromMacro(LHSTok, TokenStream[CurIdx]))) && 699 "Token at this Index must be ## or part of the MSVC 'L " 700 "#macro-arg' pasting pair"); 701 702 // MSVC: If previous token was pasted, this must be a recovery from an invalid 703 // paste operation. Ignore spaces before this token to mimic MSVC output. 704 // Required for generating valid UUID strings in some MS headers. 705 if (PP.getLangOpts().MicrosoftExt && (CurIdx >= 2) && 706 TokenStream[CurIdx - 2].is(tok::hashhash)) 707 LHSTok.clearFlag(Token::LeadingSpace); 708 709 SmallString<128> Buffer; 710 const char *ResultTokStrPtr = nullptr; 711 SourceLocation StartLoc = LHSTok.getLocation(); 712 SourceLocation PasteOpLoc; 713 714 auto IsAtEnd = [&TokenStream, &CurIdx] { 715 return TokenStream.size() == CurIdx; 716 }; 717 718 do { 719 // Consume the ## operator if any. 720 PasteOpLoc = TokenStream[CurIdx].getLocation(); 721 if (TokenStream[CurIdx].is(tok::hashhash)) 722 ++CurIdx; 723 assert(!IsAtEnd() && "No token on the RHS of a paste operator!"); 724 725 // Get the RHS token. 726 const Token &RHS = TokenStream[CurIdx]; 727 728 // Allocate space for the result token. This is guaranteed to be enough for 729 // the two tokens. 730 Buffer.resize(LHSTok.getLength() + RHS.getLength()); 731 732 // Get the spelling of the LHS token in Buffer. 733 const char *BufPtr = &Buffer[0]; 734 bool Invalid = false; 735 unsigned LHSLen = PP.getSpelling(LHSTok, BufPtr, &Invalid); 736 if (BufPtr != &Buffer[0]) // Really, we want the chars in Buffer! 737 memcpy(&Buffer[0], BufPtr, LHSLen); 738 if (Invalid) 739 return true; 740 741 BufPtr = Buffer.data() + LHSLen; 742 unsigned RHSLen = PP.getSpelling(RHS, BufPtr, &Invalid); 743 if (Invalid) 744 return true; 745 if (RHSLen && BufPtr != &Buffer[LHSLen]) 746 // Really, we want the chars in Buffer! 747 memcpy(&Buffer[LHSLen], BufPtr, RHSLen); 748 749 // Trim excess space. 750 Buffer.resize(LHSLen+RHSLen); 751 752 // Plop the pasted result (including the trailing newline and null) into a 753 // scratch buffer where we can lex it. 754 Token ResultTokTmp; 755 ResultTokTmp.startToken(); 756 757 // Claim that the tmp token is a string_literal so that we can get the 758 // character pointer back from CreateString in getLiteralData(). 759 ResultTokTmp.setKind(tok::string_literal); 760 PP.CreateString(Buffer, ResultTokTmp); 761 SourceLocation ResultTokLoc = ResultTokTmp.getLocation(); 762 ResultTokStrPtr = ResultTokTmp.getLiteralData(); 763 764 // Lex the resultant pasted token into Result. 765 Token Result; 766 767 if (LHSTok.isAnyIdentifier() && RHS.isAnyIdentifier()) { 768 // Common paste case: identifier+identifier = identifier. Avoid creating 769 // a lexer and other overhead. 770 PP.IncrementPasteCounter(true); 771 Result.startToken(); 772 Result.setKind(tok::raw_identifier); 773 Result.setRawIdentifierData(ResultTokStrPtr); 774 Result.setLocation(ResultTokLoc); 775 Result.setLength(LHSLen+RHSLen); 776 } else { 777 PP.IncrementPasteCounter(false); 778 779 assert(ResultTokLoc.isFileID() && 780 "Should be a raw location into scratch buffer"); 781 SourceManager &SourceMgr = PP.getSourceManager(); 782 FileID LocFileID = SourceMgr.getFileID(ResultTokLoc); 783 784 bool Invalid = false; 785 const char *ScratchBufStart 786 = SourceMgr.getBufferData(LocFileID, &Invalid).data(); 787 if (Invalid) 788 return false; 789 790 // Make a lexer to lex this string from. Lex just this one token. 791 // Make a lexer object so that we lex and expand the paste result. 792 Lexer TL(SourceMgr.getLocForStartOfFile(LocFileID), 793 PP.getLangOpts(), ScratchBufStart, 794 ResultTokStrPtr, ResultTokStrPtr+LHSLen+RHSLen); 795 796 // Lex a token in raw mode. This way it won't look up identifiers 797 // automatically, lexing off the end will return an eof token, and 798 // warnings are disabled. This returns true if the result token is the 799 // entire buffer. 800 bool isInvalid = !TL.LexFromRawLexer(Result); 801 802 // If we got an EOF token, we didn't form even ONE token. For example, we 803 // did "/ ## /" to get "//". 804 isInvalid |= Result.is(tok::eof); 805 806 // If pasting the two tokens didn't form a full new token, this is an 807 // error. This occurs with "x ## +" and other stuff. Return with LHSTok 808 // unmodified and with RHS as the next token to lex. 809 if (isInvalid) { 810 // Explicitly convert the token location to have proper expansion 811 // information so that the user knows where it came from. 812 SourceManager &SM = PP.getSourceManager(); 813 SourceLocation Loc = 814 SM.createExpansionLoc(PasteOpLoc, ExpandLocStart, ExpandLocEnd, 2); 815 816 // Test for the Microsoft extension of /##/ turning into // here on the 817 // error path. 818 if (PP.getLangOpts().MicrosoftExt && LHSTok.is(tok::slash) && 819 RHS.is(tok::slash)) { 820 HandleMicrosoftCommentPaste(LHSTok, Loc); 821 return true; 822 } 823 824 // Do not emit the error when preprocessing assembler code. 825 if (!PP.getLangOpts().AsmPreprocessor) { 826 // If we're in microsoft extensions mode, downgrade this from a hard 827 // error to an extension that defaults to an error. This allows 828 // disabling it. 829 PP.Diag(Loc, PP.getLangOpts().MicrosoftExt ? diag::ext_pp_bad_paste_ms 830 : diag::err_pp_bad_paste) 831 << Buffer; 832 } 833 834 // An error has occurred so exit loop. 835 break; 836 } 837 838 // Turn ## into 'unknown' to avoid # ## # from looking like a paste 839 // operator. 840 if (Result.is(tok::hashhash)) 841 Result.setKind(tok::unknown); 842 } 843 844 // Transfer properties of the LHS over the Result. 845 Result.setFlagValue(Token::StartOfLine , LHSTok.isAtStartOfLine()); 846 Result.setFlagValue(Token::LeadingSpace, LHSTok.hasLeadingSpace()); 847 848 // Finally, replace LHS with the result, consume the RHS, and iterate. 849 ++CurIdx; 850 LHSTok = Result; 851 } while (!IsAtEnd() && TokenStream[CurIdx].is(tok::hashhash)); 852 853 SourceLocation EndLoc = TokenStream[CurIdx - 1].getLocation(); 854 855 // The token's current location indicate where the token was lexed from. We 856 // need this information to compute the spelling of the token, but any 857 // diagnostics for the expanded token should appear as if the token was 858 // expanded from the full ## expression. Pull this information together into 859 // a new SourceLocation that captures all of this. 860 SourceManager &SM = PP.getSourceManager(); 861 if (StartLoc.isFileID()) 862 StartLoc = getExpansionLocForMacroDefLoc(StartLoc); 863 if (EndLoc.isFileID()) 864 EndLoc = getExpansionLocForMacroDefLoc(EndLoc); 865 FileID MacroFID = SM.getFileID(MacroExpansionStart); 866 while (SM.getFileID(StartLoc) != MacroFID) 867 StartLoc = SM.getImmediateExpansionRange(StartLoc).getBegin(); 868 while (SM.getFileID(EndLoc) != MacroFID) 869 EndLoc = SM.getImmediateExpansionRange(EndLoc).getEnd(); 870 871 LHSTok.setLocation(SM.createExpansionLoc(LHSTok.getLocation(), StartLoc, EndLoc, 872 LHSTok.getLength())); 873 874 // Now that we got the result token, it will be subject to expansion. Since 875 // token pasting re-lexes the result token in raw mode, identifier information 876 // isn't looked up. As such, if the result is an identifier, look up id info. 877 if (LHSTok.is(tok::raw_identifier)) { 878 // Look up the identifier info for the token. We disabled identifier lookup 879 // by saying we're skipping contents, so we need to do this manually. 880 PP.LookUpIdentifierInfo(LHSTok); 881 } 882 return false; 883 } 884 885 /// isNextTokenLParen - If the next token lexed will pop this macro off the 886 /// expansion stack, return 2. If the next unexpanded token is a '(', return 887 /// 1, otherwise return 0. 888 unsigned TokenLexer::isNextTokenLParen() const { 889 // Out of tokens? 890 if (isAtEnd()) 891 return 2; 892 return Tokens[CurTokenIdx].is(tok::l_paren); 893 } 894 895 /// isParsingPreprocessorDirective - Return true if we are in the middle of a 896 /// preprocessor directive. 897 bool TokenLexer::isParsingPreprocessorDirective() const { 898 return Tokens[NumTokens-1].is(tok::eod) && !isAtEnd(); 899 } 900 901 /// HandleMicrosoftCommentPaste - In microsoft compatibility mode, /##/ pastes 902 /// together to form a comment that comments out everything in the current 903 /// macro, other active macros, and anything left on the current physical 904 /// source line of the expanded buffer. Handle this by returning the 905 /// first token on the next line. 906 void TokenLexer::HandleMicrosoftCommentPaste(Token &Tok, SourceLocation OpLoc) { 907 PP.Diag(OpLoc, diag::ext_comment_paste_microsoft); 908 909 // We 'comment out' the rest of this macro by just ignoring the rest of the 910 // tokens that have not been lexed yet, if any. 911 912 // Since this must be a macro, mark the macro enabled now that it is no longer 913 // being expanded. 914 assert(Macro && "Token streams can't paste comments"); 915 Macro->EnableMacro(); 916 917 PP.HandleMicrosoftCommentPaste(Tok); 918 } 919 920 /// If \arg loc is a file ID and points inside the current macro 921 /// definition, returns the appropriate source location pointing at the 922 /// macro expansion source location entry, otherwise it returns an invalid 923 /// SourceLocation. 924 SourceLocation 925 TokenLexer::getExpansionLocForMacroDefLoc(SourceLocation loc) const { 926 assert(ExpandLocStart.isValid() && MacroExpansionStart.isValid() && 927 "Not appropriate for token streams"); 928 assert(loc.isValid() && loc.isFileID()); 929 930 SourceManager &SM = PP.getSourceManager(); 931 assert(SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength) && 932 "Expected loc to come from the macro definition"); 933 934 unsigned relativeOffset = 0; 935 SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength, &relativeOffset); 936 return MacroExpansionStart.getLocWithOffset(relativeOffset); 937 } 938 939 /// Finds the tokens that are consecutive (from the same FileID) 940 /// creates a single SLocEntry, and assigns SourceLocations to each token that 941 /// point to that SLocEntry. e.g for 942 /// assert(foo == bar); 943 /// There will be a single SLocEntry for the "foo == bar" chunk and locations 944 /// for the 'foo', '==', 'bar' tokens will point inside that chunk. 945 /// 946 /// \arg begin_tokens will be updated to a position past all the found 947 /// consecutive tokens. 948 static void updateConsecutiveMacroArgTokens(SourceManager &SM, 949 SourceLocation InstLoc, 950 Token *&begin_tokens, 951 Token * end_tokens) { 952 assert(begin_tokens < end_tokens); 953 954 SourceLocation FirstLoc = begin_tokens->getLocation(); 955 SourceLocation CurLoc = FirstLoc; 956 957 // Compare the source location offset of tokens and group together tokens that 958 // are close, even if their locations point to different FileIDs. e.g. 959 // 960 // |bar | foo | cake | (3 tokens from 3 consecutive FileIDs) 961 // ^ ^ 962 // |bar foo cake| (one SLocEntry chunk for all tokens) 963 // 964 // we can perform this "merge" since the token's spelling location depends 965 // on the relative offset. 966 967 Token *NextTok = begin_tokens + 1; 968 for (; NextTok < end_tokens; ++NextTok) { 969 SourceLocation NextLoc = NextTok->getLocation(); 970 if (CurLoc.isFileID() != NextLoc.isFileID()) 971 break; // Token from different kind of FileID. 972 973 int RelOffs; 974 if (!SM.isInSameSLocAddrSpace(CurLoc, NextLoc, &RelOffs)) 975 break; // Token from different local/loaded location. 976 // Check that token is not before the previous token or more than 50 977 // "characters" away. 978 if (RelOffs < 0 || RelOffs > 50) 979 break; 980 981 if (CurLoc.isMacroID() && !SM.isWrittenInSameFile(CurLoc, NextLoc)) 982 break; // Token from a different macro. 983 984 CurLoc = NextLoc; 985 } 986 987 // For the consecutive tokens, find the length of the SLocEntry to contain 988 // all of them. 989 Token &LastConsecutiveTok = *(NextTok-1); 990 int LastRelOffs = 0; 991 SM.isInSameSLocAddrSpace(FirstLoc, LastConsecutiveTok.getLocation(), 992 &LastRelOffs); 993 unsigned FullLength = LastRelOffs + LastConsecutiveTok.getLength(); 994 995 // Create a macro expansion SLocEntry that will "contain" all of the tokens. 996 SourceLocation Expansion = 997 SM.createMacroArgExpansionLoc(FirstLoc, InstLoc,FullLength); 998 999 // Change the location of the tokens from the spelling location to the new 1000 // expanded location. 1001 for (; begin_tokens < NextTok; ++begin_tokens) { 1002 Token &Tok = *begin_tokens; 1003 int RelOffs = 0; 1004 SM.isInSameSLocAddrSpace(FirstLoc, Tok.getLocation(), &RelOffs); 1005 Tok.setLocation(Expansion.getLocWithOffset(RelOffs)); 1006 } 1007 } 1008 1009 /// Creates SLocEntries and updates the locations of macro argument 1010 /// tokens to their new expanded locations. 1011 /// 1012 /// \param ArgIdSpellLoc the location of the macro argument id inside the macro 1013 /// definition. 1014 void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc, 1015 Token *begin_tokens, 1016 Token *end_tokens) { 1017 SourceManager &SM = PP.getSourceManager(); 1018 1019 SourceLocation InstLoc = 1020 getExpansionLocForMacroDefLoc(ArgIdSpellLoc); 1021 1022 while (begin_tokens < end_tokens) { 1023 // If there's only one token just create a SLocEntry for it. 1024 if (end_tokens - begin_tokens == 1) { 1025 Token &Tok = *begin_tokens; 1026 Tok.setLocation(SM.createMacroArgExpansionLoc(Tok.getLocation(), 1027 InstLoc, 1028 Tok.getLength())); 1029 return; 1030 } 1031 1032 updateConsecutiveMacroArgTokens(SM, InstLoc, begin_tokens, end_tokens); 1033 } 1034 } 1035 1036 void TokenLexer::PropagateLineStartLeadingSpaceInfo(Token &Result) { 1037 AtStartOfLine = Result.isAtStartOfLine(); 1038 HasLeadingSpace = Result.hasLeadingSpace(); 1039 } 1040