1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the Preprocessor interface. 11 // 12 //===----------------------------------------------------------------------===// 13 // 14 // Options to support: 15 // -H - Print the name of each header file used. 16 // -d[DNI] - Dump various things. 17 // -fworking-directory - #line's with preprocessor's working dir. 18 // -fpreprocessed 19 // -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD 20 // -W* 21 // -w 22 // 23 // Messages to emit: 24 // "Multiple include guards may be useful for:\n" 25 // 26 //===----------------------------------------------------------------------===// 27 28 #include "clang/Lex/Preprocessor.h" 29 #include "MacroArgs.h" 30 #include "clang/Lex/ExternalPreprocessorSource.h" 31 #include "clang/Lex/HeaderSearch.h" 32 #include "clang/Lex/MacroInfo.h" 33 #include "clang/Lex/Pragma.h" 34 #include "clang/Lex/PreprocessingRecord.h" 35 #include "clang/Lex/ScratchBuffer.h" 36 #include "clang/Lex/LexDiagnostic.h" 37 #include "clang/Lex/CodeCompletionHandler.h" 38 #include "clang/Basic/SourceManager.h" 39 #include "clang/Basic/FileManager.h" 40 #include "clang/Basic/TargetInfo.h" 41 #include "llvm/ADT/APFloat.h" 42 #include "llvm/ADT/SmallVector.h" 43 #include "llvm/Support/MemoryBuffer.h" 44 #include "llvm/Support/raw_ostream.h" 45 #include "llvm/Support/Capacity.h" 46 using namespace clang; 47 48 //===----------------------------------------------------------------------===// 49 ExternalPreprocessorSource::~ExternalPreprocessorSource() { } 50 51 Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts, 52 const TargetInfo &target, SourceManager &SM, 53 HeaderSearch &Headers, 54 IdentifierInfoLookup* IILookup, 55 bool OwnsHeaders) 56 : Diags(&diags), Features(opts), Target(target),FileMgr(Headers.getFileMgr()), 57 SourceMgr(SM), 58 HeaderInfo(Headers), ExternalSource(0), 59 Identifiers(opts, IILookup), BuiltinInfo(Target), CodeComplete(0), 60 CodeCompletionFile(0), SkipMainFilePreamble(0, true), CurPPLexer(0), 61 CurDirLookup(0), Callbacks(0), MacroArgCache(0), Record(0), MIChainHead(0), 62 MICache(0) { 63 ScratchBuf = new ScratchBuffer(SourceMgr); 64 CounterValue = 0; // __COUNTER__ starts at 0. 65 OwnsHeaderSearch = OwnsHeaders; 66 67 // Clear stats. 68 NumDirectives = NumDefined = NumUndefined = NumPragma = 0; 69 NumIf = NumElse = NumEndif = 0; 70 NumEnteredSourceFiles = 0; 71 NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0; 72 NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0; 73 MaxIncludeStackDepth = 0; 74 NumSkipped = 0; 75 76 // Default to discarding comments. 77 KeepComments = false; 78 KeepMacroComments = false; 79 80 // Macro expansion is enabled. 81 DisableMacroExpansion = false; 82 InMacroArgs = false; 83 NumCachedTokenLexers = 0; 84 85 CachedLexPos = 0; 86 87 // We haven't read anything from the external source. 88 ReadMacrosFromExternalSource = false; 89 90 // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro. 91 // This gets unpoisoned where it is allowed. 92 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); 93 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); 94 95 // Initialize the pragma handlers. 96 PragmaHandlers = new PragmaNamespace(StringRef()); 97 RegisterBuiltinPragmas(); 98 99 // Initialize builtin macros like __LINE__ and friends. 100 RegisterBuiltinMacros(); 101 102 if(Features.Borland) { 103 Ident__exception_info = getIdentifierInfo("_exception_info"); 104 Ident___exception_info = getIdentifierInfo("__exception_info"); 105 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation"); 106 Ident__exception_code = getIdentifierInfo("_exception_code"); 107 Ident___exception_code = getIdentifierInfo("__exception_code"); 108 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode"); 109 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination"); 110 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination"); 111 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination"); 112 } else { 113 Ident__exception_info = Ident__exception_code = Ident__abnormal_termination = 0; 114 Ident___exception_info = Ident___exception_code = Ident___abnormal_termination = 0; 115 Ident_GetExceptionInfo = Ident_GetExceptionCode = Ident_AbnormalTermination = 0; 116 } 117 118 } 119 120 Preprocessor::~Preprocessor() { 121 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); 122 assert(MacroExpandingLexersStack.empty() && MacroExpandedTokens.empty() && 123 "Preprocessor::HandleEndOfTokenLexer should have cleared those"); 124 125 while (!IncludeMacroStack.empty()) { 126 delete IncludeMacroStack.back().TheLexer; 127 delete IncludeMacroStack.back().TheTokenLexer; 128 IncludeMacroStack.pop_back(); 129 } 130 131 // Free any macro definitions. 132 for (MacroInfoChain *I = MIChainHead ; I ; I = I->Next) 133 I->MI.Destroy(); 134 135 // Free any cached macro expanders. 136 for (unsigned i = 0, e = NumCachedTokenLexers; i != e; ++i) 137 delete TokenLexerCache[i]; 138 139 // Free any cached MacroArgs. 140 for (MacroArgs *ArgList = MacroArgCache; ArgList; ) 141 ArgList = ArgList->deallocate(); 142 143 // Release pragma information. 144 delete PragmaHandlers; 145 146 // Delete the scratch buffer info. 147 delete ScratchBuf; 148 149 // Delete the header search info, if we own it. 150 if (OwnsHeaderSearch) 151 delete &HeaderInfo; 152 153 delete Callbacks; 154 } 155 156 void Preprocessor::setPTHManager(PTHManager* pm) { 157 PTH.reset(pm); 158 FileMgr.addStatCache(PTH->createStatCache()); 159 } 160 161 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { 162 llvm::errs() << tok::getTokenName(Tok.getKind()) << " '" 163 << getSpelling(Tok) << "'"; 164 165 if (!DumpFlags) return; 166 167 llvm::errs() << "\t"; 168 if (Tok.isAtStartOfLine()) 169 llvm::errs() << " [StartOfLine]"; 170 if (Tok.hasLeadingSpace()) 171 llvm::errs() << " [LeadingSpace]"; 172 if (Tok.isExpandDisabled()) 173 llvm::errs() << " [ExpandDisabled]"; 174 if (Tok.needsCleaning()) { 175 const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); 176 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) 177 << "']"; 178 } 179 180 llvm::errs() << "\tLoc=<"; 181 DumpLocation(Tok.getLocation()); 182 llvm::errs() << ">"; 183 } 184 185 void Preprocessor::DumpLocation(SourceLocation Loc) const { 186 Loc.dump(SourceMgr); 187 } 188 189 void Preprocessor::DumpMacro(const MacroInfo &MI) const { 190 llvm::errs() << "MACRO: "; 191 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { 192 DumpToken(MI.getReplacementToken(i)); 193 llvm::errs() << " "; 194 } 195 llvm::errs() << "\n"; 196 } 197 198 void Preprocessor::PrintStats() { 199 llvm::errs() << "\n*** Preprocessor Stats:\n"; 200 llvm::errs() << NumDirectives << " directives found:\n"; 201 llvm::errs() << " " << NumDefined << " #define.\n"; 202 llvm::errs() << " " << NumUndefined << " #undef.\n"; 203 llvm::errs() << " #include/#include_next/#import:\n"; 204 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n"; 205 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n"; 206 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n"; 207 llvm::errs() << " " << NumElse << " #else/#elif.\n"; 208 llvm::errs() << " " << NumEndif << " #endif.\n"; 209 llvm::errs() << " " << NumPragma << " #pragma.\n"; 210 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; 211 212 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" 213 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " 214 << NumFastMacroExpanded << " on the fast path.\n"; 215 llvm::errs() << (NumFastTokenPaste+NumTokenPaste) 216 << " token paste (##) operations performed, " 217 << NumFastTokenPaste << " on the fast path.\n"; 218 } 219 220 Preprocessor::macro_iterator 221 Preprocessor::macro_begin(bool IncludeExternalMacros) const { 222 if (IncludeExternalMacros && ExternalSource && 223 !ReadMacrosFromExternalSource) { 224 ReadMacrosFromExternalSource = true; 225 ExternalSource->ReadDefinedMacros(); 226 } 227 228 return Macros.begin(); 229 } 230 231 size_t Preprocessor::getTotalMemory() const { 232 return BP.getTotalMemory() 233 + llvm::capacity_in_bytes(MacroExpandedTokens) 234 + Predefines.capacity() /* Predefines buffer. */ 235 + llvm::capacity_in_bytes(Macros) 236 + llvm::capacity_in_bytes(PragmaPushMacroInfo) 237 + llvm::capacity_in_bytes(PoisonReasons) 238 + llvm::capacity_in_bytes(CommentHandlers); 239 } 240 241 Preprocessor::macro_iterator 242 Preprocessor::macro_end(bool IncludeExternalMacros) const { 243 if (IncludeExternalMacros && ExternalSource && 244 !ReadMacrosFromExternalSource) { 245 ReadMacrosFromExternalSource = true; 246 ExternalSource->ReadDefinedMacros(); 247 } 248 249 return Macros.end(); 250 } 251 252 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, 253 unsigned TruncateAtLine, 254 unsigned TruncateAtColumn) { 255 using llvm::MemoryBuffer; 256 257 CodeCompletionFile = File; 258 259 // Okay to clear out the code-completion point by passing NULL. 260 if (!CodeCompletionFile) 261 return false; 262 263 // Load the actual file's contents. 264 bool Invalid = false; 265 const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid); 266 if (Invalid) 267 return true; 268 269 // Find the byte position of the truncation point. 270 const char *Position = Buffer->getBufferStart(); 271 for (unsigned Line = 1; Line < TruncateAtLine; ++Line) { 272 for (; *Position; ++Position) { 273 if (*Position != '\r' && *Position != '\n') 274 continue; 275 276 // Eat \r\n or \n\r as a single line. 277 if ((Position[1] == '\r' || Position[1] == '\n') && 278 Position[0] != Position[1]) 279 ++Position; 280 ++Position; 281 break; 282 } 283 } 284 285 Position += TruncateAtColumn - 1; 286 287 // Truncate the buffer. 288 if (Position < Buffer->getBufferEnd()) { 289 StringRef Data(Buffer->getBufferStart(), 290 Position-Buffer->getBufferStart()); 291 MemoryBuffer *TruncatedBuffer 292 = MemoryBuffer::getMemBufferCopy(Data, Buffer->getBufferIdentifier()); 293 SourceMgr.overrideFileContents(File, TruncatedBuffer); 294 } 295 296 return false; 297 } 298 299 bool Preprocessor::isCodeCompletionFile(SourceLocation FileLoc) const { 300 return CodeCompletionFile && FileLoc.isFileID() && 301 SourceMgr.getFileEntryForID(SourceMgr.getFileID(FileLoc)) 302 == CodeCompletionFile; 303 } 304 305 void Preprocessor::CodeCompleteNaturalLanguage() { 306 SetCodeCompletionPoint(0, 0, 0); 307 getDiagnostics().setSuppressAllDiagnostics(true); 308 if (CodeComplete) 309 CodeComplete->CodeCompleteNaturalLanguage(); 310 } 311 312 /// getSpelling - This method is used to get the spelling of a token into a 313 /// SmallVector. Note that the returned StringRef may not point to the 314 /// supplied buffer if a copy can be avoided. 315 StringRef Preprocessor::getSpelling(const Token &Tok, 316 SmallVectorImpl<char> &Buffer, 317 bool *Invalid) const { 318 // NOTE: this has to be checked *before* testing for an IdentifierInfo. 319 if (Tok.isNot(tok::raw_identifier)) { 320 // Try the fast path. 321 if (const IdentifierInfo *II = Tok.getIdentifierInfo()) 322 return II->getName(); 323 } 324 325 // Resize the buffer if we need to copy into it. 326 if (Tok.needsCleaning()) 327 Buffer.resize(Tok.getLength()); 328 329 const char *Ptr = Buffer.data(); 330 unsigned Len = getSpelling(Tok, Ptr, Invalid); 331 return StringRef(Ptr, Len); 332 } 333 334 /// CreateString - Plop the specified string into a scratch buffer and return a 335 /// location for it. If specified, the source location provides a source 336 /// location for the token. 337 void Preprocessor::CreateString(const char *Buf, unsigned Len, Token &Tok, 338 SourceLocation ExpansionLoc) { 339 Tok.setLength(Len); 340 341 const char *DestPtr; 342 SourceLocation Loc = ScratchBuf->getToken(Buf, Len, DestPtr); 343 344 if (ExpansionLoc.isValid()) 345 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLoc, ExpansionLoc, Len); 346 Tok.setLocation(Loc); 347 348 // If this is a raw identifier or a literal token, set the pointer data. 349 if (Tok.is(tok::raw_identifier)) 350 Tok.setRawIdentifierData(DestPtr); 351 else if (Tok.isLiteral()) 352 Tok.setLiteralData(DestPtr); 353 } 354 355 356 357 //===----------------------------------------------------------------------===// 358 // Preprocessor Initialization Methods 359 //===----------------------------------------------------------------------===// 360 361 362 /// EnterMainSourceFile - Enter the specified FileID as the main source file, 363 /// which implicitly adds the builtin defines etc. 364 void Preprocessor::EnterMainSourceFile() { 365 // We do not allow the preprocessor to reenter the main file. Doing so will 366 // cause FileID's to accumulate information from both runs (e.g. #line 367 // information) and predefined macros aren't guaranteed to be set properly. 368 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); 369 FileID MainFileID = SourceMgr.getMainFileID(); 370 371 // Enter the main file source buffer. 372 EnterSourceFile(MainFileID, 0, SourceLocation()); 373 374 // If we've been asked to skip bytes in the main file (e.g., as part of a 375 // precompiled preamble), do so now. 376 if (SkipMainFilePreamble.first > 0) 377 CurLexer->SkipBytes(SkipMainFilePreamble.first, 378 SkipMainFilePreamble.second); 379 380 // Tell the header info that the main file was entered. If the file is later 381 // #imported, it won't be re-entered. 382 if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) 383 HeaderInfo.IncrementIncludeCount(FE); 384 385 // Preprocess Predefines to populate the initial preprocessor state. 386 llvm::MemoryBuffer *SB = 387 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>"); 388 assert(SB && "Cannot create predefined source buffer"); 389 FileID FID = SourceMgr.createFileIDForMemBuffer(SB); 390 assert(!FID.isInvalid() && "Could not create FileID for predefines?"); 391 392 // Start parsing the predefines. 393 EnterSourceFile(FID, 0, SourceLocation()); 394 } 395 396 void Preprocessor::EndSourceFile() { 397 // Notify the client that we reached the end of the source file. 398 if (Callbacks) 399 Callbacks->EndOfMainFile(); 400 } 401 402 //===----------------------------------------------------------------------===// 403 // Lexer Event Handling. 404 //===----------------------------------------------------------------------===// 405 406 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the 407 /// identifier information for the token and install it into the token, 408 /// updating the token kind accordingly. 409 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { 410 assert(Identifier.getRawIdentifierData() != 0 && "No raw identifier data!"); 411 412 // Look up this token, see if it is a macro, or if it is a language keyword. 413 IdentifierInfo *II; 414 if (!Identifier.needsCleaning()) { 415 // No cleaning needed, just use the characters from the lexed buffer. 416 II = getIdentifierInfo(StringRef(Identifier.getRawIdentifierData(), 417 Identifier.getLength())); 418 } else { 419 // Cleaning needed, alloca a buffer, clean into it, then use the buffer. 420 llvm::SmallString<64> IdentifierBuffer; 421 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); 422 II = getIdentifierInfo(CleanedStr); 423 } 424 425 // Update the token info (identifier info and appropriate token kind). 426 Identifier.setIdentifierInfo(II); 427 Identifier.setKind(II->getTokenID()); 428 429 return II; 430 } 431 432 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) { 433 PoisonReasons[II] = DiagID; 434 } 435 436 void Preprocessor::PoisonSEHIdentifiers(bool Poison) { 437 assert(Ident__exception_code && Ident__exception_info); 438 assert(Ident___exception_code && Ident___exception_info); 439 Ident__exception_code->setIsPoisoned(Poison); 440 Ident___exception_code->setIsPoisoned(Poison); 441 Ident_GetExceptionCode->setIsPoisoned(Poison); 442 Ident__exception_info->setIsPoisoned(Poison); 443 Ident___exception_info->setIsPoisoned(Poison); 444 Ident_GetExceptionInfo->setIsPoisoned(Poison); 445 Ident__abnormal_termination->setIsPoisoned(Poison); 446 Ident___abnormal_termination->setIsPoisoned(Poison); 447 Ident_AbnormalTermination->setIsPoisoned(Poison); 448 } 449 450 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) { 451 assert(Identifier.getIdentifierInfo() && 452 "Can't handle identifiers without identifier info!"); 453 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it = 454 PoisonReasons.find(Identifier.getIdentifierInfo()); 455 if(it == PoisonReasons.end()) 456 Diag(Identifier, diag::err_pp_used_poisoned_id); 457 else 458 Diag(Identifier,it->second) << Identifier.getIdentifierInfo(); 459 } 460 461 /// HandleIdentifier - This callback is invoked when the lexer reads an 462 /// identifier. This callback looks up the identifier in the map and/or 463 /// potentially macro expands it or turns it into a named token (like 'for'). 464 /// 465 /// Note that callers of this method are guarded by checking the 466 /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the 467 /// IdentifierInfo methods that compute these properties will need to change to 468 /// match. 469 void Preprocessor::HandleIdentifier(Token &Identifier) { 470 assert(Identifier.getIdentifierInfo() && 471 "Can't handle identifiers without identifier info!"); 472 473 IdentifierInfo &II = *Identifier.getIdentifierInfo(); 474 475 // If this identifier was poisoned, and if it was not produced from a macro 476 // expansion, emit an error. 477 if (II.isPoisoned() && CurPPLexer) { 478 HandlePoisonedIdentifier(Identifier); 479 } 480 481 // If this is a macro to be expanded, do it. 482 if (MacroInfo *MI = getMacroInfo(&II)) { 483 if (!DisableMacroExpansion && !Identifier.isExpandDisabled()) { 484 if (MI->isEnabled()) { 485 if (!HandleMacroExpandedIdentifier(Identifier, MI)) 486 return; 487 } else { 488 // C99 6.10.3.4p2 says that a disabled macro may never again be 489 // expanded, even if it's in a context where it could be expanded in the 490 // future. 491 Identifier.setFlag(Token::DisableExpand); 492 } 493 } 494 } 495 496 // C++ 2.11p2: If this is an alternative representation of a C++ operator, 497 // then we act as if it is the actual operator and not the textual 498 // representation of it. 499 if (II.isCPlusPlusOperatorKeyword()) 500 Identifier.setIdentifierInfo(0); 501 502 // If this is an extension token, diagnose its use. 503 // We avoid diagnosing tokens that originate from macro definitions. 504 // FIXME: This warning is disabled in cases where it shouldn't be, 505 // like "#define TY typeof", "TY(1) x". 506 if (II.isExtensionToken() && !DisableMacroExpansion) 507 Diag(Identifier, diag::ext_token_used); 508 } 509 510 void Preprocessor::AddCommentHandler(CommentHandler *Handler) { 511 assert(Handler && "NULL comment handler"); 512 assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) == 513 CommentHandlers.end() && "Comment handler already registered"); 514 CommentHandlers.push_back(Handler); 515 } 516 517 void Preprocessor::RemoveCommentHandler(CommentHandler *Handler) { 518 std::vector<CommentHandler *>::iterator Pos 519 = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler); 520 assert(Pos != CommentHandlers.end() && "Comment handler not registered"); 521 CommentHandlers.erase(Pos); 522 } 523 524 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) { 525 bool AnyPendingTokens = false; 526 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(), 527 HEnd = CommentHandlers.end(); 528 H != HEnd; ++H) { 529 if ((*H)->HandleComment(*this, Comment)) 530 AnyPendingTokens = true; 531 } 532 if (!AnyPendingTokens || getCommentRetentionState()) 533 return false; 534 Lex(result); 535 return true; 536 } 537 538 CommentHandler::~CommentHandler() { } 539 540 CodeCompletionHandler::~CodeCompletionHandler() { } 541 542 void Preprocessor::createPreprocessingRecord( 543 bool IncludeNestedMacroExpansions) { 544 if (Record) 545 return; 546 547 Record = new PreprocessingRecord(IncludeNestedMacroExpansions); 548 addPPCallbacks(Record); 549 } 550