1 //===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This code rewrites include invocations into their expansions. This gives you 11 // a file with all included files merged into it. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Rewrite/Frontend/Rewriters.h" 16 #include "clang/Basic/SourceManager.h" 17 #include "clang/Frontend/PreprocessorOutputOptions.h" 18 #include "clang/Lex/HeaderSearch.h" 19 #include "clang/Lex/Pragma.h" 20 #include "clang/Lex/Preprocessor.h" 21 #include "llvm/ADT/SmallString.h" 22 #include "llvm/Support/raw_ostream.h" 23 24 using namespace clang; 25 using namespace llvm; 26 27 namespace { 28 29 class InclusionRewriter : public PPCallbacks { 30 /// Information about which #includes were actually performed, 31 /// created by preprocessor callbacks. 32 struct FileChange { 33 const Module *Mod; 34 SourceLocation From; 35 FileID Id; 36 SrcMgr::CharacteristicKind FileType; 37 FileChange(SourceLocation From, const Module *Mod) : Mod(Mod), From(From) { 38 } 39 }; 40 Preprocessor &PP; ///< Used to find inclusion directives. 41 SourceManager &SM; ///< Used to read and manage source files. 42 raw_ostream &OS; ///< The destination stream for rewritten contents. 43 const llvm::MemoryBuffer *PredefinesBuffer; ///< The preprocessor predefines. 44 bool ShowLineMarkers; ///< Show #line markers. 45 bool UseLineDirective; ///< Use of line directives or line markers. 46 typedef std::map<unsigned, FileChange> FileChangeMap; 47 FileChangeMap FileChanges; ///< Tracks which files were included where. 48 /// Used transitively for building up the FileChanges mapping over the 49 /// various \c PPCallbacks callbacks. 50 FileChangeMap::iterator LastInsertedFileChange; 51 public: 52 InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers); 53 bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType); 54 void setPredefinesBuffer(const llvm::MemoryBuffer *Buf) { 55 PredefinesBuffer = Buf; 56 } 57 private: 58 void FileChanged(SourceLocation Loc, FileChangeReason Reason, 59 SrcMgr::CharacteristicKind FileType, 60 FileID PrevFID) override; 61 void FileSkipped(const FileEntry &ParentFile, const Token &FilenameTok, 62 SrcMgr::CharacteristicKind FileType) override; 63 void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, 64 StringRef FileName, bool IsAngled, 65 CharSourceRange FilenameRange, const FileEntry *File, 66 StringRef SearchPath, StringRef RelativePath, 67 const Module *Imported) override; 68 void WriteLineInfo(const char *Filename, int Line, 69 SrcMgr::CharacteristicKind FileType, 70 StringRef EOL, StringRef Extra = StringRef()); 71 void WriteImplicitModuleImport(const Module *Mod, StringRef EOL); 72 void OutputContentUpTo(const MemoryBuffer &FromFile, 73 unsigned &WriteFrom, unsigned WriteTo, 74 StringRef EOL, int &lines, 75 bool EnsureNewline); 76 void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken, 77 const MemoryBuffer &FromFile, StringRef EOL, 78 unsigned &NextToWrite, int &Lines); 79 bool HandleHasInclude(FileID FileId, Lexer &RawLex, 80 const DirectoryLookup *Lookup, Token &Tok, 81 bool &FileExists); 82 const FileChange *FindFileChangeLocation(SourceLocation Loc) const; 83 StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken); 84 }; 85 86 } // end anonymous namespace 87 88 /// Initializes an InclusionRewriter with a \p PP source and \p OS destination. 89 InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS, 90 bool ShowLineMarkers) 91 : PP(PP), SM(PP.getSourceManager()), OS(OS), PredefinesBuffer(nullptr), 92 ShowLineMarkers(ShowLineMarkers), 93 LastInsertedFileChange(FileChanges.end()) { 94 // If we're in microsoft mode, use normal #line instead of line markers. 95 UseLineDirective = PP.getLangOpts().MicrosoftExt; 96 } 97 98 /// Write appropriate line information as either #line directives or GNU line 99 /// markers depending on what mode we're in, including the \p Filename and 100 /// \p Line we are located at, using the specified \p EOL line separator, and 101 /// any \p Extra context specifiers in GNU line directives. 102 void InclusionRewriter::WriteLineInfo(const char *Filename, int Line, 103 SrcMgr::CharacteristicKind FileType, 104 StringRef EOL, StringRef Extra) { 105 if (!ShowLineMarkers) 106 return; 107 if (UseLineDirective) { 108 OS << "#line" << ' ' << Line << ' ' << '"'; 109 OS.write_escaped(Filename); 110 OS << '"'; 111 } else { 112 // Use GNU linemarkers as described here: 113 // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html 114 OS << '#' << ' ' << Line << ' ' << '"'; 115 OS.write_escaped(Filename); 116 OS << '"'; 117 if (!Extra.empty()) 118 OS << Extra; 119 if (FileType == SrcMgr::C_System) 120 // "`3' This indicates that the following text comes from a system header 121 // file, so certain warnings should be suppressed." 122 OS << " 3"; 123 else if (FileType == SrcMgr::C_ExternCSystem) 124 // as above for `3', plus "`4' This indicates that the following text 125 // should be treated as being wrapped in an implicit extern "C" block." 126 OS << " 3 4"; 127 } 128 OS << EOL; 129 } 130 131 void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod, 132 StringRef EOL) { 133 OS << "@import " << Mod->getFullModuleName() << ";" 134 << " /* clang -frewrite-includes: implicit import */" << EOL; 135 } 136 137 /// FileChanged - Whenever the preprocessor enters or exits a #include file 138 /// it invokes this handler. 139 void InclusionRewriter::FileChanged(SourceLocation Loc, 140 FileChangeReason Reason, 141 SrcMgr::CharacteristicKind NewFileType, 142 FileID) { 143 if (Reason != EnterFile) 144 return; 145 if (LastInsertedFileChange == FileChanges.end()) 146 // we didn't reach this file (eg: the main file) via an inclusion directive 147 return; 148 LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID(); 149 LastInsertedFileChange->second.FileType = NewFileType; 150 LastInsertedFileChange = FileChanges.end(); 151 } 152 153 /// Called whenever an inclusion is skipped due to canonical header protection 154 /// macros. 155 void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/, 156 const Token &/*FilenameTok*/, 157 SrcMgr::CharacteristicKind /*FileType*/) { 158 assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't " 159 "found via an inclusion directive, was skipped"); 160 FileChanges.erase(LastInsertedFileChange); 161 LastInsertedFileChange = FileChanges.end(); 162 } 163 164 /// This should be called whenever the preprocessor encounters include 165 /// directives. It does not say whether the file has been included, but it 166 /// provides more information about the directive (hash location instead 167 /// of location inside the included file). It is assumed that the matching 168 /// FileChanged() or FileSkipped() is called after this. 169 void InclusionRewriter::InclusionDirective(SourceLocation HashLoc, 170 const Token &/*IncludeTok*/, 171 StringRef /*FileName*/, 172 bool /*IsAngled*/, 173 CharSourceRange /*FilenameRange*/, 174 const FileEntry * /*File*/, 175 StringRef /*SearchPath*/, 176 StringRef /*RelativePath*/, 177 const Module *Imported) { 178 assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion " 179 "directive was found before the previous one was processed"); 180 std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert( 181 std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc, Imported))); 182 assert(p.second && "Unexpected revisitation of the same include directive"); 183 if (!Imported) 184 LastInsertedFileChange = p.first; 185 } 186 187 /// Simple lookup for a SourceLocation (specifically one denoting the hash in 188 /// an inclusion directive) in the map of inclusion information, FileChanges. 189 const InclusionRewriter::FileChange * 190 InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const { 191 FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding()); 192 if (I != FileChanges.end()) 193 return &I->second; 194 return nullptr; 195 } 196 197 /// Detect the likely line ending style of \p FromFile by examining the first 198 /// newline found within it. 199 static StringRef DetectEOL(const MemoryBuffer &FromFile) { 200 // detect what line endings the file uses, so that added content does not mix 201 // the style 202 const char *Pos = strchr(FromFile.getBufferStart(), '\n'); 203 if (!Pos) 204 return "\n"; 205 if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r') 206 return "\n\r"; 207 if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r') 208 return "\r\n"; 209 return "\n"; 210 } 211 212 /// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at 213 /// \p WriteTo - 1. 214 void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile, 215 unsigned &WriteFrom, unsigned WriteTo, 216 StringRef EOL, int &Line, 217 bool EnsureNewline) { 218 if (WriteTo <= WriteFrom) 219 return; 220 if (&FromFile == PredefinesBuffer) { 221 // Ignore the #defines of the predefines buffer. 222 WriteFrom = WriteTo; 223 return; 224 } 225 OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom); 226 // count lines manually, it's faster than getPresumedLoc() 227 Line += std::count(FromFile.getBufferStart() + WriteFrom, 228 FromFile.getBufferStart() + WriteTo, '\n'); 229 if (EnsureNewline) { 230 char LastChar = FromFile.getBufferStart()[WriteTo - 1]; 231 if (LastChar != '\n' && LastChar != '\r') 232 OS << EOL; 233 } 234 WriteFrom = WriteTo; 235 } 236 237 /// Print characters from \p FromFile starting at \p NextToWrite up until the 238 /// inclusion directive at \p StartToken, then print out the inclusion 239 /// inclusion directive disabled by a #if directive, updating \p NextToWrite 240 /// and \p Line to track the number of source lines visited and the progress 241 /// through the \p FromFile buffer. 242 void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex, 243 const Token &StartToken, 244 const MemoryBuffer &FromFile, 245 StringRef EOL, 246 unsigned &NextToWrite, int &Line) { 247 OutputContentUpTo(FromFile, NextToWrite, 248 SM.getFileOffset(StartToken.getLocation()), EOL, Line, false); 249 Token DirectiveToken; 250 do { 251 DirectiveLex.LexFromRawLexer(DirectiveToken); 252 } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof)); 253 if (&FromFile == PredefinesBuffer) { 254 // OutputContentUpTo() would not output anything anyway. 255 return; 256 } 257 OS << "#if 0 /* expanded by -frewrite-includes */" << EOL; 258 OutputContentUpTo(FromFile, NextToWrite, 259 SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(), 260 EOL, Line, true); 261 OS << "#endif /* expanded by -frewrite-includes */" << EOL; 262 } 263 264 /// Find the next identifier in the pragma directive specified by \p RawToken. 265 StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex, 266 Token &RawToken) { 267 RawLex.LexFromRawLexer(RawToken); 268 if (RawToken.is(tok::raw_identifier)) 269 PP.LookUpIdentifierInfo(RawToken); 270 if (RawToken.is(tok::identifier)) 271 return RawToken.getIdentifierInfo()->getName(); 272 return StringRef(); 273 } 274 275 // Expand __has_include and __has_include_next if possible. If there's no 276 // definitive answer return false. 277 bool InclusionRewriter::HandleHasInclude( 278 FileID FileId, Lexer &RawLex, const DirectoryLookup *Lookup, Token &Tok, 279 bool &FileExists) { 280 // Lex the opening paren. 281 RawLex.LexFromRawLexer(Tok); 282 if (Tok.isNot(tok::l_paren)) 283 return false; 284 285 RawLex.LexFromRawLexer(Tok); 286 287 SmallString<128> FilenameBuffer; 288 StringRef Filename; 289 // Since the raw lexer doesn't give us angle_literals we have to parse them 290 // ourselves. 291 // FIXME: What to do if the file name is a macro? 292 if (Tok.is(tok::less)) { 293 RawLex.LexFromRawLexer(Tok); 294 295 FilenameBuffer += '<'; 296 do { 297 if (Tok.is(tok::eod)) // Sanity check. 298 return false; 299 300 if (Tok.is(tok::raw_identifier)) 301 PP.LookUpIdentifierInfo(Tok); 302 303 // Get the string piece. 304 SmallVector<char, 128> TmpBuffer; 305 bool Invalid = false; 306 StringRef TmpName = PP.getSpelling(Tok, TmpBuffer, &Invalid); 307 if (Invalid) 308 return false; 309 310 FilenameBuffer += TmpName; 311 312 RawLex.LexFromRawLexer(Tok); 313 } while (Tok.isNot(tok::greater)); 314 315 FilenameBuffer += '>'; 316 Filename = FilenameBuffer; 317 } else { 318 if (Tok.isNot(tok::string_literal)) 319 return false; 320 321 bool Invalid = false; 322 Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid); 323 if (Invalid) 324 return false; 325 } 326 327 // Lex the closing paren. 328 RawLex.LexFromRawLexer(Tok); 329 if (Tok.isNot(tok::r_paren)) 330 return false; 331 332 // Now ask HeaderInfo if it knows about the header. 333 // FIXME: Subframeworks aren't handled here. Do we care? 334 bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename); 335 const DirectoryLookup *CurDir; 336 const FileEntry *FileEnt = PP.getSourceManager().getFileEntryForID(FileId); 337 SmallVector<std::pair<const FileEntry *, const DirectoryEntry *>, 1> 338 Includers; 339 Includers.push_back(std::make_pair(FileEnt, FileEnt->getDir())); 340 const FileEntry *File = PP.getHeaderSearchInfo().LookupFile( 341 Filename, SourceLocation(), isAngled, nullptr, CurDir, Includers, nullptr, 342 nullptr, nullptr, false); 343 344 FileExists = File != nullptr; 345 return true; 346 } 347 348 /// Use a raw lexer to analyze \p FileId, incrementally copying parts of it 349 /// and including content of included files recursively. 350 bool InclusionRewriter::Process(FileID FileId, 351 SrcMgr::CharacteristicKind FileType) 352 { 353 bool Invalid; 354 const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid); 355 if (Invalid) // invalid inclusion 356 return false; 357 const char *FileName = FromFile.getBufferIdentifier(); 358 Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts()); 359 RawLex.SetCommentRetentionState(false); 360 361 StringRef EOL = DetectEOL(FromFile); 362 363 // Per the GNU docs: "1" indicates entering a new file. 364 if (FileId == SM.getMainFileID() || FileId == PP.getPredefinesFileID()) 365 WriteLineInfo(FileName, 1, FileType, EOL, ""); 366 else 367 WriteLineInfo(FileName, 1, FileType, EOL, " 1"); 368 369 if (SM.getFileIDSize(FileId) == 0) 370 return false; 371 372 // The next byte to be copied from the source file, which may be non-zero if 373 // the lexer handled a BOM. 374 unsigned NextToWrite = SM.getFileOffset(RawLex.getSourceLocation()); 375 assert(SM.getLineNumber(FileId, NextToWrite) == 1); 376 int Line = 1; // The current input file line number. 377 378 Token RawToken; 379 RawLex.LexFromRawLexer(RawToken); 380 381 // TODO: Consider adding a switch that strips possibly unimportant content, 382 // such as comments, to reduce the size of repro files. 383 while (RawToken.isNot(tok::eof)) { 384 if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) { 385 RawLex.setParsingPreprocessorDirective(true); 386 Token HashToken = RawToken; 387 RawLex.LexFromRawLexer(RawToken); 388 if (RawToken.is(tok::raw_identifier)) 389 PP.LookUpIdentifierInfo(RawToken); 390 if (RawToken.getIdentifierInfo() != nullptr) { 391 switch (RawToken.getIdentifierInfo()->getPPKeywordID()) { 392 case tok::pp_include: 393 case tok::pp_include_next: 394 case tok::pp_import: { 395 CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite, 396 Line); 397 if (FileId != PP.getPredefinesFileID()) 398 WriteLineInfo(FileName, Line - 1, FileType, EOL, ""); 399 StringRef LineInfoExtra; 400 if (const FileChange *Change = FindFileChangeLocation( 401 HashToken.getLocation())) { 402 if (Change->Mod) { 403 WriteImplicitModuleImport(Change->Mod, EOL); 404 405 // else now include and recursively process the file 406 } else if (Process(Change->Id, Change->FileType)) { 407 // and set lineinfo back to this file, if the nested one was 408 // actually included 409 // `2' indicates returning to a file (after having included 410 // another file. 411 LineInfoExtra = " 2"; 412 } 413 } 414 // fix up lineinfo (since commented out directive changed line 415 // numbers) for inclusions that were skipped due to header guards 416 WriteLineInfo(FileName, Line, FileType, EOL, LineInfoExtra); 417 break; 418 } 419 case tok::pp_pragma: { 420 StringRef Identifier = NextIdentifierName(RawLex, RawToken); 421 if (Identifier == "clang" || Identifier == "GCC") { 422 if (NextIdentifierName(RawLex, RawToken) == "system_header") { 423 // keep the directive in, commented out 424 CommentOutDirective(RawLex, HashToken, FromFile, EOL, 425 NextToWrite, Line); 426 // update our own type 427 FileType = SM.getFileCharacteristic(RawToken.getLocation()); 428 WriteLineInfo(FileName, Line, FileType, EOL); 429 } 430 } else if (Identifier == "once") { 431 // keep the directive in, commented out 432 CommentOutDirective(RawLex, HashToken, FromFile, EOL, 433 NextToWrite, Line); 434 WriteLineInfo(FileName, Line, FileType, EOL); 435 } 436 break; 437 } 438 case tok::pp_if: 439 case tok::pp_elif: { 440 bool elif = (RawToken.getIdentifierInfo()->getPPKeywordID() == 441 tok::pp_elif); 442 // Rewrite special builtin macros to avoid pulling in host details. 443 do { 444 // Walk over the directive. 445 RawLex.LexFromRawLexer(RawToken); 446 if (RawToken.is(tok::raw_identifier)) 447 PP.LookUpIdentifierInfo(RawToken); 448 449 if (RawToken.is(tok::identifier)) { 450 bool HasFile; 451 SourceLocation Loc = RawToken.getLocation(); 452 453 // Rewrite __has_include(x) 454 if (RawToken.getIdentifierInfo()->isStr("__has_include")) { 455 if (!HandleHasInclude(FileId, RawLex, nullptr, RawToken, 456 HasFile)) 457 continue; 458 // Rewrite __has_include_next(x) 459 } else if (RawToken.getIdentifierInfo()->isStr( 460 "__has_include_next")) { 461 const DirectoryLookup *Lookup = PP.GetCurDirLookup(); 462 if (Lookup) 463 ++Lookup; 464 465 if (!HandleHasInclude(FileId, RawLex, Lookup, RawToken, 466 HasFile)) 467 continue; 468 } else { 469 continue; 470 } 471 // Replace the macro with (0) or (1), followed by the commented 472 // out macro for reference. 473 OutputContentUpTo(FromFile, NextToWrite, SM.getFileOffset(Loc), 474 EOL, Line, false); 475 OS << '(' << (int) HasFile << ")/*"; 476 OutputContentUpTo(FromFile, NextToWrite, 477 SM.getFileOffset(RawToken.getLocation()) + 478 RawToken.getLength(), 479 EOL, Line, false); 480 OS << "*/"; 481 } 482 } while (RawToken.isNot(tok::eod)); 483 if (elif) { 484 OutputContentUpTo(FromFile, NextToWrite, 485 SM.getFileOffset(RawToken.getLocation()) + 486 RawToken.getLength(), 487 EOL, Line, /*EnsureNewLine*/ true); 488 WriteLineInfo(FileName, Line, FileType, EOL); 489 } 490 break; 491 } 492 case tok::pp_endif: 493 case tok::pp_else: { 494 // We surround every #include by #if 0 to comment it out, but that 495 // changes line numbers. These are fixed up right after that, but 496 // the whole #include could be inside a preprocessor conditional 497 // that is not processed. So it is necessary to fix the line 498 // numbers one the next line after each #else/#endif as well. 499 RawLex.SetKeepWhitespaceMode(true); 500 do { 501 RawLex.LexFromRawLexer(RawToken); 502 } while (RawToken.isNot(tok::eod) && RawToken.isNot(tok::eof)); 503 OutputContentUpTo( 504 FromFile, NextToWrite, 505 SM.getFileOffset(RawToken.getLocation()) + RawToken.getLength(), 506 EOL, Line, /*EnsureNewLine*/ true); 507 WriteLineInfo(FileName, Line, FileType, EOL); 508 RawLex.SetKeepWhitespaceMode(false); 509 } 510 default: 511 break; 512 } 513 } 514 RawLex.setParsingPreprocessorDirective(false); 515 } 516 RawLex.LexFromRawLexer(RawToken); 517 } 518 OutputContentUpTo(FromFile, NextToWrite, 519 SM.getFileOffset(SM.getLocForEndOfFile(FileId)), EOL, Line, 520 /*EnsureNewline*/true); 521 return true; 522 } 523 524 /// InclusionRewriterInInput - Implement -frewrite-includes mode. 525 void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS, 526 const PreprocessorOutputOptions &Opts) { 527 SourceManager &SM = PP.getSourceManager(); 528 InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS, 529 Opts.ShowLineMarkers); 530 PP.addPPCallbacks(Rewrite); 531 PP.IgnorePragmas(); 532 533 // First let the preprocessor process the entire file and call callbacks. 534 // Callbacks will record which #include's were actually performed. 535 PP.EnterMainSourceFile(); 536 Token Tok; 537 // Only preprocessor directives matter here, so disable macro expansion 538 // everywhere else as an optimization. 539 // TODO: It would be even faster if the preprocessor could be switched 540 // to a mode where it would parse only preprocessor directives and comments, 541 // nothing else matters for parsing or processing. 542 PP.SetMacroExpansionOnlyInDirectives(); 543 do { 544 PP.Lex(Tok); 545 } while (Tok.isNot(tok::eof)); 546 Rewrite->setPredefinesBuffer(SM.getBuffer(PP.getPredefinesFileID())); 547 Rewrite->Process(PP.getPredefinesFileID(), SrcMgr::C_User); 548 Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User); 549 OS->flush(); 550 } 551