1 //===- InputFile.cpp ------------------------------------------ *- C++ --*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "InputFile.h" 11 12 #include "FormatUtil.h" 13 #include "LinePrinter.h" 14 15 #include "llvm/BinaryFormat/Magic.h" 16 #include "llvm/DebugInfo/CodeView/CodeView.h" 17 #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" 18 #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" 19 #include "llvm/DebugInfo/PDB/Native/DbiStream.h" 20 #include "llvm/DebugInfo/PDB/Native/NativeSession.h" 21 #include "llvm/DebugInfo/PDB/Native/PDBFile.h" 22 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" 23 #include "llvm/DebugInfo/PDB/Native/RawError.h" 24 #include "llvm/DebugInfo/PDB/Native/TpiStream.h" 25 #include "llvm/DebugInfo/PDB/PDB.h" 26 #include "llvm/Object/COFF.h" 27 #include "llvm/Support/FileSystem.h" 28 #include "llvm/Support/FormatVariadic.h" 29 30 using namespace llvm; 31 using namespace llvm::codeview; 32 using namespace llvm::object; 33 using namespace llvm::pdb; 34 35 InputFile::InputFile() {} 36 InputFile::~InputFile() {} 37 38 static Expected<ModuleDebugStreamRef> 39 getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index) { 40 ExitOnError Err("Unexpected error: "); 41 42 auto &Dbi = Err(File.getPDBDbiStream()); 43 const auto &Modules = Dbi.modules(); 44 if (Index >= Modules.getModuleCount()) 45 return make_error<RawError>(raw_error_code::index_out_of_bounds, 46 "Invalid module index"); 47 48 auto Modi = Modules.getModuleDescriptor(Index); 49 50 ModuleName = Modi.getModuleName(); 51 52 uint16_t ModiStream = Modi.getModuleStreamIndex(); 53 if (ModiStream == kInvalidStreamIndex) 54 return make_error<RawError>(raw_error_code::no_stream, 55 "Module stream not present"); 56 57 auto ModStreamData = File.createIndexedStream(ModiStream); 58 59 ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData)); 60 if (auto EC = ModS.reload()) 61 return make_error<RawError>(raw_error_code::corrupt_file, 62 "Invalid module stream"); 63 64 return std::move(ModS); 65 } 66 67 static inline bool isCodeViewDebugSubsection(object::SectionRef Section, 68 StringRef Name, 69 BinaryStreamReader &Reader) { 70 StringRef SectionName, Contents; 71 if (Section.getName(SectionName)) 72 return false; 73 74 if (SectionName != Name) 75 return false; 76 77 if (Section.getContents(Contents)) 78 return false; 79 80 Reader = BinaryStreamReader(Contents, support::little); 81 uint32_t Magic; 82 if (Reader.bytesRemaining() < sizeof(uint32_t)) 83 return false; 84 cantFail(Reader.readInteger(Magic)); 85 if (Magic != COFF::DEBUG_SECTION_MAGIC) 86 return false; 87 return true; 88 } 89 90 static inline bool isDebugSSection(object::SectionRef Section, 91 DebugSubsectionArray &Subsections) { 92 BinaryStreamReader Reader; 93 if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader)) 94 return false; 95 96 cantFail(Reader.readArray(Subsections, Reader.bytesRemaining())); 97 return true; 98 } 99 100 static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) { 101 BinaryStreamReader Reader; 102 if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader) && 103 !isCodeViewDebugSubsection(Section, ".debug$P", Reader)) 104 return false; 105 cantFail(Reader.readArray(Types, Reader.bytesRemaining())); 106 return true; 107 } 108 109 static std::string formatChecksumKind(FileChecksumKind Kind) { 110 switch (Kind) { 111 RETURN_CASE(FileChecksumKind, None, "None"); 112 RETURN_CASE(FileChecksumKind, MD5, "MD5"); 113 RETURN_CASE(FileChecksumKind, SHA1, "SHA-1"); 114 RETURN_CASE(FileChecksumKind, SHA256, "SHA-256"); 115 } 116 return formatUnknownEnum(Kind); 117 } 118 119 template <typename... Args> 120 static void formatInternal(LinePrinter &Printer, bool Append, Args &&... args) { 121 if (Append) 122 Printer.format(std::forward<Args>(args)...); 123 else 124 Printer.formatLine(std::forward<Args>(args)...); 125 } 126 127 SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) { 128 if (!File) 129 return; 130 131 if (File->isPdb()) 132 initializeForPdb(GroupIndex); 133 else { 134 Name = ".debug$S"; 135 uint32_t I = 0; 136 for (const auto &S : File->obj().sections()) { 137 DebugSubsectionArray SS; 138 if (!isDebugSSection(S, SS)) 139 continue; 140 141 if (!SC.hasChecksums() || !SC.hasStrings()) 142 SC.initialize(SS); 143 144 if (I == GroupIndex) 145 Subsections = SS; 146 147 if (SC.hasChecksums() && SC.hasStrings()) 148 break; 149 } 150 rebuildChecksumMap(); 151 } 152 } 153 154 StringRef SymbolGroup::name() const { return Name; } 155 156 void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) { 157 Subsections = SS; 158 } 159 160 void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); } 161 162 void SymbolGroup::initializeForPdb(uint32_t Modi) { 163 assert(File && File->isPdb()); 164 165 // PDB always uses the same string table, but each module has its own 166 // checksums. So we only set the strings if they're not already set. 167 if (!SC.hasStrings()) { 168 auto StringTable = File->pdb().getStringTable(); 169 if (StringTable) 170 SC.setStrings(StringTable->getStringTable()); 171 else 172 consumeError(StringTable.takeError()); 173 } 174 175 SC.resetChecksums(); 176 auto MDS = getModuleDebugStream(File->pdb(), Name, Modi); 177 if (!MDS) { 178 consumeError(MDS.takeError()); 179 return; 180 } 181 182 DebugStream = std::make_shared<ModuleDebugStreamRef>(std::move(*MDS)); 183 Subsections = DebugStream->getSubsectionsArray(); 184 SC.initialize(Subsections); 185 rebuildChecksumMap(); 186 } 187 188 void SymbolGroup::rebuildChecksumMap() { 189 if (!SC.hasChecksums()) 190 return; 191 192 for (const auto &Entry : SC.checksums()) { 193 auto S = SC.strings().getString(Entry.FileNameOffset); 194 if (!S) 195 continue; 196 ChecksumsByFile[*S] = Entry; 197 } 198 } 199 200 const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const { 201 assert(File && File->isPdb() && DebugStream); 202 return *DebugStream; 203 } 204 205 Expected<StringRef> SymbolGroup::getNameFromStringTable(uint32_t Offset) const { 206 return SC.strings().getString(Offset); 207 } 208 209 void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File, 210 bool Append) const { 211 auto FC = ChecksumsByFile.find(File); 212 if (FC == ChecksumsByFile.end()) { 213 formatInternal(Printer, Append, "- (no checksum) {0}", File); 214 return; 215 } 216 217 formatInternal(Printer, Append, "- ({0}: {1}) {2}", 218 formatChecksumKind(FC->getValue().Kind), 219 toHex(FC->getValue().Checksum), File); 220 } 221 222 void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer, 223 uint32_t Offset, 224 bool Append) const { 225 if (!SC.hasChecksums()) { 226 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); 227 return; 228 } 229 230 auto Iter = SC.checksums().getArray().at(Offset); 231 if (Iter == SC.checksums().getArray().end()) { 232 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); 233 return; 234 } 235 236 uint32_t FO = Iter->FileNameOffset; 237 auto ExpectedFile = getNameFromStringTable(FO); 238 if (!ExpectedFile) { 239 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); 240 consumeError(ExpectedFile.takeError()); 241 return; 242 } 243 if (Iter->Kind == FileChecksumKind::None) { 244 formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile); 245 } else { 246 formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile, 247 formatChecksumKind(Iter->Kind), toHex(Iter->Checksum)); 248 } 249 } 250 251 Expected<InputFile> InputFile::open(StringRef Path, bool AllowUnknownFile) { 252 InputFile IF; 253 if (!llvm::sys::fs::exists(Path)) 254 return make_error<StringError>(formatv("File {0} not found", Path), 255 inconvertibleErrorCode()); 256 257 file_magic Magic; 258 if (auto EC = identify_magic(Path, Magic)) 259 return make_error<StringError>( 260 formatv("Unable to identify file type for file {0}", Path), EC); 261 262 if (Magic == file_magic::coff_object) { 263 Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Path); 264 if (!BinaryOrErr) 265 return BinaryOrErr.takeError(); 266 267 IF.CoffObject = std::move(*BinaryOrErr); 268 IF.PdbOrObj = llvm::cast<COFFObjectFile>(IF.CoffObject.getBinary()); 269 return std::move(IF); 270 } 271 272 if (Magic == file_magic::pdb) { 273 std::unique_ptr<IPDBSession> Session; 274 if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session)) 275 return std::move(Err); 276 277 IF.PdbSession.reset(static_cast<NativeSession *>(Session.release())); 278 IF.PdbOrObj = &IF.PdbSession->getPDBFile(); 279 280 return std::move(IF); 281 } 282 283 if (!AllowUnknownFile) 284 return make_error<StringError>( 285 formatv("File {0} is not a supported file type", Path), 286 inconvertibleErrorCode()); 287 288 auto Result = MemoryBuffer::getFile(Path, -1LL, false); 289 if (!Result) 290 return make_error<StringError>( 291 formatv("File {0} could not be opened", Path), Result.getError()); 292 293 IF.UnknownFile = std::move(*Result); 294 IF.PdbOrObj = IF.UnknownFile.get(); 295 return std::move(IF); 296 } 297 298 PDBFile &InputFile::pdb() { 299 assert(isPdb()); 300 return *PdbOrObj.get<PDBFile *>(); 301 } 302 303 const PDBFile &InputFile::pdb() const { 304 assert(isPdb()); 305 return *PdbOrObj.get<PDBFile *>(); 306 } 307 308 object::COFFObjectFile &InputFile::obj() { 309 assert(isObj()); 310 return *PdbOrObj.get<object::COFFObjectFile *>(); 311 } 312 313 const object::COFFObjectFile &InputFile::obj() const { 314 assert(isObj()); 315 return *PdbOrObj.get<object::COFFObjectFile *>(); 316 } 317 318 MemoryBuffer &InputFile::unknown() { 319 assert(isUnknown()); 320 return *PdbOrObj.get<MemoryBuffer *>(); 321 } 322 323 const MemoryBuffer &InputFile::unknown() const { 324 assert(isUnknown()); 325 return *PdbOrObj.get<MemoryBuffer *>(); 326 } 327 328 StringRef InputFile::getFilePath() const { 329 if (isPdb()) 330 return pdb().getFilePath(); 331 if (isObj()) 332 return obj().getFileName(); 333 assert(isUnknown()); 334 return unknown().getBufferIdentifier(); 335 } 336 337 bool InputFile::hasTypes() const { 338 if (isPdb()) 339 return pdb().hasPDBTpiStream(); 340 341 for (const auto &Section : obj().sections()) { 342 CVTypeArray Types; 343 if (isDebugTSection(Section, Types)) 344 return true; 345 } 346 return false; 347 } 348 349 bool InputFile::hasIds() const { 350 if (isObj()) 351 return false; 352 return pdb().hasPDBIpiStream(); 353 } 354 355 bool InputFile::isPdb() const { return PdbOrObj.is<PDBFile *>(); } 356 357 bool InputFile::isObj() const { 358 return PdbOrObj.is<object::COFFObjectFile *>(); 359 } 360 361 bool InputFile::isUnknown() const { return PdbOrObj.is<MemoryBuffer *>(); } 362 363 codeview::LazyRandomTypeCollection & 364 InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) { 365 if (Types && Kind == kTypes) 366 return *Types; 367 if (Ids && Kind == kIds) 368 return *Ids; 369 370 if (Kind == kIds) { 371 assert(isPdb() && pdb().hasPDBIpiStream()); 372 } 373 374 // If the collection was already initialized, we should have just returned it 375 // in step 1. 376 if (isPdb()) { 377 TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types; 378 auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream() 379 : pdb().getPDBTpiStream()); 380 381 auto &Array = Stream.typeArray(); 382 uint32_t Count = Stream.getNumTypeRecords(); 383 auto Offsets = Stream.getTypeIndexOffsets(); 384 Collection = 385 llvm::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets); 386 return *Collection; 387 } 388 389 assert(isObj()); 390 assert(Kind == kTypes); 391 assert(!Types); 392 393 for (const auto &Section : obj().sections()) { 394 CVTypeArray Records; 395 if (!isDebugTSection(Section, Records)) 396 continue; 397 398 Types = llvm::make_unique<LazyRandomTypeCollection>(Records, 100); 399 return *Types; 400 } 401 402 Types = llvm::make_unique<LazyRandomTypeCollection>(100); 403 return *Types; 404 } 405 406 codeview::LazyRandomTypeCollection &InputFile::types() { 407 return getOrCreateTypeCollection(kTypes); 408 } 409 410 codeview::LazyRandomTypeCollection &InputFile::ids() { 411 // Object files have only one type stream that contains both types and ids. 412 // Similarly, some PDBs don't contain an IPI stream, and for those both types 413 // and IDs are in the same stream. 414 if (isObj() || !pdb().hasPDBIpiStream()) 415 return types(); 416 417 return getOrCreateTypeCollection(kIds); 418 } 419 420 iterator_range<SymbolGroupIterator> InputFile::symbol_groups() { 421 return make_range<SymbolGroupIterator>(symbol_groups_begin(), 422 symbol_groups_end()); 423 } 424 425 SymbolGroupIterator InputFile::symbol_groups_begin() { 426 return SymbolGroupIterator(*this); 427 } 428 429 SymbolGroupIterator InputFile::symbol_groups_end() { 430 return SymbolGroupIterator(); 431 } 432 433 SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {} 434 435 SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) { 436 if (File.isObj()) { 437 SectionIter = File.obj().section_begin(); 438 scanToNextDebugS(); 439 } 440 } 441 442 bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const { 443 bool E = isEnd(); 444 bool RE = R.isEnd(); 445 if (E || RE) 446 return E == RE; 447 448 if (Value.File != R.Value.File) 449 return false; 450 return Index == R.Index; 451 } 452 453 const SymbolGroup &SymbolGroupIterator::operator*() const { 454 assert(!isEnd()); 455 return Value; 456 } 457 SymbolGroup &SymbolGroupIterator::operator*() { 458 assert(!isEnd()); 459 return Value; 460 } 461 462 SymbolGroupIterator &SymbolGroupIterator::operator++() { 463 assert(Value.File && !isEnd()); 464 ++Index; 465 if (isEnd()) 466 return *this; 467 468 if (Value.File->isPdb()) { 469 Value.updatePdbModi(Index); 470 return *this; 471 } 472 473 scanToNextDebugS(); 474 return *this; 475 } 476 477 void SymbolGroupIterator::scanToNextDebugS() { 478 assert(SectionIter.hasValue()); 479 auto End = Value.File->obj().section_end(); 480 auto &Iter = *SectionIter; 481 assert(!isEnd()); 482 483 while (++Iter != End) { 484 DebugSubsectionArray SS; 485 SectionRef SR = *Iter; 486 if (!isDebugSSection(SR, SS)) 487 continue; 488 489 Value.updateDebugS(SS); 490 return; 491 } 492 } 493 494 bool SymbolGroupIterator::isEnd() const { 495 if (!Value.File) 496 return true; 497 if (Value.File->isPdb()) { 498 auto &Dbi = cantFail(Value.File->pdb().getPDBDbiStream()); 499 uint32_t Count = Dbi.modules().getModuleCount(); 500 assert(Index <= Count); 501 return Index == Count; 502 } 503 504 assert(SectionIter.hasValue()); 505 return *SectionIter == Value.File->obj().section_end(); 506 } 507