1 //===- InputFile.cpp ------------------------------------------ *- C++ --*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "InputFile.h" 10 11 #include "FormatUtil.h" 12 #include "LinePrinter.h" 13 14 #include "llvm/BinaryFormat/Magic.h" 15 #include "llvm/DebugInfo/CodeView/CodeView.h" 16 #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" 17 #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" 18 #include "llvm/DebugInfo/PDB/Native/DbiStream.h" 19 #include "llvm/DebugInfo/PDB/Native/NativeSession.h" 20 #include "llvm/DebugInfo/PDB/Native/PDBFile.h" 21 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" 22 #include "llvm/DebugInfo/PDB/Native/RawError.h" 23 #include "llvm/DebugInfo/PDB/Native/TpiStream.h" 24 #include "llvm/DebugInfo/PDB/PDB.h" 25 #include "llvm/Object/COFF.h" 26 #include "llvm/Support/FileSystem.h" 27 #include "llvm/Support/FormatVariadic.h" 28 29 using namespace llvm; 30 using namespace llvm::codeview; 31 using namespace llvm::object; 32 using namespace llvm::pdb; 33 34 InputFile::InputFile() {} 35 InputFile::~InputFile() {} 36 37 static Expected<ModuleDebugStreamRef> 38 getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index) { 39 ExitOnError Err("Unexpected error: "); 40 41 auto &Dbi = Err(File.getPDBDbiStream()); 42 const auto &Modules = Dbi.modules(); 43 if (Index >= Modules.getModuleCount()) 44 return make_error<RawError>(raw_error_code::index_out_of_bounds, 45 "Invalid module index"); 46 47 auto Modi = Modules.getModuleDescriptor(Index); 48 49 ModuleName = Modi.getModuleName(); 50 51 uint16_t ModiStream = Modi.getModuleStreamIndex(); 52 if (ModiStream == kInvalidStreamIndex) 53 return make_error<RawError>(raw_error_code::no_stream, 54 "Module stream not present"); 55 56 auto ModStreamData = File.createIndexedStream(ModiStream); 57 58 ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData)); 59 if (auto EC = ModS.reload()) 60 return make_error<RawError>(raw_error_code::corrupt_file, 61 "Invalid module stream"); 62 63 return std::move(ModS); 64 } 65 66 static inline bool isCodeViewDebugSubsection(object::SectionRef Section, 67 StringRef Name, 68 BinaryStreamReader &Reader) { 69 if (Expected<StringRef> NameOrErr = Section.getName()) { 70 if (*NameOrErr != Name) 71 return false; 72 } else { 73 consumeError(NameOrErr.takeError()); 74 return false; 75 } 76 77 Expected<StringRef> ContentsOrErr = Section.getContents(); 78 if (!ContentsOrErr) { 79 consumeError(ContentsOrErr.takeError()); 80 return false; 81 } 82 83 Reader = BinaryStreamReader(*ContentsOrErr, support::little); 84 uint32_t Magic; 85 if (Reader.bytesRemaining() < sizeof(uint32_t)) 86 return false; 87 cantFail(Reader.readInteger(Magic)); 88 if (Magic != COFF::DEBUG_SECTION_MAGIC) 89 return false; 90 return true; 91 } 92 93 static inline bool isDebugSSection(object::SectionRef Section, 94 DebugSubsectionArray &Subsections) { 95 BinaryStreamReader Reader; 96 if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader)) 97 return false; 98 99 cantFail(Reader.readArray(Subsections, Reader.bytesRemaining())); 100 return true; 101 } 102 103 static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) { 104 BinaryStreamReader Reader; 105 if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader) && 106 !isCodeViewDebugSubsection(Section, ".debug$P", Reader)) 107 return false; 108 cantFail(Reader.readArray(Types, Reader.bytesRemaining())); 109 return true; 110 } 111 112 static std::string formatChecksumKind(FileChecksumKind Kind) { 113 switch (Kind) { 114 RETURN_CASE(FileChecksumKind, None, "None"); 115 RETURN_CASE(FileChecksumKind, MD5, "MD5"); 116 RETURN_CASE(FileChecksumKind, SHA1, "SHA-1"); 117 RETURN_CASE(FileChecksumKind, SHA256, "SHA-256"); 118 } 119 return formatUnknownEnum(Kind); 120 } 121 122 template <typename... Args> 123 static void formatInternal(LinePrinter &Printer, bool Append, Args &&... args) { 124 if (Append) 125 Printer.format(std::forward<Args>(args)...); 126 else 127 Printer.formatLine(std::forward<Args>(args)...); 128 } 129 130 SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) { 131 if (!File) 132 return; 133 134 if (File->isPdb()) 135 initializeForPdb(GroupIndex); 136 else { 137 Name = ".debug$S"; 138 uint32_t I = 0; 139 for (const auto &S : File->obj().sections()) { 140 DebugSubsectionArray SS; 141 if (!isDebugSSection(S, SS)) 142 continue; 143 144 if (!SC.hasChecksums() || !SC.hasStrings()) 145 SC.initialize(SS); 146 147 if (I == GroupIndex) 148 Subsections = SS; 149 150 if (SC.hasChecksums() && SC.hasStrings()) 151 break; 152 } 153 rebuildChecksumMap(); 154 } 155 } 156 157 StringRef SymbolGroup::name() const { return Name; } 158 159 void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) { 160 Subsections = SS; 161 } 162 163 void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); } 164 165 void SymbolGroup::initializeForPdb(uint32_t Modi) { 166 assert(File && File->isPdb()); 167 168 // PDB always uses the same string table, but each module has its own 169 // checksums. So we only set the strings if they're not already set. 170 if (!SC.hasStrings()) { 171 auto StringTable = File->pdb().getStringTable(); 172 if (StringTable) 173 SC.setStrings(StringTable->getStringTable()); 174 else 175 consumeError(StringTable.takeError()); 176 } 177 178 SC.resetChecksums(); 179 auto MDS = getModuleDebugStream(File->pdb(), Name, Modi); 180 if (!MDS) { 181 consumeError(MDS.takeError()); 182 return; 183 } 184 185 DebugStream = std::make_shared<ModuleDebugStreamRef>(std::move(*MDS)); 186 Subsections = DebugStream->getSubsectionsArray(); 187 SC.initialize(Subsections); 188 rebuildChecksumMap(); 189 } 190 191 void SymbolGroup::rebuildChecksumMap() { 192 if (!SC.hasChecksums()) 193 return; 194 195 for (const auto &Entry : SC.checksums()) { 196 auto S = SC.strings().getString(Entry.FileNameOffset); 197 if (!S) 198 continue; 199 ChecksumsByFile[*S] = Entry; 200 } 201 } 202 203 const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const { 204 assert(File && File->isPdb() && DebugStream); 205 return *DebugStream; 206 } 207 208 Expected<StringRef> SymbolGroup::getNameFromStringTable(uint32_t Offset) const { 209 return SC.strings().getString(Offset); 210 } 211 212 void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File, 213 bool Append) const { 214 auto FC = ChecksumsByFile.find(File); 215 if (FC == ChecksumsByFile.end()) { 216 formatInternal(Printer, Append, "- (no checksum) {0}", File); 217 return; 218 } 219 220 formatInternal(Printer, Append, "- ({0}: {1}) {2}", 221 formatChecksumKind(FC->getValue().Kind), 222 toHex(FC->getValue().Checksum), File); 223 } 224 225 void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer, 226 uint32_t Offset, 227 bool Append) const { 228 if (!SC.hasChecksums()) { 229 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); 230 return; 231 } 232 233 auto Iter = SC.checksums().getArray().at(Offset); 234 if (Iter == SC.checksums().getArray().end()) { 235 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); 236 return; 237 } 238 239 uint32_t FO = Iter->FileNameOffset; 240 auto ExpectedFile = getNameFromStringTable(FO); 241 if (!ExpectedFile) { 242 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); 243 consumeError(ExpectedFile.takeError()); 244 return; 245 } 246 if (Iter->Kind == FileChecksumKind::None) { 247 formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile); 248 } else { 249 formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile, 250 formatChecksumKind(Iter->Kind), toHex(Iter->Checksum)); 251 } 252 } 253 254 Expected<InputFile> InputFile::open(StringRef Path, bool AllowUnknownFile) { 255 InputFile IF; 256 if (!llvm::sys::fs::exists(Path)) 257 return make_error<StringError>(formatv("File {0} not found", Path), 258 inconvertibleErrorCode()); 259 260 file_magic Magic; 261 if (auto EC = identify_magic(Path, Magic)) 262 return make_error<StringError>( 263 formatv("Unable to identify file type for file {0}", Path), EC); 264 265 if (Magic == file_magic::coff_object) { 266 Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Path); 267 if (!BinaryOrErr) 268 return BinaryOrErr.takeError(); 269 270 IF.CoffObject = std::move(*BinaryOrErr); 271 IF.PdbOrObj = llvm::cast<COFFObjectFile>(IF.CoffObject.getBinary()); 272 return std::move(IF); 273 } 274 275 if (Magic == file_magic::pdb) { 276 std::unique_ptr<IPDBSession> Session; 277 if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session)) 278 return std::move(Err); 279 280 IF.PdbSession.reset(static_cast<NativeSession *>(Session.release())); 281 IF.PdbOrObj = &IF.PdbSession->getPDBFile(); 282 283 return std::move(IF); 284 } 285 286 if (!AllowUnknownFile) 287 return make_error<StringError>( 288 formatv("File {0} is not a supported file type", Path), 289 inconvertibleErrorCode()); 290 291 auto Result = MemoryBuffer::getFile(Path, /*IsText=*/false, 292 /*RequiresNullTerminator=*/false); 293 if (!Result) 294 return make_error<StringError>( 295 formatv("File {0} could not be opened", Path), Result.getError()); 296 297 IF.UnknownFile = std::move(*Result); 298 IF.PdbOrObj = IF.UnknownFile.get(); 299 return std::move(IF); 300 } 301 302 PDBFile &InputFile::pdb() { 303 assert(isPdb()); 304 return *PdbOrObj.get<PDBFile *>(); 305 } 306 307 const PDBFile &InputFile::pdb() const { 308 assert(isPdb()); 309 return *PdbOrObj.get<PDBFile *>(); 310 } 311 312 object::COFFObjectFile &InputFile::obj() { 313 assert(isObj()); 314 return *PdbOrObj.get<object::COFFObjectFile *>(); 315 } 316 317 const object::COFFObjectFile &InputFile::obj() const { 318 assert(isObj()); 319 return *PdbOrObj.get<object::COFFObjectFile *>(); 320 } 321 322 MemoryBuffer &InputFile::unknown() { 323 assert(isUnknown()); 324 return *PdbOrObj.get<MemoryBuffer *>(); 325 } 326 327 const MemoryBuffer &InputFile::unknown() const { 328 assert(isUnknown()); 329 return *PdbOrObj.get<MemoryBuffer *>(); 330 } 331 332 StringRef InputFile::getFilePath() const { 333 if (isPdb()) 334 return pdb().getFilePath(); 335 if (isObj()) 336 return obj().getFileName(); 337 assert(isUnknown()); 338 return unknown().getBufferIdentifier(); 339 } 340 341 bool InputFile::hasTypes() const { 342 if (isPdb()) 343 return pdb().hasPDBTpiStream(); 344 345 for (const auto &Section : obj().sections()) { 346 CVTypeArray Types; 347 if (isDebugTSection(Section, Types)) 348 return true; 349 } 350 return false; 351 } 352 353 bool InputFile::hasIds() const { 354 if (isObj()) 355 return false; 356 return pdb().hasPDBIpiStream(); 357 } 358 359 bool InputFile::isPdb() const { return PdbOrObj.is<PDBFile *>(); } 360 361 bool InputFile::isObj() const { 362 return PdbOrObj.is<object::COFFObjectFile *>(); 363 } 364 365 bool InputFile::isUnknown() const { return PdbOrObj.is<MemoryBuffer *>(); } 366 367 codeview::LazyRandomTypeCollection & 368 InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) { 369 if (Types && Kind == kTypes) 370 return *Types; 371 if (Ids && Kind == kIds) 372 return *Ids; 373 374 if (Kind == kIds) { 375 assert(isPdb() && pdb().hasPDBIpiStream()); 376 } 377 378 // If the collection was already initialized, we should have just returned it 379 // in step 1. 380 if (isPdb()) { 381 TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types; 382 auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream() 383 : pdb().getPDBTpiStream()); 384 385 auto &Array = Stream.typeArray(); 386 uint32_t Count = Stream.getNumTypeRecords(); 387 auto Offsets = Stream.getTypeIndexOffsets(); 388 Collection = 389 std::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets); 390 return *Collection; 391 } 392 393 assert(isObj()); 394 assert(Kind == kTypes); 395 assert(!Types); 396 397 for (const auto &Section : obj().sections()) { 398 CVTypeArray Records; 399 if (!isDebugTSection(Section, Records)) 400 continue; 401 402 Types = std::make_unique<LazyRandomTypeCollection>(Records, 100); 403 return *Types; 404 } 405 406 Types = std::make_unique<LazyRandomTypeCollection>(100); 407 return *Types; 408 } 409 410 codeview::LazyRandomTypeCollection &InputFile::types() { 411 return getOrCreateTypeCollection(kTypes); 412 } 413 414 codeview::LazyRandomTypeCollection &InputFile::ids() { 415 // Object files have only one type stream that contains both types and ids. 416 // Similarly, some PDBs don't contain an IPI stream, and for those both types 417 // and IDs are in the same stream. 418 if (isObj() || !pdb().hasPDBIpiStream()) 419 return types(); 420 421 return getOrCreateTypeCollection(kIds); 422 } 423 424 iterator_range<SymbolGroupIterator> InputFile::symbol_groups() { 425 return make_range<SymbolGroupIterator>(symbol_groups_begin(), 426 symbol_groups_end()); 427 } 428 429 SymbolGroupIterator InputFile::symbol_groups_begin() { 430 return SymbolGroupIterator(*this); 431 } 432 433 SymbolGroupIterator InputFile::symbol_groups_end() { 434 return SymbolGroupIterator(); 435 } 436 437 SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {} 438 439 SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) { 440 if (File.isObj()) { 441 SectionIter = File.obj().section_begin(); 442 scanToNextDebugS(); 443 } 444 } 445 446 bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const { 447 bool E = isEnd(); 448 bool RE = R.isEnd(); 449 if (E || RE) 450 return E == RE; 451 452 if (Value.File != R.Value.File) 453 return false; 454 return Index == R.Index; 455 } 456 457 const SymbolGroup &SymbolGroupIterator::operator*() const { 458 assert(!isEnd()); 459 return Value; 460 } 461 SymbolGroup &SymbolGroupIterator::operator*() { 462 assert(!isEnd()); 463 return Value; 464 } 465 466 SymbolGroupIterator &SymbolGroupIterator::operator++() { 467 assert(Value.File && !isEnd()); 468 ++Index; 469 if (isEnd()) 470 return *this; 471 472 if (Value.File->isPdb()) { 473 Value.updatePdbModi(Index); 474 return *this; 475 } 476 477 scanToNextDebugS(); 478 return *this; 479 } 480 481 void SymbolGroupIterator::scanToNextDebugS() { 482 assert(SectionIter.hasValue()); 483 auto End = Value.File->obj().section_end(); 484 auto &Iter = *SectionIter; 485 assert(!isEnd()); 486 487 while (++Iter != End) { 488 DebugSubsectionArray SS; 489 SectionRef SR = *Iter; 490 if (!isDebugSSection(SR, SS)) 491 continue; 492 493 Value.updateDebugS(SS); 494 return; 495 } 496 } 497 498 bool SymbolGroupIterator::isEnd() const { 499 if (!Value.File) 500 return true; 501 if (Value.File->isPdb()) { 502 auto &Dbi = cantFail(Value.File->pdb().getPDBDbiStream()); 503 uint32_t Count = Dbi.modules().getModuleCount(); 504 assert(Index <= Count); 505 return Index == Count; 506 } 507 508 assert(SectionIter.hasValue()); 509 return *SectionIter == Value.File->obj().section_end(); 510 } 511