1 //===- InputFile.cpp ------------------------------------------ *- C++ --*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "InputFile.h" 11 12 #include "FormatUtil.h" 13 #include "LinePrinter.h" 14 15 #include "llvm/BinaryFormat/Magic.h" 16 #include "llvm/DebugInfo/CodeView/CodeView.h" 17 #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" 18 #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" 19 #include "llvm/DebugInfo/PDB/Native/DbiStream.h" 20 #include "llvm/DebugInfo/PDB/Native/NativeSession.h" 21 #include "llvm/DebugInfo/PDB/Native/PDBFile.h" 22 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" 23 #include "llvm/DebugInfo/PDB/Native/RawError.h" 24 #include "llvm/DebugInfo/PDB/Native/TpiStream.h" 25 #include "llvm/DebugInfo/PDB/PDB.h" 26 #include "llvm/Object/COFF.h" 27 #include "llvm/Support/FileSystem.h" 28 #include "llvm/Support/FormatVariadic.h" 29 30 using namespace llvm; 31 using namespace llvm::codeview; 32 using namespace llvm::object; 33 using namespace llvm::pdb; 34 35 InputFile::InputFile() {} 36 InputFile::~InputFile() {} 37 38 static Expected<ModuleDebugStreamRef> 39 getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index) { 40 ExitOnError Err("Unexpected error: "); 41 42 auto &Dbi = Err(File.getPDBDbiStream()); 43 const auto &Modules = Dbi.modules(); 44 auto Modi = Modules.getModuleDescriptor(Index); 45 46 ModuleName = Modi.getModuleName(); 47 48 uint16_t ModiStream = Modi.getModuleStreamIndex(); 49 if (ModiStream == kInvalidStreamIndex) 50 return make_error<RawError>(raw_error_code::no_stream, 51 "Module stream not present"); 52 53 auto ModStreamData = File.createIndexedStream(ModiStream); 54 55 ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData)); 56 if (auto EC = ModS.reload()) 57 return make_error<RawError>(raw_error_code::corrupt_file, 58 "Invalid module stream"); 59 60 return std::move(ModS); 61 } 62 63 static inline bool isCodeViewDebugSubsection(object::SectionRef Section, 64 StringRef Name, 65 BinaryStreamReader &Reader) { 66 StringRef SectionName, Contents; 67 if (Section.getName(SectionName)) 68 return false; 69 70 if (SectionName != Name) 71 return false; 72 73 if (Section.getContents(Contents)) 74 return false; 75 76 Reader = BinaryStreamReader(Contents, support::little); 77 uint32_t Magic; 78 if (Reader.bytesRemaining() < sizeof(uint32_t)) 79 return false; 80 cantFail(Reader.readInteger(Magic)); 81 if (Magic != COFF::DEBUG_SECTION_MAGIC) 82 return false; 83 return true; 84 } 85 86 static inline bool isDebugSSection(object::SectionRef Section, 87 DebugSubsectionArray &Subsections) { 88 BinaryStreamReader Reader; 89 if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader)) 90 return false; 91 92 cantFail(Reader.readArray(Subsections, Reader.bytesRemaining())); 93 return true; 94 } 95 96 static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) { 97 BinaryStreamReader Reader; 98 if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader)) 99 return false; 100 cantFail(Reader.readArray(Types, Reader.bytesRemaining())); 101 return true; 102 } 103 104 static std::string formatChecksumKind(FileChecksumKind Kind) { 105 switch (Kind) { 106 RETURN_CASE(FileChecksumKind, None, "None"); 107 RETURN_CASE(FileChecksumKind, MD5, "MD5"); 108 RETURN_CASE(FileChecksumKind, SHA1, "SHA-1"); 109 RETURN_CASE(FileChecksumKind, SHA256, "SHA-256"); 110 } 111 return formatUnknownEnum(Kind); 112 } 113 114 static const DebugStringTableSubsectionRef &extractStringTable(PDBFile &File) { 115 return cantFail(File.getStringTable()).getStringTable(); 116 } 117 118 template <typename... Args> 119 static void formatInternal(LinePrinter &Printer, bool Append, Args &&... args) { 120 if (Append) 121 Printer.format(std::forward<Args>(args)...); 122 else 123 Printer.formatLine(std::forward<Args>(args)...); 124 } 125 126 SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) { 127 if (!File) 128 return; 129 130 if (File->isPdb()) 131 initializeForPdb(GroupIndex); 132 else { 133 Name = ".debug$S"; 134 uint32_t I = 0; 135 for (const auto &S : File->obj().sections()) { 136 DebugSubsectionArray SS; 137 if (!isDebugSSection(S, SS)) 138 continue; 139 140 if (!SC.hasChecksums() || !SC.hasStrings()) 141 SC.initialize(SS); 142 143 if (I == GroupIndex) 144 Subsections = SS; 145 146 if (SC.hasChecksums() && SC.hasStrings()) 147 break; 148 } 149 rebuildChecksumMap(); 150 } 151 } 152 153 StringRef SymbolGroup::name() const { return Name; } 154 155 void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) { 156 Subsections = SS; 157 } 158 159 void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); } 160 161 void SymbolGroup::initializeForPdb(uint32_t Modi) { 162 assert(File && File->isPdb()); 163 164 // PDB always uses the same string table, but each module has its own 165 // checksums. So we only set the strings if they're not already set. 166 if (!SC.hasStrings()) 167 SC.setStrings(extractStringTable(File->pdb())); 168 169 SC.resetChecksums(); 170 auto MDS = getModuleDebugStream(File->pdb(), Name, Modi); 171 if (!MDS) { 172 consumeError(MDS.takeError()); 173 return; 174 } 175 176 DebugStream = std::make_shared<ModuleDebugStreamRef>(std::move(*MDS)); 177 Subsections = DebugStream->getSubsectionsArray(); 178 SC.initialize(Subsections); 179 rebuildChecksumMap(); 180 } 181 182 void SymbolGroup::rebuildChecksumMap() { 183 if (!SC.hasChecksums()) 184 return; 185 186 for (const auto &Entry : SC.checksums()) { 187 auto S = SC.strings().getString(Entry.FileNameOffset); 188 if (!S) 189 continue; 190 ChecksumsByFile[*S] = Entry; 191 } 192 } 193 194 const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const { 195 assert(File && File->isPdb() && DebugStream); 196 return *DebugStream; 197 } 198 199 Expected<StringRef> SymbolGroup::getNameFromStringTable(uint32_t Offset) const { 200 return SC.strings().getString(Offset); 201 } 202 203 void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File, 204 bool Append) const { 205 auto FC = ChecksumsByFile.find(File); 206 if (FC == ChecksumsByFile.end()) { 207 formatInternal(Printer, Append, "- (no checksum) {0}", File); 208 return; 209 } 210 211 formatInternal(Printer, Append, "- ({0}: {1}) {2}", 212 formatChecksumKind(FC->getValue().Kind), 213 toHex(FC->getValue().Checksum), File); 214 } 215 216 void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer, 217 uint32_t Offset, 218 bool Append) const { 219 if (!SC.hasChecksums()) { 220 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); 221 return; 222 } 223 224 auto Iter = SC.checksums().getArray().at(Offset); 225 if (Iter == SC.checksums().getArray().end()) { 226 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); 227 return; 228 } 229 230 uint32_t FO = Iter->FileNameOffset; 231 auto ExpectedFile = getNameFromStringTable(FO); 232 if (!ExpectedFile) { 233 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); 234 consumeError(ExpectedFile.takeError()); 235 return; 236 } 237 if (Iter->Kind == FileChecksumKind::None) { 238 formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile); 239 } else { 240 formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile, 241 formatChecksumKind(Iter->Kind), toHex(Iter->Checksum)); 242 } 243 } 244 245 Expected<InputFile> InputFile::open(StringRef Path) { 246 InputFile IF; 247 if (!llvm::sys::fs::exists(Path)) 248 return make_error<StringError>(formatv("File {0} not found", Path), 249 inconvertibleErrorCode()); 250 251 file_magic Magic; 252 if (auto EC = identify_magic(Path, Magic)) 253 return make_error<StringError>( 254 formatv("Unable to identify file type for file {0}", Path), EC); 255 256 if (Magic == file_magic::coff_object) { 257 Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Path); 258 if (!BinaryOrErr) 259 return BinaryOrErr.takeError(); 260 261 IF.CoffObject = std::move(*BinaryOrErr); 262 IF.PdbOrObj = llvm::cast<COFFObjectFile>(IF.CoffObject.getBinary()); 263 return std::move(IF); 264 } 265 266 if (Magic == file_magic::unknown) { 267 std::unique_ptr<IPDBSession> Session; 268 if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session)) 269 return std::move(Err); 270 271 IF.PdbSession.reset(static_cast<NativeSession *>(Session.release())); 272 IF.PdbOrObj = &IF.PdbSession->getPDBFile(); 273 274 return std::move(IF); 275 } 276 277 return make_error<StringError>( 278 formatv("File {0} is not a supported file type", Path), 279 inconvertibleErrorCode()); 280 } 281 282 PDBFile &InputFile::pdb() { 283 assert(isPdb()); 284 return *PdbOrObj.get<PDBFile *>(); 285 } 286 287 const PDBFile &InputFile::pdb() const { 288 assert(isPdb()); 289 return *PdbOrObj.get<PDBFile *>(); 290 } 291 292 object::COFFObjectFile &InputFile::obj() { 293 assert(isObj()); 294 return *PdbOrObj.get<object::COFFObjectFile *>(); 295 } 296 297 const object::COFFObjectFile &InputFile::obj() const { 298 assert(isObj()); 299 return *PdbOrObj.get<object::COFFObjectFile *>(); 300 } 301 302 bool InputFile::hasTypes() const { 303 if (isPdb()) 304 return pdb().hasPDBTpiStream(); 305 306 for (const auto &Section : obj().sections()) { 307 CVTypeArray Types; 308 if (isDebugTSection(Section, Types)) 309 return true; 310 } 311 return false; 312 } 313 314 bool InputFile::hasIds() const { 315 if (isObj()) 316 return false; 317 return pdb().hasPDBIpiStream(); 318 } 319 320 bool InputFile::isPdb() const { return PdbOrObj.is<PDBFile *>(); } 321 322 bool InputFile::isObj() const { 323 return PdbOrObj.is<object::COFFObjectFile *>(); 324 } 325 326 codeview::LazyRandomTypeCollection & 327 InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) { 328 if (Types && Kind == kTypes) 329 return *Types; 330 if (Ids && Kind == kIds) 331 return *Ids; 332 333 if (Kind == kIds) { 334 assert(isPdb() && pdb().hasPDBIpiStream()); 335 } 336 337 // If the collection was already initialized, we should have just returned it 338 // in step 1. 339 if (isPdb()) { 340 TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types; 341 auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream() 342 : pdb().getPDBTpiStream()); 343 344 auto &Array = Stream.typeArray(); 345 uint32_t Count = Stream.getNumTypeRecords(); 346 auto Offsets = Stream.getTypeIndexOffsets(); 347 Collection = 348 llvm::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets); 349 return *Collection; 350 } 351 352 assert(isObj()); 353 assert(Kind == kTypes); 354 assert(!Types); 355 356 for (const auto &Section : obj().sections()) { 357 CVTypeArray Records; 358 if (!isDebugTSection(Section, Records)) 359 continue; 360 361 Types = llvm::make_unique<LazyRandomTypeCollection>(Records, 100); 362 return *Types; 363 } 364 365 Types = llvm::make_unique<LazyRandomTypeCollection>(100); 366 return *Types; 367 } 368 369 codeview::LazyRandomTypeCollection &InputFile::types() { 370 return getOrCreateTypeCollection(kTypes); 371 } 372 373 codeview::LazyRandomTypeCollection &InputFile::ids() { 374 // Object files have only one type stream that contains both types and ids. 375 // Similarly, some PDBs don't contain an IPI stream, and for those both types 376 // and IDs are in the same stream. 377 if (isObj() || !pdb().hasPDBIpiStream()) 378 return types(); 379 380 return getOrCreateTypeCollection(kIds); 381 } 382 383 iterator_range<SymbolGroupIterator> InputFile::symbol_groups() { 384 return make_range<SymbolGroupIterator>(symbol_groups_begin(), 385 symbol_groups_end()); 386 } 387 388 SymbolGroupIterator InputFile::symbol_groups_begin() { 389 return SymbolGroupIterator(*this); 390 } 391 392 SymbolGroupIterator InputFile::symbol_groups_end() { 393 return SymbolGroupIterator(); 394 } 395 396 SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {} 397 398 SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) { 399 if (File.isObj()) { 400 SectionIter = File.obj().section_begin(); 401 scanToNextDebugS(); 402 } 403 } 404 405 bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const { 406 bool E = isEnd(); 407 bool RE = R.isEnd(); 408 if (E || RE) 409 return E == RE; 410 411 if (Value.File != R.Value.File) 412 return false; 413 return Index == R.Index; 414 } 415 416 const SymbolGroup &SymbolGroupIterator::operator*() const { 417 assert(!isEnd()); 418 return Value; 419 } 420 SymbolGroup &SymbolGroupIterator::operator*() { 421 assert(!isEnd()); 422 return Value; 423 } 424 425 SymbolGroupIterator &SymbolGroupIterator::operator++() { 426 assert(Value.File && !isEnd()); 427 ++Index; 428 if (isEnd()) 429 return *this; 430 431 if (Value.File->isPdb()) { 432 Value.updatePdbModi(Index); 433 return *this; 434 } 435 436 scanToNextDebugS(); 437 return *this; 438 } 439 440 void SymbolGroupIterator::scanToNextDebugS() { 441 assert(SectionIter.hasValue()); 442 auto End = Value.File->obj().section_end(); 443 auto &Iter = *SectionIter; 444 assert(!isEnd()); 445 446 while (++Iter != End) { 447 DebugSubsectionArray SS; 448 SectionRef SR = *Iter; 449 if (!isDebugSSection(SR, SS)) 450 continue; 451 452 Value.updateDebugS(SS); 453 return; 454 } 455 } 456 457 bool SymbolGroupIterator::isEnd() const { 458 if (!Value.File) 459 return true; 460 if (Value.File->isPdb()) { 461 auto &Dbi = cantFail(Value.File->pdb().getPDBDbiStream()); 462 uint32_t Count = Dbi.modules().getModuleCount(); 463 assert(Index <= Count); 464 return Index == Count; 465 } 466 467 assert(SectionIter.hasValue()); 468 return *SectionIter == Value.File->obj().section_end(); 469 } 470