1 //===-- LLVMSymbolize.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation for LLVM symbolization library. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/DebugInfo/Symbolize/Symbolize.h" 14 15 #include "llvm/ADT/STLExtras.h" 16 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 17 #include "llvm/DebugInfo/PDB/PDB.h" 18 #include "llvm/DebugInfo/PDB/PDBContext.h" 19 #include "llvm/DebugInfo/Symbolize/DIFetcher.h" 20 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" 21 #include "llvm/Demangle/Demangle.h" 22 #include "llvm/Object/COFF.h" 23 #include "llvm/Object/ELF.h" 24 #include "llvm/Object/ELFObjectFile.h" 25 #include "llvm/Object/MachO.h" 26 #include "llvm/Object/MachOUniversal.h" 27 #include "llvm/Support/CRC.h" 28 #include "llvm/Support/Casting.h" 29 #include "llvm/Support/Compression.h" 30 #include "llvm/Support/DataExtractor.h" 31 #include "llvm/Support/Errc.h" 32 #include "llvm/Support/FileSystem.h" 33 #include "llvm/Support/MemoryBuffer.h" 34 #include "llvm/Support/Path.h" 35 #include <algorithm> 36 #include <cassert> 37 #include <cstring> 38 39 namespace llvm { 40 namespace symbolize { 41 42 LLVMSymbolizer::LLVMSymbolizer(const Options &Opts) : Opts(Opts) {} 43 44 LLVMSymbolizer::~LLVMSymbolizer() = default; 45 46 template <typename T> 47 Expected<DILineInfo> 48 LLVMSymbolizer::symbolizeCodeCommon(const T &ModuleSpecifier, 49 object::SectionedAddress ModuleOffset) { 50 51 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); 52 if (!InfoOrErr) 53 return InfoOrErr.takeError(); 54 55 SymbolizableModule *Info = *InfoOrErr; 56 57 // A null module means an error has already been reported. Return an empty 58 // result. 59 if (!Info) 60 return DILineInfo(); 61 62 // If the user is giving us relative addresses, add the preferred base of the 63 // object to the offset before we do the query. It's what DIContext expects. 64 if (Opts.RelativeAddresses) 65 ModuleOffset.Address += Info->getModulePreferredBase(); 66 67 DILineInfo LineInfo = Info->symbolizeCode( 68 ModuleOffset, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions), 69 Opts.UseSymbolTable); 70 if (Opts.Demangle) 71 LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info); 72 return LineInfo; 73 } 74 75 Expected<DILineInfo> 76 LLVMSymbolizer::symbolizeCode(const ObjectFile &Obj, 77 object::SectionedAddress ModuleOffset) { 78 return symbolizeCodeCommon(Obj, ModuleOffset); 79 } 80 81 Expected<DILineInfo> 82 LLVMSymbolizer::symbolizeCode(const std::string &ModuleName, 83 object::SectionedAddress ModuleOffset) { 84 return symbolizeCodeCommon(ModuleName, ModuleOffset); 85 } 86 87 Expected<DILineInfo> 88 LLVMSymbolizer::symbolizeCode(ArrayRef<uint8_t> BuildID, 89 object::SectionedAddress ModuleOffset) { 90 return symbolizeCodeCommon(BuildID, ModuleOffset); 91 } 92 93 template <typename T> 94 Expected<DIInliningInfo> LLVMSymbolizer::symbolizeInlinedCodeCommon( 95 const T &ModuleSpecifier, object::SectionedAddress ModuleOffset) { 96 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); 97 if (!InfoOrErr) 98 return InfoOrErr.takeError(); 99 100 SymbolizableModule *Info = *InfoOrErr; 101 102 // A null module means an error has already been reported. Return an empty 103 // result. 104 if (!Info) 105 return DIInliningInfo(); 106 107 // If the user is giving us relative addresses, add the preferred base of the 108 // object to the offset before we do the query. It's what DIContext expects. 109 if (Opts.RelativeAddresses) 110 ModuleOffset.Address += Info->getModulePreferredBase(); 111 112 DIInliningInfo InlinedContext = Info->symbolizeInlinedCode( 113 ModuleOffset, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions), 114 Opts.UseSymbolTable); 115 if (Opts.Demangle) { 116 for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) { 117 auto *Frame = InlinedContext.getMutableFrame(i); 118 Frame->FunctionName = DemangleName(Frame->FunctionName, Info); 119 } 120 } 121 return InlinedContext; 122 } 123 124 Expected<DIInliningInfo> 125 LLVMSymbolizer::symbolizeInlinedCode(const ObjectFile &Obj, 126 object::SectionedAddress ModuleOffset) { 127 return symbolizeInlinedCodeCommon(Obj, ModuleOffset); 128 } 129 130 Expected<DIInliningInfo> 131 LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName, 132 object::SectionedAddress ModuleOffset) { 133 return symbolizeInlinedCodeCommon(ModuleName, ModuleOffset); 134 } 135 136 Expected<DIInliningInfo> 137 LLVMSymbolizer::symbolizeInlinedCode(ArrayRef<uint8_t> BuildID, 138 object::SectionedAddress ModuleOffset) { 139 return symbolizeInlinedCodeCommon(BuildID, ModuleOffset); 140 } 141 142 template <typename T> 143 Expected<DIGlobal> 144 LLVMSymbolizer::symbolizeDataCommon(const T &ModuleSpecifier, 145 object::SectionedAddress ModuleOffset) { 146 147 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); 148 if (!InfoOrErr) 149 return InfoOrErr.takeError(); 150 151 SymbolizableModule *Info = *InfoOrErr; 152 // A null module means an error has already been reported. Return an empty 153 // result. 154 if (!Info) 155 return DIGlobal(); 156 157 // If the user is giving us relative addresses, add the preferred base of 158 // the object to the offset before we do the query. It's what DIContext 159 // expects. 160 if (Opts.RelativeAddresses) 161 ModuleOffset.Address += Info->getModulePreferredBase(); 162 163 DIGlobal Global = Info->symbolizeData(ModuleOffset); 164 if (Opts.Demangle) 165 Global.Name = DemangleName(Global.Name, Info); 166 return Global; 167 } 168 169 Expected<DIGlobal> 170 LLVMSymbolizer::symbolizeData(const ObjectFile &Obj, 171 object::SectionedAddress ModuleOffset) { 172 return symbolizeDataCommon(Obj, ModuleOffset); 173 } 174 175 Expected<DIGlobal> 176 LLVMSymbolizer::symbolizeData(const std::string &ModuleName, 177 object::SectionedAddress ModuleOffset) { 178 return symbolizeDataCommon(ModuleName, ModuleOffset); 179 } 180 181 Expected<DIGlobal> 182 LLVMSymbolizer::symbolizeData(ArrayRef<uint8_t> BuildID, 183 object::SectionedAddress ModuleOffset) { 184 return symbolizeDataCommon(BuildID, ModuleOffset); 185 } 186 187 template <typename T> 188 Expected<std::vector<DILocal>> 189 LLVMSymbolizer::symbolizeFrameCommon(const T &ModuleSpecifier, 190 object::SectionedAddress ModuleOffset) { 191 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); 192 if (!InfoOrErr) 193 return InfoOrErr.takeError(); 194 195 SymbolizableModule *Info = *InfoOrErr; 196 // A null module means an error has already been reported. Return an empty 197 // result. 198 if (!Info) 199 return std::vector<DILocal>(); 200 201 // If the user is giving us relative addresses, add the preferred base of 202 // the object to the offset before we do the query. It's what DIContext 203 // expects. 204 if (Opts.RelativeAddresses) 205 ModuleOffset.Address += Info->getModulePreferredBase(); 206 207 return Info->symbolizeFrame(ModuleOffset); 208 } 209 210 Expected<std::vector<DILocal>> 211 LLVMSymbolizer::symbolizeFrame(const ObjectFile &Obj, 212 object::SectionedAddress ModuleOffset) { 213 return symbolizeFrameCommon(Obj, ModuleOffset); 214 } 215 216 Expected<std::vector<DILocal>> 217 LLVMSymbolizer::symbolizeFrame(const std::string &ModuleName, 218 object::SectionedAddress ModuleOffset) { 219 return symbolizeFrameCommon(ModuleName, ModuleOffset); 220 } 221 222 Expected<std::vector<DILocal>> 223 LLVMSymbolizer::symbolizeFrame(ArrayRef<uint8_t> BuildID, 224 object::SectionedAddress ModuleOffset) { 225 return symbolizeFrameCommon(BuildID, ModuleOffset); 226 } 227 228 void LLVMSymbolizer::flush() { 229 ObjectForUBPathAndArch.clear(); 230 LRUBinaries.clear(); 231 CacheSize = 0; 232 BinaryForPath.clear(); 233 ObjectPairForPathArch.clear(); 234 Modules.clear(); 235 BuildIDPaths.clear(); 236 } 237 238 namespace { 239 240 // For Path="/path/to/foo" and Basename="foo" assume that debug info is in 241 // /path/to/foo.dSYM/Contents/Resources/DWARF/foo. 242 // For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in 243 // /path/to/bar.dSYM/Contents/Resources/DWARF/foo. 244 std::string getDarwinDWARFResourceForPath(const std::string &Path, 245 const std::string &Basename) { 246 SmallString<16> ResourceName = StringRef(Path); 247 if (sys::path::extension(Path) != ".dSYM") { 248 ResourceName += ".dSYM"; 249 } 250 sys::path::append(ResourceName, "Contents", "Resources", "DWARF"); 251 sys::path::append(ResourceName, Basename); 252 return std::string(ResourceName.str()); 253 } 254 255 bool checkFileCRC(StringRef Path, uint32_t CRCHash) { 256 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 257 MemoryBuffer::getFileOrSTDIN(Path); 258 if (!MB) 259 return false; 260 return CRCHash == llvm::crc32(arrayRefFromStringRef(MB.get()->getBuffer())); 261 } 262 263 bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName, 264 uint32_t &CRCHash) { 265 if (!Obj) 266 return false; 267 for (const SectionRef &Section : Obj->sections()) { 268 StringRef Name; 269 consumeError(Section.getName().moveInto(Name)); 270 271 Name = Name.substr(Name.find_first_not_of("._")); 272 if (Name == "gnu_debuglink") { 273 Expected<StringRef> ContentsOrErr = Section.getContents(); 274 if (!ContentsOrErr) { 275 consumeError(ContentsOrErr.takeError()); 276 return false; 277 } 278 DataExtractor DE(*ContentsOrErr, Obj->isLittleEndian(), 0); 279 uint64_t Offset = 0; 280 if (const char *DebugNameStr = DE.getCStr(&Offset)) { 281 // 4-byte align the offset. 282 Offset = (Offset + 3) & ~0x3; 283 if (DE.isValidOffsetForDataOfSize(Offset, 4)) { 284 DebugName = DebugNameStr; 285 CRCHash = DE.getU32(&Offset); 286 return true; 287 } 288 } 289 break; 290 } 291 } 292 return false; 293 } 294 295 bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj, 296 const MachOObjectFile *Obj) { 297 ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid(); 298 ArrayRef<uint8_t> bin_uuid = Obj->getUuid(); 299 if (dbg_uuid.empty() || bin_uuid.empty()) 300 return false; 301 return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size()); 302 } 303 304 template <typename ELFT> 305 Optional<ArrayRef<uint8_t>> getBuildID(const ELFFile<ELFT> &Obj) { 306 auto PhdrsOrErr = Obj.program_headers(); 307 if (!PhdrsOrErr) { 308 consumeError(PhdrsOrErr.takeError()); 309 return {}; 310 } 311 for (const auto &P : *PhdrsOrErr) { 312 if (P.p_type != ELF::PT_NOTE) 313 continue; 314 Error Err = Error::success(); 315 for (auto N : Obj.notes(P, Err)) 316 if (N.getType() == ELF::NT_GNU_BUILD_ID && 317 N.getName() == ELF::ELF_NOTE_GNU) 318 return N.getDesc(); 319 consumeError(std::move(Err)); 320 } 321 return {}; 322 } 323 324 Optional<ArrayRef<uint8_t>> getBuildID(const ELFObjectFileBase *Obj) { 325 Optional<ArrayRef<uint8_t>> BuildID; 326 if (auto *O = dyn_cast<ELFObjectFile<ELF32LE>>(Obj)) 327 BuildID = getBuildID(O->getELFFile()); 328 else if (auto *O = dyn_cast<ELFObjectFile<ELF32BE>>(Obj)) 329 BuildID = getBuildID(O->getELFFile()); 330 else if (auto *O = dyn_cast<ELFObjectFile<ELF64LE>>(Obj)) 331 BuildID = getBuildID(O->getELFFile()); 332 else if (auto *O = dyn_cast<ELFObjectFile<ELF64BE>>(Obj)) 333 BuildID = getBuildID(O->getELFFile()); 334 else 335 llvm_unreachable("unsupported file format"); 336 return BuildID; 337 } 338 339 } // end anonymous namespace 340 341 ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath, 342 const MachOObjectFile *MachExeObj, 343 const std::string &ArchName) { 344 // On Darwin we may find DWARF in separate object file in 345 // resource directory. 346 std::vector<std::string> DsymPaths; 347 StringRef Filename = sys::path::filename(ExePath); 348 DsymPaths.push_back( 349 getDarwinDWARFResourceForPath(ExePath, std::string(Filename))); 350 for (const auto &Path : Opts.DsymHints) { 351 DsymPaths.push_back( 352 getDarwinDWARFResourceForPath(Path, std::string(Filename))); 353 } 354 for (const auto &Path : DsymPaths) { 355 auto DbgObjOrErr = getOrCreateObject(Path, ArchName); 356 if (!DbgObjOrErr) { 357 // Ignore errors, the file might not exist. 358 consumeError(DbgObjOrErr.takeError()); 359 continue; 360 } 361 ObjectFile *DbgObj = DbgObjOrErr.get(); 362 if (!DbgObj) 363 continue; 364 const MachOObjectFile *MachDbgObj = dyn_cast<const MachOObjectFile>(DbgObj); 365 if (!MachDbgObj) 366 continue; 367 if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj)) 368 return DbgObj; 369 } 370 return nullptr; 371 } 372 373 ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path, 374 const ObjectFile *Obj, 375 const std::string &ArchName) { 376 std::string DebuglinkName; 377 uint32_t CRCHash; 378 std::string DebugBinaryPath; 379 if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash)) 380 return nullptr; 381 if (!findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath)) 382 return nullptr; 383 auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName); 384 if (!DbgObjOrErr) { 385 // Ignore errors, the file might not exist. 386 consumeError(DbgObjOrErr.takeError()); 387 return nullptr; 388 } 389 return DbgObjOrErr.get(); 390 } 391 392 ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path, 393 const ELFObjectFileBase *Obj, 394 const std::string &ArchName) { 395 auto BuildID = getBuildID(Obj); 396 if (!BuildID) 397 return nullptr; 398 if (BuildID->size() < 2) 399 return nullptr; 400 std::string DebugBinaryPath; 401 if (!getOrFindDebugBinary(*BuildID, DebugBinaryPath)) 402 return nullptr; 403 auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName); 404 if (!DbgObjOrErr) { 405 consumeError(DbgObjOrErr.takeError()); 406 return nullptr; 407 } 408 return DbgObjOrErr.get(); 409 } 410 411 bool LLVMSymbolizer::findDebugBinary(const std::string &OrigPath, 412 const std::string &DebuglinkName, 413 uint32_t CRCHash, std::string &Result) { 414 SmallString<16> OrigDir(OrigPath); 415 llvm::sys::path::remove_filename(OrigDir); 416 SmallString<16> DebugPath = OrigDir; 417 // Try relative/path/to/original_binary/debuglink_name 418 llvm::sys::path::append(DebugPath, DebuglinkName); 419 if (checkFileCRC(DebugPath, CRCHash)) { 420 Result = std::string(DebugPath.str()); 421 return true; 422 } 423 // Try relative/path/to/original_binary/.debug/debuglink_name 424 DebugPath = OrigDir; 425 llvm::sys::path::append(DebugPath, ".debug", DebuglinkName); 426 if (checkFileCRC(DebugPath, CRCHash)) { 427 Result = std::string(DebugPath.str()); 428 return true; 429 } 430 // Make the path absolute so that lookups will go to 431 // "/usr/lib/debug/full/path/to/debug", not 432 // "/usr/lib/debug/to/debug" 433 llvm::sys::fs::make_absolute(OrigDir); 434 if (!Opts.FallbackDebugPath.empty()) { 435 // Try <FallbackDebugPath>/absolute/path/to/original_binary/debuglink_name 436 DebugPath = Opts.FallbackDebugPath; 437 } else { 438 #if defined(__NetBSD__) 439 // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name 440 DebugPath = "/usr/libdata/debug"; 441 #else 442 // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name 443 DebugPath = "/usr/lib/debug"; 444 #endif 445 } 446 llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir), 447 DebuglinkName); 448 if (checkFileCRC(DebugPath, CRCHash)) { 449 Result = std::string(DebugPath.str()); 450 return true; 451 } 452 return false; 453 } 454 455 static StringRef getBuildIDStr(ArrayRef<uint8_t> BuildID) { 456 return StringRef(reinterpret_cast<const char *>(BuildID.data()), 457 BuildID.size()); 458 } 459 460 bool LLVMSymbolizer::getOrFindDebugBinary(const ArrayRef<uint8_t> BuildID, 461 std::string &Result) { 462 StringRef BuildIDStr = getBuildIDStr(BuildID); 463 auto I = BuildIDPaths.find(BuildIDStr); 464 if (I != BuildIDPaths.end()) { 465 Result = I->second; 466 return true; 467 } 468 auto recordPath = [&](StringRef Path) { 469 Result = Path.str(); 470 auto InsertResult = BuildIDPaths.insert({BuildIDStr, Result}); 471 assert(InsertResult.second); 472 (void)InsertResult; 473 }; 474 475 Optional<std::string> Path; 476 Path = LocalDIFetcher(Opts.DebugFileDirectory).fetchBuildID(BuildID); 477 if (Path) { 478 recordPath(*Path); 479 return true; 480 } 481 482 // Try caller-provided debug info fetchers. 483 for (const std::unique_ptr<DIFetcher> &Fetcher : DIFetchers) { 484 Path = Fetcher->fetchBuildID(BuildID); 485 if (Path) { 486 recordPath(*Path); 487 return true; 488 } 489 } 490 491 return false; 492 } 493 494 Expected<LLVMSymbolizer::ObjectPair> 495 LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path, 496 const std::string &ArchName) { 497 auto I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName)); 498 if (I != ObjectPairForPathArch.end()) { 499 recordAccess(BinaryForPath.find(Path)->second); 500 return I->second; 501 } 502 503 auto ObjOrErr = getOrCreateObject(Path, ArchName); 504 if (!ObjOrErr) { 505 ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), 506 ObjectPair(nullptr, nullptr)); 507 return ObjOrErr.takeError(); 508 } 509 510 ObjectFile *Obj = ObjOrErr.get(); 511 assert(Obj != nullptr); 512 ObjectFile *DbgObj = nullptr; 513 514 if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj)) 515 DbgObj = lookUpDsymFile(Path, MachObj, ArchName); 516 else if (auto ELFObj = dyn_cast<const ELFObjectFileBase>(Obj)) 517 DbgObj = lookUpBuildIDObject(Path, ELFObj, ArchName); 518 if (!DbgObj) 519 DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName); 520 if (!DbgObj) 521 DbgObj = Obj; 522 ObjectPair Res = std::make_pair(Obj, DbgObj); 523 std::string DbgObjPath = DbgObj->getFileName().str(); 524 auto Pair = 525 ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), Res); 526 BinaryForPath.find(DbgObjPath)->second.pushEvictor([this, I = Pair.first]() { 527 ObjectPairForPathArch.erase(I); 528 }); 529 return Res; 530 } 531 532 Expected<ObjectFile *> 533 LLVMSymbolizer::getOrCreateObject(const std::string &Path, 534 const std::string &ArchName) { 535 Binary *Bin; 536 auto Pair = BinaryForPath.emplace(Path, OwningBinary<Binary>()); 537 if (!Pair.second) { 538 Bin = Pair.first->second->getBinary(); 539 recordAccess(Pair.first->second); 540 } else { 541 Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path); 542 if (!BinOrErr) 543 return BinOrErr.takeError(); 544 545 CachedBinary &CachedBin = Pair.first->second; 546 CachedBin = std::move(BinOrErr.get()); 547 CachedBin.pushEvictor([this, I = Pair.first]() { BinaryForPath.erase(I); }); 548 LRUBinaries.push_back(CachedBin); 549 CacheSize += CachedBin.size(); 550 Bin = CachedBin->getBinary(); 551 } 552 553 if (!Bin) 554 return static_cast<ObjectFile *>(nullptr); 555 556 if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Bin)) { 557 auto I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName)); 558 if (I != ObjectForUBPathAndArch.end()) 559 return I->second.get(); 560 561 Expected<std::unique_ptr<ObjectFile>> ObjOrErr = 562 UB->getMachOObjectForArch(ArchName); 563 if (!ObjOrErr) { 564 ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName), 565 std::unique_ptr<ObjectFile>()); 566 return ObjOrErr.takeError(); 567 } 568 ObjectFile *Res = ObjOrErr->get(); 569 auto Pair = ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName), 570 std::move(ObjOrErr.get())); 571 BinaryForPath.find(Path)->second.pushEvictor( 572 [this, Iter = Pair.first]() { ObjectForUBPathAndArch.erase(Iter); }); 573 return Res; 574 } 575 if (Bin->isObject()) { 576 return cast<ObjectFile>(Bin); 577 } 578 return errorCodeToError(object_error::arch_not_found); 579 } 580 581 Expected<SymbolizableModule *> 582 LLVMSymbolizer::createModuleInfo(const ObjectFile *Obj, 583 std::unique_ptr<DIContext> Context, 584 StringRef ModuleName) { 585 auto InfoOrErr = SymbolizableObjectFile::create(Obj, std::move(Context), 586 Opts.UntagAddresses); 587 std::unique_ptr<SymbolizableModule> SymMod; 588 if (InfoOrErr) 589 SymMod = std::move(*InfoOrErr); 590 auto InsertResult = Modules.insert( 591 std::make_pair(std::string(ModuleName), std::move(SymMod))); 592 assert(InsertResult.second); 593 if (!InfoOrErr) 594 return InfoOrErr.takeError(); 595 return InsertResult.first->second.get(); 596 } 597 598 Expected<SymbolizableModule *> 599 LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { 600 std::string BinaryName = ModuleName; 601 std::string ArchName = Opts.DefaultArch; 602 size_t ColonPos = ModuleName.find_last_of(':'); 603 // Verify that substring after colon form a valid arch name. 604 if (ColonPos != std::string::npos) { 605 std::string ArchStr = ModuleName.substr(ColonPos + 1); 606 if (Triple(ArchStr).getArch() != Triple::UnknownArch) { 607 BinaryName = ModuleName.substr(0, ColonPos); 608 ArchName = ArchStr; 609 } 610 } 611 612 auto I = Modules.find(ModuleName); 613 if (I != Modules.end()) { 614 recordAccess(BinaryForPath.find(BinaryName)->second); 615 return I->second.get(); 616 } 617 618 auto ObjectsOrErr = getOrCreateObjectPair(BinaryName, ArchName); 619 if (!ObjectsOrErr) { 620 // Failed to find valid object file. 621 Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>()); 622 return ObjectsOrErr.takeError(); 623 } 624 ObjectPair Objects = ObjectsOrErr.get(); 625 626 std::unique_ptr<DIContext> Context; 627 // If this is a COFF object containing PDB info, use a PDBContext to 628 // symbolize. Otherwise, use DWARF. 629 if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) { 630 const codeview::DebugInfo *DebugInfo; 631 StringRef PDBFileName; 632 auto EC = CoffObject->getDebugPDBInfo(DebugInfo, PDBFileName); 633 if (!EC && DebugInfo != nullptr && !PDBFileName.empty()) { 634 using namespace pdb; 635 std::unique_ptr<IPDBSession> Session; 636 637 PDB_ReaderType ReaderType = 638 Opts.UseDIA ? PDB_ReaderType::DIA : PDB_ReaderType::Native; 639 if (auto Err = loadDataForEXE(ReaderType, Objects.first->getFileName(), 640 Session)) { 641 Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>()); 642 // Return along the PDB filename to provide more context 643 return createFileError(PDBFileName, std::move(Err)); 644 } 645 Context.reset(new PDBContext(*CoffObject, std::move(Session))); 646 } 647 } 648 if (!Context) 649 Context = DWARFContext::create( 650 *Objects.second, DWARFContext::ProcessDebugRelocations::Process, 651 nullptr, Opts.DWPName); 652 auto ModuleOrErr = 653 createModuleInfo(Objects.first, std::move(Context), ModuleName); 654 if (ModuleOrErr) { 655 auto I = Modules.find(ModuleName); 656 BinaryForPath.find(BinaryName)->second.pushEvictor([this, I]() { 657 Modules.erase(I); 658 }); 659 } 660 return ModuleOrErr; 661 } 662 663 Expected<SymbolizableModule *> 664 LLVMSymbolizer::getOrCreateModuleInfo(const ObjectFile &Obj) { 665 StringRef ObjName = Obj.getFileName(); 666 auto I = Modules.find(ObjName); 667 if (I != Modules.end()) 668 return I->second.get(); 669 670 std::unique_ptr<DIContext> Context = DWARFContext::create(Obj); 671 // FIXME: handle COFF object with PDB info to use PDBContext 672 return createModuleInfo(&Obj, std::move(Context), ObjName); 673 } 674 675 Expected<SymbolizableModule *> 676 LLVMSymbolizer::getOrCreateModuleInfo(ArrayRef<uint8_t> BuildID) { 677 std::string Path; 678 if (!getOrFindDebugBinary(BuildID, Path)) { 679 return createStringError(errc::no_such_file_or_directory, 680 Twine("could not find build ID '") + 681 toHex(BuildID) + "'"); 682 } 683 return getOrCreateModuleInfo(Path); 684 } 685 686 namespace { 687 688 // Undo these various manglings for Win32 extern "C" functions: 689 // cdecl - _foo 690 // stdcall - _foo@12 691 // fastcall - @foo@12 692 // vectorcall - foo@@12 693 // These are all different linkage names for 'foo'. 694 StringRef demanglePE32ExternCFunc(StringRef SymbolName) { 695 // Remove any '_' or '@' prefix. 696 char Front = SymbolName.empty() ? '\0' : SymbolName[0]; 697 if (Front == '_' || Front == '@') 698 SymbolName = SymbolName.drop_front(); 699 700 // Remove any '@[0-9]+' suffix. 701 if (Front != '?') { 702 size_t AtPos = SymbolName.rfind('@'); 703 if (AtPos != StringRef::npos && 704 all_of(drop_begin(SymbolName, AtPos + 1), isDigit)) 705 SymbolName = SymbolName.substr(0, AtPos); 706 } 707 708 // Remove any ending '@' for vectorcall. 709 if (SymbolName.endswith("@")) 710 SymbolName = SymbolName.drop_back(); 711 712 return SymbolName; 713 } 714 715 } // end anonymous namespace 716 717 std::string 718 LLVMSymbolizer::DemangleName(const std::string &Name, 719 const SymbolizableModule *DbiModuleDescriptor) { 720 std::string Result; 721 if (nonMicrosoftDemangle(Name.c_str(), Result)) 722 return Result; 723 724 if (!Name.empty() && Name.front() == '?') { 725 // Only do MSVC C++ demangling on symbols starting with '?'. 726 int status = 0; 727 char *DemangledName = microsoftDemangle( 728 Name.c_str(), nullptr, nullptr, nullptr, &status, 729 MSDemangleFlags(MSDF_NoAccessSpecifier | MSDF_NoCallingConvention | 730 MSDF_NoMemberType | MSDF_NoReturnType)); 731 if (status != 0) 732 return Name; 733 Result = DemangledName; 734 free(DemangledName); 735 return Result; 736 } 737 738 if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) 739 return std::string(demanglePE32ExternCFunc(Name)); 740 return Name; 741 } 742 743 void LLVMSymbolizer::recordAccess(CachedBinary &Bin) { 744 if (Bin->getBinary()) 745 LRUBinaries.splice(LRUBinaries.end(), LRUBinaries, Bin.getIterator()); 746 } 747 748 void LLVMSymbolizer::pruneCache() { 749 // Evict the LRU binary until the max cache size is reached or there's <= 1 750 // item in the cache. The MRU binary is always kept to avoid thrashing if it's 751 // larger than the cache size. 752 while (CacheSize > Opts.MaxCacheSize && !LRUBinaries.empty() && 753 std::next(LRUBinaries.begin()) != LRUBinaries.end()) { 754 CachedBinary &Bin = LRUBinaries.front(); 755 CacheSize -= Bin.size(); 756 LRUBinaries.pop_front(); 757 Bin.evict(); 758 } 759 } 760 761 void CachedBinary::pushEvictor(std::function<void()> NewEvictor) { 762 if (Evictor) { 763 this->Evictor = [OldEvictor = std::move(this->Evictor), 764 NewEvictor = std::move(NewEvictor)]() { 765 NewEvictor(); 766 OldEvictor(); 767 }; 768 } else { 769 this->Evictor = std::move(NewEvictor); 770 } 771 } 772 773 } // namespace symbolize 774 } // namespace llvm 775