1 //===-- LLVMSymbolize.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation for LLVM symbolization library. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/DebugInfo/Symbolize/Symbolize.h" 14 15 #include "SymbolizableObjectFile.h" 16 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/BinaryFormat/COFF.h" 19 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 20 #include "llvm/DebugInfo/PDB/PDB.h" 21 #include "llvm/DebugInfo/PDB/PDBContext.h" 22 #include "llvm/Demangle/Demangle.h" 23 #include "llvm/Object/COFF.h" 24 #include "llvm/Object/MachO.h" 25 #include "llvm/Object/MachOUniversal.h" 26 #include "llvm/Support/CRC.h" 27 #include "llvm/Support/Casting.h" 28 #include "llvm/Support/Compression.h" 29 #include "llvm/Support/DataExtractor.h" 30 #include "llvm/Support/Errc.h" 31 #include "llvm/Support/FileSystem.h" 32 #include "llvm/Support/MemoryBuffer.h" 33 #include "llvm/Support/Path.h" 34 #include <algorithm> 35 #include <cassert> 36 #include <cstring> 37 38 #if defined(_MSC_VER) 39 #include <Windows.h> 40 41 // This must be included after windows.h. 42 #include <DbgHelp.h> 43 #pragma comment(lib, "dbghelp.lib") 44 45 // Windows.h conflicts with our COFF header definitions. 46 #ifdef IMAGE_FILE_MACHINE_I386 47 #undef IMAGE_FILE_MACHINE_I386 48 #endif 49 #endif 50 51 namespace llvm { 52 namespace symbolize { 53 54 Expected<DILineInfo> 55 LLVMSymbolizer::symbolizeCode(const std::string &ModuleName, 56 object::SectionedAddress ModuleOffset) { 57 SymbolizableModule *Info; 58 if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName)) 59 Info = InfoOrErr.get(); 60 else 61 return InfoOrErr.takeError(); 62 63 // A null module means an error has already been reported. Return an empty 64 // result. 65 if (!Info) 66 return DILineInfo(); 67 68 // If the user is giving us relative addresses, add the preferred base of the 69 // object to the offset before we do the query. It's what DIContext expects. 70 if (Opts.RelativeAddresses) 71 ModuleOffset.Address += Info->getModulePreferredBase(); 72 73 DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts.PrintFunctions, 74 Opts.UseSymbolTable); 75 if (Opts.Demangle) 76 LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info); 77 return LineInfo; 78 } 79 80 Expected<DIInliningInfo> 81 LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName, 82 object::SectionedAddress ModuleOffset) { 83 SymbolizableModule *Info; 84 if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName)) 85 Info = InfoOrErr.get(); 86 else 87 return InfoOrErr.takeError(); 88 89 // A null module means an error has already been reported. Return an empty 90 // result. 91 if (!Info) 92 return DIInliningInfo(); 93 94 // If the user is giving us relative addresses, add the preferred base of the 95 // object to the offset before we do the query. It's what DIContext expects. 96 if (Opts.RelativeAddresses) 97 ModuleOffset.Address += Info->getModulePreferredBase(); 98 99 DIInliningInfo InlinedContext = Info->symbolizeInlinedCode( 100 ModuleOffset, Opts.PrintFunctions, Opts.UseSymbolTable); 101 if (Opts.Demangle) { 102 for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) { 103 auto *Frame = InlinedContext.getMutableFrame(i); 104 Frame->FunctionName = DemangleName(Frame->FunctionName, Info); 105 } 106 } 107 return InlinedContext; 108 } 109 110 Expected<DIGlobal> 111 LLVMSymbolizer::symbolizeData(const std::string &ModuleName, 112 object::SectionedAddress ModuleOffset) { 113 SymbolizableModule *Info; 114 if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName)) 115 Info = InfoOrErr.get(); 116 else 117 return InfoOrErr.takeError(); 118 119 // A null module means an error has already been reported. Return an empty 120 // result. 121 if (!Info) 122 return DIGlobal(); 123 124 // If the user is giving us relative addresses, add the preferred base of 125 // the object to the offset before we do the query. It's what DIContext 126 // expects. 127 if (Opts.RelativeAddresses) 128 ModuleOffset.Address += Info->getModulePreferredBase(); 129 130 DIGlobal Global = Info->symbolizeData(ModuleOffset); 131 if (Opts.Demangle) 132 Global.Name = DemangleName(Global.Name, Info); 133 return Global; 134 } 135 136 Expected<std::vector<DILocal>> 137 LLVMSymbolizer::symbolizeFrame(const std::string &ModuleName, 138 object::SectionedAddress ModuleOffset) { 139 SymbolizableModule *Info; 140 if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName)) 141 Info = InfoOrErr.get(); 142 else 143 return InfoOrErr.takeError(); 144 145 // A null module means an error has already been reported. Return an empty 146 // result. 147 if (!Info) 148 return std::vector<DILocal>(); 149 150 // If the user is giving us relative addresses, add the preferred base of 151 // the object to the offset before we do the query. It's what DIContext 152 // expects. 153 if (Opts.RelativeAddresses) 154 ModuleOffset.Address += Info->getModulePreferredBase(); 155 156 return Info->symbolizeFrame(ModuleOffset); 157 } 158 159 void LLVMSymbolizer::flush() { 160 ObjectForUBPathAndArch.clear(); 161 BinaryForPath.clear(); 162 ObjectPairForPathArch.clear(); 163 Modules.clear(); 164 } 165 166 namespace { 167 168 // For Path="/path/to/foo" and Basename="foo" assume that debug info is in 169 // /path/to/foo.dSYM/Contents/Resources/DWARF/foo. 170 // For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in 171 // /path/to/bar.dSYM/Contents/Resources/DWARF/foo. 172 std::string getDarwinDWARFResourceForPath( 173 const std::string &Path, const std::string &Basename) { 174 SmallString<16> ResourceName = StringRef(Path); 175 if (sys::path::extension(Path) != ".dSYM") { 176 ResourceName += ".dSYM"; 177 } 178 sys::path::append(ResourceName, "Contents", "Resources", "DWARF"); 179 sys::path::append(ResourceName, Basename); 180 return ResourceName.str(); 181 } 182 183 bool checkFileCRC(StringRef Path, uint32_t CRCHash) { 184 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 185 MemoryBuffer::getFileOrSTDIN(Path); 186 if (!MB) 187 return false; 188 return CRCHash == llvm::crc32(0, MB.get()->getBuffer()); 189 } 190 191 bool findDebugBinary(const std::string &OrigPath, 192 const std::string &DebuglinkName, uint32_t CRCHash, 193 const std::string &FallbackDebugPath, 194 std::string &Result) { 195 SmallString<16> OrigDir(OrigPath); 196 llvm::sys::path::remove_filename(OrigDir); 197 SmallString<16> DebugPath = OrigDir; 198 // Try relative/path/to/original_binary/debuglink_name 199 llvm::sys::path::append(DebugPath, DebuglinkName); 200 if (checkFileCRC(DebugPath, CRCHash)) { 201 Result = DebugPath.str(); 202 return true; 203 } 204 // Try relative/path/to/original_binary/.debug/debuglink_name 205 DebugPath = OrigDir; 206 llvm::sys::path::append(DebugPath, ".debug", DebuglinkName); 207 if (checkFileCRC(DebugPath, CRCHash)) { 208 Result = DebugPath.str(); 209 return true; 210 } 211 // Make the path absolute so that lookups will go to 212 // "/usr/lib/debug/full/path/to/debug", not 213 // "/usr/lib/debug/to/debug" 214 llvm::sys::fs::make_absolute(OrigDir); 215 if (!FallbackDebugPath.empty()) { 216 // Try <FallbackDebugPath>/absolute/path/to/original_binary/debuglink_name 217 DebugPath = FallbackDebugPath; 218 } else { 219 #if defined(__NetBSD__) 220 // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name 221 DebugPath = "/usr/libdata/debug"; 222 #else 223 // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name 224 DebugPath = "/usr/lib/debug"; 225 #endif 226 } 227 llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir), 228 DebuglinkName); 229 if (checkFileCRC(DebugPath, CRCHash)) { 230 Result = DebugPath.str(); 231 return true; 232 } 233 return false; 234 } 235 236 bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName, 237 uint32_t &CRCHash) { 238 if (!Obj) 239 return false; 240 for (const SectionRef &Section : Obj->sections()) { 241 StringRef Name; 242 Section.getName(Name); 243 Name = Name.substr(Name.find_first_not_of("._")); 244 if (Name == "gnu_debuglink") { 245 Expected<StringRef> ContentsOrErr = Section.getContents(); 246 if (!ContentsOrErr) { 247 consumeError(ContentsOrErr.takeError()); 248 return false; 249 } 250 DataExtractor DE(*ContentsOrErr, Obj->isLittleEndian(), 0); 251 uint32_t Offset = 0; 252 if (const char *DebugNameStr = DE.getCStr(&Offset)) { 253 // 4-byte align the offset. 254 Offset = (Offset + 3) & ~0x3; 255 if (DE.isValidOffsetForDataOfSize(Offset, 4)) { 256 DebugName = DebugNameStr; 257 CRCHash = DE.getU32(&Offset); 258 return true; 259 } 260 } 261 break; 262 } 263 } 264 return false; 265 } 266 267 bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj, 268 const MachOObjectFile *Obj) { 269 ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid(); 270 ArrayRef<uint8_t> bin_uuid = Obj->getUuid(); 271 if (dbg_uuid.empty() || bin_uuid.empty()) 272 return false; 273 return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size()); 274 } 275 276 } // end anonymous namespace 277 278 ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath, 279 const MachOObjectFile *MachExeObj, const std::string &ArchName) { 280 // On Darwin we may find DWARF in separate object file in 281 // resource directory. 282 std::vector<std::string> DsymPaths; 283 StringRef Filename = sys::path::filename(ExePath); 284 DsymPaths.push_back(getDarwinDWARFResourceForPath(ExePath, Filename)); 285 for (const auto &Path : Opts.DsymHints) { 286 DsymPaths.push_back(getDarwinDWARFResourceForPath(Path, Filename)); 287 } 288 for (const auto &Path : DsymPaths) { 289 auto DbgObjOrErr = getOrCreateObject(Path, ArchName); 290 if (!DbgObjOrErr) { 291 // Ignore errors, the file might not exist. 292 consumeError(DbgObjOrErr.takeError()); 293 continue; 294 } 295 ObjectFile *DbgObj = DbgObjOrErr.get(); 296 if (!DbgObj) 297 continue; 298 const MachOObjectFile *MachDbgObj = dyn_cast<const MachOObjectFile>(DbgObj); 299 if (!MachDbgObj) 300 continue; 301 if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj)) 302 return DbgObj; 303 } 304 return nullptr; 305 } 306 307 ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path, 308 const ObjectFile *Obj, 309 const std::string &ArchName) { 310 std::string DebuglinkName; 311 uint32_t CRCHash; 312 std::string DebugBinaryPath; 313 if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash)) 314 return nullptr; 315 if (!findDebugBinary(Path, DebuglinkName, CRCHash, Opts.FallbackDebugPath, 316 DebugBinaryPath)) 317 return nullptr; 318 auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName); 319 if (!DbgObjOrErr) { 320 // Ignore errors, the file might not exist. 321 consumeError(DbgObjOrErr.takeError()); 322 return nullptr; 323 } 324 return DbgObjOrErr.get(); 325 } 326 327 Expected<LLVMSymbolizer::ObjectPair> 328 LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path, 329 const std::string &ArchName) { 330 auto I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName)); 331 if (I != ObjectPairForPathArch.end()) 332 return I->second; 333 334 auto ObjOrErr = getOrCreateObject(Path, ArchName); 335 if (!ObjOrErr) { 336 ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), 337 ObjectPair(nullptr, nullptr)); 338 return ObjOrErr.takeError(); 339 } 340 341 ObjectFile *Obj = ObjOrErr.get(); 342 assert(Obj != nullptr); 343 ObjectFile *DbgObj = nullptr; 344 345 if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj)) 346 DbgObj = lookUpDsymFile(Path, MachObj, ArchName); 347 if (!DbgObj) 348 DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName); 349 if (!DbgObj) 350 DbgObj = Obj; 351 ObjectPair Res = std::make_pair(Obj, DbgObj); 352 ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), Res); 353 return Res; 354 } 355 356 Expected<ObjectFile *> 357 LLVMSymbolizer::getOrCreateObject(const std::string &Path, 358 const std::string &ArchName) { 359 Binary *Bin; 360 auto Pair = BinaryForPath.emplace(Path, OwningBinary<Binary>()); 361 if (!Pair.second) { 362 Bin = Pair.first->second.getBinary(); 363 } else { 364 Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path); 365 if (!BinOrErr) 366 return BinOrErr.takeError(); 367 Pair.first->second = std::move(BinOrErr.get()); 368 Bin = Pair.first->second.getBinary(); 369 } 370 371 if (!Bin) 372 return static_cast<ObjectFile *>(nullptr); 373 374 if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Bin)) { 375 auto I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName)); 376 if (I != ObjectForUBPathAndArch.end()) 377 return I->second.get(); 378 379 Expected<std::unique_ptr<ObjectFile>> ObjOrErr = 380 UB->getObjectForArch(ArchName); 381 if (!ObjOrErr) { 382 ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName), 383 std::unique_ptr<ObjectFile>()); 384 return ObjOrErr.takeError(); 385 } 386 ObjectFile *Res = ObjOrErr->get(); 387 ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName), 388 std::move(ObjOrErr.get())); 389 return Res; 390 } 391 if (Bin->isObject()) { 392 return cast<ObjectFile>(Bin); 393 } 394 return errorCodeToError(object_error::arch_not_found); 395 } 396 397 Expected<SymbolizableModule *> 398 LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { 399 auto I = Modules.find(ModuleName); 400 if (I != Modules.end()) 401 return I->second.get(); 402 403 std::string BinaryName = ModuleName; 404 std::string ArchName = Opts.DefaultArch; 405 size_t ColonPos = ModuleName.find_last_of(':'); 406 // Verify that substring after colon form a valid arch name. 407 if (ColonPos != std::string::npos) { 408 std::string ArchStr = ModuleName.substr(ColonPos + 1); 409 if (Triple(ArchStr).getArch() != Triple::UnknownArch) { 410 BinaryName = ModuleName.substr(0, ColonPos); 411 ArchName = ArchStr; 412 } 413 } 414 auto ObjectsOrErr = getOrCreateObjectPair(BinaryName, ArchName); 415 if (!ObjectsOrErr) { 416 // Failed to find valid object file. 417 Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>()); 418 return ObjectsOrErr.takeError(); 419 } 420 ObjectPair Objects = ObjectsOrErr.get(); 421 422 std::unique_ptr<DIContext> Context; 423 // If this is a COFF object containing PDB info, use a PDBContext to 424 // symbolize. Otherwise, use DWARF. 425 if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) { 426 const codeview::DebugInfo *DebugInfo; 427 StringRef PDBFileName; 428 auto EC = CoffObject->getDebugPDBInfo(DebugInfo, PDBFileName); 429 if (!EC && DebugInfo != nullptr && !PDBFileName.empty()) { 430 using namespace pdb; 431 std::unique_ptr<IPDBSession> Session; 432 if (auto Err = loadDataForEXE(PDB_ReaderType::DIA, 433 Objects.first->getFileName(), Session)) { 434 Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>()); 435 // Return along the PDB filename to provide more context 436 return createFileError(PDBFileName, std::move(Err)); 437 } 438 Context.reset(new PDBContext(*CoffObject, std::move(Session))); 439 } 440 } 441 if (!Context) 442 Context = 443 DWARFContext::create(*Objects.second, nullptr, 444 DWARFContext::defaultErrorHandler, Opts.DWPName); 445 auto InfoOrErr = 446 SymbolizableObjectFile::create(Objects.first, std::move(Context)); 447 std::unique_ptr<SymbolizableModule> SymMod; 448 if (InfoOrErr) 449 SymMod = std::move(InfoOrErr.get()); 450 auto InsertResult = Modules.emplace(ModuleName, std::move(SymMod)); 451 assert(InsertResult.second); 452 if (auto EC = InfoOrErr.getError()) 453 return errorCodeToError(EC); 454 return InsertResult.first->second.get(); 455 } 456 457 namespace { 458 459 // Undo these various manglings for Win32 extern "C" functions: 460 // cdecl - _foo 461 // stdcall - _foo@12 462 // fastcall - @foo@12 463 // vectorcall - foo@@12 464 // These are all different linkage names for 'foo'. 465 StringRef demanglePE32ExternCFunc(StringRef SymbolName) { 466 // Remove any '_' or '@' prefix. 467 char Front = SymbolName.empty() ? '\0' : SymbolName[0]; 468 if (Front == '_' || Front == '@') 469 SymbolName = SymbolName.drop_front(); 470 471 // Remove any '@[0-9]+' suffix. 472 if (Front != '?') { 473 size_t AtPos = SymbolName.rfind('@'); 474 if (AtPos != StringRef::npos && 475 std::all_of(SymbolName.begin() + AtPos + 1, SymbolName.end(), 476 [](char C) { return C >= '0' && C <= '9'; })) { 477 SymbolName = SymbolName.substr(0, AtPos); 478 } 479 } 480 481 // Remove any ending '@' for vectorcall. 482 if (SymbolName.endswith("@")) 483 SymbolName = SymbolName.drop_back(); 484 485 return SymbolName; 486 } 487 488 } // end anonymous namespace 489 490 std::string 491 LLVMSymbolizer::DemangleName(const std::string &Name, 492 const SymbolizableModule *DbiModuleDescriptor) { 493 // We can spoil names of symbols with C linkage, so use an heuristic 494 // approach to check if the name should be demangled. 495 if (Name.substr(0, 2) == "_Z") { 496 int status = 0; 497 char *DemangledName = itaniumDemangle(Name.c_str(), nullptr, nullptr, &status); 498 if (status != 0) 499 return Name; 500 std::string Result = DemangledName; 501 free(DemangledName); 502 return Result; 503 } 504 505 #if defined(_MSC_VER) 506 if (!Name.empty() && Name.front() == '?') { 507 // Only do MSVC C++ demangling on symbols starting with '?'. 508 char DemangledName[1024] = {0}; 509 DWORD result = ::UnDecorateSymbolName( 510 Name.c_str(), DemangledName, 1023, 511 UNDNAME_NO_ACCESS_SPECIFIERS | // Strip public, private, protected 512 UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc 513 UNDNAME_NO_THROW_SIGNATURES | // Strip throw() specifications 514 UNDNAME_NO_MEMBER_TYPE | // Strip virtual, static, etc specifiers 515 UNDNAME_NO_MS_KEYWORDS | // Strip all MS extension keywords 516 UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types 517 return (result == 0) ? Name : std::string(DemangledName); 518 } 519 #endif 520 if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) 521 return std::string(demanglePE32ExternCFunc(Name)); 522 return Name; 523 } 524 525 } // namespace symbolize 526 } // namespace llvm 527