1 //===-- LLVMSymbolize.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation for LLVM symbolization library. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/DebugInfo/Symbolize/Symbolize.h" 14 15 #include "SymbolizableObjectFile.h" 16 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/BinaryFormat/COFF.h" 19 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 20 #include "llvm/DebugInfo/PDB/PDB.h" 21 #include "llvm/DebugInfo/PDB/PDBContext.h" 22 #include "llvm/Demangle/Demangle.h" 23 #include "llvm/Object/COFF.h" 24 #include "llvm/Object/MachO.h" 25 #include "llvm/Object/MachOUniversal.h" 26 #include "llvm/Support/Casting.h" 27 #include "llvm/Support/Compression.h" 28 #include "llvm/Support/DataExtractor.h" 29 #include "llvm/Support/Errc.h" 30 #include "llvm/Support/FileSystem.h" 31 #include "llvm/Support/MemoryBuffer.h" 32 #include "llvm/Support/Path.h" 33 #include <algorithm> 34 #include <cassert> 35 #include <cstring> 36 37 #if defined(_MSC_VER) 38 #include <Windows.h> 39 40 // This must be included after windows.h. 41 #include <DbgHelp.h> 42 #pragma comment(lib, "dbghelp.lib") 43 44 // Windows.h conflicts with our COFF header definitions. 45 #ifdef IMAGE_FILE_MACHINE_I386 46 #undef IMAGE_FILE_MACHINE_I386 47 #endif 48 #endif 49 50 namespace llvm { 51 namespace symbolize { 52 53 Expected<DILineInfo> 54 LLVMSymbolizer::symbolizeCode(const std::string &ModuleName, 55 object::SectionedAddress ModuleOffset, 56 StringRef DWPName) { 57 SymbolizableModule *Info; 58 if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName, DWPName)) 59 Info = InfoOrErr.get(); 60 else 61 return InfoOrErr.takeError(); 62 63 // A null module means an error has already been reported. Return an empty 64 // result. 65 if (!Info) 66 return DILineInfo(); 67 68 // If the user is giving us relative addresses, add the preferred base of the 69 // object to the offset before we do the query. It's what DIContext expects. 70 if (Opts.RelativeAddresses) 71 ModuleOffset.Address += Info->getModulePreferredBase(); 72 73 DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts.PrintFunctions, 74 Opts.UseSymbolTable); 75 if (Opts.Demangle) 76 LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info); 77 return LineInfo; 78 } 79 80 Expected<DIInliningInfo> 81 LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName, 82 object::SectionedAddress ModuleOffset, 83 StringRef DWPName) { 84 SymbolizableModule *Info; 85 if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName, DWPName)) 86 Info = InfoOrErr.get(); 87 else 88 return InfoOrErr.takeError(); 89 90 // A null module means an error has already been reported. Return an empty 91 // result. 92 if (!Info) 93 return DIInliningInfo(); 94 95 // If the user is giving us relative addresses, add the preferred base of the 96 // object to the offset before we do the query. It's what DIContext expects. 97 if (Opts.RelativeAddresses) 98 ModuleOffset.Address += Info->getModulePreferredBase(); 99 100 DIInliningInfo InlinedContext = Info->symbolizeInlinedCode( 101 ModuleOffset, Opts.PrintFunctions, Opts.UseSymbolTable); 102 if (Opts.Demangle) { 103 for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) { 104 auto *Frame = InlinedContext.getMutableFrame(i); 105 Frame->FunctionName = DemangleName(Frame->FunctionName, Info); 106 } 107 } 108 return InlinedContext; 109 } 110 111 Expected<DIGlobal> 112 LLVMSymbolizer::symbolizeData(const std::string &ModuleName, 113 object::SectionedAddress ModuleOffset) { 114 SymbolizableModule *Info; 115 if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName)) 116 Info = InfoOrErr.get(); 117 else 118 return InfoOrErr.takeError(); 119 120 // A null module means an error has already been reported. Return an empty 121 // result. 122 if (!Info) 123 return DIGlobal(); 124 125 // If the user is giving us relative addresses, add the preferred base of 126 // the object to the offset before we do the query. It's what DIContext 127 // expects. 128 if (Opts.RelativeAddresses) 129 ModuleOffset.Address += Info->getModulePreferredBase(); 130 131 DIGlobal Global = Info->symbolizeData(ModuleOffset); 132 if (Opts.Demangle) 133 Global.Name = DemangleName(Global.Name, Info); 134 return Global; 135 } 136 137 void LLVMSymbolizer::flush() { 138 ObjectForUBPathAndArch.clear(); 139 BinaryForPath.clear(); 140 ObjectPairForPathArch.clear(); 141 Modules.clear(); 142 } 143 144 namespace { 145 146 // For Path="/path/to/foo" and Basename="foo" assume that debug info is in 147 // /path/to/foo.dSYM/Contents/Resources/DWARF/foo. 148 // For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in 149 // /path/to/bar.dSYM/Contents/Resources/DWARF/foo. 150 std::string getDarwinDWARFResourceForPath( 151 const std::string &Path, const std::string &Basename) { 152 SmallString<16> ResourceName = StringRef(Path); 153 if (sys::path::extension(Path) != ".dSYM") { 154 ResourceName += ".dSYM"; 155 } 156 sys::path::append(ResourceName, "Contents", "Resources", "DWARF"); 157 sys::path::append(ResourceName, Basename); 158 return ResourceName.str(); 159 } 160 161 bool checkFileCRC(StringRef Path, uint32_t CRCHash) { 162 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 163 MemoryBuffer::getFileOrSTDIN(Path); 164 if (!MB) 165 return false; 166 return !zlib::isAvailable() || CRCHash == zlib::crc32(MB.get()->getBuffer()); 167 } 168 169 bool findDebugBinary(const std::string &OrigPath, 170 const std::string &DebuglinkName, uint32_t CRCHash, 171 const std::string &FallbackDebugPath, 172 std::string &Result) { 173 SmallString<16> OrigDir(OrigPath); 174 llvm::sys::path::remove_filename(OrigDir); 175 SmallString<16> DebugPath = OrigDir; 176 // Try relative/path/to/original_binary/debuglink_name 177 llvm::sys::path::append(DebugPath, DebuglinkName); 178 if (checkFileCRC(DebugPath, CRCHash)) { 179 Result = DebugPath.str(); 180 return true; 181 } 182 // Try relative/path/to/original_binary/.debug/debuglink_name 183 DebugPath = OrigDir; 184 llvm::sys::path::append(DebugPath, ".debug", DebuglinkName); 185 if (checkFileCRC(DebugPath, CRCHash)) { 186 Result = DebugPath.str(); 187 return true; 188 } 189 // Make the path absolute so that lookups will go to 190 // "/usr/lib/debug/full/path/to/debug", not 191 // "/usr/lib/debug/to/debug" 192 llvm::sys::fs::make_absolute(OrigDir); 193 if (!FallbackDebugPath.empty()) { 194 // Try <FallbackDebugPath>/absolute/path/to/original_binary/debuglink_name 195 DebugPath = FallbackDebugPath; 196 } else { 197 #if defined(__NetBSD__) 198 // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name 199 DebugPath = "/usr/libdata/debug"; 200 #else 201 // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name 202 DebugPath = "/usr/lib/debug"; 203 #endif 204 } 205 llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir), 206 DebuglinkName); 207 if (checkFileCRC(DebugPath, CRCHash)) { 208 Result = DebugPath.str(); 209 return true; 210 } 211 return false; 212 } 213 214 bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName, 215 uint32_t &CRCHash) { 216 if (!Obj) 217 return false; 218 for (const SectionRef &Section : Obj->sections()) { 219 StringRef Name; 220 Section.getName(Name); 221 Name = Name.substr(Name.find_first_not_of("._")); 222 if (Name == "gnu_debuglink") { 223 StringRef Data; 224 Section.getContents(Data); 225 DataExtractor DE(Data, Obj->isLittleEndian(), 0); 226 uint32_t Offset = 0; 227 if (const char *DebugNameStr = DE.getCStr(&Offset)) { 228 // 4-byte align the offset. 229 Offset = (Offset + 3) & ~0x3; 230 if (DE.isValidOffsetForDataOfSize(Offset, 4)) { 231 DebugName = DebugNameStr; 232 CRCHash = DE.getU32(&Offset); 233 return true; 234 } 235 } 236 break; 237 } 238 } 239 return false; 240 } 241 242 bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj, 243 const MachOObjectFile *Obj) { 244 ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid(); 245 ArrayRef<uint8_t> bin_uuid = Obj->getUuid(); 246 if (dbg_uuid.empty() || bin_uuid.empty()) 247 return false; 248 return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size()); 249 } 250 251 } // end anonymous namespace 252 253 ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath, 254 const MachOObjectFile *MachExeObj, const std::string &ArchName) { 255 // On Darwin we may find DWARF in separate object file in 256 // resource directory. 257 std::vector<std::string> DsymPaths; 258 StringRef Filename = sys::path::filename(ExePath); 259 DsymPaths.push_back(getDarwinDWARFResourceForPath(ExePath, Filename)); 260 for (const auto &Path : Opts.DsymHints) { 261 DsymPaths.push_back(getDarwinDWARFResourceForPath(Path, Filename)); 262 } 263 for (const auto &Path : DsymPaths) { 264 auto DbgObjOrErr = getOrCreateObject(Path, ArchName); 265 if (!DbgObjOrErr) { 266 // Ignore errors, the file might not exist. 267 consumeError(DbgObjOrErr.takeError()); 268 continue; 269 } 270 ObjectFile *DbgObj = DbgObjOrErr.get(); 271 if (!DbgObj) 272 continue; 273 const MachOObjectFile *MachDbgObj = dyn_cast<const MachOObjectFile>(DbgObj); 274 if (!MachDbgObj) 275 continue; 276 if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj)) 277 return DbgObj; 278 } 279 return nullptr; 280 } 281 282 ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path, 283 const ObjectFile *Obj, 284 const std::string &ArchName) { 285 std::string DebuglinkName; 286 uint32_t CRCHash; 287 std::string DebugBinaryPath; 288 if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash)) 289 return nullptr; 290 if (!findDebugBinary(Path, DebuglinkName, CRCHash, Opts.FallbackDebugPath, 291 DebugBinaryPath)) 292 return nullptr; 293 auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName); 294 if (!DbgObjOrErr) { 295 // Ignore errors, the file might not exist. 296 consumeError(DbgObjOrErr.takeError()); 297 return nullptr; 298 } 299 return DbgObjOrErr.get(); 300 } 301 302 Expected<LLVMSymbolizer::ObjectPair> 303 LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path, 304 const std::string &ArchName) { 305 const auto &I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName)); 306 if (I != ObjectPairForPathArch.end()) { 307 return I->second; 308 } 309 310 auto ObjOrErr = getOrCreateObject(Path, ArchName); 311 if (!ObjOrErr) { 312 ObjectPairForPathArch.insert(std::make_pair(std::make_pair(Path, ArchName), 313 ObjectPair(nullptr, nullptr))); 314 return ObjOrErr.takeError(); 315 } 316 317 ObjectFile *Obj = ObjOrErr.get(); 318 assert(Obj != nullptr); 319 ObjectFile *DbgObj = nullptr; 320 321 if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj)) 322 DbgObj = lookUpDsymFile(Path, MachObj, ArchName); 323 if (!DbgObj) 324 DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName); 325 if (!DbgObj) 326 DbgObj = Obj; 327 ObjectPair Res = std::make_pair(Obj, DbgObj); 328 ObjectPairForPathArch.insert( 329 std::make_pair(std::make_pair(Path, ArchName), Res)); 330 return Res; 331 } 332 333 Expected<ObjectFile *> 334 LLVMSymbolizer::getOrCreateObject(const std::string &Path, 335 const std::string &ArchName) { 336 const auto &I = BinaryForPath.find(Path); 337 Binary *Bin = nullptr; 338 if (I == BinaryForPath.end()) { 339 Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path); 340 if (!BinOrErr) { 341 BinaryForPath.insert(std::make_pair(Path, OwningBinary<Binary>())); 342 return BinOrErr.takeError(); 343 } 344 Bin = BinOrErr->getBinary(); 345 BinaryForPath.insert(std::make_pair(Path, std::move(BinOrErr.get()))); 346 } else { 347 Bin = I->second.getBinary(); 348 } 349 350 if (!Bin) 351 return static_cast<ObjectFile *>(nullptr); 352 353 if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Bin)) { 354 const auto &I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName)); 355 if (I != ObjectForUBPathAndArch.end()) { 356 return I->second.get(); 357 } 358 Expected<std::unique_ptr<ObjectFile>> ObjOrErr = 359 UB->getObjectForArch(ArchName); 360 if (!ObjOrErr) { 361 ObjectForUBPathAndArch.insert(std::make_pair( 362 std::make_pair(Path, ArchName), std::unique_ptr<ObjectFile>())); 363 return ObjOrErr.takeError(); 364 } 365 ObjectFile *Res = ObjOrErr->get(); 366 ObjectForUBPathAndArch.insert(std::make_pair(std::make_pair(Path, ArchName), 367 std::move(ObjOrErr.get()))); 368 return Res; 369 } 370 if (Bin->isObject()) { 371 return cast<ObjectFile>(Bin); 372 } 373 return errorCodeToError(object_error::arch_not_found); 374 } 375 376 Expected<SymbolizableModule *> 377 LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName, 378 StringRef DWPName) { 379 const auto &I = Modules.find(ModuleName); 380 if (I != Modules.end()) { 381 return I->second.get(); 382 } 383 std::string BinaryName = ModuleName; 384 std::string ArchName = Opts.DefaultArch; 385 size_t ColonPos = ModuleName.find_last_of(':'); 386 // Verify that substring after colon form a valid arch name. 387 if (ColonPos != std::string::npos) { 388 std::string ArchStr = ModuleName.substr(ColonPos + 1); 389 if (Triple(ArchStr).getArch() != Triple::UnknownArch) { 390 BinaryName = ModuleName.substr(0, ColonPos); 391 ArchName = ArchStr; 392 } 393 } 394 auto ObjectsOrErr = getOrCreateObjectPair(BinaryName, ArchName); 395 if (!ObjectsOrErr) { 396 // Failed to find valid object file. 397 Modules.insert( 398 std::make_pair(ModuleName, std::unique_ptr<SymbolizableModule>())); 399 return ObjectsOrErr.takeError(); 400 } 401 ObjectPair Objects = ObjectsOrErr.get(); 402 403 std::unique_ptr<DIContext> Context; 404 // If this is a COFF object containing PDB info, use a PDBContext to 405 // symbolize. Otherwise, use DWARF. 406 if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) { 407 const codeview::DebugInfo *DebugInfo; 408 StringRef PDBFileName; 409 auto EC = CoffObject->getDebugPDBInfo(DebugInfo, PDBFileName); 410 if (!EC && DebugInfo != nullptr && !PDBFileName.empty()) { 411 using namespace pdb; 412 std::unique_ptr<IPDBSession> Session; 413 if (auto Err = loadDataForEXE(PDB_ReaderType::DIA, 414 Objects.first->getFileName(), Session)) { 415 Modules.insert( 416 std::make_pair(ModuleName, std::unique_ptr<SymbolizableModule>())); 417 // Return along the PDB filename to provide more context 418 return createFileError(PDBFileName, std::move(Err)); 419 } 420 Context.reset(new PDBContext(*CoffObject, std::move(Session))); 421 } 422 } 423 if (!Context) 424 Context = DWARFContext::create(*Objects.second, nullptr, 425 DWARFContext::defaultErrorHandler, DWPName); 426 assert(Context); 427 auto InfoOrErr = 428 SymbolizableObjectFile::create(Objects.first, std::move(Context)); 429 std::unique_ptr<SymbolizableModule> SymMod; 430 if (InfoOrErr) 431 SymMod = std::move(InfoOrErr.get()); 432 auto InsertResult = 433 Modules.insert(std::make_pair(ModuleName, std::move(SymMod))); 434 assert(InsertResult.second); 435 if (auto EC = InfoOrErr.getError()) 436 return errorCodeToError(EC); 437 return InsertResult.first->second.get(); 438 } 439 440 namespace { 441 442 // Undo these various manglings for Win32 extern "C" functions: 443 // cdecl - _foo 444 // stdcall - _foo@12 445 // fastcall - @foo@12 446 // vectorcall - foo@@12 447 // These are all different linkage names for 'foo'. 448 StringRef demanglePE32ExternCFunc(StringRef SymbolName) { 449 // Remove any '_' or '@' prefix. 450 char Front = SymbolName.empty() ? '\0' : SymbolName[0]; 451 if (Front == '_' || Front == '@') 452 SymbolName = SymbolName.drop_front(); 453 454 // Remove any '@[0-9]+' suffix. 455 if (Front != '?') { 456 size_t AtPos = SymbolName.rfind('@'); 457 if (AtPos != StringRef::npos && 458 std::all_of(SymbolName.begin() + AtPos + 1, SymbolName.end(), 459 [](char C) { return C >= '0' && C <= '9'; })) { 460 SymbolName = SymbolName.substr(0, AtPos); 461 } 462 } 463 464 // Remove any ending '@' for vectorcall. 465 if (SymbolName.endswith("@")) 466 SymbolName = SymbolName.drop_back(); 467 468 return SymbolName; 469 } 470 471 } // end anonymous namespace 472 473 std::string 474 LLVMSymbolizer::DemangleName(const std::string &Name, 475 const SymbolizableModule *DbiModuleDescriptor) { 476 // We can spoil names of symbols with C linkage, so use an heuristic 477 // approach to check if the name should be demangled. 478 if (Name.substr(0, 2) == "_Z") { 479 int status = 0; 480 char *DemangledName = itaniumDemangle(Name.c_str(), nullptr, nullptr, &status); 481 if (status != 0) 482 return Name; 483 std::string Result = DemangledName; 484 free(DemangledName); 485 return Result; 486 } 487 488 #if defined(_MSC_VER) 489 if (!Name.empty() && Name.front() == '?') { 490 // Only do MSVC C++ demangling on symbols starting with '?'. 491 char DemangledName[1024] = {0}; 492 DWORD result = ::UnDecorateSymbolName( 493 Name.c_str(), DemangledName, 1023, 494 UNDNAME_NO_ACCESS_SPECIFIERS | // Strip public, private, protected 495 UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc 496 UNDNAME_NO_THROW_SIGNATURES | // Strip throw() specifications 497 UNDNAME_NO_MEMBER_TYPE | // Strip virtual, static, etc specifiers 498 UNDNAME_NO_MS_KEYWORDS | // Strip all MS extension keywords 499 UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types 500 return (result == 0) ? Name : std::string(DemangledName); 501 } 502 #endif 503 if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) 504 return std::string(demanglePE32ExternCFunc(Name)); 505 return Name; 506 } 507 508 } // namespace symbolize 509 } // namespace llvm 510