17a7e6055SDimitry Andric //===- SymbolizableObjectFile.cpp -----------------------------------------===//
27d523365SDimitry Andric //
37d523365SDimitry Andric // The LLVM Compiler Infrastructure
47d523365SDimitry Andric //
57d523365SDimitry Andric // This file is distributed under the University of Illinois Open Source
67d523365SDimitry Andric // License. See LICENSE.TXT for details.
77d523365SDimitry Andric //
87d523365SDimitry Andric //===----------------------------------------------------------------------===//
97d523365SDimitry Andric //
107d523365SDimitry Andric // Implementation of SymbolizableObjectFile class.
117d523365SDimitry Andric //
127d523365SDimitry Andric //===----------------------------------------------------------------------===//
137d523365SDimitry Andric
147d523365SDimitry Andric #include "SymbolizableObjectFile.h"
157a7e6055SDimitry Andric #include "llvm/ADT/STLExtras.h"
167a7e6055SDimitry Andric #include "llvm/ADT/StringRef.h"
177a7e6055SDimitry Andric #include "llvm/ADT/Triple.h"
18db17bf38SDimitry Andric #include "llvm/BinaryFormat/COFF.h"
197d523365SDimitry Andric #include "llvm/DebugInfo/DWARF/DWARFContext.h"
207a7e6055SDimitry Andric #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
217a7e6055SDimitry Andric #include "llvm/Object/COFF.h"
227a7e6055SDimitry Andric #include "llvm/Object/ObjectFile.h"
237a7e6055SDimitry Andric #include "llvm/Object/SymbolSize.h"
247a7e6055SDimitry Andric #include "llvm/Support/Casting.h"
257a7e6055SDimitry Andric #include "llvm/Support/DataExtractor.h"
267a7e6055SDimitry Andric #include "llvm/Support/Error.h"
277a7e6055SDimitry Andric #include <algorithm>
287a7e6055SDimitry Andric #include <cstdint>
297a7e6055SDimitry Andric #include <memory>
307a7e6055SDimitry Andric #include <string>
317a7e6055SDimitry Andric #include <system_error>
327a7e6055SDimitry Andric #include <utility>
337a7e6055SDimitry Andric #include <vector>
347d523365SDimitry Andric
357a7e6055SDimitry Andric using namespace llvm;
367d523365SDimitry Andric using namespace object;
377a7e6055SDimitry Andric using namespace symbolize;
387d523365SDimitry Andric
397d523365SDimitry Andric static DILineInfoSpecifier
getDILineInfoSpecifier(FunctionNameKind FNKind)407d523365SDimitry Andric getDILineInfoSpecifier(FunctionNameKind FNKind) {
417d523365SDimitry Andric return DILineInfoSpecifier(
427d523365SDimitry Andric DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, FNKind);
437d523365SDimitry Andric }
447d523365SDimitry Andric
457d523365SDimitry Andric ErrorOr<std::unique_ptr<SymbolizableObjectFile>>
create(object::ObjectFile * Obj,std::unique_ptr<DIContext> DICtx)467d523365SDimitry Andric SymbolizableObjectFile::create(object::ObjectFile *Obj,
477d523365SDimitry Andric std::unique_ptr<DIContext> DICtx) {
487d523365SDimitry Andric std::unique_ptr<SymbolizableObjectFile> res(
497d523365SDimitry Andric new SymbolizableObjectFile(Obj, std::move(DICtx)));
507d523365SDimitry Andric std::unique_ptr<DataExtractor> OpdExtractor;
517d523365SDimitry Andric uint64_t OpdAddress = 0;
527d523365SDimitry Andric // Find the .opd (function descriptor) section if any, for big-endian
537d523365SDimitry Andric // PowerPC64 ELF.
547d523365SDimitry Andric if (Obj->getArch() == Triple::ppc64) {
557d523365SDimitry Andric for (section_iterator Section : Obj->sections()) {
567d523365SDimitry Andric StringRef Name;
577d523365SDimitry Andric StringRef Data;
587d523365SDimitry Andric if (auto EC = Section->getName(Name))
597d523365SDimitry Andric return EC;
607d523365SDimitry Andric if (Name == ".opd") {
617d523365SDimitry Andric if (auto EC = Section->getContents(Data))
627d523365SDimitry Andric return EC;
637d523365SDimitry Andric OpdExtractor.reset(new DataExtractor(Data, Obj->isLittleEndian(),
647d523365SDimitry Andric Obj->getBytesInAddress()));
657d523365SDimitry Andric OpdAddress = Section->getAddress();
667d523365SDimitry Andric break;
677d523365SDimitry Andric }
687d523365SDimitry Andric }
697d523365SDimitry Andric }
707d523365SDimitry Andric std::vector<std::pair<SymbolRef, uint64_t>> Symbols =
717d523365SDimitry Andric computeSymbolSizes(*Obj);
727d523365SDimitry Andric for (auto &P : Symbols)
737d523365SDimitry Andric res->addSymbol(P.first, P.second, OpdExtractor.get(), OpdAddress);
747d523365SDimitry Andric
757d523365SDimitry Andric // If this is a COFF object and we didn't find any symbols, try the export
767d523365SDimitry Andric // table.
777d523365SDimitry Andric if (Symbols.empty()) {
787d523365SDimitry Andric if (auto *CoffObj = dyn_cast<COFFObjectFile>(Obj))
797d523365SDimitry Andric if (auto EC = res->addCoffExportSymbols(CoffObj))
807d523365SDimitry Andric return EC;
817d523365SDimitry Andric }
827d523365SDimitry Andric return std::move(res);
837d523365SDimitry Andric }
847d523365SDimitry Andric
SymbolizableObjectFile(ObjectFile * Obj,std::unique_ptr<DIContext> DICtx)857d523365SDimitry Andric SymbolizableObjectFile::SymbolizableObjectFile(ObjectFile *Obj,
867d523365SDimitry Andric std::unique_ptr<DIContext> DICtx)
877d523365SDimitry Andric : Module(Obj), DebugInfoContext(std::move(DICtx)) {}
887d523365SDimitry Andric
897d523365SDimitry Andric namespace {
907a7e6055SDimitry Andric
917d523365SDimitry Andric struct OffsetNamePair {
927d523365SDimitry Andric uint32_t Offset;
937d523365SDimitry Andric StringRef Name;
947a7e6055SDimitry Andric
operator <__anon5f73dae80111::OffsetNamePair957d523365SDimitry Andric bool operator<(const OffsetNamePair &R) const {
967d523365SDimitry Andric return Offset < R.Offset;
977d523365SDimitry Andric }
987d523365SDimitry Andric };
997a7e6055SDimitry Andric
1007a7e6055SDimitry Andric } // end anonymous namespace
1017d523365SDimitry Andric
addCoffExportSymbols(const COFFObjectFile * CoffObj)1027d523365SDimitry Andric std::error_code SymbolizableObjectFile::addCoffExportSymbols(
1037d523365SDimitry Andric const COFFObjectFile *CoffObj) {
1047d523365SDimitry Andric // Get all export names and offsets.
1057d523365SDimitry Andric std::vector<OffsetNamePair> ExportSyms;
1067d523365SDimitry Andric for (const ExportDirectoryEntryRef &Ref : CoffObj->export_directories()) {
1077d523365SDimitry Andric StringRef Name;
1087d523365SDimitry Andric uint32_t Offset;
1097d523365SDimitry Andric if (auto EC = Ref.getSymbolName(Name))
1107d523365SDimitry Andric return EC;
1117d523365SDimitry Andric if (auto EC = Ref.getExportRVA(Offset))
1127d523365SDimitry Andric return EC;
1137d523365SDimitry Andric ExportSyms.push_back(OffsetNamePair{Offset, Name});
1147d523365SDimitry Andric }
1157d523365SDimitry Andric if (ExportSyms.empty())
1167d523365SDimitry Andric return std::error_code();
1177d523365SDimitry Andric
1187d523365SDimitry Andric // Sort by ascending offset.
1197d523365SDimitry Andric array_pod_sort(ExportSyms.begin(), ExportSyms.end());
1207d523365SDimitry Andric
1217d523365SDimitry Andric // Approximate the symbol sizes by assuming they run to the next symbol.
1227d523365SDimitry Andric // FIXME: This assumes all exports are functions.
1237d523365SDimitry Andric uint64_t ImageBase = CoffObj->getImageBase();
1247d523365SDimitry Andric for (auto I = ExportSyms.begin(), E = ExportSyms.end(); I != E; ++I) {
1257d523365SDimitry Andric OffsetNamePair &Export = *I;
1267d523365SDimitry Andric // FIXME: The last export has a one byte size now.
1277d523365SDimitry Andric uint32_t NextOffset = I != E ? I->Offset : Export.Offset + 1;
1287d523365SDimitry Andric uint64_t SymbolStart = ImageBase + Export.Offset;
1297d523365SDimitry Andric uint64_t SymbolSize = NextOffset - Export.Offset;
1307d523365SDimitry Andric SymbolDesc SD = {SymbolStart, SymbolSize};
1317d523365SDimitry Andric Functions.insert(std::make_pair(SD, Export.Name));
1327d523365SDimitry Andric }
1337d523365SDimitry Andric return std::error_code();
1347d523365SDimitry Andric }
1357d523365SDimitry Andric
addSymbol(const SymbolRef & Symbol,uint64_t SymbolSize,DataExtractor * OpdExtractor,uint64_t OpdAddress)1367d523365SDimitry Andric std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
1377d523365SDimitry Andric uint64_t SymbolSize,
1387d523365SDimitry Andric DataExtractor *OpdExtractor,
1397d523365SDimitry Andric uint64_t OpdAddress) {
1403ca95b02SDimitry Andric Expected<SymbolRef::Type> SymbolTypeOrErr = Symbol.getType();
1413ca95b02SDimitry Andric if (!SymbolTypeOrErr)
1423ca95b02SDimitry Andric return errorToErrorCode(SymbolTypeOrErr.takeError());
1433ca95b02SDimitry Andric SymbolRef::Type SymbolType = *SymbolTypeOrErr;
1447d523365SDimitry Andric if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data)
1457d523365SDimitry Andric return std::error_code();
1463ca95b02SDimitry Andric Expected<uint64_t> SymbolAddressOrErr = Symbol.getAddress();
1473ca95b02SDimitry Andric if (!SymbolAddressOrErr)
1483ca95b02SDimitry Andric return errorToErrorCode(SymbolAddressOrErr.takeError());
1497d523365SDimitry Andric uint64_t SymbolAddress = *SymbolAddressOrErr;
1507d523365SDimitry Andric if (OpdExtractor) {
1517d523365SDimitry Andric // For big-endian PowerPC64 ELF, symbols in the .opd section refer to
1527d523365SDimitry Andric // function descriptors. The first word of the descriptor is a pointer to
1537d523365SDimitry Andric // the function's code.
1547d523365SDimitry Andric // For the purposes of symbolization, pretend the symbol's address is that
1557d523365SDimitry Andric // of the function's code, not the descriptor.
1567d523365SDimitry Andric uint64_t OpdOffset = SymbolAddress - OpdAddress;
1577d523365SDimitry Andric uint32_t OpdOffset32 = OpdOffset;
1587d523365SDimitry Andric if (OpdOffset == OpdOffset32 &&
1597d523365SDimitry Andric OpdExtractor->isValidOffsetForAddress(OpdOffset32))
1607d523365SDimitry Andric SymbolAddress = OpdExtractor->getAddress(&OpdOffset32);
1617d523365SDimitry Andric }
1623ca95b02SDimitry Andric Expected<StringRef> SymbolNameOrErr = Symbol.getName();
1633ca95b02SDimitry Andric if (!SymbolNameOrErr)
1643ca95b02SDimitry Andric return errorToErrorCode(SymbolNameOrErr.takeError());
1657d523365SDimitry Andric StringRef SymbolName = *SymbolNameOrErr;
1667d523365SDimitry Andric // Mach-O symbol table names have leading underscore, skip it.
1677a7e6055SDimitry Andric if (Module->isMachO() && !SymbolName.empty() && SymbolName[0] == '_')
1687d523365SDimitry Andric SymbolName = SymbolName.drop_front();
1697d523365SDimitry Andric // FIXME: If a function has alias, there are two entries in symbol table
1707d523365SDimitry Andric // with same address size. Make sure we choose the correct one.
1717d523365SDimitry Andric auto &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects;
1727d523365SDimitry Andric SymbolDesc SD = { SymbolAddress, SymbolSize };
1737d523365SDimitry Andric M.insert(std::make_pair(SD, SymbolName));
1747d523365SDimitry Andric return std::error_code();
1757d523365SDimitry Andric }
1767d523365SDimitry Andric
1777d523365SDimitry Andric // Return true if this is a 32-bit x86 PE COFF module.
isWin32Module() const1787d523365SDimitry Andric bool SymbolizableObjectFile::isWin32Module() const {
1797d523365SDimitry Andric auto *CoffObject = dyn_cast<COFFObjectFile>(Module);
1807d523365SDimitry Andric return CoffObject && CoffObject->getMachine() == COFF::IMAGE_FILE_MACHINE_I386;
1817d523365SDimitry Andric }
1827d523365SDimitry Andric
getModulePreferredBase() const1837d523365SDimitry Andric uint64_t SymbolizableObjectFile::getModulePreferredBase() const {
1847d523365SDimitry Andric if (auto *CoffObject = dyn_cast<COFFObjectFile>(Module))
1857d523365SDimitry Andric return CoffObject->getImageBase();
1867d523365SDimitry Andric return 0;
1877d523365SDimitry Andric }
1887d523365SDimitry Andric
getNameFromSymbolTable(SymbolRef::Type Type,uint64_t Address,std::string & Name,uint64_t & Addr,uint64_t & Size) const1897d523365SDimitry Andric bool SymbolizableObjectFile::getNameFromSymbolTable(SymbolRef::Type Type,
1907d523365SDimitry Andric uint64_t Address,
1917d523365SDimitry Andric std::string &Name,
1927d523365SDimitry Andric uint64_t &Addr,
1937d523365SDimitry Andric uint64_t &Size) const {
1947d523365SDimitry Andric const auto &SymbolMap = Type == SymbolRef::ST_Function ? Functions : Objects;
1957d523365SDimitry Andric if (SymbolMap.empty())
1967d523365SDimitry Andric return false;
1977d523365SDimitry Andric SymbolDesc SD = { Address, Address };
1987d523365SDimitry Andric auto SymbolIterator = SymbolMap.upper_bound(SD);
1997d523365SDimitry Andric if (SymbolIterator == SymbolMap.begin())
2007d523365SDimitry Andric return false;
2017d523365SDimitry Andric --SymbolIterator;
2027d523365SDimitry Andric if (SymbolIterator->first.Size != 0 &&
2037d523365SDimitry Andric SymbolIterator->first.Addr + SymbolIterator->first.Size <= Address)
2047d523365SDimitry Andric return false;
2057d523365SDimitry Andric Name = SymbolIterator->second.str();
2067d523365SDimitry Andric Addr = SymbolIterator->first.Addr;
2077d523365SDimitry Andric Size = SymbolIterator->first.Size;
2087d523365SDimitry Andric return true;
2097d523365SDimitry Andric }
2107d523365SDimitry Andric
shouldOverrideWithSymbolTable(FunctionNameKind FNKind,bool UseSymbolTable) const2117d523365SDimitry Andric bool SymbolizableObjectFile::shouldOverrideWithSymbolTable(
2127d523365SDimitry Andric FunctionNameKind FNKind, bool UseSymbolTable) const {
2137d523365SDimitry Andric // When DWARF is used with -gline-tables-only / -gmlt, the symbol table gives
2147d523365SDimitry Andric // better answers for linkage names than the DIContext. Otherwise, we are
2157d523365SDimitry Andric // probably using PEs and PDBs, and we shouldn't do the override. PE files
2167d523365SDimitry Andric // generally only contain the names of exported symbols.
2177d523365SDimitry Andric return FNKind == FunctionNameKind::LinkageName && UseSymbolTable &&
2187d523365SDimitry Andric isa<DWARFContext>(DebugInfoContext.get());
2197d523365SDimitry Andric }
2207d523365SDimitry Andric
symbolizeCode(uint64_t ModuleOffset,FunctionNameKind FNKind,bool UseSymbolTable) const2217d523365SDimitry Andric DILineInfo SymbolizableObjectFile::symbolizeCode(uint64_t ModuleOffset,
2227d523365SDimitry Andric FunctionNameKind FNKind,
2237d523365SDimitry Andric bool UseSymbolTable) const {
2247d523365SDimitry Andric DILineInfo LineInfo;
2257d523365SDimitry Andric if (DebugInfoContext) {
2267d523365SDimitry Andric LineInfo = DebugInfoContext->getLineInfoForAddress(
2277d523365SDimitry Andric ModuleOffset, getDILineInfoSpecifier(FNKind));
2287d523365SDimitry Andric }
2297d523365SDimitry Andric // Override function name from symbol table if necessary.
2307d523365SDimitry Andric if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) {
2317d523365SDimitry Andric std::string FunctionName;
2327d523365SDimitry Andric uint64_t Start, Size;
2337d523365SDimitry Andric if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
2347d523365SDimitry Andric FunctionName, Start, Size)) {
2357d523365SDimitry Andric LineInfo.FunctionName = FunctionName;
2367d523365SDimitry Andric }
2377d523365SDimitry Andric }
2387d523365SDimitry Andric return LineInfo;
2397d523365SDimitry Andric }
2407d523365SDimitry Andric
symbolizeInlinedCode(uint64_t ModuleOffset,FunctionNameKind FNKind,bool UseSymbolTable) const2417d523365SDimitry Andric DIInliningInfo SymbolizableObjectFile::symbolizeInlinedCode(
2427d523365SDimitry Andric uint64_t ModuleOffset, FunctionNameKind FNKind, bool UseSymbolTable) const {
2437d523365SDimitry Andric DIInliningInfo InlinedContext;
2447d523365SDimitry Andric
2457d523365SDimitry Andric if (DebugInfoContext)
2467d523365SDimitry Andric InlinedContext = DebugInfoContext->getInliningInfoForAddress(
2477d523365SDimitry Andric ModuleOffset, getDILineInfoSpecifier(FNKind));
2487d523365SDimitry Andric // Make sure there is at least one frame in context.
2497d523365SDimitry Andric if (InlinedContext.getNumberOfFrames() == 0)
2507d523365SDimitry Andric InlinedContext.addFrame(DILineInfo());
2517d523365SDimitry Andric
2527d523365SDimitry Andric // Override the function name in lower frame with name from symbol table.
2537d523365SDimitry Andric if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) {
2547d523365SDimitry Andric std::string FunctionName;
2557d523365SDimitry Andric uint64_t Start, Size;
2567d523365SDimitry Andric if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
2577d523365SDimitry Andric FunctionName, Start, Size)) {
2587d523365SDimitry Andric InlinedContext.getMutableFrame(InlinedContext.getNumberOfFrames() - 1)
2597d523365SDimitry Andric ->FunctionName = FunctionName;
2607d523365SDimitry Andric }
2617d523365SDimitry Andric }
2627d523365SDimitry Andric
2637d523365SDimitry Andric return InlinedContext;
2647d523365SDimitry Andric }
2657d523365SDimitry Andric
symbolizeData(uint64_t ModuleOffset) const2667d523365SDimitry Andric DIGlobal SymbolizableObjectFile::symbolizeData(uint64_t ModuleOffset) const {
2677d523365SDimitry Andric DIGlobal Res;
2687d523365SDimitry Andric getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset, Res.Name, Res.Start,
2697d523365SDimitry Andric Res.Size);
2707d523365SDimitry Andric return Res;
2717d523365SDimitry Andric }
272