1 //===- SymbolizableObjectFile.cpp -----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of SymbolizableObjectFile class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "SymbolizableObjectFile.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/Triple.h"
16 #include "llvm/BinaryFormat/COFF.h"
17 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
18 #include "llvm/Object/COFF.h"
19 #include "llvm/Object/ELFObjectFile.h"
20 #include "llvm/Object/ObjectFile.h"
21 #include "llvm/Object/SymbolSize.h"
22 #include "llvm/Support/Casting.h"
23 #include "llvm/Support/DataExtractor.h"
24 #include <algorithm>
25 
26 using namespace llvm;
27 using namespace object;
28 using namespace symbolize;
29 
30 Expected<std::unique_ptr<SymbolizableObjectFile>>
31 SymbolizableObjectFile::create(const object::ObjectFile *Obj,
32                                std::unique_ptr<DIContext> DICtx,
33                                bool UntagAddresses) {
34   assert(DICtx);
35   std::unique_ptr<SymbolizableObjectFile> res(
36       new SymbolizableObjectFile(Obj, std::move(DICtx), UntagAddresses));
37   std::unique_ptr<DataExtractor> OpdExtractor;
38   uint64_t OpdAddress = 0;
39   // Find the .opd (function descriptor) section if any, for big-endian
40   // PowerPC64 ELF.
41   if (Obj->getArch() == Triple::ppc64) {
42     for (section_iterator Section : Obj->sections()) {
43       Expected<StringRef> NameOrErr = Section->getName();
44       if (!NameOrErr)
45         return NameOrErr.takeError();
46 
47       if (*NameOrErr == ".opd") {
48         Expected<StringRef> E = Section->getContents();
49         if (!E)
50           return E.takeError();
51         OpdExtractor.reset(new DataExtractor(*E, Obj->isLittleEndian(),
52                                              Obj->getBytesInAddress()));
53         OpdAddress = Section->getAddress();
54         break;
55       }
56     }
57   }
58   std::vector<std::pair<SymbolRef, uint64_t>> Symbols =
59       computeSymbolSizes(*Obj);
60   for (auto &P : Symbols)
61     if (Error E =
62             res->addSymbol(P.first, P.second, OpdExtractor.get(), OpdAddress))
63       return std::move(E);
64 
65   // If this is a COFF object and we didn't find any symbols, try the export
66   // table.
67   if (Symbols.empty()) {
68     if (auto *CoffObj = dyn_cast<COFFObjectFile>(Obj))
69       if (Error E = res->addCoffExportSymbols(CoffObj))
70         return std::move(E);
71   }
72 
73   std::vector<SymbolDesc> &Fs = res->Functions, &Os = res->Objects;
74   auto Uniquify = [](std::vector<SymbolDesc> &S) {
75     // Sort by (Addr,Size,Name). If several SymbolDescs share the same Addr,
76     // pick the one with the largest Size. This helps us avoid symbols with no
77     // size information (Size=0).
78     llvm::sort(S);
79     auto I = S.begin(), E = S.end(), J = S.begin();
80     while (I != E) {
81       auto OI = I;
82       while (++I != E && OI->Addr == I->Addr) {
83       }
84       *J++ = I[-1];
85     }
86     S.erase(J, S.end());
87   };
88   Uniquify(Fs);
89   Uniquify(Os);
90 
91   return std::move(res);
92 }
93 
94 SymbolizableObjectFile::SymbolizableObjectFile(const ObjectFile *Obj,
95                                                std::unique_ptr<DIContext> DICtx,
96                                                bool UntagAddresses)
97     : Module(Obj), DebugInfoContext(std::move(DICtx)),
98       UntagAddresses(UntagAddresses) {}
99 
100 namespace {
101 
102 struct OffsetNamePair {
103   uint32_t Offset;
104   StringRef Name;
105 
106   bool operator<(const OffsetNamePair &R) const {
107     return Offset < R.Offset;
108   }
109 };
110 
111 } // end anonymous namespace
112 
113 Error SymbolizableObjectFile::addCoffExportSymbols(
114     const COFFObjectFile *CoffObj) {
115   // Get all export names and offsets.
116   std::vector<OffsetNamePair> ExportSyms;
117   for (const ExportDirectoryEntryRef &Ref : CoffObj->export_directories()) {
118     StringRef Name;
119     uint32_t Offset;
120     if (auto EC = Ref.getSymbolName(Name))
121       return EC;
122     if (auto EC = Ref.getExportRVA(Offset))
123       return EC;
124     ExportSyms.push_back(OffsetNamePair{Offset, Name});
125   }
126   if (ExportSyms.empty())
127     return Error::success();
128 
129   // Sort by ascending offset.
130   array_pod_sort(ExportSyms.begin(), ExportSyms.end());
131 
132   // Approximate the symbol sizes by assuming they run to the next symbol.
133   // FIXME: This assumes all exports are functions.
134   uint64_t ImageBase = CoffObj->getImageBase();
135   for (auto I = ExportSyms.begin(), E = ExportSyms.end(); I != E; ++I) {
136     OffsetNamePair &Export = *I;
137     // FIXME: The last export has a one byte size now.
138     uint32_t NextOffset = I != E ? I->Offset : Export.Offset + 1;
139     uint64_t SymbolStart = ImageBase + Export.Offset;
140     uint64_t SymbolSize = NextOffset - Export.Offset;
141     Functions.push_back({SymbolStart, SymbolSize, Export.Name, 0});
142   }
143   return Error::success();
144 }
145 
146 Error SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
147                                         uint64_t SymbolSize,
148                                         DataExtractor *OpdExtractor,
149                                         uint64_t OpdAddress) {
150   // Avoid adding symbols from an unknown/undefined section.
151   const ObjectFile &Obj = *Symbol.getObject();
152   Expected<StringRef> SymbolNameOrErr = Symbol.getName();
153   if (!SymbolNameOrErr)
154     return SymbolNameOrErr.takeError();
155   StringRef SymbolName = *SymbolNameOrErr;
156 
157   uint32_t ELFSymIdx =
158       Obj.isELF() ? ELFSymbolRef(Symbol).getRawDataRefImpl().d.b : 0;
159   Expected<section_iterator> Sec = Symbol.getSection();
160   if (!Sec || Obj.section_end() == *Sec) {
161     if (Obj.isELF()) {
162       // Store the (index, filename) pair for a file symbol.
163       ELFSymbolRef ESym(Symbol);
164       if (ESym.getELFType() == ELF::STT_FILE)
165         FileSymbols.emplace_back(ELFSymIdx, SymbolName);
166     }
167     return Error::success();
168   }
169 
170   Expected<SymbolRef::Type> SymbolTypeOrErr = Symbol.getType();
171   if (!SymbolTypeOrErr)
172     return SymbolTypeOrErr.takeError();
173   SymbolRef::Type SymbolType = *SymbolTypeOrErr;
174   if (Obj.isELF()) {
175     // Allow function and data symbols. Additionally allow STT_NONE, which are
176     // common for functions defined in assembly.
177     uint8_t Type = ELFSymbolRef(Symbol).getELFType();
178     if (Type != ELF::STT_NOTYPE && Type != ELF::STT_FUNC &&
179         Type != ELF::STT_OBJECT && Type != ELF::STT_GNU_IFUNC)
180       return Error::success();
181   } else if (SymbolType != SymbolRef::ST_Function &&
182              SymbolType != SymbolRef::ST_Data) {
183     return Error::success();
184   }
185 
186   Expected<uint64_t> SymbolAddressOrErr = Symbol.getAddress();
187   if (!SymbolAddressOrErr)
188     return SymbolAddressOrErr.takeError();
189   uint64_t SymbolAddress = *SymbolAddressOrErr;
190   if (UntagAddresses) {
191     // For kernel addresses, bits 56-63 need to be set, so we sign extend bit 55
192     // into bits 56-63 instead of masking them out.
193     SymbolAddress &= (1ull << 56) - 1;
194     SymbolAddress = (int64_t(SymbolAddress) << 8) >> 8;
195   }
196   if (OpdExtractor) {
197     // For big-endian PowerPC64 ELF, symbols in the .opd section refer to
198     // function descriptors. The first word of the descriptor is a pointer to
199     // the function's code.
200     // For the purposes of symbolization, pretend the symbol's address is that
201     // of the function's code, not the descriptor.
202     uint64_t OpdOffset = SymbolAddress - OpdAddress;
203     if (OpdExtractor->isValidOffsetForAddress(OpdOffset))
204       SymbolAddress = OpdExtractor->getAddress(&OpdOffset);
205   }
206   // Mach-O symbol table names have leading underscore, skip it.
207   if (Module->isMachO() && !SymbolName.empty() && SymbolName[0] == '_')
208     SymbolName = SymbolName.drop_front();
209 
210   if (Obj.isELF() && ELFSymbolRef(Symbol).getBinding() != ELF::STB_LOCAL)
211     ELFSymIdx = 0;
212   SymbolDesc SD = {SymbolAddress, SymbolSize, SymbolName, ELFSymIdx};
213   // DATA command symbolizes just ST_Data (ELF STT_OBJECT) symbols as an
214   // optimization. Treat everything else (e.g. ELF STT_NOTYPE, STT_FUNC and
215   // STT_GNU_IFUNC) as function symbols which can be used to symbolize
216   // addresses.
217   if (SymbolType == SymbolRef::ST_Data)
218     Objects.push_back(SD);
219   else
220     Functions.push_back(SD);
221   return Error::success();
222 }
223 
224 // Return true if this is a 32-bit x86 PE COFF module.
225 bool SymbolizableObjectFile::isWin32Module() const {
226   auto *CoffObject = dyn_cast<COFFObjectFile>(Module);
227   return CoffObject && CoffObject->getMachine() == COFF::IMAGE_FILE_MACHINE_I386;
228 }
229 
230 uint64_t SymbolizableObjectFile::getModulePreferredBase() const {
231   if (auto *CoffObject = dyn_cast<COFFObjectFile>(Module))
232     return CoffObject->getImageBase();
233   return 0;
234 }
235 
236 bool SymbolizableObjectFile::getNameFromSymbolTable(
237     SymbolRef::Type Type, uint64_t Address, std::string &Name, uint64_t &Addr,
238     uint64_t &Size, std::string &FileName) const {
239   const auto &Symbols = Type == SymbolRef::ST_Function ? Functions : Objects;
240   SymbolDesc SD{Address, UINT64_C(-1), StringRef(), 0};
241   auto SymbolIterator = llvm::upper_bound(Symbols, SD);
242   if (SymbolIterator == Symbols.begin())
243     return false;
244   --SymbolIterator;
245   if (SymbolIterator->Size != 0 &&
246       SymbolIterator->Addr + SymbolIterator->Size <= Address)
247     return false;
248   Name = SymbolIterator->Name.str();
249   Addr = SymbolIterator->Addr;
250   Size = SymbolIterator->Size;
251 
252   if (SymbolIterator->ELFLocalSymIdx != 0) {
253     // If this is an ELF local symbol, find the STT_FILE symbol preceding
254     // SymbolIterator to get the filename. The ELF spec requires the STT_FILE
255     // symbol (if present) precedes the other STB_LOCAL symbols for the file.
256     assert(Module->isELF());
257     auto It = llvm::upper_bound(
258         FileSymbols,
259         std::make_pair(SymbolIterator->ELFLocalSymIdx, StringRef()));
260     if (It != FileSymbols.begin())
261       FileName = It[-1].second.str();
262   }
263   return true;
264 }
265 
266 bool SymbolizableObjectFile::shouldOverrideWithSymbolTable(
267     FunctionNameKind FNKind, bool UseSymbolTable) const {
268   // When DWARF is used with -gline-tables-only / -gmlt, the symbol table gives
269   // better answers for linkage names than the DIContext. Otherwise, we are
270   // probably using PEs and PDBs, and we shouldn't do the override. PE files
271   // generally only contain the names of exported symbols.
272   return FNKind == FunctionNameKind::LinkageName && UseSymbolTable &&
273          isa<DWARFContext>(DebugInfoContext.get());
274 }
275 
276 DILineInfo
277 SymbolizableObjectFile::symbolizeCode(object::SectionedAddress ModuleOffset,
278                                       DILineInfoSpecifier LineInfoSpecifier,
279                                       bool UseSymbolTable) const {
280   if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection)
281     ModuleOffset.SectionIndex =
282         getModuleSectionIndexForAddress(ModuleOffset.Address);
283   DILineInfo LineInfo =
284       DebugInfoContext->getLineInfoForAddress(ModuleOffset, LineInfoSpecifier);
285 
286   // Override function name from symbol table if necessary.
287   if (shouldOverrideWithSymbolTable(LineInfoSpecifier.FNKind, UseSymbolTable)) {
288     std::string FunctionName, FileName;
289     uint64_t Start, Size;
290     if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address,
291                                FunctionName, Start, Size, FileName)) {
292       LineInfo.FunctionName = FunctionName;
293       if (LineInfo.FileName == DILineInfo::BadString && !FileName.empty())
294         LineInfo.FileName = FileName;
295     }
296   }
297   return LineInfo;
298 }
299 
300 DIInliningInfo SymbolizableObjectFile::symbolizeInlinedCode(
301     object::SectionedAddress ModuleOffset,
302     DILineInfoSpecifier LineInfoSpecifier, bool UseSymbolTable) const {
303   if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection)
304     ModuleOffset.SectionIndex =
305         getModuleSectionIndexForAddress(ModuleOffset.Address);
306   DIInliningInfo InlinedContext = DebugInfoContext->getInliningInfoForAddress(
307       ModuleOffset, LineInfoSpecifier);
308 
309   // Make sure there is at least one frame in context.
310   if (InlinedContext.getNumberOfFrames() == 0)
311     InlinedContext.addFrame(DILineInfo());
312 
313   // Override the function name in lower frame with name from symbol table.
314   if (shouldOverrideWithSymbolTable(LineInfoSpecifier.FNKind, UseSymbolTable)) {
315     std::string FunctionName, FileName;
316     uint64_t Start, Size;
317     if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address,
318                                FunctionName, Start, Size, FileName)) {
319       DILineInfo *LI = InlinedContext.getMutableFrame(
320           InlinedContext.getNumberOfFrames() - 1);
321       LI->FunctionName = FunctionName;
322       if (LI->FileName == DILineInfo::BadString && !FileName.empty())
323         LI->FileName = FileName;
324     }
325   }
326 
327   return InlinedContext;
328 }
329 
330 DIGlobal SymbolizableObjectFile::symbolizeData(
331     object::SectionedAddress ModuleOffset) const {
332   DIGlobal Res;
333   std::string FileName;
334   getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset.Address, Res.Name,
335                          Res.Start, Res.Size, FileName);
336   return Res;
337 }
338 
339 std::vector<DILocal> SymbolizableObjectFile::symbolizeFrame(
340     object::SectionedAddress ModuleOffset) const {
341   if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection)
342     ModuleOffset.SectionIndex =
343         getModuleSectionIndexForAddress(ModuleOffset.Address);
344   return DebugInfoContext->getLocalsForAddress(ModuleOffset);
345 }
346 
347 /// Search for the first occurence of specified Address in ObjectFile.
348 uint64_t SymbolizableObjectFile::getModuleSectionIndexForAddress(
349     uint64_t Address) const {
350 
351   for (SectionRef Sec : Module->sections()) {
352     if (!Sec.isText() || Sec.isVirtual())
353       continue;
354 
355     if (Address >= Sec.getAddress() &&
356         Address < Sec.getAddress() + Sec.getSize())
357       return Sec.getIndex();
358   }
359 
360   return object::SectionedAddress::UndefSection;
361 }
362