1 #include "llvm/DebugInfo/PDB/Native/SymbolCache.h"
2 
3 #include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h"
4 #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
5 #include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
6 #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
7 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
8 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
9 #include "llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h"
10 #include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h"
11 #include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
12 #include "llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h"
13 #include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h"
14 #include "llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h"
15 #include "llvm/DebugInfo/PDB/Native/NativePublicSymbol.h"
16 #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
17 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
18 #include "llvm/DebugInfo/PDB/Native/NativeTypeArray.h"
19 #include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h"
20 #include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h"
21 #include "llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h"
22 #include "llvm/DebugInfo/PDB/Native/NativeTypePointer.h"
23 #include "llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h"
24 #include "llvm/DebugInfo/PDB/Native/NativeTypeUDT.h"
25 #include "llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h"
26 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
27 #include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
28 #include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
29 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
30 #include "llvm/DebugInfo/PDB/PDBSymbol.h"
31 #include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h"
32 #include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
33 
34 using namespace llvm;
35 using namespace llvm::codeview;
36 using namespace llvm::pdb;
37 
38 // Maps codeview::SimpleTypeKind of a built-in type to the parameters necessary
39 // to instantiate a NativeBuiltinSymbol for that type.
40 static const struct BuiltinTypeEntry {
41   codeview::SimpleTypeKind Kind;
42   PDB_BuiltinType Type;
43   uint32_t Size;
44 } BuiltinTypes[] = {
45     {codeview::SimpleTypeKind::None, PDB_BuiltinType::None, 0},
46     {codeview::SimpleTypeKind::Void, PDB_BuiltinType::Void, 0},
47     {codeview::SimpleTypeKind::HResult, PDB_BuiltinType::HResult, 4},
48     {codeview::SimpleTypeKind::Int16Short, PDB_BuiltinType::Int, 2},
49     {codeview::SimpleTypeKind::UInt16Short, PDB_BuiltinType::UInt, 2},
50     {codeview::SimpleTypeKind::Int32, PDB_BuiltinType::Int, 4},
51     {codeview::SimpleTypeKind::UInt32, PDB_BuiltinType::UInt, 4},
52     {codeview::SimpleTypeKind::Int32Long, PDB_BuiltinType::Int, 4},
53     {codeview::SimpleTypeKind::UInt32Long, PDB_BuiltinType::UInt, 4},
54     {codeview::SimpleTypeKind::Int64Quad, PDB_BuiltinType::Int, 8},
55     {codeview::SimpleTypeKind::UInt64Quad, PDB_BuiltinType::UInt, 8},
56     {codeview::SimpleTypeKind::NarrowCharacter, PDB_BuiltinType::Char, 1},
57     {codeview::SimpleTypeKind::WideCharacter, PDB_BuiltinType::WCharT, 2},
58     {codeview::SimpleTypeKind::Character16, PDB_BuiltinType::Char16, 2},
59     {codeview::SimpleTypeKind::Character32, PDB_BuiltinType::Char32, 4},
60     {codeview::SimpleTypeKind::SignedCharacter, PDB_BuiltinType::Char, 1},
61     {codeview::SimpleTypeKind::UnsignedCharacter, PDB_BuiltinType::UInt, 1},
62     {codeview::SimpleTypeKind::Float32, PDB_BuiltinType::Float, 4},
63     {codeview::SimpleTypeKind::Float64, PDB_BuiltinType::Float, 8},
64     {codeview::SimpleTypeKind::Float80, PDB_BuiltinType::Float, 10},
65     {codeview::SimpleTypeKind::Boolean8, PDB_BuiltinType::Bool, 1},
66     // This table can be grown as necessary, but these are the only types we've
67     // needed so far.
68 };
69 
70 SymbolCache::SymbolCache(NativeSession &Session, DbiStream *Dbi)
71     : Session(Session), Dbi(Dbi), AddrToModuleIndex(IMapAllocator) {
72   // Id 0 is reserved for the invalid symbol.
73   Cache.push_back(nullptr);
74 
75   if (Dbi)
76     Compilands.resize(Dbi->modules().getModuleCount());
77 }
78 
79 std::unique_ptr<IPDBEnumSymbols>
80 SymbolCache::createTypeEnumerator(TypeLeafKind Kind) {
81   return createTypeEnumerator(std::vector<TypeLeafKind>{Kind});
82 }
83 
84 std::unique_ptr<IPDBEnumSymbols>
85 SymbolCache::createTypeEnumerator(std::vector<TypeLeafKind> Kinds) {
86   auto Tpi = Session.getPDBFile().getPDBTpiStream();
87   if (!Tpi) {
88     consumeError(Tpi.takeError());
89     return nullptr;
90   }
91   auto &Types = Tpi->typeCollection();
92   return std::unique_ptr<IPDBEnumSymbols>(
93       new NativeEnumTypes(Session, Types, std::move(Kinds)));
94 }
95 
96 std::unique_ptr<IPDBEnumSymbols>
97 SymbolCache::createGlobalsEnumerator(codeview::SymbolKind Kind) {
98   return std::unique_ptr<IPDBEnumSymbols>(
99       new NativeEnumGlobals(Session, {Kind}));
100 }
101 
102 SymIndexId SymbolCache::createSimpleType(TypeIndex Index,
103                                          ModifierOptions Mods) {
104   if (Index.getSimpleMode() != codeview::SimpleTypeMode::Direct)
105     return createSymbol<NativeTypePointer>(Index);
106 
107   const auto Kind = Index.getSimpleKind();
108   const auto It = std::find_if(
109       std::begin(BuiltinTypes), std::end(BuiltinTypes),
110       [Kind](const BuiltinTypeEntry &Builtin) { return Builtin.Kind == Kind; });
111   if (It == std::end(BuiltinTypes))
112     return 0;
113   return createSymbol<NativeTypeBuiltin>(Mods, It->Type, It->Size);
114 }
115 
116 SymIndexId
117 SymbolCache::createSymbolForModifiedType(codeview::TypeIndex ModifierTI,
118                                          codeview::CVType CVT) {
119   ModifierRecord Record;
120   if (auto EC = TypeDeserializer::deserializeAs<ModifierRecord>(CVT, Record)) {
121     consumeError(std::move(EC));
122     return 0;
123   }
124 
125   if (Record.ModifiedType.isSimple())
126     return createSimpleType(Record.ModifiedType, Record.Modifiers);
127 
128   // Make sure we create and cache a record for the unmodified type.
129   SymIndexId UnmodifiedId = findSymbolByTypeIndex(Record.ModifiedType);
130   NativeRawSymbol &UnmodifiedNRS = *Cache[UnmodifiedId];
131 
132   switch (UnmodifiedNRS.getSymTag()) {
133   case PDB_SymType::Enum:
134     return createSymbol<NativeTypeEnum>(
135         static_cast<NativeTypeEnum &>(UnmodifiedNRS), std::move(Record));
136   case PDB_SymType::UDT:
137     return createSymbol<NativeTypeUDT>(
138         static_cast<NativeTypeUDT &>(UnmodifiedNRS), std::move(Record));
139   default:
140     // No other types can be modified.  (LF_POINTER, for example, records
141     // its modifiers a different way.
142     assert(false && "Invalid LF_MODIFIER record");
143     break;
144   }
145   return 0;
146 }
147 
148 SymIndexId SymbolCache::findSymbolByTypeIndex(codeview::TypeIndex Index) {
149   // First see if it's already in our cache.
150   const auto Entry = TypeIndexToSymbolId.find(Index);
151   if (Entry != TypeIndexToSymbolId.end())
152     return Entry->second;
153 
154   // Symbols for built-in types are created on the fly.
155   if (Index.isSimple()) {
156     SymIndexId Result = createSimpleType(Index, ModifierOptions::None);
157     assert(TypeIndexToSymbolId.count(Index) == 0);
158     TypeIndexToSymbolId[Index] = Result;
159     return Result;
160   }
161 
162   // We need to instantiate and cache the desired type symbol.
163   auto Tpi = Session.getPDBFile().getPDBTpiStream();
164   if (!Tpi) {
165     consumeError(Tpi.takeError());
166     return 0;
167   }
168   codeview::LazyRandomTypeCollection &Types = Tpi->typeCollection();
169   codeview::CVType CVT = Types.getType(Index);
170 
171   if (isUdtForwardRef(CVT)) {
172     Expected<TypeIndex> EFD = Tpi->findFullDeclForForwardRef(Index);
173 
174     if (!EFD)
175       consumeError(EFD.takeError());
176     else if (*EFD != Index) {
177       assert(!isUdtForwardRef(Types.getType(*EFD)));
178       SymIndexId Result = findSymbolByTypeIndex(*EFD);
179       // Record a mapping from ForwardRef -> SymIndex of complete type so that
180       // we'll take the fast path next time.
181       assert(TypeIndexToSymbolId.count(Index) == 0);
182       TypeIndexToSymbolId[Index] = Result;
183       return Result;
184     }
185   }
186 
187   // At this point if we still have a forward ref udt it means the full decl was
188   // not in the PDB.  We just have to deal with it and use the forward ref.
189   SymIndexId Id = 0;
190   switch (CVT.kind()) {
191   case codeview::LF_ENUM:
192     Id = createSymbolForType<NativeTypeEnum, EnumRecord>(Index, std::move(CVT));
193     break;
194   case codeview::LF_ARRAY:
195     Id = createSymbolForType<NativeTypeArray, ArrayRecord>(Index,
196                                                            std::move(CVT));
197     break;
198   case codeview::LF_CLASS:
199   case codeview::LF_STRUCTURE:
200   case codeview::LF_INTERFACE:
201     Id = createSymbolForType<NativeTypeUDT, ClassRecord>(Index, std::move(CVT));
202     break;
203   case codeview::LF_UNION:
204     Id = createSymbolForType<NativeTypeUDT, UnionRecord>(Index, std::move(CVT));
205     break;
206   case codeview::LF_POINTER:
207     Id = createSymbolForType<NativeTypePointer, PointerRecord>(Index,
208                                                                std::move(CVT));
209     break;
210   case codeview::LF_MODIFIER:
211     Id = createSymbolForModifiedType(Index, std::move(CVT));
212     break;
213   case codeview::LF_PROCEDURE:
214     Id = createSymbolForType<NativeTypeFunctionSig, ProcedureRecord>(
215         Index, std::move(CVT));
216     break;
217   case codeview::LF_MFUNCTION:
218     Id = createSymbolForType<NativeTypeFunctionSig, MemberFunctionRecord>(
219         Index, std::move(CVT));
220     break;
221   case codeview::LF_VTSHAPE:
222     Id = createSymbolForType<NativeTypeVTShape, VFTableShapeRecord>(
223         Index, std::move(CVT));
224     break;
225   default:
226     Id = createSymbolPlaceholder();
227     break;
228   }
229   if (Id != 0) {
230     assert(TypeIndexToSymbolId.count(Index) == 0);
231     TypeIndexToSymbolId[Index] = Id;
232   }
233   return Id;
234 }
235 
236 std::unique_ptr<PDBSymbol>
237 SymbolCache::getSymbolById(SymIndexId SymbolId) const {
238   assert(SymbolId < Cache.size());
239 
240   // Id 0 is reserved.
241   if (SymbolId == 0 || SymbolId >= Cache.size())
242     return nullptr;
243 
244   // Make sure to handle the case where we've inserted a placeholder symbol
245   // for types we don't yet suppport.
246   NativeRawSymbol *NRS = Cache[SymbolId].get();
247   if (!NRS)
248     return nullptr;
249 
250   return PDBSymbol::create(Session, *NRS);
251 }
252 
253 NativeRawSymbol &SymbolCache::getNativeSymbolById(SymIndexId SymbolId) const {
254   return *Cache[SymbolId];
255 }
256 
257 uint32_t SymbolCache::getNumCompilands() const {
258   if (!Dbi)
259     return 0;
260 
261   return Dbi->modules().getModuleCount();
262 }
263 
264 SymIndexId SymbolCache::getOrCreateGlobalSymbolByOffset(uint32_t Offset) {
265   auto Iter = GlobalOffsetToSymbolId.find(Offset);
266   if (Iter != GlobalOffsetToSymbolId.end())
267     return Iter->second;
268 
269   SymbolStream &SS = cantFail(Session.getPDBFile().getPDBSymbolStream());
270   CVSymbol CVS = SS.readRecord(Offset);
271   SymIndexId Id = 0;
272   switch (CVS.kind()) {
273   case SymbolKind::S_UDT: {
274     UDTSym US = cantFail(SymbolDeserializer::deserializeAs<UDTSym>(CVS));
275     Id = createSymbol<NativeTypeTypedef>(std::move(US));
276     break;
277   }
278   default:
279     Id = createSymbolPlaceholder();
280     break;
281   }
282   if (Id != 0) {
283     assert(GlobalOffsetToSymbolId.count(Offset) == 0);
284     GlobalOffsetToSymbolId[Offset] = Id;
285   }
286 
287   return Id;
288 }
289 
290 std::unique_ptr<PDBSymbol>
291 SymbolCache::findSymbolBySectOffset(uint32_t Sect, uint32_t Offset,
292                                     PDB_SymType Type) {
293   if (AddrToModuleIndex.empty())
294     parseSectionContribs();
295 
296   switch (Type) {
297   case PDB_SymType::Function:
298     return findFunctionSymbolBySectOffset(Sect, Offset);
299   case PDB_SymType::PublicSymbol:
300     return findPublicSymbolBySectOffset(Sect, Offset);
301   case PDB_SymType::None: {
302     // FIXME: Implement for PDB_SymType::Data.
303     if (auto Sym = findFunctionSymbolBySectOffset(Sect, Offset))
304       return Sym;
305     return nullptr;
306   }
307   default:
308     return nullptr;
309   }
310 }
311 
312 std::unique_ptr<PDBSymbol>
313 SymbolCache::findFunctionSymbolBySectOffset(uint32_t Sect, uint32_t Offset) {
314   auto Iter = AddressToFunctionSymId.find({Sect, Offset});
315   if (Iter != AddressToFunctionSymId.end())
316     return getSymbolById(Iter->second);
317 
318   if (!Dbi)
319     return nullptr;
320 
321   auto Modi = getModuleIndexForAddr(Sect, Offset);
322   if (!Modi)
323     return nullptr;
324 
325   DbiModuleDescriptor ModDesc = Dbi->modules().getModuleDescriptor(*Modi);
326   uint16_t StreamIndex = ModDesc.getModuleStreamIndex();
327   if (StreamIndex == kInvalidStreamIndex)
328     return nullptr;
329   auto ModStreamData = Session.getPDBFile().createIndexedStream(StreamIndex);
330   ModuleDebugStreamRef ModS(ModDesc, std::move(ModStreamData));
331   if (auto EC = ModS.reload()) {
332     consumeError(std::move(EC));
333     return nullptr;
334   }
335 
336   // Search for the symbol in this module.
337   CVSymbolArray Syms = ModS.getSymbolArray();
338   for (auto I = Syms.begin(), E = Syms.end(); I != E; ++I) {
339     if (I->kind() != S_LPROC32 && I->kind() != S_GPROC32)
340       continue;
341     auto PS = cantFail(SymbolDeserializer::deserializeAs<ProcSym>(*I));
342     if (Sect == PS.Segment && Offset >= PS.CodeOffset &&
343         Offset < PS.CodeOffset + PS.CodeSize) {
344       SymIndexId Id = createSymbol<NativeFunctionSymbol>(PS);
345       return getSymbolById(Id);
346     }
347 
348     // Jump to the end of this ProcSym.
349     I = Syms.at(PS.End);
350   }
351   return nullptr;
352 }
353 
354 std::unique_ptr<PDBSymbol>
355 SymbolCache::findPublicSymbolBySectOffset(uint32_t Sect, uint32_t Offset) {
356   auto Iter = AddressToPublicSymId.find({Sect, Offset});
357   if (Iter != AddressToPublicSymId.end())
358     return getSymbolById(Iter->second);
359 
360   auto Publics = Session.getPDBFile().getPDBPublicsStream();
361   if (!Publics)
362     return nullptr;
363 
364   auto ExpectedSyms = Session.getPDBFile().getPDBSymbolStream();
365   if (!ExpectedSyms)
366     return nullptr;
367   BinaryStreamRef SymStream =
368       ExpectedSyms->getSymbolArray().getUnderlyingStream();
369 
370   // Use binary search to find the first public symbol with an address greater
371   // than or equal to Sect, Offset.
372   auto AddrMap = Publics->getAddressMap();
373   auto First = AddrMap.begin();
374   auto It = AddrMap.begin();
375   size_t Count = AddrMap.size();
376   size_t Half;
377   while (Count > 0) {
378     It = First;
379     Half = Count / 2;
380     It += Half;
381     Expected<CVSymbol> Sym = readSymbolFromStream(SymStream, *It);
382     if (!Sym) {
383       consumeError(Sym.takeError());
384       return nullptr;
385     }
386 
387     auto PS =
388         cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(Sym.get()));
389     if (PS.Segment < Sect || (PS.Segment == Sect && PS.Offset <= Offset)) {
390       First = ++It;
391       Count -= Half + 1;
392     } else
393       Count = Half;
394   }
395   --It;
396 
397   Expected<CVSymbol> Sym = readSymbolFromStream(SymStream, *It);
398   if (!Sym) {
399     consumeError(Sym.takeError());
400     return nullptr;
401   }
402   auto PS = cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(Sym.get()));
403   SymIndexId Id = createSymbol<NativePublicSymbol>(PS);
404   return getSymbolById(Id);
405 }
406 
407 std::unique_ptr<PDBSymbolCompiland>
408 SymbolCache::getOrCreateCompiland(uint32_t Index) {
409   if (!Dbi)
410     return nullptr;
411 
412   if (Index >= Compilands.size())
413     return nullptr;
414 
415   if (Compilands[Index] == 0) {
416     const DbiModuleList &Modules = Dbi->modules();
417     Compilands[Index] =
418         createSymbol<NativeCompilandSymbol>(Modules.getModuleDescriptor(Index));
419   }
420 
421   return Session.getConcreteSymbolById<PDBSymbolCompiland>(Compilands[Index]);
422 }
423 
424 void SymbolCache::parseSectionContribs() {
425   if (!Dbi)
426     return;
427 
428   class Visitor : public ISectionContribVisitor {
429     NativeSession &Session;
430     IMap &AddrMap;
431 
432   public:
433     Visitor(NativeSession &Session, IMap &AddrMap)
434         : Session(Session), AddrMap(AddrMap) {}
435     void visit(const SectionContrib &C) override {
436       if (C.Size == 0)
437         return;
438 
439       uint64_t VA = Session.getVAFromSectOffset(C.ISect, C.Off);
440       uint64_t End = VA + C.Size;
441 
442       // Ignore overlapping sections based on the assumption that a valid
443       // PDB file should not have overlaps.
444       if (!AddrMap.overlaps(VA, End))
445         AddrMap.insert(VA, End, C.Imod);
446     }
447     void visit(const SectionContrib2 &C) override { visit(C.Base); }
448   };
449 
450   Visitor V(Session, AddrToModuleIndex);
451   Dbi->visitSectionContributions(V);
452 }
453 
454 Optional<uint16_t> SymbolCache::getModuleIndexForAddr(uint32_t Sect,
455                                                       uint32_t Offset) const {
456   auto Iter = AddrToModuleIndex.find(Session.getVAFromSectOffset(Sect, Offset));
457   if (Iter == AddrToModuleIndex.end())
458     return None;
459   return Iter.value();
460 }
461