1 //==- include/llvm/CodeGen/AccelTable.h - Accelerator Tables -----*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file contains support for writing accelerator tables.
10 ///
11 //===----------------------------------------------------------------------===//
12
13 #ifndef LLVM_CODEGEN_ACCELTABLE_H
14 #define LLVM_CODEGEN_ACCELTABLE_H
15
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/ADT/STLFunctionalExtras.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/BinaryFormat/Dwarf.h"
21 #include "llvm/CodeGen/DIE.h"
22 #include "llvm/CodeGen/DwarfStringPoolEntry.h"
23 #include "llvm/Support/Allocator.h"
24 #include "llvm/Support/DJB.h"
25 #include "llvm/Support/Debug.h"
26 #include <cstdint>
27 #include <variant>
28 #include <vector>
29
30 /// \file
31 /// The DWARF and Apple accelerator tables are an indirect hash table optimized
32 /// for null lookup rather than access to known data. The Apple accelerator
33 /// tables are a precursor of the newer DWARF v5 accelerator tables. Both
34 /// formats share common design ideas.
35 ///
36 /// The Apple accelerator table are output into an on-disk format that looks
37 /// like this:
38 ///
39 /// .------------------.
40 /// | HEADER |
41 /// |------------------|
42 /// | BUCKETS |
43 /// |------------------|
44 /// | HASHES |
45 /// |------------------|
46 /// | OFFSETS |
47 /// |------------------|
48 /// | DATA |
49 /// `------------------'
50 ///
51 /// The header contains a magic number, version, type of hash function,
52 /// the number of buckets, total number of hashes, and room for a special struct
53 /// of data and the length of that struct.
54 ///
55 /// The buckets contain an index (e.g. 6) into the hashes array. The hashes
56 /// section contains all of the 32-bit hash values in contiguous memory, and the
57 /// offsets contain the offset into the data area for the particular hash.
58 ///
59 /// For a lookup example, we could hash a function name and take it modulo the
60 /// number of buckets giving us our bucket. From there we take the bucket value
61 /// as an index into the hashes table and look at each successive hash as long
62 /// as the hash value is still the same modulo result (bucket value) as earlier.
63 /// If we have a match we look at that same entry in the offsets table and grab
64 /// the offset in the data for our final match.
65 ///
66 /// The DWARF v5 accelerator table consists of zero or more name indices that
67 /// are output into an on-disk format that looks like this:
68 ///
69 /// .------------------.
70 /// | HEADER |
71 /// |------------------|
72 /// | CU LIST |
73 /// |------------------|
74 /// | LOCAL TU LIST |
75 /// |------------------|
76 /// | FOREIGN TU LIST |
77 /// |------------------|
78 /// | HASH TABLE |
79 /// |------------------|
80 /// | NAME TABLE |
81 /// |------------------|
82 /// | ABBREV TABLE |
83 /// |------------------|
84 /// | ENTRY POOL |
85 /// `------------------'
86 ///
87 /// For the full documentation please refer to the DWARF 5 standard.
88 ///
89 ///
90 /// This file defines the class template AccelTable, which is represents an
91 /// abstract view of an Accelerator table, without any notion of an on-disk
92 /// layout. This class is parameterized by an entry type, which should derive
93 /// from AccelTableData. This is the type of individual entries in the table,
94 /// and it should store the data necessary to emit them. AppleAccelTableData is
95 /// the base class for Apple Accelerator Table entries, which have a uniform
96 /// structure based on a sequence of Atoms. There are different sub-classes
97 /// derived from AppleAccelTable, which differ in the set of Atoms and how they
98 /// obtain their values.
99 ///
100 /// An Apple Accelerator Table can be serialized by calling emitAppleAccelTable
101 /// function.
102
103 namespace llvm {
104
105 class AsmPrinter;
106 class DwarfDebug;
107 class DwarfTypeUnit;
108 class MCSymbol;
109 class raw_ostream;
110
111 /// Interface which the different types of accelerator table data have to
112 /// conform. It serves as a base class for different values of the template
113 /// argument of the AccelTable class template.
114 class AccelTableData {
115 public:
116 virtual ~AccelTableData() = default;
117
118 bool operator<(const AccelTableData &Other) const {
119 return order() < Other.order();
120 }
121
122 // Subclasses should implement:
123 // static uint32_t hash(StringRef Name);
124
125 #ifndef NDEBUG
126 virtual void print(raw_ostream &OS) const = 0;
127 #endif
128 protected:
129 virtual uint64_t order() const = 0;
130 };
131
132 /// A base class holding non-template-dependant functionality of the AccelTable
133 /// class. Clients should not use this class directly but rather instantiate
134 /// AccelTable with a type derived from AccelTableData.
135 class AccelTableBase {
136 public:
137 using HashFn = uint32_t(StringRef);
138
139 /// Represents a group of entries with identical name (and hence, hash value).
140 struct HashData {
141 DwarfStringPoolEntryRef Name;
142 uint32_t HashValue;
143 std::vector<AccelTableData *> Values;
144 MCSymbol *Sym;
145
146 /// Get all AccelTableData cast as a `T`.
getValuesHashData147 template <typename T = AccelTableData *> auto getValues() const {
148 static_assert(std::is_pointer<T>());
149 static_assert(
150 std::is_base_of<AccelTableData, std::remove_pointer_t<T>>());
151 return map_range(
152 Values, [](AccelTableData *Data) { return static_cast<T>(Data); });
153 }
154
155 #ifndef NDEBUG
156 void print(raw_ostream &OS) const;
dumpHashData157 void dump() const { print(dbgs()); }
158 #endif
159 };
160 using HashList = std::vector<HashData *>;
161 using BucketList = std::vector<HashList>;
162
163 protected:
164 /// Allocator for HashData and Values.
165 BumpPtrAllocator Allocator;
166
167 using StringEntries = MapVector<StringRef, HashData>;
168 StringEntries Entries;
169
170 HashFn *Hash;
171 uint32_t BucketCount = 0;
172 uint32_t UniqueHashCount = 0;
173
174 HashList Hashes;
175 BucketList Buckets;
176
177 void computeBucketCount();
178
AccelTableBase(HashFn * Hash)179 AccelTableBase(HashFn *Hash) : Hash(Hash) {}
180
181 public:
182 void finalize(AsmPrinter *Asm, StringRef Prefix);
getBuckets()183 ArrayRef<HashList> getBuckets() const { return Buckets; }
getBucketCount()184 uint32_t getBucketCount() const { return BucketCount; }
getUniqueHashCount()185 uint32_t getUniqueHashCount() const { return UniqueHashCount; }
getUniqueNameCount()186 uint32_t getUniqueNameCount() const { return Entries.size(); }
187
188 #ifndef NDEBUG
189 void print(raw_ostream &OS) const;
dump()190 void dump() const { print(dbgs()); }
191 #endif
192
193 AccelTableBase(const AccelTableBase &) = delete;
194 void operator=(const AccelTableBase &) = delete;
195 };
196
197 /// This class holds an abstract representation of an Accelerator Table,
198 /// consisting of a sequence of buckets, each bucket containint a sequence of
199 /// HashData entries. The class is parameterized by the type of entries it
200 /// holds. The type template parameter also defines the hash function to use for
201 /// hashing names.
202 template <typename DataT> class AccelTable : public AccelTableBase {
203 public:
AccelTable()204 AccelTable() : AccelTableBase(DataT::hash) {}
205
206 template <typename... Types>
207 void addName(DwarfStringPoolEntryRef Name, Types &&... Args);
clear()208 void clear() { Entries.clear(); }
209 void addEntries(AccelTable<DataT> &Table);
getEntries()210 const StringEntries getEntries() const { return Entries; }
211 };
212
213 template <typename AccelTableDataT>
214 template <typename... Types>
addName(DwarfStringPoolEntryRef Name,Types &&...Args)215 void AccelTable<AccelTableDataT>::addName(DwarfStringPoolEntryRef Name,
216 Types &&... Args) {
217 assert(Buckets.empty() && "Already finalized!");
218 // If the string is in the list already then add this die to the list
219 // otherwise add a new one.
220 auto &It = Entries[Name.getString()];
221 if (It.Values.empty()) {
222 It.Name = Name;
223 It.HashValue = Hash(Name.getString());
224 }
225 It.Values.push_back(new (Allocator)
226 AccelTableDataT(std::forward<Types>(Args)...));
227 }
228
229 /// A base class for different implementations of Data classes for Apple
230 /// Accelerator Tables. The columns in the table are defined by the static Atoms
231 /// variable defined on the subclasses.
232 class AppleAccelTableData : public AccelTableData {
233 public:
234 /// An Atom defines the form of the data in an Apple accelerator table.
235 /// Conceptually it is a column in the accelerator consisting of a type and a
236 /// specification of the form of its data.
237 struct Atom {
238 /// Atom Type.
239 const uint16_t Type;
240 /// DWARF Form.
241 const uint16_t Form;
242
AtomAtom243 constexpr Atom(uint16_t Type, uint16_t Form) : Type(Type), Form(Form) {}
244
245 #ifndef NDEBUG
246 void print(raw_ostream &OS) const;
dumpAtom247 void dump() const { print(dbgs()); }
248 #endif
249 };
250 // Subclasses should define:
251 // static constexpr Atom Atoms[];
252
253 virtual void emit(AsmPrinter *Asm) const = 0;
254
hash(StringRef Buffer)255 static uint32_t hash(StringRef Buffer) { return djbHash(Buffer); }
256 };
257
258 /// Helper class to identify an entry in DWARF5AccelTable based on their DIE
259 /// offset and UnitID.
260 struct OffsetAndUnitID : std::pair<uint64_t, uint32_t> {
261 using Base = std::pair<uint64_t, uint32_t>;
OffsetAndUnitIDOffsetAndUnitID262 OffsetAndUnitID(Base B) : Base(B) {}
263
OffsetAndUnitIDOffsetAndUnitID264 OffsetAndUnitID(uint64_t Offset, uint32_t UnitID) : Base(Offset, UnitID) {}
offsetOffsetAndUnitID265 uint64_t offset() const { return first; };
unitIDOffsetAndUnitID266 uint32_t unitID() const { return second; };
267 };
268
269 template <>
270 struct DenseMapInfo<OffsetAndUnitID> : DenseMapInfo<OffsetAndUnitID::Base> {};
271
272 /// The Data class implementation for DWARF v5 accelerator table. Unlike the
273 /// Apple Data classes, this class is just a DIE wrapper, and does not know to
274 /// serialize itself. The complete serialization logic is in the
275 /// emitDWARF5AccelTable function.
276 class DWARF5AccelTableData : public AccelTableData {
277 public:
278 struct AttributeEncoding {
279 dwarf::Index Index;
280 dwarf::Form Form;
281 };
282
283 static uint32_t hash(StringRef Name) { return caseFoldingDjbHash(Name); }
284
285 DWARF5AccelTableData(const DIE &Die, const uint32_t UnitID,
286 const bool IsTU = false);
287 DWARF5AccelTableData(const uint64_t DieOffset,
288 const std::optional<uint64_t> DefiningParentOffset,
289 const unsigned DieTag, const unsigned UnitID,
290 const bool IsTU = false)
291 : OffsetVal(DieOffset), ParentOffset(DefiningParentOffset),
292 DieTag(DieTag), UnitID(UnitID), IsTU(IsTU) {}
293
294 #ifndef NDEBUG
295 void print(raw_ostream &OS) const override;
296 #endif
297
298 uint64_t getDieOffset() const {
299 assert(isNormalized() && "Accessing DIE Offset before normalizing.");
300 return std::get<uint64_t>(OffsetVal);
301 }
302
303 OffsetAndUnitID getDieOffsetAndUnitID() const {
304 return {getDieOffset(), UnitID};
305 }
306
307 unsigned getDieTag() const { return DieTag; }
308 unsigned getUnitID() const { return UnitID; }
309 bool isTU() const { return IsTU; }
310 void normalizeDIEToOffset() {
311 assert(!isNormalized() && "Accessing offset after normalizing.");
312 const DIE *Entry = std::get<const DIE *>(OffsetVal);
313 ParentOffset = getDefiningParentDieOffset(*Entry);
314 OffsetVal = Entry->getOffset();
315 }
316 bool isNormalized() const {
317 return std::holds_alternative<uint64_t>(OffsetVal);
318 }
319
320 std::optional<uint64_t> getParentDieOffset() const {
321 if (auto OffsetAndId = getParentDieOffsetAndUnitID())
322 return OffsetAndId->offset();
323 return {};
324 }
325
326 std::optional<OffsetAndUnitID> getParentDieOffsetAndUnitID() const {
327 assert(isNormalized() && "Accessing DIE Offset before normalizing.");
328 if (!ParentOffset)
329 return std::nullopt;
330 return OffsetAndUnitID(*ParentOffset, getUnitID());
331 }
332
333 /// If `Die` has a non-null parent and the parent is not a declaration,
334 /// return its offset.
335 static std::optional<uint64_t> getDefiningParentDieOffset(const DIE &Die);
336
337 protected:
338 std::variant<const DIE *, uint64_t> OffsetVal;
339 std::optional<uint64_t> ParentOffset;
340 uint32_t DieTag : 16;
341 uint32_t UnitID : 15;
342 uint32_t IsTU : 1;
343
344 uint64_t order() const override { return getDieOffset(); }
345 };
346
347 struct TypeUnitMetaInfo {
348 // Symbol for start of the TU section or signature if this is SplitDwarf.
349 std::variant<MCSymbol *, uint64_t> LabelOrSignature;
350 // Unique ID of Type Unit.
351 unsigned UniqueID;
352 };
353 using TUVectorTy = SmallVector<TypeUnitMetaInfo, 1>;
354 class DWARF5AccelTable : public AccelTable<DWARF5AccelTableData> {
355 // Symbols to start of all the TU sections that were generated.
356 TUVectorTy TUSymbolsOrHashes;
357
358 public:
359 struct UnitIndexAndEncoding {
360 unsigned Index;
361 DWARF5AccelTableData::AttributeEncoding Encoding;
362 };
363 /// Returns type units that were constructed.
364 const TUVectorTy &getTypeUnitsSymbols() { return TUSymbolsOrHashes; }
365 /// Add a type unit start symbol.
366 void addTypeUnitSymbol(DwarfTypeUnit &U);
367 /// Add a type unit Signature.
368 void addTypeUnitSignature(DwarfTypeUnit &U);
369 /// Convert DIE entries to explicit offset.
370 /// Needs to be called after DIE offsets are computed.
371 void convertDieToOffset() {
372 for (auto &Entry : Entries) {
373 for (auto *Data : Entry.second.getValues<DWARF5AccelTableData *>()) {
374 // For TU we normalize as each Unit is emitted.
375 // So when this is invoked after CU construction we will be in mixed
376 // state.
377 if (!Data->isNormalized())
378 Data->normalizeDIEToOffset();
379 }
380 }
381 }
382
383 void addTypeEntries(DWARF5AccelTable &Table) {
384 for (auto &Entry : Table.getEntries()) {
385 for (auto *Data : Entry.second.getValues<DWARF5AccelTableData *>()) {
386 addName(Entry.second.Name, Data->getDieOffset(),
387 Data->getParentDieOffset(), Data->getDieTag(),
388 Data->getUnitID(), true);
389 }
390 }
391 }
392 };
393
394 void emitAppleAccelTableImpl(AsmPrinter *Asm, AccelTableBase &Contents,
395 StringRef Prefix, const MCSymbol *SecBegin,
396 ArrayRef<AppleAccelTableData::Atom> Atoms);
397
398 /// Emit an Apple Accelerator Table consisting of entries in the specified
399 /// AccelTable. The DataT template parameter should be derived from
400 /// AppleAccelTableData.
401 template <typename DataT>
402 void emitAppleAccelTable(AsmPrinter *Asm, AccelTable<DataT> &Contents,
403 StringRef Prefix, const MCSymbol *SecBegin) {
404 static_assert(std::is_convertible<DataT *, AppleAccelTableData *>::value);
405 emitAppleAccelTableImpl(Asm, Contents, Prefix, SecBegin, DataT::Atoms);
406 }
407
408 void emitDWARF5AccelTable(AsmPrinter *Asm, DWARF5AccelTable &Contents,
409 const DwarfDebug &DD,
410 ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs);
411
412 /// Emit a DWARFv5 Accelerator Table consisting of entries in the specified
413 /// AccelTable. The \p CUs contains either symbols keeping offsets to the
414 /// start of compilation unit, either offsets to the start of compilation
415 /// unit themselves.
416 void emitDWARF5AccelTable(
417 AsmPrinter *Asm, DWARF5AccelTable &Contents,
418 ArrayRef<std::variant<MCSymbol *, uint64_t>> CUs,
419 llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(
420 const DWARF5AccelTableData &)>
421 getIndexForEntry);
422
423 /// Accelerator table data implementation for simple Apple accelerator tables
424 /// with just a DIE reference.
425 class AppleAccelTableOffsetData : public AppleAccelTableData {
426 public:
427 AppleAccelTableOffsetData(const DIE &D) : Die(D) {}
428
429 void emit(AsmPrinter *Asm) const override;
430
431 static constexpr Atom Atoms[] = {
432 Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
433
434 #ifndef NDEBUG
435 void print(raw_ostream &OS) const override;
436 #endif
437 protected:
438 uint64_t order() const override { return Die.getOffset(); }
439
440 const DIE &Die;
441 };
442
443 /// Accelerator table data implementation for Apple type accelerator tables.
444 class AppleAccelTableTypeData : public AppleAccelTableOffsetData {
445 public:
446 AppleAccelTableTypeData(const DIE &D) : AppleAccelTableOffsetData(D) {}
447
448 void emit(AsmPrinter *Asm) const override;
449
450 static constexpr Atom Atoms[] = {
451 Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
452 Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
453 Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
454
455 #ifndef NDEBUG
456 void print(raw_ostream &OS) const override;
457 #endif
458 };
459
460 /// Accelerator table data implementation for simple Apple accelerator tables
461 /// with a DIE offset but no actual DIE pointer.
462 class AppleAccelTableStaticOffsetData : public AppleAccelTableData {
463 public:
464 AppleAccelTableStaticOffsetData(uint32_t Offset) : Offset(Offset) {}
465
466 void emit(AsmPrinter *Asm) const override;
467
468 static constexpr Atom Atoms[] = {
469 Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
470
471 #ifndef NDEBUG
472 void print(raw_ostream &OS) const override;
473 #endif
474 protected:
475 uint64_t order() const override { return Offset; }
476
477 uint32_t Offset;
478 };
479
480 /// Accelerator table data implementation for type accelerator tables with
481 /// a DIE offset but no actual DIE pointer.
482 class AppleAccelTableStaticTypeData : public AppleAccelTableStaticOffsetData {
483 public:
484 AppleAccelTableStaticTypeData(uint32_t Offset, uint16_t Tag,
485 bool ObjCClassIsImplementation,
486 uint32_t QualifiedNameHash)
487 : AppleAccelTableStaticOffsetData(Offset),
488 QualifiedNameHash(QualifiedNameHash), Tag(Tag),
489 ObjCClassIsImplementation(ObjCClassIsImplementation) {}
490
491 void emit(AsmPrinter *Asm) const override;
492
493 static constexpr Atom Atoms[] = {
494 Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
495 Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
496 Atom(5, dwarf::DW_FORM_data1), Atom(6, dwarf::DW_FORM_data4)};
497
498 #ifndef NDEBUG
499 void print(raw_ostream &OS) const override;
500 #endif
501 protected:
502 uint64_t order() const override { return Offset; }
503
504 uint32_t QualifiedNameHash;
505 uint16_t Tag;
506 bool ObjCClassIsImplementation;
507 };
508
509 } // end namespace llvm
510
511 #endif // LLVM_CODEGEN_ACCELTABLE_H
512