1 //===- DebugTypes.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "DebugTypes.h"
10 #include "COFFLinkerContext.h"
11 #include "Chunks.h"
12 #include "Driver.h"
13 #include "InputFiles.h"
14 #include "PDB.h"
15 #include "TypeMerger.h"
16 #include "lld/Common/ErrorHandler.h"
17 #include "lld/Common/Memory.h"
18 #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
19 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
20 #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
21 #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
22 #include "llvm/DebugInfo/PDB/GenericError.h"
23 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
24 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
25 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
26 #include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
27 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
28 #include "llvm/Support/FormatVariadic.h"
29 #include "llvm/Support/Parallel.h"
30 #include "llvm/Support/Path.h"
31
32 using namespace llvm;
33 using namespace llvm::codeview;
34 using namespace lld;
35 using namespace lld::coff;
36
37 namespace {
38 class TypeServerIpiSource;
39
40 // The TypeServerSource class represents a PDB type server, a file referenced by
41 // OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ
42 // files, therefore there must be only once instance per OBJ lot. The file path
43 // is discovered from the dependent OBJ's debug type stream. The
44 // TypeServerSource object is then queued and loaded by the COFF Driver. The
45 // debug type stream for such PDB files will be merged first in the final PDB,
46 // before any dependent OBJ.
47 class TypeServerSource : public TpiSource {
48 public:
TypeServerSource(COFFLinkerContext & ctx,PDBInputFile * f)49 explicit TypeServerSource(COFFLinkerContext &ctx, PDBInputFile *f)
50 : TpiSource(ctx, PDB, nullptr), pdbInputFile(f) {
51 if (f->loadErr && *f->loadErr)
52 return;
53 pdb::PDBFile &file = f->session->getPDBFile();
54 auto expectedInfo = file.getPDBInfoStream();
55 if (!expectedInfo)
56 return;
57 Guid = expectedInfo->getGuid();
58 auto it = ctx.typeServerSourceMappings.emplace(Guid, this);
59 if (!it.second) {
60 // If we hit here we have collision on Guid's in two PDB files.
61 // This can happen if the PDB Guid is invalid or if we are really
62 // unlucky. This should fall back on stright file-system lookup.
63 it.first->second = nullptr;
64 }
65 }
66
67 Error mergeDebugT(TypeMerger *m) override;
68
69 void loadGHashes() override;
70 void remapTpiWithGHashes(GHashState *g) override;
71
isDependency() const72 bool isDependency() const override { return true; }
73
74 PDBInputFile *pdbInputFile = nullptr;
75
76 // TpiSource for IPI stream.
77 TypeServerIpiSource *ipiSrc = nullptr;
78
79 // The PDB signature GUID.
80 codeview::GUID Guid;
81 };
82
83 // Companion to TypeServerSource. Stores the index map for the IPI stream in the
84 // PDB. Modeling PDBs with two sources for TPI and IPI helps establish the
85 // invariant of one type index space per source.
86 class TypeServerIpiSource : public TpiSource {
87 public:
TypeServerIpiSource(COFFLinkerContext & ctx)88 explicit TypeServerIpiSource(COFFLinkerContext &ctx)
89 : TpiSource(ctx, PDBIpi, nullptr) {}
90
91 friend class TypeServerSource;
92
93 // All of the TpiSource methods are no-ops. The parent TypeServerSource
94 // handles both TPI and IPI.
mergeDebugT(TypeMerger * m)95 Error mergeDebugT(TypeMerger *m) override { return Error::success(); }
loadGHashes()96 void loadGHashes() override {}
remapTpiWithGHashes(GHashState * g)97 void remapTpiWithGHashes(GHashState *g) override {}
isDependency() const98 bool isDependency() const override { return true; }
99 };
100
101 // This class represents the debug type stream of an OBJ file that depends on a
102 // PDB type server (see TypeServerSource).
103 class UseTypeServerSource : public TpiSource {
104 Expected<TypeServerSource *> getTypeServerSource();
105
106 public:
UseTypeServerSource(COFFLinkerContext & ctx,ObjFile * f,TypeServer2Record ts)107 UseTypeServerSource(COFFLinkerContext &ctx, ObjFile *f, TypeServer2Record ts)
108 : TpiSource(ctx, UsingPDB, f), typeServerDependency(ts) {}
109
110 Error mergeDebugT(TypeMerger *m) override;
111
112 // No need to load ghashes from /Zi objects.
loadGHashes()113 void loadGHashes() override {}
114 void remapTpiWithGHashes(GHashState *g) override;
115
116 // Information about the PDB type server dependency, that needs to be loaded
117 // in before merging this OBJ.
118 TypeServer2Record typeServerDependency;
119 };
120
121 // This class represents the debug type stream of a Microsoft precompiled
122 // headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output
123 // PDB, before any other OBJs that depend on this. Note that only MSVC generate
124 // such files, clang does not.
125 class PrecompSource : public TpiSource {
126 public:
PrecompSource(COFFLinkerContext & ctx,ObjFile * f)127 PrecompSource(COFFLinkerContext &ctx, ObjFile *f) : TpiSource(ctx, PCH, f) {
128 if (!f->pchSignature || !*f->pchSignature)
129 fatal(toString(f) +
130 " claims to be a PCH object, but does not have a valid signature");
131 auto it = ctx.precompSourceMappings.emplace(*f->pchSignature, this);
132 if (!it.second)
133 fatal("a PCH object with the same signature has already been provided (" +
134 toString(it.first->second->file) + " and " + toString(file) + ")");
135 }
136
137 void loadGHashes() override;
138
isDependency() const139 bool isDependency() const override { return true; }
140 };
141
142 // This class represents the debug type stream of an OBJ file that depends on a
143 // Microsoft precompiled headers OBJ (see PrecompSource).
144 class UsePrecompSource : public TpiSource {
145 public:
UsePrecompSource(COFFLinkerContext & ctx,ObjFile * f,PrecompRecord precomp)146 UsePrecompSource(COFFLinkerContext &ctx, ObjFile *f, PrecompRecord precomp)
147 : TpiSource(ctx, UsingPCH, f), precompDependency(precomp) {}
148
149 Error mergeDebugT(TypeMerger *m) override;
150
151 void loadGHashes() override;
152 void remapTpiWithGHashes(GHashState *g) override;
153
154 private:
155 Error mergeInPrecompHeaderObj();
156
157 PrecompSource *findObjByName(StringRef fileNameOnly);
158 PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr);
159 Expected<PrecompSource *> findPrecompMap(ObjFile *file, PrecompRecord &pr);
160
161 public:
162 // Information about the Precomp OBJ dependency, that needs to be loaded in
163 // before merging this OBJ.
164 PrecompRecord precompDependency;
165 };
166 } // namespace
167
TpiSource(COFFLinkerContext & ctx,TpiKind k,ObjFile * f)168 TpiSource::TpiSource(COFFLinkerContext &ctx, TpiKind k, ObjFile *f)
169 : ctx(ctx), kind(k), tpiSrcIdx(ctx.tpiSourceList.size()), file(f) {
170 ctx.addTpiSource(this);
171 }
172
173 // Vtable key method.
~TpiSource()174 TpiSource::~TpiSource() {
175 // Silence any assertions about unchecked errors.
176 consumeError(std::move(typeMergingError));
177 }
178
makeTpiSource(COFFLinkerContext & ctx,ObjFile * file)179 TpiSource *lld::coff::makeTpiSource(COFFLinkerContext &ctx, ObjFile *file) {
180 return make<TpiSource>(ctx, TpiSource::Regular, file);
181 }
182
makeTypeServerSource(COFFLinkerContext & ctx,PDBInputFile * pdbInputFile)183 TpiSource *lld::coff::makeTypeServerSource(COFFLinkerContext &ctx,
184 PDBInputFile *pdbInputFile) {
185 // Type server sources come in pairs: the TPI stream, and the IPI stream.
186 auto *tpiSource = make<TypeServerSource>(ctx, pdbInputFile);
187 if (pdbInputFile->session->getPDBFile().hasPDBIpiStream())
188 tpiSource->ipiSrc = make<TypeServerIpiSource>(ctx);
189 return tpiSource;
190 }
191
makeUseTypeServerSource(COFFLinkerContext & ctx,ObjFile * file,TypeServer2Record ts)192 TpiSource *lld::coff::makeUseTypeServerSource(COFFLinkerContext &ctx,
193 ObjFile *file,
194 TypeServer2Record ts) {
195 return make<UseTypeServerSource>(ctx, file, ts);
196 }
197
makePrecompSource(COFFLinkerContext & ctx,ObjFile * file)198 TpiSource *lld::coff::makePrecompSource(COFFLinkerContext &ctx, ObjFile *file) {
199 return make<PrecompSource>(ctx, file);
200 }
201
makeUsePrecompSource(COFFLinkerContext & ctx,ObjFile * file,PrecompRecord precomp)202 TpiSource *lld::coff::makeUsePrecompSource(COFFLinkerContext &ctx,
203 ObjFile *file,
204 PrecompRecord precomp) {
205 return make<UsePrecompSource>(ctx, file, precomp);
206 }
207
remapTypeIndex(TypeIndex & ti,TiRefKind refKind) const208 bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const {
209 if (ti.isSimple())
210 return true;
211
212 // This can be an item index or a type index. Choose the appropriate map.
213 ArrayRef<TypeIndex> tpiOrIpiMap =
214 (refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap;
215 if (ti.toArrayIndex() >= tpiOrIpiMap.size())
216 return false;
217 ti = tpiOrIpiMap[ti.toArrayIndex()];
218 return true;
219 }
220
remapRecord(MutableArrayRef<uint8_t> rec,ArrayRef<TiReference> typeRefs)221 void TpiSource::remapRecord(MutableArrayRef<uint8_t> rec,
222 ArrayRef<TiReference> typeRefs) {
223 MutableArrayRef<uint8_t> contents = rec.drop_front(sizeof(RecordPrefix));
224 for (const TiReference &ref : typeRefs) {
225 unsigned byteSize = ref.Count * sizeof(TypeIndex);
226 if (contents.size() < ref.Offset + byteSize)
227 fatal("symbol record too short");
228
229 MutableArrayRef<TypeIndex> indices(
230 reinterpret_cast<TypeIndex *>(contents.data() + ref.Offset), ref.Count);
231 for (TypeIndex &ti : indices) {
232 if (!remapTypeIndex(ti, ref.Kind)) {
233 if (config->verbose) {
234 uint16_t kind =
235 reinterpret_cast<const RecordPrefix *>(rec.data())->RecordKind;
236 StringRef fname = file ? file->getName() : "<unknown PDB>";
237 log("failed to remap type index in record of kind 0x" +
238 utohexstr(kind) + " in " + fname + " with bad " +
239 (ref.Kind == TiRefKind::IndexRef ? "item" : "type") +
240 " index 0x" + utohexstr(ti.getIndex()));
241 }
242 ti = TypeIndex(SimpleTypeKind::NotTranslated);
243 continue;
244 }
245 }
246 }
247 }
248
remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec)249 void TpiSource::remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec) {
250 // TODO: Handle errors similar to symbols.
251 SmallVector<TiReference, 32> typeRefs;
252 discoverTypeIndices(CVType(rec), typeRefs);
253 remapRecord(rec, typeRefs);
254 }
255
remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec)256 bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec) {
257 // Discover type index references in the record. Skip it if we don't
258 // know where they are.
259 SmallVector<TiReference, 32> typeRefs;
260 if (!discoverTypeIndicesInSymbol(rec, typeRefs))
261 return false;
262 remapRecord(rec, typeRefs);
263 return true;
264 }
265
266 // A COFF .debug$H section is currently a clang extension. This function checks
267 // if a .debug$H section is in a format that we expect / understand, so that we
268 // can ignore any sections which are coincidentally also named .debug$H but do
269 // not contain a format we recognize.
canUseDebugH(ArrayRef<uint8_t> debugH)270 static bool canUseDebugH(ArrayRef<uint8_t> debugH) {
271 if (debugH.size() < sizeof(object::debug_h_header))
272 return false;
273 auto *header =
274 reinterpret_cast<const object::debug_h_header *>(debugH.data());
275 debugH = debugH.drop_front(sizeof(object::debug_h_header));
276 return header->Magic == COFF::DEBUG_HASHES_SECTION_MAGIC &&
277 header->Version == 0 &&
278 header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::SHA1_8) &&
279 (debugH.size() % 8 == 0);
280 }
281
getDebugH(ObjFile * file)282 static Optional<ArrayRef<uint8_t>> getDebugH(ObjFile *file) {
283 SectionChunk *sec =
284 SectionChunk::findByName(file->getDebugChunks(), ".debug$H");
285 if (!sec)
286 return llvm::None;
287 ArrayRef<uint8_t> contents = sec->getContents();
288 if (!canUseDebugH(contents))
289 return None;
290 return contents;
291 }
292
293 static ArrayRef<GloballyHashedType>
getHashesFromDebugH(ArrayRef<uint8_t> debugH)294 getHashesFromDebugH(ArrayRef<uint8_t> debugH) {
295 assert(canUseDebugH(debugH));
296 debugH = debugH.drop_front(sizeof(object::debug_h_header));
297 uint32_t count = debugH.size() / sizeof(GloballyHashedType);
298 return {reinterpret_cast<const GloballyHashedType *>(debugH.data()), count};
299 }
300
301 // Merge .debug$T for a generic object file.
mergeDebugT(TypeMerger * m)302 Error TpiSource::mergeDebugT(TypeMerger *m) {
303 assert(!config->debugGHashes &&
304 "use remapTpiWithGHashes when ghash is enabled");
305
306 CVTypeArray types;
307 BinaryStreamReader reader(file->debugTypes, support::little);
308 cantFail(reader.readArray(types, reader.getLength()));
309
310 // When dealing with PCH.OBJ, some indices were already merged.
311 unsigned nbHeadIndices = indexMapStorage.size();
312
313 if (auto err = mergeTypeAndIdRecords(
314 m->idTable, m->typeTable, indexMapStorage, types, file->pchSignature))
315 fatal("codeview::mergeTypeAndIdRecords failed: " +
316 toString(std::move(err)));
317
318 // In an object, there is only one mapping for both types and items.
319 tpiMap = indexMapStorage;
320 ipiMap = indexMapStorage;
321
322 if (config->showSummary) {
323 nbTypeRecords = indexMapStorage.size() - nbHeadIndices;
324 nbTypeRecordsBytes = reader.getLength();
325 // Count how many times we saw each type record in our input. This
326 // calculation requires a second pass over the type records to classify each
327 // record as a type or index. This is slow, but this code executes when
328 // collecting statistics.
329 m->tpiCounts.resize(m->getTypeTable().size());
330 m->ipiCounts.resize(m->getIDTable().size());
331 uint32_t srcIdx = nbHeadIndices;
332 for (const CVType &ty : types) {
333 TypeIndex dstIdx = tpiMap[srcIdx++];
334 // Type merging may fail, so a complex source type may become the simple
335 // NotTranslated type, which cannot be used as an array index.
336 if (dstIdx.isSimple())
337 continue;
338 SmallVectorImpl<uint32_t> &counts =
339 isIdRecord(ty.kind()) ? m->ipiCounts : m->tpiCounts;
340 ++counts[dstIdx.toArrayIndex()];
341 }
342 }
343
344 return Error::success();
345 }
346
347 // Merge types from a type server PDB.
mergeDebugT(TypeMerger * m)348 Error TypeServerSource::mergeDebugT(TypeMerger *m) {
349 assert(!config->debugGHashes &&
350 "use remapTpiWithGHashes when ghash is enabled");
351
352 pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
353 Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
354 if (auto e = expectedTpi.takeError())
355 fatal("Type server does not have TPI stream: " + toString(std::move(e)));
356 pdb::TpiStream *maybeIpi = nullptr;
357 if (pdbFile.hasPDBIpiStream()) {
358 Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
359 if (auto e = expectedIpi.takeError())
360 fatal("Error getting type server IPI stream: " + toString(std::move(e)));
361 maybeIpi = &*expectedIpi;
362 }
363
364 // Merge TPI first, because the IPI stream will reference type indices.
365 if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage,
366 expectedTpi->typeArray()))
367 fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err)));
368 tpiMap = indexMapStorage;
369
370 // Merge IPI.
371 if (maybeIpi) {
372 if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage,
373 maybeIpi->typeArray()))
374 fatal("codeview::mergeIdRecords failed: " + toString(std::move(err)));
375 ipiMap = ipiSrc->indexMapStorage;
376 }
377
378 if (config->showSummary) {
379 nbTypeRecords = tpiMap.size() + ipiMap.size();
380 nbTypeRecordsBytes =
381 expectedTpi->typeArray().getUnderlyingStream().getLength() +
382 (maybeIpi ? maybeIpi->typeArray().getUnderlyingStream().getLength()
383 : 0);
384
385 // Count how many times we saw each type record in our input. If a
386 // destination type index is present in the source to destination type index
387 // map, that means we saw it once in the input. Add it to our histogram.
388 m->tpiCounts.resize(m->getTypeTable().size());
389 m->ipiCounts.resize(m->getIDTable().size());
390 for (TypeIndex ti : tpiMap)
391 if (!ti.isSimple())
392 ++m->tpiCounts[ti.toArrayIndex()];
393 for (TypeIndex ti : ipiMap)
394 if (!ti.isSimple())
395 ++m->ipiCounts[ti.toArrayIndex()];
396 }
397
398 return Error::success();
399 }
400
getTypeServerSource()401 Expected<TypeServerSource *> UseTypeServerSource::getTypeServerSource() {
402 const codeview::GUID &tsId = typeServerDependency.getGuid();
403 StringRef tsPath = typeServerDependency.getName();
404
405 TypeServerSource *tsSrc = nullptr;
406 auto it = ctx.typeServerSourceMappings.find(tsId);
407 if (it != ctx.typeServerSourceMappings.end()) {
408 tsSrc = (TypeServerSource *)it->second;
409 }
410 if (tsSrc == nullptr) {
411 // The file failed to load, lookup by name
412 PDBInputFile *pdb = PDBInputFile::findFromRecordPath(ctx, tsPath, file);
413 if (!pdb)
414 return createFileError(tsPath, errorCodeToError(std::error_code(
415 ENOENT, std::generic_category())));
416 // If an error occurred during loading, throw it now
417 if (pdb->loadErr && *pdb->loadErr)
418 return createFileError(tsPath, std::move(*pdb->loadErr));
419
420 tsSrc = (TypeServerSource *)pdb->debugTypesObj;
421
422 // Just because a file with a matching name was found and it was an actual
423 // PDB file doesn't mean it matches. For it to match the InfoStream's GUID
424 // must match the GUID specified in the TypeServer2 record.
425 if (tsSrc->Guid != tsId) {
426 return createFileError(tsPath,
427 make_error<pdb::PDBError>(
428 pdb::pdb_error_code::signature_out_of_date));
429 }
430 }
431 return tsSrc;
432 }
433
mergeDebugT(TypeMerger * m)434 Error UseTypeServerSource::mergeDebugT(TypeMerger *m) {
435 Expected<TypeServerSource *> tsSrc = getTypeServerSource();
436 if (!tsSrc)
437 return tsSrc.takeError();
438
439 pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session->getPDBFile();
440 auto expectedInfo = pdbSession.getPDBInfoStream();
441 if (!expectedInfo)
442 return expectedInfo.takeError();
443
444 // Reuse the type index map of the type server.
445 tpiMap = (*tsSrc)->tpiMap;
446 ipiMap = (*tsSrc)->ipiMap;
447 return Error::success();
448 }
449
equalsPath(StringRef path1,StringRef path2)450 static bool equalsPath(StringRef path1, StringRef path2) {
451 #if defined(_WIN32)
452 return path1.equals_insensitive(path2);
453 #else
454 return path1.equals(path2);
455 #endif
456 }
457
458 // Find by name an OBJ provided on the command line
findObjByName(StringRef fileNameOnly)459 PrecompSource *UsePrecompSource::findObjByName(StringRef fileNameOnly) {
460 SmallString<128> currentPath;
461 for (auto kv : ctx.precompSourceMappings) {
462 StringRef currentFileName = sys::path::filename(kv.second->file->getName(),
463 sys::path::Style::windows);
464
465 // Compare based solely on the file name (link.exe behavior)
466 if (equalsPath(currentFileName, fileNameOnly))
467 return (PrecompSource *)kv.second;
468 }
469 return nullptr;
470 }
471
findPrecompSource(ObjFile * file,PrecompRecord & pr)472 PrecompSource *UsePrecompSource::findPrecompSource(ObjFile *file,
473 PrecompRecord &pr) {
474 // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP
475 // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly,
476 // the paths embedded in the OBJs are in the Windows format.
477 SmallString<128> prFileName =
478 sys::path::filename(pr.getPrecompFilePath(), sys::path::Style::windows);
479
480 auto it = ctx.precompSourceMappings.find(pr.getSignature());
481 if (it != ctx.precompSourceMappings.end()) {
482 return (PrecompSource *)it->second;
483 }
484 // Lookup by name
485 return findObjByName(prFileName);
486 }
487
findPrecompMap(ObjFile * file,PrecompRecord & pr)488 Expected<PrecompSource *> UsePrecompSource::findPrecompMap(ObjFile *file,
489 PrecompRecord &pr) {
490 PrecompSource *precomp = findPrecompSource(file, pr);
491
492 if (!precomp)
493 return createFileError(
494 pr.getPrecompFilePath(),
495 make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
496
497 if (pr.getSignature() != file->pchSignature)
498 return createFileError(
499 toString(file),
500 make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
501
502 if (pr.getSignature() != *precomp->file->pchSignature)
503 return createFileError(
504 toString(precomp->file),
505 make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
506
507 return precomp;
508 }
509
510 /// Merges a precompiled headers TPI map into the current TPI map. The
511 /// precompiled headers object will also be loaded and remapped in the
512 /// process.
mergeInPrecompHeaderObj()513 Error UsePrecompSource::mergeInPrecompHeaderObj() {
514 auto e = findPrecompMap(file, precompDependency);
515 if (!e)
516 return e.takeError();
517
518 PrecompSource *precompSrc = *e;
519 if (precompSrc->tpiMap.empty())
520 return Error::success();
521
522 assert(precompDependency.getStartTypeIndex() ==
523 TypeIndex::FirstNonSimpleIndex);
524 assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size());
525 // Use the previously remapped index map from the precompiled headers.
526 indexMapStorage.insert(indexMapStorage.begin(), precompSrc->tpiMap.begin(),
527 precompSrc->tpiMap.begin() +
528 precompDependency.getTypesCount());
529
530 return Error::success();
531 }
532
mergeDebugT(TypeMerger * m)533 Error UsePrecompSource::mergeDebugT(TypeMerger *m) {
534 // This object was compiled with /Yu, so process the corresponding
535 // precompiled headers object (/Yc) first. Some type indices in the current
536 // object are referencing data in the precompiled headers object, so we need
537 // both to be loaded.
538 if (Error e = mergeInPrecompHeaderObj())
539 return e;
540
541 return TpiSource::mergeDebugT(m);
542 }
543
544 //===----------------------------------------------------------------------===//
545 // Parellel GHash type merging implementation.
546 //===----------------------------------------------------------------------===//
547
loadGHashes()548 void TpiSource::loadGHashes() {
549 if (Optional<ArrayRef<uint8_t>> debugH = getDebugH(file)) {
550 ghashes = getHashesFromDebugH(*debugH);
551 ownedGHashes = false;
552 } else {
553 CVTypeArray types;
554 BinaryStreamReader reader(file->debugTypes, support::little);
555 cantFail(reader.readArray(types, reader.getLength()));
556 assignGHashesFromVector(GloballyHashedType::hashTypes(types));
557 }
558
559 fillIsItemIndexFromDebugT();
560 }
561
562 // Copies ghashes from a vector into an array. These are long lived, so it's
563 // worth the time to copy these into an appropriately sized vector to reduce
564 // memory usage.
assignGHashesFromVector(std::vector<GloballyHashedType> && hashVec)565 void TpiSource::assignGHashesFromVector(
566 std::vector<GloballyHashedType> &&hashVec) {
567 if (hashVec.empty())
568 return;
569 GloballyHashedType *hashes = new GloballyHashedType[hashVec.size()];
570 memcpy(hashes, hashVec.data(), hashVec.size() * sizeof(GloballyHashedType));
571 ghashes = makeArrayRef(hashes, hashVec.size());
572 ownedGHashes = true;
573 }
574
575 // Faster way to iterate type records. forEachTypeChecked is faster than
576 // iterating CVTypeArray. It avoids virtual readBytes calls in inner loops.
forEachTypeChecked(ArrayRef<uint8_t> types,function_ref<void (const CVType &)> fn)577 static void forEachTypeChecked(ArrayRef<uint8_t> types,
578 function_ref<void(const CVType &)> fn) {
579 checkError(
580 forEachCodeViewRecord<CVType>(types, [fn](const CVType &ty) -> Error {
581 fn(ty);
582 return Error::success();
583 }));
584 }
585
586 // Walk over file->debugTypes and fill in the isItemIndex bit vector.
587 // TODO: Store this information in .debug$H so that we don't have to recompute
588 // it. This is the main bottleneck slowing down parallel ghashing with one
589 // thread over single-threaded ghashing.
fillIsItemIndexFromDebugT()590 void TpiSource::fillIsItemIndexFromDebugT() {
591 uint32_t index = 0;
592 isItemIndex.resize(ghashes.size());
593 forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
594 if (isIdRecord(ty.kind()))
595 isItemIndex.set(index);
596 ++index;
597 });
598 }
599
mergeTypeRecord(TypeIndex curIndex,CVType ty)600 void TpiSource::mergeTypeRecord(TypeIndex curIndex, CVType ty) {
601 // Decide if the merged type goes into TPI or IPI.
602 bool isItem = isIdRecord(ty.kind());
603 MergedInfo &merged = isItem ? mergedIpi : mergedTpi;
604
605 // Copy the type into our mutable buffer.
606 assert(ty.length() <= codeview::MaxRecordLength);
607 size_t offset = merged.recs.size();
608 size_t newSize = alignTo(ty.length(), 4);
609 merged.recs.resize(offset + newSize);
610 auto newRec = makeMutableArrayRef(&merged.recs[offset], newSize);
611 memcpy(newRec.data(), ty.data().data(), newSize);
612
613 // Fix up the record prefix and padding bytes if it required resizing.
614 if (newSize != ty.length()) {
615 reinterpret_cast<RecordPrefix *>(newRec.data())->RecordLen = newSize - 2;
616 for (size_t i = ty.length(); i < newSize; ++i)
617 newRec[i] = LF_PAD0 + (newSize - i);
618 }
619
620 // Remap the type indices in the new record.
621 remapTypesInTypeRecord(newRec);
622 uint32_t pdbHash = check(pdb::hashTypeRecord(CVType(newRec)));
623 merged.recSizes.push_back(static_cast<uint16_t>(newSize));
624 merged.recHashes.push_back(pdbHash);
625
626 // Retain a mapping from PDB function id to PDB function type. This mapping is
627 // used during symbol processing to rewrite S_GPROC32_ID symbols to S_GPROC32
628 // symbols.
629 if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) {
630 bool success = ty.length() >= 12;
631 TypeIndex funcId = curIndex;
632 if (success)
633 success &= remapTypeIndex(funcId, TiRefKind::IndexRef);
634 TypeIndex funcType =
635 *reinterpret_cast<const TypeIndex *>(&newRec.data()[8]);
636 if (success) {
637 funcIdToType.push_back({funcId, funcType});
638 } else {
639 StringRef fname = file ? file->getName() : "<unknown PDB>";
640 warn("corrupt LF_[M]FUNC_ID record 0x" + utohexstr(curIndex.getIndex()) +
641 " in " + fname);
642 }
643 }
644 }
645
mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords,TypeIndex beginIndex)646 void TpiSource::mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords,
647 TypeIndex beginIndex) {
648 // Re-sort the list of unique types by index.
649 if (kind == PDB)
650 assert(std::is_sorted(uniqueTypes.begin(), uniqueTypes.end()));
651 else
652 llvm::sort(uniqueTypes);
653
654 // Accumulate all the unique types into one buffer in mergedTypes.
655 uint32_t ghashIndex = 0;
656 auto nextUniqueIndex = uniqueTypes.begin();
657 assert(mergedTpi.recs.empty());
658 assert(mergedIpi.recs.empty());
659
660 // Pre-compute the number of elements in advance to avoid std::vector resizes.
661 unsigned nbTpiRecs = 0;
662 unsigned nbIpiRecs = 0;
663 forEachTypeChecked(typeRecords, [&](const CVType &ty) {
664 if (nextUniqueIndex != uniqueTypes.end() &&
665 *nextUniqueIndex == ghashIndex) {
666 assert(ty.length() <= codeview::MaxRecordLength);
667 size_t newSize = alignTo(ty.length(), 4);
668 (isIdRecord(ty.kind()) ? nbIpiRecs : nbTpiRecs) += newSize;
669 ++nextUniqueIndex;
670 }
671 ++ghashIndex;
672 });
673 mergedTpi.recs.reserve(nbTpiRecs);
674 mergedIpi.recs.reserve(nbIpiRecs);
675
676 // Do the actual type merge.
677 ghashIndex = 0;
678 nextUniqueIndex = uniqueTypes.begin();
679 forEachTypeChecked(typeRecords, [&](const CVType &ty) {
680 if (nextUniqueIndex != uniqueTypes.end() &&
681 *nextUniqueIndex == ghashIndex) {
682 mergeTypeRecord(beginIndex + ghashIndex, ty);
683 ++nextUniqueIndex;
684 }
685 ++ghashIndex;
686 });
687 assert(nextUniqueIndex == uniqueTypes.end() &&
688 "failed to merge all desired records");
689 assert(uniqueTypes.size() ==
690 mergedTpi.recSizes.size() + mergedIpi.recSizes.size() &&
691 "missing desired record");
692 }
693
remapTpiWithGHashes(GHashState * g)694 void TpiSource::remapTpiWithGHashes(GHashState *g) {
695 assert(config->debugGHashes && "ghashes must be enabled");
696 fillMapFromGHashes(g);
697 tpiMap = indexMapStorage;
698 ipiMap = indexMapStorage;
699 mergeUniqueTypeRecords(file->debugTypes);
700 // TODO: Free all unneeded ghash resources now that we have a full index map.
701
702 if (config->showSummary) {
703 nbTypeRecords = ghashes.size();
704 nbTypeRecordsBytes = file->debugTypes.size();
705 }
706 }
707
708 // PDBs do not actually store global hashes, so when merging a type server
709 // PDB we have to synthesize global hashes. To do this, we first synthesize
710 // global hashes for the TPI stream, since it is independent, then we
711 // synthesize hashes for the IPI stream, using the hashes for the TPI stream
712 // as inputs.
loadGHashes()713 void TypeServerSource::loadGHashes() {
714 // Don't hash twice.
715 if (!ghashes.empty())
716 return;
717 pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
718
719 // Hash TPI stream.
720 Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
721 if (auto e = expectedTpi.takeError())
722 fatal("Type server does not have TPI stream: " + toString(std::move(e)));
723 assignGHashesFromVector(
724 GloballyHashedType::hashTypes(expectedTpi->typeArray()));
725 isItemIndex.resize(ghashes.size());
726
727 // Hash IPI stream, which depends on TPI ghashes.
728 if (!pdbFile.hasPDBIpiStream())
729 return;
730 Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
731 if (auto e = expectedIpi.takeError())
732 fatal("error retrieving IPI stream: " + toString(std::move(e)));
733 ipiSrc->assignGHashesFromVector(
734 GloballyHashedType::hashIds(expectedIpi->typeArray(), ghashes));
735
736 // The IPI stream isItemIndex bitvector should be all ones.
737 ipiSrc->isItemIndex.resize(ipiSrc->ghashes.size());
738 ipiSrc->isItemIndex.set(0, ipiSrc->ghashes.size());
739 }
740
741 // Flatten discontiguous PDB type arrays to bytes so that we can use
742 // forEachTypeChecked instead of CVTypeArray iteration. Copying all types from
743 // type servers is faster than iterating all object files compiled with /Z7 with
744 // CVTypeArray, which has high overheads due to the virtual interface of
745 // BinaryStream::readBytes.
typeArrayToBytes(const CVTypeArray & types)746 static ArrayRef<uint8_t> typeArrayToBytes(const CVTypeArray &types) {
747 BinaryStreamRef stream = types.getUnderlyingStream();
748 ArrayRef<uint8_t> debugTypes;
749 checkError(stream.readBytes(0, stream.getLength(), debugTypes));
750 return debugTypes;
751 }
752
753 // Merge types from a type server PDB.
remapTpiWithGHashes(GHashState * g)754 void TypeServerSource::remapTpiWithGHashes(GHashState *g) {
755 assert(config->debugGHashes && "ghashes must be enabled");
756
757 // IPI merging depends on TPI, so do TPI first, then do IPI. No need to
758 // propagate errors, those should've been handled during ghash loading.
759 pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
760 pdb::TpiStream &tpi = check(pdbFile.getPDBTpiStream());
761 fillMapFromGHashes(g);
762 tpiMap = indexMapStorage;
763 mergeUniqueTypeRecords(typeArrayToBytes(tpi.typeArray()));
764 if (pdbFile.hasPDBIpiStream()) {
765 pdb::TpiStream &ipi = check(pdbFile.getPDBIpiStream());
766 ipiSrc->indexMapStorage.resize(ipiSrc->ghashes.size());
767 ipiSrc->fillMapFromGHashes(g);
768 ipiMap = ipiSrc->indexMapStorage;
769 ipiSrc->tpiMap = tpiMap;
770 ipiSrc->ipiMap = ipiMap;
771 ipiSrc->mergeUniqueTypeRecords(typeArrayToBytes(ipi.typeArray()));
772
773 if (config->showSummary) {
774 nbTypeRecords = ipiSrc->ghashes.size();
775 nbTypeRecordsBytes = ipi.typeArray().getUnderlyingStream().getLength();
776 }
777 }
778
779 if (config->showSummary) {
780 nbTypeRecords += ghashes.size();
781 nbTypeRecordsBytes += tpi.typeArray().getUnderlyingStream().getLength();
782 }
783 }
784
remapTpiWithGHashes(GHashState * g)785 void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) {
786 // No remapping to do with /Zi objects. Simply use the index map from the type
787 // server. Errors should have been reported earlier. Symbols from this object
788 // will be ignored.
789 Expected<TypeServerSource *> maybeTsSrc = getTypeServerSource();
790 if (!maybeTsSrc) {
791 typeMergingError =
792 joinErrors(std::move(typeMergingError), maybeTsSrc.takeError());
793 return;
794 }
795 TypeServerSource *tsSrc = *maybeTsSrc;
796 tpiMap = tsSrc->tpiMap;
797 ipiMap = tsSrc->ipiMap;
798 }
799
loadGHashes()800 void PrecompSource::loadGHashes() {
801 if (getDebugH(file)) {
802 warn("ignoring .debug$H section; pch with ghash is not implemented");
803 }
804
805 uint32_t ghashIdx = 0;
806 std::vector<GloballyHashedType> hashVec;
807 forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
808 // Remember the index of the LF_ENDPRECOMP record so it can be excluded from
809 // the PDB. There must be an entry in the list of ghashes so that the type
810 // indexes of the following records in the /Yc PCH object line up.
811 if (ty.kind() == LF_ENDPRECOMP)
812 endPrecompGHashIdx = ghashIdx;
813
814 hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
815 isItemIndex.push_back(isIdRecord(ty.kind()));
816 ++ghashIdx;
817 });
818 assignGHashesFromVector(std::move(hashVec));
819 }
820
loadGHashes()821 void UsePrecompSource::loadGHashes() {
822 PrecompSource *pchSrc = findPrecompSource(file, precompDependency);
823 if (!pchSrc)
824 return;
825
826 // To compute ghashes of a /Yu object file, we need to build on the the
827 // ghashes of the /Yc PCH object. After we are done hashing, discard the
828 // ghashes from the PCH source so we don't unnecessarily try to deduplicate
829 // them.
830 std::vector<GloballyHashedType> hashVec =
831 pchSrc->ghashes.take_front(precompDependency.getTypesCount());
832 forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
833 hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
834 isItemIndex.push_back(isIdRecord(ty.kind()));
835 });
836 hashVec.erase(hashVec.begin(),
837 hashVec.begin() + precompDependency.getTypesCount());
838 assignGHashesFromVector(std::move(hashVec));
839 }
840
remapTpiWithGHashes(GHashState * g)841 void UsePrecompSource::remapTpiWithGHashes(GHashState *g) {
842 fillMapFromGHashes(g);
843 // This object was compiled with /Yu, so process the corresponding
844 // precompiled headers object (/Yc) first. Some type indices in the current
845 // object are referencing data in the precompiled headers object, so we need
846 // both to be loaded.
847 if (Error e = mergeInPrecompHeaderObj()) {
848 typeMergingError = joinErrors(std::move(typeMergingError), std::move(e));
849 return;
850 }
851
852 tpiMap = indexMapStorage;
853 ipiMap = indexMapStorage;
854 mergeUniqueTypeRecords(file->debugTypes,
855 TypeIndex(precompDependency.getStartTypeIndex() +
856 precompDependency.getTypesCount()));
857 if (config->showSummary) {
858 nbTypeRecords = ghashes.size();
859 nbTypeRecordsBytes = file->debugTypes.size();
860 }
861 }
862
863 namespace {
864 /// A concurrent hash table for global type hashing. It is based on this paper:
865 /// Concurrent Hash Tables: Fast and General(?)!
866 /// https://dl.acm.org/doi/10.1145/3309206
867 ///
868 /// This hash table is meant to be used in two phases:
869 /// 1. concurrent insertions
870 /// 2. concurrent reads
871 /// It does not support lookup, deletion, or rehashing. It uses linear probing.
872 ///
873 /// The paper describes storing a key-value pair in two machine words.
874 /// Generally, the values stored in this map are type indices, and we can use
875 /// those values to recover the ghash key from a side table. This allows us to
876 /// shrink the table entries further at the cost of some loads, and sidesteps
877 /// the need for a 128 bit atomic compare-and-swap operation.
878 ///
879 /// During insertion, a priority function is used to decide which insertion
880 /// should be preferred. This ensures that the output is deterministic. For
881 /// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred.
882 ///
883 class GHashCell;
884 struct GHashTable {
885 GHashCell *table = nullptr;
886 uint32_t tableSize = 0;
887
888 GHashTable() = default;
889 ~GHashTable();
890
891 /// Initialize the table with the given size. Because the table cannot be
892 /// resized, the initial size of the table must be large enough to contain all
893 /// inputs, or insertion may not be able to find an empty cell.
894 void init(uint32_t newTableSize);
895
896 /// Insert the cell with the given ghash into the table. Return the insertion
897 /// position in the table. It is safe for the caller to store the insertion
898 /// position because the table cannot be resized.
899 uint32_t insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
900 GHashCell newCell);
901 };
902
903 /// A ghash table cell for deduplicating types from TpiSources.
904 class GHashCell {
905 // Force "data" to be 64-bit aligned; otherwise, some versions of clang
906 // will generate calls to libatomic when using some versions of libstdc++
907 // on 32-bit targets. (Also, in theory, there could be a target where
908 // new[] doesn't always return an 8-byte-aligned allocation.)
909 alignas(sizeof(uint64_t)) uint64_t data = 0;
910
911 public:
912 GHashCell() = default;
913
914 // Construct data most to least significant so that sorting works well:
915 // - isItem
916 // - tpiSrcIdx
917 // - ghashIdx
918 // Add one to the tpiSrcIdx so that the 0th record from the 0th source has a
919 // non-zero representation.
GHashCell(bool isItem,uint32_t tpiSrcIdx,uint32_t ghashIdx)920 GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx)
921 : data((uint64_t(isItem) << 63U) | (uint64_t(tpiSrcIdx + 1) << 32ULL) |
922 ghashIdx) {
923 assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure");
924 assert(ghashIdx == getGHashIdx() && "round trip failure");
925 }
926
GHashCell(uint64_t data)927 explicit GHashCell(uint64_t data) : data(data) {}
928
929 // The empty cell is all zeros.
isEmpty() const930 bool isEmpty() const { return data == 0ULL; }
931
932 /// Extract the tpiSrcIdx.
getTpiSrcIdx() const933 uint32_t getTpiSrcIdx() const {
934 return ((uint32_t)(data >> 32U) & 0x7FFFFFFF) - 1;
935 }
936
937 /// Extract the index into the ghash array of the TpiSource.
getGHashIdx() const938 uint32_t getGHashIdx() const { return (uint32_t)data; }
939
isItem() const940 bool isItem() const { return data & (1ULL << 63U); }
941
942 /// Get the ghash key for this cell.
getGHash(const COFFLinkerContext & ctx) const943 GloballyHashedType getGHash(const COFFLinkerContext &ctx) const {
944 return ctx.tpiSourceList[getTpiSrcIdx()]->ghashes[getGHashIdx()];
945 }
946
947 /// The priority function for the cell. The data is stored such that lower
948 /// tpiSrcIdx and ghashIdx values are preferred, which means that type record
949 /// from earlier sources are more likely to prevail.
operator <(const GHashCell & l,const GHashCell & r)950 friend inline bool operator<(const GHashCell &l, const GHashCell &r) {
951 return l.data < r.data;
952 }
953 };
954 } // namespace
955
956 namespace lld {
957 namespace coff {
958 /// This type is just a wrapper around GHashTable with external linkage so it
959 /// can be used from a header.
960 struct GHashState {
961 GHashTable table;
962 };
963 } // namespace coff
964 } // namespace lld
965
~GHashTable()966 GHashTable::~GHashTable() { delete[] table; }
967
init(uint32_t newTableSize)968 void GHashTable::init(uint32_t newTableSize) {
969 table = new GHashCell[newTableSize];
970 memset(table, 0, newTableSize * sizeof(GHashCell));
971 tableSize = newTableSize;
972 }
973
insert(COFFLinkerContext & ctx,GloballyHashedType ghash,GHashCell newCell)974 uint32_t GHashTable::insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
975 GHashCell newCell) {
976 assert(!newCell.isEmpty() && "cannot insert empty cell value");
977
978 // FIXME: The low bytes of SHA1 have low entropy for short records, which
979 // type records are. Swap the byte order for better entropy. A better ghash
980 // won't need this.
981 uint32_t startIdx =
982 ByteSwap_64(*reinterpret_cast<uint64_t *>(&ghash)) % tableSize;
983
984 // Do a linear probe starting at startIdx.
985 uint32_t idx = startIdx;
986 while (true) {
987 // Run a compare and swap loop. There are four cases:
988 // - cell is empty: CAS into place and return
989 // - cell has matching key, earlier priority: do nothing, return
990 // - cell has matching key, later priority: CAS into place and return
991 // - cell has non-matching key: hash collision, probe next cell
992 auto *cellPtr = reinterpret_cast<std::atomic<GHashCell> *>(&table[idx]);
993 GHashCell oldCell(cellPtr->load());
994 while (oldCell.isEmpty() || oldCell.getGHash(ctx) == ghash) {
995 // Check if there is an existing ghash entry with a higher priority
996 // (earlier ordering). If so, this is a duplicate, we are done.
997 if (!oldCell.isEmpty() && oldCell < newCell)
998 return idx;
999 // Either the cell is empty, or our value is higher priority. Try to
1000 // compare and swap. If it succeeds, we are done.
1001 if (cellPtr->compare_exchange_weak(oldCell, newCell))
1002 return idx;
1003 // If the CAS failed, check this cell again.
1004 }
1005
1006 // Advance the probe. Wrap around to the beginning if we run off the end.
1007 ++idx;
1008 idx = idx == tableSize ? 0 : idx;
1009 if (idx == startIdx) {
1010 // If this becomes an issue, we could mark failure and rehash from the
1011 // beginning with a bigger table. There is no difference between rehashing
1012 // internally and starting over.
1013 report_fatal_error("ghash table is full");
1014 }
1015 }
1016 llvm_unreachable("left infloop");
1017 }
1018
TypeMerger(COFFLinkerContext & c,llvm::BumpPtrAllocator & alloc)1019 TypeMerger::TypeMerger(COFFLinkerContext &c, llvm::BumpPtrAllocator &alloc)
1020 : typeTable(alloc), idTable(alloc), ctx(c) {}
1021
1022 TypeMerger::~TypeMerger() = default;
1023
mergeTypesWithGHash()1024 void TypeMerger::mergeTypesWithGHash() {
1025 // Load ghashes. Do type servers and PCH objects first.
1026 {
1027 ScopedTimer t1(ctx.loadGHashTimer);
1028 parallelForEach(dependencySources,
1029 [&](TpiSource *source) { source->loadGHashes(); });
1030 parallelForEach(objectSources,
1031 [&](TpiSource *source) { source->loadGHashes(); });
1032 }
1033
1034 ScopedTimer t2(ctx.mergeGHashTimer);
1035 GHashState ghashState;
1036
1037 // Estimate the size of hash table needed to deduplicate ghashes. This *must*
1038 // be larger than the number of unique types, or hash table insertion may not
1039 // be able to find a vacant slot. Summing the input types guarantees this, but
1040 // it is a gross overestimate. The table size could be reduced to save memory,
1041 // but it would require implementing rehashing, and this table is generally
1042 // small compared to total memory usage, at eight bytes per input type record,
1043 // and most input type records are larger than eight bytes.
1044 size_t tableSize = 0;
1045 for (TpiSource *source : ctx.tpiSourceList)
1046 tableSize += source->ghashes.size();
1047
1048 // Cap the table size so that we can use 32-bit cell indices. Type indices are
1049 // also 32-bit, so this is an inherent PDB file format limit anyway.
1050 tableSize =
1051 std::min(size_t(INT32_MAX) - TypeIndex::FirstNonSimpleIndex, tableSize);
1052 ghashState.table.init(static_cast<uint32_t>(tableSize));
1053
1054 // Insert ghashes in parallel. During concurrent insertion, we cannot observe
1055 // the contents of the hash table cell, but we can remember the insertion
1056 // position. Because the table does not rehash, the position will not change
1057 // under insertion. After insertion is done, the value of the cell can be read
1058 // to retrieve the final PDB type index.
1059 parallelFor(0, ctx.tpiSourceList.size(), [&](size_t tpiSrcIdx) {
1060 TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
1061 source->indexMapStorage.resize(source->ghashes.size());
1062 for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) {
1063 if (source->shouldOmitFromPdb(i)) {
1064 source->indexMapStorage[i] = TypeIndex(SimpleTypeKind::NotTranslated);
1065 continue;
1066 }
1067 GloballyHashedType ghash = source->ghashes[i];
1068 bool isItem = source->isItemIndex.test(i);
1069 uint32_t cellIdx =
1070 ghashState.table.insert(ctx, ghash, GHashCell(isItem, tpiSrcIdx, i));
1071
1072 // Store the ghash cell index as a type index in indexMapStorage. Later
1073 // we will replace it with the PDB type index.
1074 source->indexMapStorage[i] = TypeIndex::fromArrayIndex(cellIdx);
1075 }
1076 });
1077
1078 // Collect all non-empty cells and sort them. This will implicitly assign
1079 // destination type indices, and partition the entries into type records and
1080 // item records. It arranges types in this order:
1081 // - type records
1082 // - source 0, type 0...
1083 // - source 1, type 1...
1084 // - item records
1085 // - source 0, type 1...
1086 // - source 1, type 0...
1087 std::vector<GHashCell> entries;
1088 for (const GHashCell &cell :
1089 makeArrayRef(ghashState.table.table, tableSize)) {
1090 if (!cell.isEmpty())
1091 entries.push_back(cell);
1092 }
1093 parallelSort(entries, std::less<GHashCell>());
1094 log(formatv("ghash table load factor: {0:p} (size {1} / capacity {2})\n",
1095 tableSize ? double(entries.size()) / tableSize : 0,
1096 entries.size(), tableSize));
1097
1098 // Find out how many type and item indices there are.
1099 auto mid =
1100 std::lower_bound(entries.begin(), entries.end(), GHashCell(true, 0, 0));
1101 assert((mid == entries.end() || mid->isItem()) &&
1102 (mid == entries.begin() || !std::prev(mid)->isItem()) &&
1103 "midpoint is not midpoint");
1104 uint32_t numTypes = std::distance(entries.begin(), mid);
1105 uint32_t numItems = std::distance(mid, entries.end());
1106 log("Tpi record count: " + Twine(numTypes));
1107 log("Ipi record count: " + Twine(numItems));
1108
1109 // Make a list of the "unique" type records to merge for each tpi source. Type
1110 // merging will skip indices not on this list. Store the destination PDB type
1111 // index for these unique types in the tpiMap for each source. The entries for
1112 // non-unique types will be filled in prior to type merging.
1113 for (uint32_t i = 0, e = entries.size(); i < e; ++i) {
1114 auto &cell = entries[i];
1115 uint32_t tpiSrcIdx = cell.getTpiSrcIdx();
1116 TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
1117 source->uniqueTypes.push_back(cell.getGHashIdx());
1118
1119 // Update the ghash table to store the destination PDB type index in the
1120 // table.
1121 uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes;
1122 uint32_t ghashCellIndex =
1123 source->indexMapStorage[cell.getGHashIdx()].toArrayIndex();
1124 ghashState.table.table[ghashCellIndex] =
1125 GHashCell(cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex);
1126 }
1127
1128 // In parallel, remap all types.
1129 for (TpiSource *source : dependencySources)
1130 source->remapTpiWithGHashes(&ghashState);
1131 parallelForEach(objectSources, [&](TpiSource *source) {
1132 source->remapTpiWithGHashes(&ghashState);
1133 });
1134
1135 // Build a global map of from function ID to function type.
1136 for (TpiSource *source : ctx.tpiSourceList) {
1137 for (auto idToType : source->funcIdToType)
1138 funcIdToType.insert(idToType);
1139 source->funcIdToType.clear();
1140 }
1141
1142 clearGHashes();
1143 }
1144
sortDependencies()1145 void TypeMerger::sortDependencies() {
1146 // Order dependencies first, but preserve the existing order.
1147 std::vector<TpiSource *> deps;
1148 std::vector<TpiSource *> objs;
1149 for (TpiSource *s : ctx.tpiSourceList)
1150 (s->isDependency() ? deps : objs).push_back(s);
1151 uint32_t numDeps = deps.size();
1152 uint32_t numObjs = objs.size();
1153 ctx.tpiSourceList = std::move(deps);
1154 ctx.tpiSourceList.insert(ctx.tpiSourceList.end(), objs.begin(), objs.end());
1155 for (uint32_t i = 0, e = ctx.tpiSourceList.size(); i < e; ++i)
1156 ctx.tpiSourceList[i]->tpiSrcIdx = i;
1157 dependencySources = makeArrayRef(ctx.tpiSourceList.data(), numDeps);
1158 objectSources = makeArrayRef(ctx.tpiSourceList.data() + numDeps, numObjs);
1159 }
1160
1161 /// Given the index into the ghash table for a particular type, return the type
1162 /// index for that type in the output PDB.
loadPdbTypeIndexFromCell(GHashState * g,uint32_t ghashCellIdx)1163 static TypeIndex loadPdbTypeIndexFromCell(GHashState *g,
1164 uint32_t ghashCellIdx) {
1165 GHashCell cell = g->table.table[ghashCellIdx];
1166 return TypeIndex::fromArrayIndex(cell.getGHashIdx());
1167 }
1168
1169 /// Free heap allocated ghashes.
clearGHashes()1170 void TypeMerger::clearGHashes() {
1171 for (TpiSource *src : ctx.tpiSourceList) {
1172 if (src->ownedGHashes)
1173 delete[] src->ghashes.data();
1174 src->ghashes = {};
1175 src->isItemIndex.clear();
1176 src->uniqueTypes.clear();
1177 }
1178 }
1179
1180 // Fill in a TPI or IPI index map using ghashes. For each source type, use its
1181 // ghash to lookup its final type index in the PDB, and store that in the map.
fillMapFromGHashes(GHashState * g)1182 void TpiSource::fillMapFromGHashes(GHashState *g) {
1183 for (size_t i = 0, e = ghashes.size(); i < e; ++i) {
1184 TypeIndex fakeCellIndex = indexMapStorage[i];
1185 if (fakeCellIndex.isSimple())
1186 indexMapStorage[i] = fakeCellIndex;
1187 else
1188 indexMapStorage[i] =
1189 loadPdbTypeIndexFromCell(g, fakeCellIndex.toArrayIndex());
1190 }
1191 }
1192