1 //===- DebugTypes.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "DebugTypes.h"
10 #include "COFFLinkerContext.h"
11 #include "Chunks.h"
12 #include "Driver.h"
13 #include "InputFiles.h"
14 #include "PDB.h"
15 #include "TypeMerger.h"
16 #include "lld/Common/ErrorHandler.h"
17 #include "lld/Common/Memory.h"
18 #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
19 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
20 #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
21 #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
22 #include "llvm/DebugInfo/PDB/GenericError.h"
23 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
24 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
25 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
26 #include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
27 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
28 #include "llvm/Support/FormatVariadic.h"
29 #include "llvm/Support/Parallel.h"
30 #include "llvm/Support/Path.h"
31 
32 using namespace llvm;
33 using namespace llvm::codeview;
34 using namespace lld;
35 using namespace lld::coff;
36 
37 namespace {
38 class TypeServerIpiSource;
39 
40 // The TypeServerSource class represents a PDB type server, a file referenced by
41 // OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ
42 // files, therefore there must be only once instance per OBJ lot. The file path
43 // is discovered from the dependent OBJ's debug type stream. The
44 // TypeServerSource object is then queued and loaded by the COFF Driver. The
45 // debug type stream for such PDB files will be merged first in the final PDB,
46 // before any dependent OBJ.
47 class TypeServerSource : public TpiSource {
48 public:
49   explicit TypeServerSource(COFFLinkerContext &ctx, PDBInputFile *f)
50       : TpiSource(ctx, PDB, nullptr), pdbInputFile(f) {
51     if (f->loadErr && *f->loadErr)
52       return;
53     pdb::PDBFile &file = f->session->getPDBFile();
54     auto expectedInfo = file.getPDBInfoStream();
55     if (!expectedInfo)
56       return;
57     Guid = expectedInfo->getGuid();
58     auto it = ctx.typeServerSourceMappings.emplace(Guid, this);
59     if (!it.second) {
60       // If we hit here we have collision on Guid's in two PDB files.
61       // This can happen if the PDB Guid is invalid or if we are really
62       // unlucky. This should fall back on stright file-system lookup.
63       TypeServerSource *tSrc = (TypeServerSource *)it.first->second;
64       log("GUID collision between " + file.getFilePath() + " and " +
65           tSrc->pdbInputFile->session->getPDBFile().getFilePath());
66       ctx.typeServerSourceMappings.erase(Guid);
67     }
68   }
69 
70   Error mergeDebugT(TypeMerger *m) override;
71 
72   void loadGHashes() override;
73   void remapTpiWithGHashes(GHashState *g) override;
74 
75   bool isDependency() const override { return true; }
76 
77   PDBInputFile *pdbInputFile = nullptr;
78 
79   // TpiSource for IPI stream.
80   TypeServerIpiSource *ipiSrc = nullptr;
81 
82   // The PDB signature GUID.
83   codeview::GUID Guid;
84 };
85 
86 // Companion to TypeServerSource. Stores the index map for the IPI stream in the
87 // PDB. Modeling PDBs with two sources for TPI and IPI helps establish the
88 // invariant of one type index space per source.
89 class TypeServerIpiSource : public TpiSource {
90 public:
91   explicit TypeServerIpiSource(COFFLinkerContext &ctx)
92       : TpiSource(ctx, PDBIpi, nullptr) {}
93 
94   friend class TypeServerSource;
95 
96   // All of the TpiSource methods are no-ops. The parent TypeServerSource
97   // handles both TPI and IPI.
98   Error mergeDebugT(TypeMerger *m) override { return Error::success(); }
99   void loadGHashes() override {}
100   void remapTpiWithGHashes(GHashState *g) override {}
101   bool isDependency() const override { return true; }
102 };
103 
104 // This class represents the debug type stream of an OBJ file that depends on a
105 // PDB type server (see TypeServerSource).
106 class UseTypeServerSource : public TpiSource {
107   Expected<TypeServerSource *> getTypeServerSource();
108 
109 public:
110   UseTypeServerSource(COFFLinkerContext &ctx, ObjFile *f, TypeServer2Record ts)
111       : TpiSource(ctx, UsingPDB, f), typeServerDependency(ts) {}
112 
113   Error mergeDebugT(TypeMerger *m) override;
114 
115   // No need to load ghashes from /Zi objects.
116   void loadGHashes() override {}
117   void remapTpiWithGHashes(GHashState *g) override;
118 
119   // Information about the PDB type server dependency, that needs to be loaded
120   // in before merging this OBJ.
121   TypeServer2Record typeServerDependency;
122 };
123 
124 // This class represents the debug type stream of a Microsoft precompiled
125 // headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output
126 // PDB, before any other OBJs that depend on this. Note that only MSVC generate
127 // such files, clang does not.
128 class PrecompSource : public TpiSource {
129 public:
130   PrecompSource(COFFLinkerContext &ctx, ObjFile *f) : TpiSource(ctx, PCH, f) {
131     if (!f->pchSignature || !*f->pchSignature)
132       fatal(toString(f) +
133             " claims to be a PCH object, but does not have a valid signature");
134     auto it = ctx.precompSourceMappings.emplace(*f->pchSignature, this);
135     if (!it.second)
136       fatal("a PCH object with the same signature has already been provided (" +
137             toString(it.first->second->file) + " and " + toString(file) + ")");
138   }
139 
140   void loadGHashes() override;
141 
142   bool isDependency() const override { return true; }
143 };
144 
145 // This class represents the debug type stream of an OBJ file that depends on a
146 // Microsoft precompiled headers OBJ (see PrecompSource).
147 class UsePrecompSource : public TpiSource {
148 public:
149   UsePrecompSource(COFFLinkerContext &ctx, ObjFile *f, PrecompRecord precomp)
150       : TpiSource(ctx, UsingPCH, f), precompDependency(precomp) {}
151 
152   Error mergeDebugT(TypeMerger *m) override;
153 
154   void loadGHashes() override;
155   void remapTpiWithGHashes(GHashState *g) override;
156 
157 private:
158   Error mergeInPrecompHeaderObj();
159 
160   PrecompSource *findObjByName(StringRef fileNameOnly);
161   PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr);
162   Expected<PrecompSource *> findPrecompMap(ObjFile *file, PrecompRecord &pr);
163 
164 public:
165   // Information about the Precomp OBJ dependency, that needs to be loaded in
166   // before merging this OBJ.
167   PrecompRecord precompDependency;
168 };
169 } // namespace
170 
171 TpiSource::TpiSource(COFFLinkerContext &ctx, TpiKind k, ObjFile *f)
172     : ctx(ctx), kind(k), tpiSrcIdx(ctx.tpiSourceList.size()), file(f) {
173   ctx.addTpiSource(this);
174 }
175 
176 // Vtable key method.
177 TpiSource::~TpiSource() {
178   // Silence any assertions about unchecked errors.
179   consumeError(std::move(typeMergingError));
180 }
181 
182 TpiSource *lld::coff::makeTpiSource(COFFLinkerContext &ctx, ObjFile *file) {
183   return make<TpiSource>(ctx, TpiSource::Regular, file);
184 }
185 
186 TpiSource *lld::coff::makeTypeServerSource(COFFLinkerContext &ctx,
187                                            PDBInputFile *pdbInputFile) {
188   // Type server sources come in pairs: the TPI stream, and the IPI stream.
189   auto *tpiSource = make<TypeServerSource>(ctx, pdbInputFile);
190   if (pdbInputFile->session->getPDBFile().hasPDBIpiStream())
191     tpiSource->ipiSrc = make<TypeServerIpiSource>(ctx);
192   return tpiSource;
193 }
194 
195 TpiSource *lld::coff::makeUseTypeServerSource(COFFLinkerContext &ctx,
196                                               ObjFile *file,
197                                               TypeServer2Record ts) {
198   return make<UseTypeServerSource>(ctx, file, ts);
199 }
200 
201 TpiSource *lld::coff::makePrecompSource(COFFLinkerContext &ctx, ObjFile *file) {
202   return make<PrecompSource>(ctx, file);
203 }
204 
205 TpiSource *lld::coff::makeUsePrecompSource(COFFLinkerContext &ctx,
206                                            ObjFile *file,
207                                            PrecompRecord precomp) {
208   return make<UsePrecompSource>(ctx, file, precomp);
209 }
210 
211 bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const {
212   if (ti.isSimple())
213     return true;
214 
215   // This can be an item index or a type index. Choose the appropriate map.
216   ArrayRef<TypeIndex> tpiOrIpiMap =
217       (refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap;
218   if (ti.toArrayIndex() >= tpiOrIpiMap.size())
219     return false;
220   ti = tpiOrIpiMap[ti.toArrayIndex()];
221   return true;
222 }
223 
224 void TpiSource::remapRecord(MutableArrayRef<uint8_t> rec,
225                             ArrayRef<TiReference> typeRefs) {
226   MutableArrayRef<uint8_t> contents = rec.drop_front(sizeof(RecordPrefix));
227   for (const TiReference &ref : typeRefs) {
228     unsigned byteSize = ref.Count * sizeof(TypeIndex);
229     if (contents.size() < ref.Offset + byteSize)
230       fatal("symbol record too short");
231 
232     MutableArrayRef<TypeIndex> indices(
233         reinterpret_cast<TypeIndex *>(contents.data() + ref.Offset), ref.Count);
234     for (TypeIndex &ti : indices) {
235       if (!remapTypeIndex(ti, ref.Kind)) {
236         if (config->verbose) {
237           uint16_t kind =
238               reinterpret_cast<const RecordPrefix *>(rec.data())->RecordKind;
239           StringRef fname = file ? file->getName() : "<unknown PDB>";
240           log("failed to remap type index in record of kind 0x" +
241               utohexstr(kind) + " in " + fname + " with bad " +
242               (ref.Kind == TiRefKind::IndexRef ? "item" : "type") +
243               " index 0x" + utohexstr(ti.getIndex()));
244         }
245         ti = TypeIndex(SimpleTypeKind::NotTranslated);
246         continue;
247       }
248     }
249   }
250 }
251 
252 void TpiSource::remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec) {
253   // TODO: Handle errors similar to symbols.
254   SmallVector<TiReference, 32> typeRefs;
255   discoverTypeIndices(CVType(rec), typeRefs);
256   remapRecord(rec, typeRefs);
257 }
258 
259 bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec) {
260   // Discover type index references in the record. Skip it if we don't
261   // know where they are.
262   SmallVector<TiReference, 32> typeRefs;
263   if (!discoverTypeIndicesInSymbol(rec, typeRefs))
264     return false;
265   remapRecord(rec, typeRefs);
266   return true;
267 }
268 
269 // A COFF .debug$H section is currently a clang extension.  This function checks
270 // if a .debug$H section is in a format that we expect / understand, so that we
271 // can ignore any sections which are coincidentally also named .debug$H but do
272 // not contain a format we recognize.
273 static bool canUseDebugH(ArrayRef<uint8_t> debugH) {
274   if (debugH.size() < sizeof(object::debug_h_header))
275     return false;
276   auto *header =
277       reinterpret_cast<const object::debug_h_header *>(debugH.data());
278   debugH = debugH.drop_front(sizeof(object::debug_h_header));
279   return header->Magic == COFF::DEBUG_HASHES_SECTION_MAGIC &&
280          header->Version == 0 &&
281          header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::SHA1_8) &&
282          (debugH.size() % 8 == 0);
283 }
284 
285 static Optional<ArrayRef<uint8_t>> getDebugH(ObjFile *file) {
286   SectionChunk *sec =
287       SectionChunk::findByName(file->getDebugChunks(), ".debug$H");
288   if (!sec)
289     return llvm::None;
290   ArrayRef<uint8_t> contents = sec->getContents();
291   if (!canUseDebugH(contents))
292     return None;
293   return contents;
294 }
295 
296 static ArrayRef<GloballyHashedType>
297 getHashesFromDebugH(ArrayRef<uint8_t> debugH) {
298   assert(canUseDebugH(debugH));
299   debugH = debugH.drop_front(sizeof(object::debug_h_header));
300   uint32_t count = debugH.size() / sizeof(GloballyHashedType);
301   return {reinterpret_cast<const GloballyHashedType *>(debugH.data()), count};
302 }
303 
304 // Merge .debug$T for a generic object file.
305 Error TpiSource::mergeDebugT(TypeMerger *m) {
306   assert(!config->debugGHashes &&
307          "use remapTpiWithGHashes when ghash is enabled");
308 
309   CVTypeArray types;
310   BinaryStreamReader reader(file->debugTypes, support::little);
311   cantFail(reader.readArray(types, reader.getLength()));
312 
313   // When dealing with PCH.OBJ, some indices were already merged.
314   unsigned nbHeadIndices = indexMapStorage.size();
315 
316   if (auto err = mergeTypeAndIdRecords(
317           m->idTable, m->typeTable, indexMapStorage, types, file->pchSignature))
318     fatal("codeview::mergeTypeAndIdRecords failed: " +
319           toString(std::move(err)));
320 
321   // In an object, there is only one mapping for both types and items.
322   tpiMap = indexMapStorage;
323   ipiMap = indexMapStorage;
324 
325   if (config->showSummary) {
326     nbTypeRecords = indexMapStorage.size() - nbHeadIndices;
327     nbTypeRecordsBytes = reader.getLength();
328     // Count how many times we saw each type record in our input. This
329     // calculation requires a second pass over the type records to classify each
330     // record as a type or index. This is slow, but this code executes when
331     // collecting statistics.
332     m->tpiCounts.resize(m->getTypeTable().size());
333     m->ipiCounts.resize(m->getIDTable().size());
334     uint32_t srcIdx = nbHeadIndices;
335     for (const CVType &ty : types) {
336       TypeIndex dstIdx = tpiMap[srcIdx++];
337       // Type merging may fail, so a complex source type may become the simple
338       // NotTranslated type, which cannot be used as an array index.
339       if (dstIdx.isSimple())
340         continue;
341       SmallVectorImpl<uint32_t> &counts =
342           isIdRecord(ty.kind()) ? m->ipiCounts : m->tpiCounts;
343       ++counts[dstIdx.toArrayIndex()];
344     }
345   }
346 
347   return Error::success();
348 }
349 
350 // Merge types from a type server PDB.
351 Error TypeServerSource::mergeDebugT(TypeMerger *m) {
352   assert(!config->debugGHashes &&
353          "use remapTpiWithGHashes when ghash is enabled");
354 
355   pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
356   Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
357   if (auto e = expectedTpi.takeError())
358     fatal("Type server does not have TPI stream: " + toString(std::move(e)));
359   pdb::TpiStream *maybeIpi = nullptr;
360   if (pdbFile.hasPDBIpiStream()) {
361     Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
362     if (auto e = expectedIpi.takeError())
363       fatal("Error getting type server IPI stream: " + toString(std::move(e)));
364     maybeIpi = &*expectedIpi;
365   }
366 
367   // Merge TPI first, because the IPI stream will reference type indices.
368   if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage,
369                                   expectedTpi->typeArray()))
370     fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err)));
371   tpiMap = indexMapStorage;
372 
373   // Merge IPI.
374   if (maybeIpi) {
375     if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage,
376                                   maybeIpi->typeArray()))
377       fatal("codeview::mergeIdRecords failed: " + toString(std::move(err)));
378     ipiMap = ipiSrc->indexMapStorage;
379   }
380 
381   if (config->showSummary) {
382     nbTypeRecords = tpiMap.size() + ipiMap.size();
383     nbTypeRecordsBytes =
384         expectedTpi->typeArray().getUnderlyingStream().getLength() +
385         (maybeIpi ? maybeIpi->typeArray().getUnderlyingStream().getLength()
386                   : 0);
387 
388     // Count how many times we saw each type record in our input. If a
389     // destination type index is present in the source to destination type index
390     // map, that means we saw it once in the input. Add it to our histogram.
391     m->tpiCounts.resize(m->getTypeTable().size());
392     m->ipiCounts.resize(m->getIDTable().size());
393     for (TypeIndex ti : tpiMap)
394       if (!ti.isSimple())
395         ++m->tpiCounts[ti.toArrayIndex()];
396     for (TypeIndex ti : ipiMap)
397       if (!ti.isSimple())
398         ++m->ipiCounts[ti.toArrayIndex()];
399   }
400 
401   return Error::success();
402 }
403 
404 Expected<TypeServerSource *> UseTypeServerSource::getTypeServerSource() {
405   const codeview::GUID &tsId = typeServerDependency.getGuid();
406   StringRef tsPath = typeServerDependency.getName();
407 
408   TypeServerSource *tsSrc;
409   auto it = ctx.typeServerSourceMappings.find(tsId);
410   if (it != ctx.typeServerSourceMappings.end()) {
411     tsSrc = (TypeServerSource *)it->second;
412   } else {
413     // The file failed to load, lookup by name
414     PDBInputFile *pdb = PDBInputFile::findFromRecordPath(ctx, tsPath, file);
415     if (!pdb)
416       return createFileError(tsPath, errorCodeToError(std::error_code(
417                                          ENOENT, std::generic_category())));
418     // If an error occurred during loading, throw it now
419     if (pdb->loadErr && *pdb->loadErr)
420       return createFileError(tsPath, std::move(*pdb->loadErr));
421 
422     tsSrc = (TypeServerSource *)pdb->debugTypesObj;
423 
424     // Just because a file with a matching name was found and it was an actual
425     // PDB file doesn't mean it matches.  For it to match the InfoStream's GUID
426     // must match the GUID specified in the TypeServer2 record.
427     if (tsSrc->Guid != tsId) {
428       return createFileError(tsPath,
429                              make_error<pdb::PDBError>(
430                                  pdb::pdb_error_code::signature_out_of_date));
431     }
432   }
433   return tsSrc;
434 }
435 
436 Error UseTypeServerSource::mergeDebugT(TypeMerger *m) {
437   Expected<TypeServerSource *> tsSrc = getTypeServerSource();
438   if (!tsSrc)
439     return tsSrc.takeError();
440 
441   pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session->getPDBFile();
442   auto expectedInfo = pdbSession.getPDBInfoStream();
443   if (!expectedInfo)
444     return expectedInfo.takeError();
445 
446   // Reuse the type index map of the type server.
447   tpiMap = (*tsSrc)->tpiMap;
448   ipiMap = (*tsSrc)->ipiMap;
449   return Error::success();
450 }
451 
452 static bool equalsPath(StringRef path1, StringRef path2) {
453 #if defined(_WIN32)
454   return path1.equals_insensitive(path2);
455 #else
456   return path1.equals(path2);
457 #endif
458 }
459 
460 // Find by name an OBJ provided on the command line
461 PrecompSource *UsePrecompSource::findObjByName(StringRef fileNameOnly) {
462   SmallString<128> currentPath;
463   for (auto kv : ctx.precompSourceMappings) {
464     StringRef currentFileName = sys::path::filename(kv.second->file->getName(),
465                                                     sys::path::Style::windows);
466 
467     // Compare based solely on the file name (link.exe behavior)
468     if (equalsPath(currentFileName, fileNameOnly))
469       return (PrecompSource *)kv.second;
470   }
471   return nullptr;
472 }
473 
474 PrecompSource *UsePrecompSource::findPrecompSource(ObjFile *file,
475                                                    PrecompRecord &pr) {
476   // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP
477   // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly,
478   // the paths embedded in the OBJs are in the Windows format.
479   SmallString<128> prFileName =
480       sys::path::filename(pr.getPrecompFilePath(), sys::path::Style::windows);
481 
482   auto it = ctx.precompSourceMappings.find(pr.getSignature());
483   if (it != ctx.precompSourceMappings.end()) {
484     return (PrecompSource *)it->second;
485   }
486   // Lookup by name
487   return findObjByName(prFileName);
488 }
489 
490 Expected<PrecompSource *> UsePrecompSource::findPrecompMap(ObjFile *file,
491                                                            PrecompRecord &pr) {
492   PrecompSource *precomp = findPrecompSource(file, pr);
493 
494   if (!precomp)
495     return createFileError(
496         pr.getPrecompFilePath(),
497         make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
498 
499   if (pr.getSignature() != file->pchSignature)
500     return createFileError(
501         toString(file),
502         make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
503 
504   if (pr.getSignature() != *precomp->file->pchSignature)
505     return createFileError(
506         toString(precomp->file),
507         make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
508 
509   return precomp;
510 }
511 
512 /// Merges a precompiled headers TPI map into the current TPI map. The
513 /// precompiled headers object will also be loaded and remapped in the
514 /// process.
515 Error UsePrecompSource::mergeInPrecompHeaderObj() {
516   auto e = findPrecompMap(file, precompDependency);
517   if (!e)
518     return e.takeError();
519 
520   PrecompSource *precompSrc = *e;
521   if (precompSrc->tpiMap.empty())
522     return Error::success();
523 
524   assert(precompDependency.getStartTypeIndex() ==
525          TypeIndex::FirstNonSimpleIndex);
526   assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size());
527   // Use the previously remapped index map from the precompiled headers.
528   indexMapStorage.insert(indexMapStorage.begin(), precompSrc->tpiMap.begin(),
529                          precompSrc->tpiMap.begin() +
530                              precompDependency.getTypesCount());
531 
532   return Error::success();
533 }
534 
535 Error UsePrecompSource::mergeDebugT(TypeMerger *m) {
536   // This object was compiled with /Yu, so process the corresponding
537   // precompiled headers object (/Yc) first. Some type indices in the current
538   // object are referencing data in the precompiled headers object, so we need
539   // both to be loaded.
540   if (Error e = mergeInPrecompHeaderObj())
541     return e;
542 
543   return TpiSource::mergeDebugT(m);
544 }
545 
546 //===----------------------------------------------------------------------===//
547 // Parellel GHash type merging implementation.
548 //===----------------------------------------------------------------------===//
549 
550 void TpiSource::loadGHashes() {
551   if (Optional<ArrayRef<uint8_t>> debugH = getDebugH(file)) {
552     ghashes = getHashesFromDebugH(*debugH);
553     ownedGHashes = false;
554   } else {
555     CVTypeArray types;
556     BinaryStreamReader reader(file->debugTypes, support::little);
557     cantFail(reader.readArray(types, reader.getLength()));
558     assignGHashesFromVector(GloballyHashedType::hashTypes(types));
559   }
560 
561   fillIsItemIndexFromDebugT();
562 }
563 
564 // Copies ghashes from a vector into an array. These are long lived, so it's
565 // worth the time to copy these into an appropriately sized vector to reduce
566 // memory usage.
567 void TpiSource::assignGHashesFromVector(
568     std::vector<GloballyHashedType> &&hashVec) {
569   if (hashVec.empty())
570     return;
571   GloballyHashedType *hashes = new GloballyHashedType[hashVec.size()];
572   memcpy(hashes, hashVec.data(), hashVec.size() * sizeof(GloballyHashedType));
573   ghashes = makeArrayRef(hashes, hashVec.size());
574   ownedGHashes = true;
575 }
576 
577 // Faster way to iterate type records. forEachTypeChecked is faster than
578 // iterating CVTypeArray. It avoids virtual readBytes calls in inner loops.
579 static void forEachTypeChecked(ArrayRef<uint8_t> types,
580                                function_ref<void(const CVType &)> fn) {
581   checkError(
582       forEachCodeViewRecord<CVType>(types, [fn](const CVType &ty) -> Error {
583         fn(ty);
584         return Error::success();
585       }));
586 }
587 
588 // Walk over file->debugTypes and fill in the isItemIndex bit vector.
589 // TODO: Store this information in .debug$H so that we don't have to recompute
590 // it. This is the main bottleneck slowing down parallel ghashing with one
591 // thread over single-threaded ghashing.
592 void TpiSource::fillIsItemIndexFromDebugT() {
593   uint32_t index = 0;
594   isItemIndex.resize(ghashes.size());
595   forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
596     if (isIdRecord(ty.kind()))
597       isItemIndex.set(index);
598     ++index;
599   });
600 }
601 
602 void TpiSource::mergeTypeRecord(TypeIndex curIndex, CVType ty) {
603   // Decide if the merged type goes into TPI or IPI.
604   bool isItem = isIdRecord(ty.kind());
605   MergedInfo &merged = isItem ? mergedIpi : mergedTpi;
606 
607   // Copy the type into our mutable buffer.
608   assert(ty.length() <= codeview::MaxRecordLength);
609   size_t offset = merged.recs.size();
610   size_t newSize = alignTo(ty.length(), 4);
611   merged.recs.resize(offset + newSize);
612   auto newRec = makeMutableArrayRef(&merged.recs[offset], newSize);
613   memcpy(newRec.data(), ty.data().data(), newSize);
614 
615   // Fix up the record prefix and padding bytes if it required resizing.
616   if (newSize != ty.length()) {
617     reinterpret_cast<RecordPrefix *>(newRec.data())->RecordLen = newSize - 2;
618     for (size_t i = ty.length(); i < newSize; ++i)
619       newRec[i] = LF_PAD0 + (newSize - i);
620   }
621 
622   // Remap the type indices in the new record.
623   remapTypesInTypeRecord(newRec);
624   uint32_t pdbHash = check(pdb::hashTypeRecord(CVType(newRec)));
625   merged.recSizes.push_back(static_cast<uint16_t>(newSize));
626   merged.recHashes.push_back(pdbHash);
627 
628   // Retain a mapping from PDB function id to PDB function type. This mapping is
629   // used during symbol processing to rewrite S_GPROC32_ID symbols to S_GPROC32
630   // symbols.
631   if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) {
632     bool success = ty.length() >= 12;
633     TypeIndex funcId = curIndex;
634     if (success)
635       success &= remapTypeIndex(funcId, TiRefKind::IndexRef);
636     TypeIndex funcType =
637         *reinterpret_cast<const TypeIndex *>(&newRec.data()[8]);
638     if (success) {
639       funcIdToType.push_back({funcId, funcType});
640     } else {
641       StringRef fname = file ? file->getName() : "<unknown PDB>";
642       warn("corrupt LF_[M]FUNC_ID record 0x" + utohexstr(curIndex.getIndex()) +
643            " in " + fname);
644     }
645   }
646 }
647 
648 void TpiSource::mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords,
649                                        TypeIndex beginIndex) {
650   // Re-sort the list of unique types by index.
651   if (kind == PDB)
652     assert(std::is_sorted(uniqueTypes.begin(), uniqueTypes.end()));
653   else
654     llvm::sort(uniqueTypes);
655 
656   // Accumulate all the unique types into one buffer in mergedTypes.
657   uint32_t ghashIndex = 0;
658   auto nextUniqueIndex = uniqueTypes.begin();
659   assert(mergedTpi.recs.empty());
660   assert(mergedIpi.recs.empty());
661 
662   // Pre-compute the number of elements in advance to avoid std::vector resizes.
663   unsigned nbTpiRecs = 0;
664   unsigned nbIpiRecs = 0;
665   forEachTypeChecked(typeRecords, [&](const CVType &ty) {
666     if (nextUniqueIndex != uniqueTypes.end() &&
667         *nextUniqueIndex == ghashIndex) {
668       assert(ty.length() <= codeview::MaxRecordLength);
669       size_t newSize = alignTo(ty.length(), 4);
670       (isIdRecord(ty.kind()) ? nbIpiRecs : nbTpiRecs) += newSize;
671       ++nextUniqueIndex;
672     }
673     ++ghashIndex;
674   });
675   mergedTpi.recs.reserve(nbTpiRecs);
676   mergedIpi.recs.reserve(nbIpiRecs);
677 
678   // Do the actual type merge.
679   ghashIndex = 0;
680   nextUniqueIndex = uniqueTypes.begin();
681   forEachTypeChecked(typeRecords, [&](const CVType &ty) {
682     if (nextUniqueIndex != uniqueTypes.end() &&
683         *nextUniqueIndex == ghashIndex) {
684       mergeTypeRecord(beginIndex + ghashIndex, ty);
685       ++nextUniqueIndex;
686     }
687     ++ghashIndex;
688   });
689   assert(nextUniqueIndex == uniqueTypes.end() &&
690          "failed to merge all desired records");
691   assert(uniqueTypes.size() ==
692              mergedTpi.recSizes.size() + mergedIpi.recSizes.size() &&
693          "missing desired record");
694 }
695 
696 void TpiSource::remapTpiWithGHashes(GHashState *g) {
697   assert(config->debugGHashes && "ghashes must be enabled");
698   fillMapFromGHashes(g);
699   tpiMap = indexMapStorage;
700   ipiMap = indexMapStorage;
701   mergeUniqueTypeRecords(file->debugTypes);
702   // TODO: Free all unneeded ghash resources now that we have a full index map.
703 
704   if (config->showSummary) {
705     nbTypeRecords = ghashes.size();
706     nbTypeRecordsBytes = file->debugTypes.size();
707   }
708 }
709 
710 // PDBs do not actually store global hashes, so when merging a type server
711 // PDB we have to synthesize global hashes.  To do this, we first synthesize
712 // global hashes for the TPI stream, since it is independent, then we
713 // synthesize hashes for the IPI stream, using the hashes for the TPI stream
714 // as inputs.
715 void TypeServerSource::loadGHashes() {
716   // Don't hash twice.
717   if (!ghashes.empty())
718     return;
719   pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
720 
721   // Hash TPI stream.
722   Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
723   if (auto e = expectedTpi.takeError())
724     fatal("Type server does not have TPI stream: " + toString(std::move(e)));
725   assignGHashesFromVector(
726       GloballyHashedType::hashTypes(expectedTpi->typeArray()));
727   isItemIndex.resize(ghashes.size());
728 
729   // Hash IPI stream, which depends on TPI ghashes.
730   if (!pdbFile.hasPDBIpiStream())
731     return;
732   Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
733   if (auto e = expectedIpi.takeError())
734     fatal("error retrieving IPI stream: " + toString(std::move(e)));
735   ipiSrc->assignGHashesFromVector(
736       GloballyHashedType::hashIds(expectedIpi->typeArray(), ghashes));
737 
738   // The IPI stream isItemIndex bitvector should be all ones.
739   ipiSrc->isItemIndex.resize(ipiSrc->ghashes.size());
740   ipiSrc->isItemIndex.set(0, ipiSrc->ghashes.size());
741 }
742 
743 // Flatten discontiguous PDB type arrays to bytes so that we can use
744 // forEachTypeChecked instead of CVTypeArray iteration. Copying all types from
745 // type servers is faster than iterating all object files compiled with /Z7 with
746 // CVTypeArray, which has high overheads due to the virtual interface of
747 // BinaryStream::readBytes.
748 static ArrayRef<uint8_t> typeArrayToBytes(const CVTypeArray &types) {
749   BinaryStreamRef stream = types.getUnderlyingStream();
750   ArrayRef<uint8_t> debugTypes;
751   checkError(stream.readBytes(0, stream.getLength(), debugTypes));
752   return debugTypes;
753 }
754 
755 // Merge types from a type server PDB.
756 void TypeServerSource::remapTpiWithGHashes(GHashState *g) {
757   assert(config->debugGHashes && "ghashes must be enabled");
758 
759   // IPI merging depends on TPI, so do TPI first, then do IPI.  No need to
760   // propagate errors, those should've been handled during ghash loading.
761   pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
762   pdb::TpiStream &tpi = check(pdbFile.getPDBTpiStream());
763   fillMapFromGHashes(g);
764   tpiMap = indexMapStorage;
765   mergeUniqueTypeRecords(typeArrayToBytes(tpi.typeArray()));
766   if (pdbFile.hasPDBIpiStream()) {
767     pdb::TpiStream &ipi = check(pdbFile.getPDBIpiStream());
768     ipiSrc->indexMapStorage.resize(ipiSrc->ghashes.size());
769     ipiSrc->fillMapFromGHashes(g);
770     ipiMap = ipiSrc->indexMapStorage;
771     ipiSrc->tpiMap = tpiMap;
772     ipiSrc->ipiMap = ipiMap;
773     ipiSrc->mergeUniqueTypeRecords(typeArrayToBytes(ipi.typeArray()));
774 
775     if (config->showSummary) {
776       nbTypeRecords = ipiSrc->ghashes.size();
777       nbTypeRecordsBytes = ipi.typeArray().getUnderlyingStream().getLength();
778     }
779   }
780 
781   if (config->showSummary) {
782     nbTypeRecords += ghashes.size();
783     nbTypeRecordsBytes += tpi.typeArray().getUnderlyingStream().getLength();
784   }
785 }
786 
787 void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) {
788   // No remapping to do with /Zi objects. Simply use the index map from the type
789   // server. Errors should have been reported earlier. Symbols from this object
790   // will be ignored.
791   Expected<TypeServerSource *> maybeTsSrc = getTypeServerSource();
792   if (!maybeTsSrc) {
793     typeMergingError =
794         joinErrors(std::move(typeMergingError), maybeTsSrc.takeError());
795     return;
796   }
797   TypeServerSource *tsSrc = *maybeTsSrc;
798   tpiMap = tsSrc->tpiMap;
799   ipiMap = tsSrc->ipiMap;
800 }
801 
802 void PrecompSource::loadGHashes() {
803   if (getDebugH(file)) {
804     warn("ignoring .debug$H section; pch with ghash is not implemented");
805   }
806 
807   uint32_t ghashIdx = 0;
808   std::vector<GloballyHashedType> hashVec;
809   forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
810     // Remember the index of the LF_ENDPRECOMP record so it can be excluded from
811     // the PDB. There must be an entry in the list of ghashes so that the type
812     // indexes of the following records in the /Yc PCH object line up.
813     if (ty.kind() == LF_ENDPRECOMP)
814       endPrecompGHashIdx = ghashIdx;
815 
816     hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
817     isItemIndex.push_back(isIdRecord(ty.kind()));
818     ++ghashIdx;
819   });
820   assignGHashesFromVector(std::move(hashVec));
821 }
822 
823 void UsePrecompSource::loadGHashes() {
824   PrecompSource *pchSrc = findPrecompSource(file, precompDependency);
825   if (!pchSrc)
826     return;
827 
828   // To compute ghashes of a /Yu object file, we need to build on the the
829   // ghashes of the /Yc PCH object. After we are done hashing, discard the
830   // ghashes from the PCH source so we don't unnecessarily try to deduplicate
831   // them.
832   std::vector<GloballyHashedType> hashVec =
833       pchSrc->ghashes.take_front(precompDependency.getTypesCount());
834   forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
835     hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
836     isItemIndex.push_back(isIdRecord(ty.kind()));
837   });
838   hashVec.erase(hashVec.begin(),
839                 hashVec.begin() + precompDependency.getTypesCount());
840   assignGHashesFromVector(std::move(hashVec));
841 }
842 
843 void UsePrecompSource::remapTpiWithGHashes(GHashState *g) {
844   fillMapFromGHashes(g);
845   // This object was compiled with /Yu, so process the corresponding
846   // precompiled headers object (/Yc) first. Some type indices in the current
847   // object are referencing data in the precompiled headers object, so we need
848   // both to be loaded.
849   if (Error e = mergeInPrecompHeaderObj()) {
850     typeMergingError = joinErrors(std::move(typeMergingError), std::move(e));
851     return;
852   }
853 
854   tpiMap = indexMapStorage;
855   ipiMap = indexMapStorage;
856   mergeUniqueTypeRecords(file->debugTypes,
857                          TypeIndex(precompDependency.getStartTypeIndex() +
858                                    precompDependency.getTypesCount()));
859   if (config->showSummary) {
860     nbTypeRecords = ghashes.size();
861     nbTypeRecordsBytes = file->debugTypes.size();
862   }
863 }
864 
865 namespace {
866 /// A concurrent hash table for global type hashing. It is based on this paper:
867 /// Concurrent Hash Tables: Fast and General(?)!
868 /// https://dl.acm.org/doi/10.1145/3309206
869 ///
870 /// This hash table is meant to be used in two phases:
871 /// 1. concurrent insertions
872 /// 2. concurrent reads
873 /// It does not support lookup, deletion, or rehashing. It uses linear probing.
874 ///
875 /// The paper describes storing a key-value pair in two machine words.
876 /// Generally, the values stored in this map are type indices, and we can use
877 /// those values to recover the ghash key from a side table. This allows us to
878 /// shrink the table entries further at the cost of some loads, and sidesteps
879 /// the need for a 128 bit atomic compare-and-swap operation.
880 ///
881 /// During insertion, a priority function is used to decide which insertion
882 /// should be preferred. This ensures that the output is deterministic. For
883 /// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred.
884 ///
885 class GHashCell;
886 struct GHashTable {
887   GHashCell *table = nullptr;
888   uint32_t tableSize = 0;
889 
890   GHashTable() = default;
891   ~GHashTable();
892 
893   /// Initialize the table with the given size. Because the table cannot be
894   /// resized, the initial size of the table must be large enough to contain all
895   /// inputs, or insertion may not be able to find an empty cell.
896   void init(uint32_t newTableSize);
897 
898   /// Insert the cell with the given ghash into the table. Return the insertion
899   /// position in the table. It is safe for the caller to store the insertion
900   /// position because the table cannot be resized.
901   uint32_t insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
902                   GHashCell newCell);
903 };
904 
905 /// A ghash table cell for deduplicating types from TpiSources.
906 class GHashCell {
907   uint64_t data = 0;
908 
909 public:
910   GHashCell() = default;
911 
912   // Construct data most to least significant so that sorting works well:
913   // - isItem
914   // - tpiSrcIdx
915   // - ghashIdx
916   // Add one to the tpiSrcIdx so that the 0th record from the 0th source has a
917   // non-zero representation.
918   GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx)
919       : data((uint64_t(isItem) << 63U) | (uint64_t(tpiSrcIdx + 1) << 32ULL) |
920              ghashIdx) {
921     assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure");
922     assert(ghashIdx == getGHashIdx() && "round trip failure");
923   }
924 
925   explicit GHashCell(uint64_t data) : data(data) {}
926 
927   // The empty cell is all zeros.
928   bool isEmpty() const { return data == 0ULL; }
929 
930   /// Extract the tpiSrcIdx.
931   uint32_t getTpiSrcIdx() const {
932     return ((uint32_t)(data >> 32U) & 0x7FFFFFFF) - 1;
933   }
934 
935   /// Extract the index into the ghash array of the TpiSource.
936   uint32_t getGHashIdx() const { return (uint32_t)data; }
937 
938   bool isItem() const { return data & (1ULL << 63U); }
939 
940   /// Get the ghash key for this cell.
941   GloballyHashedType getGHash(const COFFLinkerContext &ctx) const {
942     return ctx.tpiSourceList[getTpiSrcIdx()]->ghashes[getGHashIdx()];
943   }
944 
945   /// The priority function for the cell. The data is stored such that lower
946   /// tpiSrcIdx and ghashIdx values are preferred, which means that type record
947   /// from earlier sources are more likely to prevail.
948   friend inline bool operator<(const GHashCell &l, const GHashCell &r) {
949     return l.data < r.data;
950   }
951 };
952 } // namespace
953 
954 namespace lld {
955 namespace coff {
956 /// This type is just a wrapper around GHashTable with external linkage so it
957 /// can be used from a header.
958 struct GHashState {
959   GHashTable table;
960 };
961 } // namespace coff
962 } // namespace lld
963 
964 GHashTable::~GHashTable() { delete[] table; }
965 
966 void GHashTable::init(uint32_t newTableSize) {
967   table = new GHashCell[newTableSize];
968   memset(table, 0, newTableSize * sizeof(GHashCell));
969   tableSize = newTableSize;
970 }
971 
972 uint32_t GHashTable::insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
973                             GHashCell newCell) {
974   assert(!newCell.isEmpty() && "cannot insert empty cell value");
975 
976   // FIXME: The low bytes of SHA1 have low entropy for short records, which
977   // type records are. Swap the byte order for better entropy. A better ghash
978   // won't need this.
979   uint32_t startIdx =
980       ByteSwap_64(*reinterpret_cast<uint64_t *>(&ghash)) % tableSize;
981 
982   // Do a linear probe starting at startIdx.
983   uint32_t idx = startIdx;
984   while (true) {
985     // Run a compare and swap loop. There are four cases:
986     // - cell is empty: CAS into place and return
987     // - cell has matching key, earlier priority: do nothing, return
988     // - cell has matching key, later priority: CAS into place and return
989     // - cell has non-matching key: hash collision, probe next cell
990     auto *cellPtr = reinterpret_cast<std::atomic<GHashCell> *>(&table[idx]);
991     GHashCell oldCell(cellPtr->load());
992     while (oldCell.isEmpty() || oldCell.getGHash(ctx) == ghash) {
993       // Check if there is an existing ghash entry with a higher priority
994       // (earlier ordering). If so, this is a duplicate, we are done.
995       if (!oldCell.isEmpty() && oldCell < newCell)
996         return idx;
997       // Either the cell is empty, or our value is higher priority. Try to
998       // compare and swap. If it succeeds, we are done.
999       if (cellPtr->compare_exchange_weak(oldCell, newCell))
1000         return idx;
1001       // If the CAS failed, check this cell again.
1002     }
1003 
1004     // Advance the probe. Wrap around to the beginning if we run off the end.
1005     ++idx;
1006     idx = idx == tableSize ? 0 : idx;
1007     if (idx == startIdx) {
1008       // If this becomes an issue, we could mark failure and rehash from the
1009       // beginning with a bigger table. There is no difference between rehashing
1010       // internally and starting over.
1011       report_fatal_error("ghash table is full");
1012     }
1013   }
1014   llvm_unreachable("left infloop");
1015 }
1016 
1017 TypeMerger::TypeMerger(COFFLinkerContext &c, llvm::BumpPtrAllocator &alloc)
1018     : typeTable(alloc), idTable(alloc), ctx(c) {}
1019 
1020 TypeMerger::~TypeMerger() = default;
1021 
1022 void TypeMerger::mergeTypesWithGHash() {
1023   // Load ghashes. Do type servers and PCH objects first.
1024   {
1025     ScopedTimer t1(ctx.loadGHashTimer);
1026     parallelForEach(dependencySources,
1027                     [&](TpiSource *source) { source->loadGHashes(); });
1028     parallelForEach(objectSources,
1029                     [&](TpiSource *source) { source->loadGHashes(); });
1030   }
1031 
1032   ScopedTimer t2(ctx.mergeGHashTimer);
1033   GHashState ghashState;
1034 
1035   // Estimate the size of hash table needed to deduplicate ghashes. This *must*
1036   // be larger than the number of unique types, or hash table insertion may not
1037   // be able to find a vacant slot. Summing the input types guarantees this, but
1038   // it is a gross overestimate. The table size could be reduced to save memory,
1039   // but it would require implementing rehashing, and this table is generally
1040   // small compared to total memory usage, at eight bytes per input type record,
1041   // and most input type records are larger than eight bytes.
1042   size_t tableSize = 0;
1043   for (TpiSource *source : ctx.tpiSourceList)
1044     tableSize += source->ghashes.size();
1045 
1046   // Cap the table size so that we can use 32-bit cell indices. Type indices are
1047   // also 32-bit, so this is an inherent PDB file format limit anyway.
1048   tableSize =
1049       std::min(size_t(INT32_MAX) - TypeIndex::FirstNonSimpleIndex, tableSize);
1050   ghashState.table.init(static_cast<uint32_t>(tableSize));
1051 
1052   // Insert ghashes in parallel. During concurrent insertion, we cannot observe
1053   // the contents of the hash table cell, but we can remember the insertion
1054   // position. Because the table does not rehash, the position will not change
1055   // under insertion. After insertion is done, the value of the cell can be read
1056   // to retrieve the final PDB type index.
1057   parallelForEachN(0, ctx.tpiSourceList.size(), [&](size_t tpiSrcIdx) {
1058     TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
1059     source->indexMapStorage.resize(source->ghashes.size());
1060     for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) {
1061       if (source->shouldOmitFromPdb(i)) {
1062         source->indexMapStorage[i] = TypeIndex(SimpleTypeKind::NotTranslated);
1063         continue;
1064       }
1065       GloballyHashedType ghash = source->ghashes[i];
1066       bool isItem = source->isItemIndex.test(i);
1067       uint32_t cellIdx =
1068           ghashState.table.insert(ctx, ghash, GHashCell(isItem, tpiSrcIdx, i));
1069 
1070       // Store the ghash cell index as a type index in indexMapStorage. Later
1071       // we will replace it with the PDB type index.
1072       source->indexMapStorage[i] = TypeIndex::fromArrayIndex(cellIdx);
1073     }
1074   });
1075 
1076   // Collect all non-empty cells and sort them. This will implicitly assign
1077   // destination type indices, and partition the entries into type records and
1078   // item records. It arranges types in this order:
1079   // - type records
1080   //   - source 0, type 0...
1081   //   - source 1, type 1...
1082   // - item records
1083   //   - source 0, type 1...
1084   //   - source 1, type 0...
1085   std::vector<GHashCell> entries;
1086   for (const GHashCell &cell :
1087        makeArrayRef(ghashState.table.table, tableSize)) {
1088     if (!cell.isEmpty())
1089       entries.push_back(cell);
1090   }
1091   parallelSort(entries, std::less<GHashCell>());
1092   log(formatv("ghash table load factor: {0:p} (size {1} / capacity {2})\n",
1093               tableSize ? double(entries.size()) / tableSize : 0,
1094               entries.size(), tableSize));
1095 
1096   // Find out how many type and item indices there are.
1097   auto mid =
1098       std::lower_bound(entries.begin(), entries.end(), GHashCell(true, 0, 0));
1099   assert((mid == entries.end() || mid->isItem()) &&
1100          (mid == entries.begin() || !std::prev(mid)->isItem()) &&
1101          "midpoint is not midpoint");
1102   uint32_t numTypes = std::distance(entries.begin(), mid);
1103   uint32_t numItems = std::distance(mid, entries.end());
1104   log("Tpi record count: " + Twine(numTypes));
1105   log("Ipi record count: " + Twine(numItems));
1106 
1107   // Make a list of the "unique" type records to merge for each tpi source. Type
1108   // merging will skip indices not on this list. Store the destination PDB type
1109   // index for these unique types in the tpiMap for each source. The entries for
1110   // non-unique types will be filled in prior to type merging.
1111   for (uint32_t i = 0, e = entries.size(); i < e; ++i) {
1112     auto &cell = entries[i];
1113     uint32_t tpiSrcIdx = cell.getTpiSrcIdx();
1114     TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
1115     source->uniqueTypes.push_back(cell.getGHashIdx());
1116 
1117     // Update the ghash table to store the destination PDB type index in the
1118     // table.
1119     uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes;
1120     uint32_t ghashCellIndex =
1121         source->indexMapStorage[cell.getGHashIdx()].toArrayIndex();
1122     ghashState.table.table[ghashCellIndex] =
1123         GHashCell(cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex);
1124   }
1125 
1126   // In parallel, remap all types.
1127   for_each(dependencySources, [&](TpiSource *source) {
1128     source->remapTpiWithGHashes(&ghashState);
1129   });
1130   parallelForEach(objectSources, [&](TpiSource *source) {
1131     source->remapTpiWithGHashes(&ghashState);
1132   });
1133 
1134   // Build a global map of from function ID to function type.
1135   for (TpiSource *source : ctx.tpiSourceList) {
1136     for (auto idToType : source->funcIdToType)
1137       funcIdToType.insert(idToType);
1138     source->funcIdToType.clear();
1139   }
1140 
1141   clearGHashes();
1142 }
1143 
1144 void TypeMerger::sortDependencies() {
1145   // Order dependencies first, but preserve the existing order.
1146   std::vector<TpiSource *> deps;
1147   std::vector<TpiSource *> objs;
1148   for (TpiSource *s : ctx.tpiSourceList)
1149     (s->isDependency() ? deps : objs).push_back(s);
1150   uint32_t numDeps = deps.size();
1151   uint32_t numObjs = objs.size();
1152   ctx.tpiSourceList = std::move(deps);
1153   ctx.tpiSourceList.insert(ctx.tpiSourceList.end(), objs.begin(), objs.end());
1154   for (uint32_t i = 0, e = ctx.tpiSourceList.size(); i < e; ++i)
1155     ctx.tpiSourceList[i]->tpiSrcIdx = i;
1156   dependencySources = makeArrayRef(ctx.tpiSourceList.data(), numDeps);
1157   objectSources = makeArrayRef(ctx.tpiSourceList.data() + numDeps, numObjs);
1158 }
1159 
1160 /// Given the index into the ghash table for a particular type, return the type
1161 /// index for that type in the output PDB.
1162 static TypeIndex loadPdbTypeIndexFromCell(GHashState *g,
1163                                           uint32_t ghashCellIdx) {
1164   GHashCell cell = g->table.table[ghashCellIdx];
1165   return TypeIndex::fromArrayIndex(cell.getGHashIdx());
1166 }
1167 
1168 /// Free heap allocated ghashes.
1169 void TypeMerger::clearGHashes() {
1170   for (TpiSource *src : ctx.tpiSourceList) {
1171     if (src->ownedGHashes)
1172       delete[] src->ghashes.data();
1173     src->ghashes = {};
1174     src->isItemIndex.clear();
1175     src->uniqueTypes.clear();
1176   }
1177 }
1178 
1179 // Fill in a TPI or IPI index map using ghashes. For each source type, use its
1180 // ghash to lookup its final type index in the PDB, and store that in the map.
1181 void TpiSource::fillMapFromGHashes(GHashState *g) {
1182   for (size_t i = 0, e = ghashes.size(); i < e; ++i) {
1183     TypeIndex fakeCellIndex = indexMapStorage[i];
1184     if (fakeCellIndex.isSimple())
1185       indexMapStorage[i] = fakeCellIndex;
1186     else
1187       indexMapStorage[i] =
1188           loadPdbTypeIndexFromCell(g, fakeCellIndex.toArrayIndex());
1189   }
1190 }
1191