1 //===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h" 11 12 #include "llvm/ADT/DenseSet.h" 13 #include "llvm/DebugInfo/CodeView/RecordName.h" 14 #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" 15 #include "llvm/DebugInfo/CodeView/SymbolRecord.h" 16 #include "llvm/DebugInfo/CodeView/SymbolSerializer.h" 17 #include "llvm/DebugInfo/MSF/MSFBuilder.h" 18 #include "llvm/DebugInfo/MSF/MSFCommon.h" 19 #include "llvm/DebugInfo/MSF/MappedBlockStream.h" 20 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" 21 #include "llvm/DebugInfo/PDB/Native/Hash.h" 22 #include "llvm/Support/BinaryItemStream.h" 23 #include "llvm/Support/BinaryStreamWriter.h" 24 #include "llvm/Support/xxhash.h" 25 #include <algorithm> 26 #include <vector> 27 28 using namespace llvm; 29 using namespace llvm::msf; 30 using namespace llvm::pdb; 31 using namespace llvm::codeview; 32 33 struct llvm::pdb::GSIHashStreamBuilder { 34 struct UdtDenseMapInfo { 35 static inline CVSymbol getEmptyKey() { 36 static CVSymbol Empty; 37 return Empty; 38 } 39 static inline CVSymbol getTombstoneKey() { 40 static CVSymbol Tombstone(static_cast<SymbolKind>(-1), 41 ArrayRef<uint8_t>()); 42 return Tombstone; 43 } 44 static unsigned getHashValue(const CVSymbol &Val) { 45 return xxHash64(Val.RecordData); 46 } 47 static bool isEqual(const CVSymbol &LHS, const CVSymbol &RHS) { 48 return LHS.RecordData == RHS.RecordData; 49 } 50 }; 51 52 std::vector<CVSymbol> Records; 53 uint32_t StreamIndex; 54 llvm::DenseSet<CVSymbol, UdtDenseMapInfo> UdtHashes; 55 std::vector<PSHashRecord> HashRecords; 56 std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap; 57 std::vector<support::ulittle32_t> HashBuckets; 58 59 uint32_t calculateSerializedLength() const; 60 uint32_t calculateRecordByteSize() const; 61 Error commit(BinaryStreamWriter &Writer); 62 void finalizeBuckets(uint32_t RecordZeroOffset); 63 64 template <typename T> void addSymbol(const T &Symbol, MSFBuilder &Msf) { 65 T Copy(Symbol); 66 addSymbol(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(), 67 CodeViewContainer::Pdb)); 68 } 69 void addSymbol(const CVSymbol &Symbol) { 70 if (Symbol.kind() == S_UDT) { 71 auto Iter = UdtHashes.insert(Symbol); 72 if (!Iter.second) 73 return; 74 } 75 76 Records.push_back(Symbol); 77 } 78 }; 79 80 uint32_t GSIHashStreamBuilder::calculateSerializedLength() const { 81 uint32_t Size = sizeof(GSIHashHeader); 82 Size += HashRecords.size() * sizeof(PSHashRecord); 83 Size += HashBitmap.size() * sizeof(uint32_t); 84 Size += HashBuckets.size() * sizeof(uint32_t); 85 return Size; 86 } 87 88 uint32_t GSIHashStreamBuilder::calculateRecordByteSize() const { 89 uint32_t Size = 0; 90 for (const auto &Sym : Records) 91 Size += Sym.length(); 92 return Size; 93 } 94 95 Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) { 96 GSIHashHeader Header; 97 Header.VerSignature = GSIHashHeader::HdrSignature; 98 Header.VerHdr = GSIHashHeader::HdrVersion; 99 Header.HrSize = HashRecords.size() * sizeof(PSHashRecord); 100 Header.NumBuckets = HashBitmap.size() * 4 + HashBuckets.size() * 4; 101 102 if (auto EC = Writer.writeObject(Header)) 103 return EC; 104 105 if (auto EC = Writer.writeArray(makeArrayRef(HashRecords))) 106 return EC; 107 if (auto EC = Writer.writeArray(makeArrayRef(HashBitmap))) 108 return EC; 109 if (auto EC = Writer.writeArray(makeArrayRef(HashBuckets))) 110 return EC; 111 return Error::success(); 112 } 113 114 static bool isAsciiString(StringRef S) { 115 return llvm::all_of(S, [](char C) { return unsigned(C) < 0x80; }); 116 } 117 118 // See `caseInsensitiveComparePchPchCchCch` in gsi.cpp 119 static bool gsiRecordLess(StringRef S1, StringRef S2) { 120 size_t LS = S1.size(); 121 size_t RS = S2.size(); 122 // Shorter strings always compare less than longer strings. 123 if (LS != RS) 124 return LS < RS; 125 126 // If either string contains non ascii characters, memcmp them. 127 if (LLVM_UNLIKELY(!isAsciiString(S1) || !isAsciiString(S2))) 128 return memcmp(S1.data(), S2.data(), LS) < 0; 129 130 // Both strings are ascii, perform a case-insenstive comparison. 131 return S1.compare_lower(S2.data()) < 0; 132 } 133 134 void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) { 135 std::array<std::vector<std::pair<StringRef, PSHashRecord>>, IPHR_HASH + 1> 136 TmpBuckets; 137 uint32_t SymOffset = RecordZeroOffset; 138 for (const CVSymbol &Sym : Records) { 139 PSHashRecord HR; 140 // Add one when writing symbol offsets to disk. See GSI1::fixSymRecs. 141 HR.Off = SymOffset + 1; 142 HR.CRef = 1; // Always use a refcount of 1. 143 144 // Hash the name to figure out which bucket this goes into. 145 StringRef Name = getSymbolName(Sym); 146 size_t BucketIdx = hashStringV1(Name) % IPHR_HASH; 147 TmpBuckets[BucketIdx].push_back(std::make_pair(Name, HR)); 148 SymOffset += Sym.length(); 149 } 150 151 // Compute the three tables: the hash records in bucket and chain order, the 152 // bucket presence bitmap, and the bucket chain start offsets. 153 HashRecords.reserve(Records.size()); 154 for (ulittle32_t &Word : HashBitmap) 155 Word = 0; 156 for (size_t BucketIdx = 0; BucketIdx < IPHR_HASH + 1; ++BucketIdx) { 157 auto &Bucket = TmpBuckets[BucketIdx]; 158 if (Bucket.empty()) 159 continue; 160 HashBitmap[BucketIdx / 32] |= 1U << (BucketIdx % 32); 161 162 // Calculate what the offset of the first hash record in the chain would 163 // be if it were inflated to contain 32-bit pointers. On a 32-bit system, 164 // each record would be 12 bytes. See HROffsetCalc in gsi.h. 165 const int SizeOfHROffsetCalc = 12; 166 ulittle32_t ChainStartOff = 167 ulittle32_t(HashRecords.size() * SizeOfHROffsetCalc); 168 HashBuckets.push_back(ChainStartOff); 169 170 // Sort each bucket by memcmp of the symbol's name. It's important that 171 // we use the same sorting algorithm as is used by the reference 172 // implementation to ensure that the search for a record within a bucket 173 // can properly early-out when it detects the record won't be found. The 174 // algorithm used here corredsponds to the function 175 // caseInsensitiveComparePchPchCchCch in the reference implementation. 176 llvm::sort(Bucket, [](const std::pair<StringRef, PSHashRecord> &Left, 177 const std::pair<StringRef, PSHashRecord> &Right) { 178 return gsiRecordLess(Left.first, Right.first); 179 }); 180 181 for (const auto &Entry : Bucket) 182 HashRecords.push_back(Entry.second); 183 } 184 } 185 186 GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder &Msf) 187 : Msf(Msf), PSH(llvm::make_unique<GSIHashStreamBuilder>()), 188 GSH(llvm::make_unique<GSIHashStreamBuilder>()) {} 189 190 GSIStreamBuilder::~GSIStreamBuilder() {} 191 192 uint32_t GSIStreamBuilder::calculatePublicsHashStreamSize() const { 193 uint32_t Size = 0; 194 Size += sizeof(PublicsStreamHeader); 195 Size += PSH->calculateSerializedLength(); 196 Size += PSH->Records.size() * sizeof(uint32_t); // AddrMap 197 // FIXME: Add thunk map and section offsets for incremental linking. 198 199 return Size; 200 } 201 202 uint32_t GSIStreamBuilder::calculateGlobalsHashStreamSize() const { 203 return GSH->calculateSerializedLength(); 204 } 205 206 Error GSIStreamBuilder::finalizeMsfLayout() { 207 // First we write public symbol records, then we write global symbol records. 208 uint32_t PSHZero = 0; 209 uint32_t GSHZero = PSH->calculateRecordByteSize(); 210 211 PSH->finalizeBuckets(PSHZero); 212 GSH->finalizeBuckets(GSHZero); 213 214 Expected<uint32_t> Idx = Msf.addStream(calculateGlobalsHashStreamSize()); 215 if (!Idx) 216 return Idx.takeError(); 217 GSH->StreamIndex = *Idx; 218 Idx = Msf.addStream(calculatePublicsHashStreamSize()); 219 if (!Idx) 220 return Idx.takeError(); 221 PSH->StreamIndex = *Idx; 222 223 uint32_t RecordBytes = 224 GSH->calculateRecordByteSize() + PSH->calculateRecordByteSize(); 225 226 Idx = Msf.addStream(RecordBytes); 227 if (!Idx) 228 return Idx.takeError(); 229 RecordStreamIdx = *Idx; 230 return Error::success(); 231 } 232 233 static bool comparePubSymByAddrAndName( 234 const std::pair<const CVSymbol *, const PublicSym32 *> &LS, 235 const std::pair<const CVSymbol *, const PublicSym32 *> &RS) { 236 if (LS.second->Segment != RS.second->Segment) 237 return LS.second->Segment < RS.second->Segment; 238 if (LS.second->Offset != RS.second->Offset) 239 return LS.second->Offset < RS.second->Offset; 240 241 return LS.second->Name < RS.second->Name; 242 } 243 244 /// Compute the address map. The address map is an array of symbol offsets 245 /// sorted so that it can be binary searched by address. 246 static std::vector<ulittle32_t> computeAddrMap(ArrayRef<CVSymbol> Records) { 247 // Make a vector of pointers to the symbols so we can sort it by address. 248 // Also gather the symbol offsets while we're at it. 249 250 std::vector<PublicSym32> DeserializedPublics; 251 std::vector<std::pair<const CVSymbol *, const PublicSym32 *>> PublicsByAddr; 252 std::vector<uint32_t> SymOffsets; 253 DeserializedPublics.reserve(Records.size()); 254 PublicsByAddr.reserve(Records.size()); 255 SymOffsets.reserve(Records.size()); 256 257 uint32_t SymOffset = 0; 258 for (const CVSymbol &Sym : Records) { 259 assert(Sym.kind() == SymbolKind::S_PUB32); 260 DeserializedPublics.push_back( 261 cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(Sym))); 262 PublicsByAddr.emplace_back(&Sym, &DeserializedPublics.back()); 263 SymOffsets.push_back(SymOffset); 264 SymOffset += Sym.length(); 265 } 266 std::stable_sort(PublicsByAddr.begin(), PublicsByAddr.end(), 267 comparePubSymByAddrAndName); 268 269 // Fill in the symbol offsets in the appropriate order. 270 std::vector<ulittle32_t> AddrMap; 271 AddrMap.reserve(Records.size()); 272 for (auto &Sym : PublicsByAddr) { 273 ptrdiff_t Idx = std::distance(Records.data(), Sym.first); 274 assert(Idx >= 0 && size_t(Idx) < Records.size()); 275 AddrMap.push_back(ulittle32_t(SymOffsets[Idx])); 276 } 277 return AddrMap; 278 } 279 280 uint32_t GSIStreamBuilder::getPublicsStreamIndex() const { 281 return PSH->StreamIndex; 282 } 283 284 uint32_t GSIStreamBuilder::getGlobalsStreamIndex() const { 285 return GSH->StreamIndex; 286 } 287 288 void GSIStreamBuilder::addPublicSymbol(const PublicSym32 &Pub) { 289 PSH->addSymbol(Pub, Msf); 290 } 291 292 void GSIStreamBuilder::addGlobalSymbol(const ProcRefSym &Sym) { 293 GSH->addSymbol(Sym, Msf); 294 } 295 296 void GSIStreamBuilder::addGlobalSymbol(const DataSym &Sym) { 297 GSH->addSymbol(Sym, Msf); 298 } 299 300 void GSIStreamBuilder::addGlobalSymbol(const ConstantSym &Sym) { 301 GSH->addSymbol(Sym, Msf); 302 } 303 304 void GSIStreamBuilder::addGlobalSymbol(const codeview::CVSymbol &Sym) { 305 GSH->addSymbol(Sym); 306 } 307 308 static Error writeRecords(BinaryStreamWriter &Writer, 309 ArrayRef<CVSymbol> Records) { 310 BinaryItemStream<CVSymbol> ItemStream(support::endianness::little); 311 ItemStream.setItems(Records); 312 BinaryStreamRef RecordsRef(ItemStream); 313 return Writer.writeStreamRef(RecordsRef); 314 } 315 316 Error GSIStreamBuilder::commitSymbolRecordStream( 317 WritableBinaryStreamRef Stream) { 318 BinaryStreamWriter Writer(Stream); 319 320 // Write public symbol records first, followed by global symbol records. This 321 // must match the order that we assume in finalizeMsfLayout when computing 322 // PSHZero and GSHZero. 323 if (auto EC = writeRecords(Writer, PSH->Records)) 324 return EC; 325 if (auto EC = writeRecords(Writer, GSH->Records)) 326 return EC; 327 328 return Error::success(); 329 } 330 331 Error GSIStreamBuilder::commitPublicsHashStream( 332 WritableBinaryStreamRef Stream) { 333 BinaryStreamWriter Writer(Stream); 334 PublicsStreamHeader Header; 335 336 // FIXME: Fill these in. They are for incremental linking. 337 Header.SymHash = PSH->calculateSerializedLength(); 338 Header.AddrMap = PSH->Records.size() * 4; 339 Header.NumThunks = 0; 340 Header.SizeOfThunk = 0; 341 Header.ISectThunkTable = 0; 342 memset(Header.Padding, 0, sizeof(Header.Padding)); 343 Header.OffThunkTable = 0; 344 Header.NumSections = 0; 345 if (auto EC = Writer.writeObject(Header)) 346 return EC; 347 348 if (auto EC = PSH->commit(Writer)) 349 return EC; 350 351 std::vector<ulittle32_t> AddrMap = computeAddrMap(PSH->Records); 352 if (auto EC = Writer.writeArray(makeArrayRef(AddrMap))) 353 return EC; 354 355 return Error::success(); 356 } 357 358 Error GSIStreamBuilder::commitGlobalsHashStream( 359 WritableBinaryStreamRef Stream) { 360 BinaryStreamWriter Writer(Stream); 361 return GSH->commit(Writer); 362 } 363 364 Error GSIStreamBuilder::commit(const msf::MSFLayout &Layout, 365 WritableBinaryStreamRef Buffer) { 366 auto GS = WritableMappedBlockStream::createIndexedStream( 367 Layout, Buffer, getGlobalsStreamIndex(), Msf.getAllocator()); 368 auto PS = WritableMappedBlockStream::createIndexedStream( 369 Layout, Buffer, getPublicsStreamIndex(), Msf.getAllocator()); 370 auto PRS = WritableMappedBlockStream::createIndexedStream( 371 Layout, Buffer, getRecordStreamIdx(), Msf.getAllocator()); 372 373 if (auto EC = commitSymbolRecordStream(*PRS)) 374 return EC; 375 if (auto EC = commitGlobalsHashStream(*GS)) 376 return EC; 377 if (auto EC = commitPublicsHashStream(*PS)) 378 return EC; 379 return Error::success(); 380 } 381