1 //===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h" 11 12 #include "llvm/DebugInfo/CodeView/RecordName.h" 13 #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" 14 #include "llvm/DebugInfo/CodeView/SymbolRecord.h" 15 #include "llvm/DebugInfo/CodeView/SymbolSerializer.h" 16 #include "llvm/DebugInfo/MSF/MSFBuilder.h" 17 #include "llvm/DebugInfo/MSF/MSFCommon.h" 18 #include "llvm/DebugInfo/MSF/MappedBlockStream.h" 19 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" 20 #include "llvm/DebugInfo/PDB/Native/Hash.h" 21 #include "llvm/Support/BinaryItemStream.h" 22 #include "llvm/Support/BinaryStreamWriter.h" 23 #include <algorithm> 24 #include <vector> 25 26 using namespace llvm; 27 using namespace llvm::msf; 28 using namespace llvm::pdb; 29 using namespace llvm::codeview; 30 31 struct llvm::pdb::GSIHashStreamBuilder { 32 std::vector<CVSymbol> Records; 33 uint32_t StreamIndex; 34 std::vector<PSHashRecord> HashRecords; 35 std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap; 36 std::vector<support::ulittle32_t> HashBuckets; 37 38 uint32_t calculateSerializedLength() const; 39 uint32_t calculateRecordByteSize() const; 40 Error commit(BinaryStreamWriter &Writer); 41 void finalizeBuckets(uint32_t RecordZeroOffset); 42 43 template <typename T> void addSymbol(const T &Symbol, MSFBuilder &Msf) { 44 T Copy(Symbol); 45 Records.push_back(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(), 46 CodeViewContainer::Pdb)); 47 } 48 void addSymbol(const CVSymbol &Symbol) { Records.push_back(Symbol); } 49 }; 50 51 uint32_t GSIHashStreamBuilder::calculateSerializedLength() const { 52 uint32_t Size = sizeof(GSIHashHeader); 53 Size += HashRecords.size() * sizeof(PSHashRecord); 54 Size += HashBitmap.size() * sizeof(uint32_t); 55 Size += HashBuckets.size() * sizeof(uint32_t); 56 return Size; 57 } 58 59 uint32_t GSIHashStreamBuilder::calculateRecordByteSize() const { 60 uint32_t Size = 0; 61 for (const auto &Sym : Records) 62 Size += Sym.length(); 63 return Size; 64 } 65 66 Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) { 67 GSIHashHeader Header; 68 Header.VerSignature = GSIHashHeader::HdrSignature; 69 Header.VerHdr = GSIHashHeader::HdrVersion; 70 Header.HrSize = HashRecords.size() * sizeof(PSHashRecord); 71 Header.NumBuckets = HashBitmap.size() * 4 + HashBuckets.size() * 4; 72 73 if (auto EC = Writer.writeObject(Header)) 74 return EC; 75 76 if (auto EC = Writer.writeArray(makeArrayRef(HashRecords))) 77 return EC; 78 if (auto EC = Writer.writeArray(makeArrayRef(HashBitmap))) 79 return EC; 80 if (auto EC = Writer.writeArray(makeArrayRef(HashBuckets))) 81 return EC; 82 return Error::success(); 83 } 84 85 static bool isAsciiString(StringRef S) { 86 return llvm::all_of(S, [](char C) { return unsigned(C) < 0x80; }); 87 } 88 89 // See `caseInsensitiveComparePchPchCchCch` in gsi.cpp 90 static bool gsiRecordLess(StringRef S1, StringRef S2) { 91 size_t LS = S1.size(); 92 size_t RS = S2.size(); 93 // Shorter strings always compare less than longer strings. 94 if (LS != RS) 95 return LS < RS; 96 97 // If either string contains non ascii characters, memcmp them. 98 if (LLVM_UNLIKELY(!isAsciiString(S1) || !isAsciiString(S2))) 99 return memcmp(S1.data(), S2.data(), LS) < 0; 100 101 // Both strings are ascii, perform a case-insenstive comparison. 102 return S1.compare_lower(S2.data()) < 0; 103 } 104 105 void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) { 106 std::array<std::vector<std::pair<StringRef, PSHashRecord>>, IPHR_HASH + 1> 107 TmpBuckets; 108 uint32_t SymOffset = RecordZeroOffset; 109 for (const CVSymbol &Sym : Records) { 110 PSHashRecord HR; 111 // Add one when writing symbol offsets to disk. See GSI1::fixSymRecs. 112 HR.Off = SymOffset + 1; 113 HR.CRef = 1; // Always use a refcount of 1. 114 115 // Hash the name to figure out which bucket this goes into. 116 StringRef Name = getSymbolName(Sym); 117 size_t BucketIdx = hashStringV1(Name) % IPHR_HASH; 118 TmpBuckets[BucketIdx].push_back(std::make_pair(Name, HR)); 119 SymOffset += Sym.length(); 120 } 121 122 // Compute the three tables: the hash records in bucket and chain order, the 123 // bucket presence bitmap, and the bucket chain start offsets. 124 HashRecords.reserve(Records.size()); 125 for (ulittle32_t &Word : HashBitmap) 126 Word = 0; 127 for (size_t BucketIdx = 0; BucketIdx < IPHR_HASH + 1; ++BucketIdx) { 128 auto &Bucket = TmpBuckets[BucketIdx]; 129 if (Bucket.empty()) 130 continue; 131 HashBitmap[BucketIdx / 32] |= 1U << (BucketIdx % 32); 132 133 // Calculate what the offset of the first hash record in the chain would 134 // be if it were inflated to contain 32-bit pointers. On a 32-bit system, 135 // each record would be 12 bytes. See HROffsetCalc in gsi.h. 136 const int SizeOfHROffsetCalc = 12; 137 ulittle32_t ChainStartOff = 138 ulittle32_t(HashRecords.size() * SizeOfHROffsetCalc); 139 HashBuckets.push_back(ChainStartOff); 140 141 // Sort each bucket by memcmp of the symbol's name. It's important that 142 // we use the same sorting algorithm as is used by the reference 143 // implementation to ensure that the search for a record within a bucket 144 // can properly early-out when it detects the record won't be found. The 145 // algorithm used here corredsponds to the function 146 // caseInsensitiveComparePchPchCchCch in the reference implementation. 147 llvm::sort(Bucket.begin(), Bucket.end(), 148 [](const std::pair<StringRef, PSHashRecord> &Left, 149 const std::pair<StringRef, PSHashRecord> &Right) { 150 return gsiRecordLess(Left.first, Right.first); 151 }); 152 153 for (const auto &Entry : Bucket) 154 HashRecords.push_back(Entry.second); 155 } 156 } 157 158 GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder &Msf) 159 : Msf(Msf), PSH(llvm::make_unique<GSIHashStreamBuilder>()), 160 GSH(llvm::make_unique<GSIHashStreamBuilder>()) {} 161 162 GSIStreamBuilder::~GSIStreamBuilder() {} 163 164 uint32_t GSIStreamBuilder::calculatePublicsHashStreamSize() const { 165 uint32_t Size = 0; 166 Size += sizeof(PublicsStreamHeader); 167 Size += PSH->calculateSerializedLength(); 168 Size += PSH->Records.size() * sizeof(uint32_t); // AddrMap 169 // FIXME: Add thunk map and section offsets for incremental linking. 170 171 return Size; 172 } 173 174 uint32_t GSIStreamBuilder::calculateGlobalsHashStreamSize() const { 175 return GSH->calculateSerializedLength(); 176 } 177 178 Error GSIStreamBuilder::finalizeMsfLayout() { 179 // First we write public symbol records, then we write global symbol records. 180 uint32_t PSHZero = 0; 181 uint32_t GSHZero = PSH->calculateRecordByteSize(); 182 183 PSH->finalizeBuckets(PSHZero); 184 GSH->finalizeBuckets(GSHZero); 185 186 Expected<uint32_t> Idx = Msf.addStream(calculateGlobalsHashStreamSize()); 187 if (!Idx) 188 return Idx.takeError(); 189 GSH->StreamIndex = *Idx; 190 Idx = Msf.addStream(calculatePublicsHashStreamSize()); 191 if (!Idx) 192 return Idx.takeError(); 193 PSH->StreamIndex = *Idx; 194 195 uint32_t RecordBytes = 196 GSH->calculateRecordByteSize() + PSH->calculateRecordByteSize(); 197 198 Idx = Msf.addStream(RecordBytes); 199 if (!Idx) 200 return Idx.takeError(); 201 RecordStreamIdx = *Idx; 202 return Error::success(); 203 } 204 205 static bool comparePubSymByAddrAndName( 206 const std::pair<const CVSymbol *, const PublicSym32 *> &LS, 207 const std::pair<const CVSymbol *, const PublicSym32 *> &RS) { 208 if (LS.second->Segment != RS.second->Segment) 209 return LS.second->Segment < RS.second->Segment; 210 if (LS.second->Offset != RS.second->Offset) 211 return LS.second->Offset < RS.second->Offset; 212 213 return LS.second->Name < RS.second->Name; 214 } 215 216 /// Compute the address map. The address map is an array of symbol offsets 217 /// sorted so that it can be binary searched by address. 218 static std::vector<ulittle32_t> computeAddrMap(ArrayRef<CVSymbol> Records) { 219 // Make a vector of pointers to the symbols so we can sort it by address. 220 // Also gather the symbol offsets while we're at it. 221 222 std::vector<PublicSym32> DeserializedPublics; 223 std::vector<std::pair<const CVSymbol *, const PublicSym32 *>> PublicsByAddr; 224 std::vector<uint32_t> SymOffsets; 225 DeserializedPublics.reserve(Records.size()); 226 PublicsByAddr.reserve(Records.size()); 227 SymOffsets.reserve(Records.size()); 228 229 uint32_t SymOffset = 0; 230 for (const CVSymbol &Sym : Records) { 231 assert(Sym.kind() == SymbolKind::S_PUB32); 232 DeserializedPublics.push_back( 233 cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(Sym))); 234 PublicsByAddr.emplace_back(&Sym, &DeserializedPublics.back()); 235 SymOffsets.push_back(SymOffset); 236 SymOffset += Sym.length(); 237 } 238 std::stable_sort(PublicsByAddr.begin(), PublicsByAddr.end(), 239 comparePubSymByAddrAndName); 240 241 // Fill in the symbol offsets in the appropriate order. 242 std::vector<ulittle32_t> AddrMap; 243 AddrMap.reserve(Records.size()); 244 for (auto &Sym : PublicsByAddr) { 245 ptrdiff_t Idx = std::distance(Records.data(), Sym.first); 246 assert(Idx >= 0 && size_t(Idx) < Records.size()); 247 AddrMap.push_back(ulittle32_t(SymOffsets[Idx])); 248 } 249 return AddrMap; 250 } 251 252 uint32_t GSIStreamBuilder::getPublicsStreamIndex() const { 253 return PSH->StreamIndex; 254 } 255 256 uint32_t GSIStreamBuilder::getGlobalsStreamIndex() const { 257 return GSH->StreamIndex; 258 } 259 260 void GSIStreamBuilder::addPublicSymbol(const PublicSym32 &Pub) { 261 PSH->addSymbol(Pub, Msf); 262 } 263 264 void GSIStreamBuilder::addGlobalSymbol(const ProcRefSym &Sym) { 265 GSH->addSymbol(Sym, Msf); 266 } 267 268 void GSIStreamBuilder::addGlobalSymbol(const DataSym &Sym) { 269 GSH->addSymbol(Sym, Msf); 270 } 271 272 void GSIStreamBuilder::addGlobalSymbol(const ConstantSym &Sym) { 273 GSH->addSymbol(Sym, Msf); 274 } 275 276 void GSIStreamBuilder::addGlobalSymbol(const UDTSym &Sym) { 277 GSH->addSymbol(Sym, Msf); 278 } 279 280 void GSIStreamBuilder::addGlobalSymbol(const codeview::CVSymbol &Sym) { 281 GSH->addSymbol(Sym); 282 } 283 284 static Error writeRecords(BinaryStreamWriter &Writer, 285 ArrayRef<CVSymbol> Records) { 286 BinaryItemStream<CVSymbol> ItemStream(support::endianness::little); 287 ItemStream.setItems(Records); 288 BinaryStreamRef RecordsRef(ItemStream); 289 return Writer.writeStreamRef(RecordsRef); 290 } 291 292 Error GSIStreamBuilder::commitSymbolRecordStream( 293 WritableBinaryStreamRef Stream) { 294 BinaryStreamWriter Writer(Stream); 295 296 // Write public symbol records first, followed by global symbol records. This 297 // must match the order that we assume in finalizeMsfLayout when computing 298 // PSHZero and GSHZero. 299 if (auto EC = writeRecords(Writer, PSH->Records)) 300 return EC; 301 if (auto EC = writeRecords(Writer, GSH->Records)) 302 return EC; 303 304 return Error::success(); 305 } 306 307 Error GSIStreamBuilder::commitPublicsHashStream( 308 WritableBinaryStreamRef Stream) { 309 BinaryStreamWriter Writer(Stream); 310 PublicsStreamHeader Header; 311 312 // FIXME: Fill these in. They are for incremental linking. 313 Header.NumThunks = 0; 314 Header.SizeOfThunk = 0; 315 Header.ISectThunkTable = 0; 316 Header.OffThunkTable = 0; 317 Header.NumSections = 0; 318 Header.SymHash = PSH->calculateSerializedLength(); 319 Header.AddrMap = PSH->Records.size() * 4; 320 if (auto EC = Writer.writeObject(Header)) 321 return EC; 322 323 if (auto EC = PSH->commit(Writer)) 324 return EC; 325 326 std::vector<ulittle32_t> AddrMap = computeAddrMap(PSH->Records); 327 if (auto EC = Writer.writeArray(makeArrayRef(AddrMap))) 328 return EC; 329 330 return Error::success(); 331 } 332 333 Error GSIStreamBuilder::commitGlobalsHashStream( 334 WritableBinaryStreamRef Stream) { 335 BinaryStreamWriter Writer(Stream); 336 return GSH->commit(Writer); 337 } 338 339 Error GSIStreamBuilder::commit(const msf::MSFLayout &Layout, 340 WritableBinaryStreamRef Buffer) { 341 auto GS = WritableMappedBlockStream::createIndexedStream( 342 Layout, Buffer, getGlobalsStreamIndex(), Msf.getAllocator()); 343 auto PS = WritableMappedBlockStream::createIndexedStream( 344 Layout, Buffer, getPublicsStreamIndex(), Msf.getAllocator()); 345 auto PRS = WritableMappedBlockStream::createIndexedStream( 346 Layout, Buffer, getRecordStreamIdx(), Msf.getAllocator()); 347 348 if (auto EC = commitSymbolRecordStream(*PRS)) 349 return EC; 350 if (auto EC = commitGlobalsHashStream(*GS)) 351 return EC; 352 if (auto EC = commitPublicsHashStream(*PS)) 353 return EC; 354 return Error::success(); 355 } 356