1 //===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h" 11 12 #include "llvm/DebugInfo/CodeView/RecordName.h" 13 #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" 14 #include "llvm/DebugInfo/CodeView/SymbolRecord.h" 15 #include "llvm/DebugInfo/CodeView/SymbolSerializer.h" 16 #include "llvm/DebugInfo/MSF/MSFBuilder.h" 17 #include "llvm/DebugInfo/MSF/MSFCommon.h" 18 #include "llvm/DebugInfo/MSF/MappedBlockStream.h" 19 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" 20 #include "llvm/DebugInfo/PDB/Native/Hash.h" 21 #include "llvm/Support/BinaryItemStream.h" 22 #include "llvm/Support/BinaryStreamWriter.h" 23 #include <algorithm> 24 #include <vector> 25 26 using namespace llvm; 27 using namespace llvm::msf; 28 using namespace llvm::pdb; 29 using namespace llvm::codeview; 30 31 struct llvm::pdb::GSIHashStreamBuilder { 32 std::vector<CVSymbol> Records; 33 uint32_t StreamIndex; 34 std::vector<PSHashRecord> HashRecords; 35 std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap; 36 std::vector<support::ulittle32_t> HashBuckets; 37 38 uint32_t calculateSerializedLength() const; 39 uint32_t calculateRecordByteSize() const; 40 Error commit(BinaryStreamWriter &Writer); 41 void finalizeBuckets(uint32_t RecordZeroOffset); 42 43 template <typename T> void addSymbol(const T &Symbol, MSFBuilder &Msf) { 44 T Copy(Symbol); 45 Records.push_back(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(), 46 CodeViewContainer::Pdb)); 47 } 48 void addSymbol(const CVSymbol &Symbol) { Records.push_back(Symbol); } 49 }; 50 51 uint32_t GSIHashStreamBuilder::calculateSerializedLength() const { 52 uint32_t Size = sizeof(GSIHashHeader); 53 Size += HashRecords.size() * sizeof(PSHashRecord); 54 Size += HashBitmap.size() * sizeof(uint32_t); 55 Size += HashBuckets.size() * sizeof(uint32_t); 56 return Size; 57 } 58 59 uint32_t GSIHashStreamBuilder::calculateRecordByteSize() const { 60 uint32_t Size = 0; 61 for (const auto &Sym : Records) 62 Size += Sym.length(); 63 return Size; 64 } 65 66 Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) { 67 GSIHashHeader Header; 68 Header.VerSignature = GSIHashHeader::HdrSignature; 69 Header.VerHdr = GSIHashHeader::HdrVersion; 70 Header.HrSize = HashRecords.size() * sizeof(PSHashRecord); 71 Header.NumBuckets = HashBitmap.size() * 4 + HashBuckets.size() * 4; 72 73 if (auto EC = Writer.writeObject(Header)) 74 return EC; 75 76 if (auto EC = Writer.writeArray(makeArrayRef(HashRecords))) 77 return EC; 78 if (auto EC = Writer.writeArray(makeArrayRef(HashBitmap))) 79 return EC; 80 if (auto EC = Writer.writeArray(makeArrayRef(HashBuckets))) 81 return EC; 82 return Error::success(); 83 } 84 85 static bool isAsciiString(StringRef S) { 86 return llvm::all_of(S, [](char C) { return unsigned(C) < 0x80; }); 87 } 88 89 // See `caseInsensitiveComparePchPchCchCch` in gsi.cpp 90 static bool gsiRecordLess(StringRef S1, StringRef S2) { 91 size_t LS = S1.size(); 92 size_t RS = S2.size(); 93 // Shorter strings always compare less than longer strings. 94 if (LS != RS) 95 return LS < RS; 96 97 // If either string contains non ascii characters, memcmp them. 98 if (LLVM_UNLIKELY(!isAsciiString(S1) || !isAsciiString(S2))) 99 return memcmp(S1.data(), S2.data(), LS) < 0; 100 101 // Both strings are ascii, perform a case-insenstive comparison. 102 return S1.compare_lower(S2.data()) < 0; 103 } 104 105 void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) { 106 std::array<std::vector<std::pair<StringRef, PSHashRecord>>, IPHR_HASH + 1> 107 TmpBuckets; 108 uint32_t SymOffset = RecordZeroOffset; 109 for (const CVSymbol &Sym : Records) { 110 PSHashRecord HR; 111 // Add one when writing symbol offsets to disk. See GSI1::fixSymRecs. 112 HR.Off = SymOffset + 1; 113 HR.CRef = 1; // Always use a refcount of 1. 114 115 // Hash the name to figure out which bucket this goes into. 116 StringRef Name = getSymbolName(Sym); 117 size_t BucketIdx = hashStringV1(Name) % IPHR_HASH; 118 TmpBuckets[BucketIdx].push_back(std::make_pair(Name, HR)); 119 SymOffset += Sym.length(); 120 } 121 122 // Compute the three tables: the hash records in bucket and chain order, the 123 // bucket presence bitmap, and the bucket chain start offsets. 124 HashRecords.reserve(Records.size()); 125 for (ulittle32_t &Word : HashBitmap) 126 Word = 0; 127 for (size_t BucketIdx = 0; BucketIdx < IPHR_HASH + 1; ++BucketIdx) { 128 auto &Bucket = TmpBuckets[BucketIdx]; 129 if (Bucket.empty()) 130 continue; 131 HashBitmap[BucketIdx / 32] |= 1U << (BucketIdx % 32); 132 133 // Calculate what the offset of the first hash record in the chain would 134 // be if it were inflated to contain 32-bit pointers. On a 32-bit system, 135 // each record would be 12 bytes. See HROffsetCalc in gsi.h. 136 const int SizeOfHROffsetCalc = 12; 137 ulittle32_t ChainStartOff = 138 ulittle32_t(HashRecords.size() * SizeOfHROffsetCalc); 139 HashBuckets.push_back(ChainStartOff); 140 141 // Sort each bucket by memcmp of the symbol's name. It's important that 142 // we use the same sorting algorithm as is used by the reference 143 // implementation to ensure that the search for a record within a bucket 144 // can properly early-out when it detects the record won't be found. The 145 // algorithm used here corredsponds to the function 146 // caseInsensitiveComparePchPchCchCch in the reference implementation. 147 llvm::sort(Bucket, [](const std::pair<StringRef, PSHashRecord> &Left, 148 const std::pair<StringRef, PSHashRecord> &Right) { 149 return gsiRecordLess(Left.first, Right.first); 150 }); 151 152 for (const auto &Entry : Bucket) 153 HashRecords.push_back(Entry.second); 154 } 155 } 156 157 GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder &Msf) 158 : Msf(Msf), PSH(llvm::make_unique<GSIHashStreamBuilder>()), 159 GSH(llvm::make_unique<GSIHashStreamBuilder>()) {} 160 161 GSIStreamBuilder::~GSIStreamBuilder() {} 162 163 uint32_t GSIStreamBuilder::calculatePublicsHashStreamSize() const { 164 uint32_t Size = 0; 165 Size += sizeof(PublicsStreamHeader); 166 Size += PSH->calculateSerializedLength(); 167 Size += PSH->Records.size() * sizeof(uint32_t); // AddrMap 168 // FIXME: Add thunk map and section offsets for incremental linking. 169 170 return Size; 171 } 172 173 uint32_t GSIStreamBuilder::calculateGlobalsHashStreamSize() const { 174 return GSH->calculateSerializedLength(); 175 } 176 177 Error GSIStreamBuilder::finalizeMsfLayout() { 178 // First we write public symbol records, then we write global symbol records. 179 uint32_t PSHZero = 0; 180 uint32_t GSHZero = PSH->calculateRecordByteSize(); 181 182 PSH->finalizeBuckets(PSHZero); 183 GSH->finalizeBuckets(GSHZero); 184 185 Expected<uint32_t> Idx = Msf.addStream(calculateGlobalsHashStreamSize()); 186 if (!Idx) 187 return Idx.takeError(); 188 GSH->StreamIndex = *Idx; 189 Idx = Msf.addStream(calculatePublicsHashStreamSize()); 190 if (!Idx) 191 return Idx.takeError(); 192 PSH->StreamIndex = *Idx; 193 194 uint32_t RecordBytes = 195 GSH->calculateRecordByteSize() + PSH->calculateRecordByteSize(); 196 197 Idx = Msf.addStream(RecordBytes); 198 if (!Idx) 199 return Idx.takeError(); 200 RecordStreamIdx = *Idx; 201 return Error::success(); 202 } 203 204 static bool comparePubSymByAddrAndName( 205 const std::pair<const CVSymbol *, const PublicSym32 *> &LS, 206 const std::pair<const CVSymbol *, const PublicSym32 *> &RS) { 207 if (LS.second->Segment != RS.second->Segment) 208 return LS.second->Segment < RS.second->Segment; 209 if (LS.second->Offset != RS.second->Offset) 210 return LS.second->Offset < RS.second->Offset; 211 212 return LS.second->Name < RS.second->Name; 213 } 214 215 /// Compute the address map. The address map is an array of symbol offsets 216 /// sorted so that it can be binary searched by address. 217 static std::vector<ulittle32_t> computeAddrMap(ArrayRef<CVSymbol> Records) { 218 // Make a vector of pointers to the symbols so we can sort it by address. 219 // Also gather the symbol offsets while we're at it. 220 221 std::vector<PublicSym32> DeserializedPublics; 222 std::vector<std::pair<const CVSymbol *, const PublicSym32 *>> PublicsByAddr; 223 std::vector<uint32_t> SymOffsets; 224 DeserializedPublics.reserve(Records.size()); 225 PublicsByAddr.reserve(Records.size()); 226 SymOffsets.reserve(Records.size()); 227 228 uint32_t SymOffset = 0; 229 for (const CVSymbol &Sym : Records) { 230 assert(Sym.kind() == SymbolKind::S_PUB32); 231 DeserializedPublics.push_back( 232 cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(Sym))); 233 PublicsByAddr.emplace_back(&Sym, &DeserializedPublics.back()); 234 SymOffsets.push_back(SymOffset); 235 SymOffset += Sym.length(); 236 } 237 std::stable_sort(PublicsByAddr.begin(), PublicsByAddr.end(), 238 comparePubSymByAddrAndName); 239 240 // Fill in the symbol offsets in the appropriate order. 241 std::vector<ulittle32_t> AddrMap; 242 AddrMap.reserve(Records.size()); 243 for (auto &Sym : PublicsByAddr) { 244 ptrdiff_t Idx = std::distance(Records.data(), Sym.first); 245 assert(Idx >= 0 && size_t(Idx) < Records.size()); 246 AddrMap.push_back(ulittle32_t(SymOffsets[Idx])); 247 } 248 return AddrMap; 249 } 250 251 uint32_t GSIStreamBuilder::getPublicsStreamIndex() const { 252 return PSH->StreamIndex; 253 } 254 255 uint32_t GSIStreamBuilder::getGlobalsStreamIndex() const { 256 return GSH->StreamIndex; 257 } 258 259 void GSIStreamBuilder::addPublicSymbol(const PublicSym32 &Pub) { 260 PSH->addSymbol(Pub, Msf); 261 } 262 263 void GSIStreamBuilder::addGlobalSymbol(const ProcRefSym &Sym) { 264 GSH->addSymbol(Sym, Msf); 265 } 266 267 void GSIStreamBuilder::addGlobalSymbol(const DataSym &Sym) { 268 GSH->addSymbol(Sym, Msf); 269 } 270 271 void GSIStreamBuilder::addGlobalSymbol(const ConstantSym &Sym) { 272 GSH->addSymbol(Sym, Msf); 273 } 274 275 void GSIStreamBuilder::addGlobalSymbol(const UDTSym &Sym) { 276 GSH->addSymbol(Sym, Msf); 277 } 278 279 void GSIStreamBuilder::addGlobalSymbol(const codeview::CVSymbol &Sym) { 280 GSH->addSymbol(Sym); 281 } 282 283 static Error writeRecords(BinaryStreamWriter &Writer, 284 ArrayRef<CVSymbol> Records) { 285 BinaryItemStream<CVSymbol> ItemStream(support::endianness::little); 286 ItemStream.setItems(Records); 287 BinaryStreamRef RecordsRef(ItemStream); 288 return Writer.writeStreamRef(RecordsRef); 289 } 290 291 Error GSIStreamBuilder::commitSymbolRecordStream( 292 WritableBinaryStreamRef Stream) { 293 BinaryStreamWriter Writer(Stream); 294 295 // Write public symbol records first, followed by global symbol records. This 296 // must match the order that we assume in finalizeMsfLayout when computing 297 // PSHZero and GSHZero. 298 if (auto EC = writeRecords(Writer, PSH->Records)) 299 return EC; 300 if (auto EC = writeRecords(Writer, GSH->Records)) 301 return EC; 302 303 return Error::success(); 304 } 305 306 Error GSIStreamBuilder::commitPublicsHashStream( 307 WritableBinaryStreamRef Stream) { 308 BinaryStreamWriter Writer(Stream); 309 PublicsStreamHeader Header; 310 311 // FIXME: Fill these in. They are for incremental linking. 312 Header.SymHash = PSH->calculateSerializedLength(); 313 Header.AddrMap = PSH->Records.size() * 4; 314 Header.NumThunks = 0; 315 Header.SizeOfThunk = 0; 316 Header.ISectThunkTable = 0; 317 memset(Header.Padding, 0, sizeof(Header.Padding)); 318 Header.OffThunkTable = 0; 319 Header.NumSections = 0; 320 if (auto EC = Writer.writeObject(Header)) 321 return EC; 322 323 if (auto EC = PSH->commit(Writer)) 324 return EC; 325 326 std::vector<ulittle32_t> AddrMap = computeAddrMap(PSH->Records); 327 if (auto EC = Writer.writeArray(makeArrayRef(AddrMap))) 328 return EC; 329 330 return Error::success(); 331 } 332 333 Error GSIStreamBuilder::commitGlobalsHashStream( 334 WritableBinaryStreamRef Stream) { 335 BinaryStreamWriter Writer(Stream); 336 return GSH->commit(Writer); 337 } 338 339 Error GSIStreamBuilder::commit(const msf::MSFLayout &Layout, 340 WritableBinaryStreamRef Buffer) { 341 auto GS = WritableMappedBlockStream::createIndexedStream( 342 Layout, Buffer, getGlobalsStreamIndex(), Msf.getAllocator()); 343 auto PS = WritableMappedBlockStream::createIndexedStream( 344 Layout, Buffer, getPublicsStreamIndex(), Msf.getAllocator()); 345 auto PRS = WritableMappedBlockStream::createIndexedStream( 346 Layout, Buffer, getRecordStreamIdx(), Msf.getAllocator()); 347 348 if (auto EC = commitSymbolRecordStream(*PRS)) 349 return EC; 350 if (auto EC = commitGlobalsHashStream(*GS)) 351 return EC; 352 if (auto EC = commitPublicsHashStream(*PS)) 353 return EC; 354 return Error::success(); 355 } 356