1 //===- BitcodeAnalyzer.cpp - Internal BitcodeAnalyzer implementation ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Bitcode/BitcodeAnalyzer.h" 10 #include "llvm/Bitcode/BitcodeReader.h" 11 #include "llvm/Bitcode/LLVMBitCodes.h" 12 #include "llvm/Support/Format.h" 13 #include "llvm/Support/SHA1.h" 14 15 using namespace llvm; 16 17 static Error reportError(StringRef Message) { 18 return createStringError(std::errc::illegal_byte_sequence, Message.data()); 19 } 20 21 /// Return a symbolic block name if known, otherwise return null. 22 static Optional<const char *> GetBlockName(unsigned BlockID, 23 const BitstreamBlockInfo &BlockInfo, 24 CurStreamTypeType CurStreamType) { 25 // Standard blocks for all bitcode files. 26 if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) { 27 if (BlockID == bitc::BLOCKINFO_BLOCK_ID) 28 return "BLOCKINFO_BLOCK"; 29 return None; 30 } 31 32 // Check to see if we have a blockinfo record for this block, with a name. 33 if (const BitstreamBlockInfo::BlockInfo *Info = 34 BlockInfo.getBlockInfo(BlockID)) { 35 if (!Info->Name.empty()) 36 return Info->Name.c_str(); 37 } 38 39 if (CurStreamType != LLVMIRBitstream) 40 return None; 41 42 switch (BlockID) { 43 default: 44 return None; 45 case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID: 46 return "OPERAND_BUNDLE_TAGS_BLOCK"; 47 case bitc::MODULE_BLOCK_ID: 48 return "MODULE_BLOCK"; 49 case bitc::PARAMATTR_BLOCK_ID: 50 return "PARAMATTR_BLOCK"; 51 case bitc::PARAMATTR_GROUP_BLOCK_ID: 52 return "PARAMATTR_GROUP_BLOCK_ID"; 53 case bitc::TYPE_BLOCK_ID_NEW: 54 return "TYPE_BLOCK_ID"; 55 case bitc::CONSTANTS_BLOCK_ID: 56 return "CONSTANTS_BLOCK"; 57 case bitc::FUNCTION_BLOCK_ID: 58 return "FUNCTION_BLOCK"; 59 case bitc::IDENTIFICATION_BLOCK_ID: 60 return "IDENTIFICATION_BLOCK_ID"; 61 case bitc::VALUE_SYMTAB_BLOCK_ID: 62 return "VALUE_SYMTAB"; 63 case bitc::METADATA_BLOCK_ID: 64 return "METADATA_BLOCK"; 65 case bitc::METADATA_KIND_BLOCK_ID: 66 return "METADATA_KIND_BLOCK"; 67 case bitc::METADATA_ATTACHMENT_ID: 68 return "METADATA_ATTACHMENT_BLOCK"; 69 case bitc::USELIST_BLOCK_ID: 70 return "USELIST_BLOCK_ID"; 71 case bitc::GLOBALVAL_SUMMARY_BLOCK_ID: 72 return "GLOBALVAL_SUMMARY_BLOCK"; 73 case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID: 74 return "FULL_LTO_GLOBALVAL_SUMMARY_BLOCK"; 75 case bitc::MODULE_STRTAB_BLOCK_ID: 76 return "MODULE_STRTAB_BLOCK"; 77 case bitc::STRTAB_BLOCK_ID: 78 return "STRTAB_BLOCK"; 79 case bitc::SYMTAB_BLOCK_ID: 80 return "SYMTAB_BLOCK"; 81 } 82 } 83 84 /// Return a symbolic code name if known, otherwise return null. 85 static Optional<const char *> GetCodeName(unsigned CodeID, unsigned BlockID, 86 const BitstreamBlockInfo &BlockInfo, 87 CurStreamTypeType CurStreamType) { 88 // Standard blocks for all bitcode files. 89 if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) { 90 if (BlockID == bitc::BLOCKINFO_BLOCK_ID) { 91 switch (CodeID) { 92 default: 93 return None; 94 case bitc::BLOCKINFO_CODE_SETBID: 95 return "SETBID"; 96 case bitc::BLOCKINFO_CODE_BLOCKNAME: 97 return "BLOCKNAME"; 98 case bitc::BLOCKINFO_CODE_SETRECORDNAME: 99 return "SETRECORDNAME"; 100 } 101 } 102 return None; 103 } 104 105 // Check to see if we have a blockinfo record for this record, with a name. 106 if (const BitstreamBlockInfo::BlockInfo *Info = 107 BlockInfo.getBlockInfo(BlockID)) { 108 for (unsigned i = 0, e = Info->RecordNames.size(); i != e; ++i) 109 if (Info->RecordNames[i].first == CodeID) 110 return Info->RecordNames[i].second.c_str(); 111 } 112 113 if (CurStreamType != LLVMIRBitstream) 114 return None; 115 116 #define STRINGIFY_CODE(PREFIX, CODE) \ 117 case bitc::PREFIX##_##CODE: \ 118 return #CODE; 119 switch (BlockID) { 120 default: 121 return None; 122 case bitc::MODULE_BLOCK_ID: 123 switch (CodeID) { 124 default: 125 return None; 126 STRINGIFY_CODE(MODULE_CODE, VERSION) 127 STRINGIFY_CODE(MODULE_CODE, TRIPLE) 128 STRINGIFY_CODE(MODULE_CODE, DATALAYOUT) 129 STRINGIFY_CODE(MODULE_CODE, ASM) 130 STRINGIFY_CODE(MODULE_CODE, SECTIONNAME) 131 STRINGIFY_CODE(MODULE_CODE, DEPLIB) // FIXME: Remove in 4.0 132 STRINGIFY_CODE(MODULE_CODE, GLOBALVAR) 133 STRINGIFY_CODE(MODULE_CODE, FUNCTION) 134 STRINGIFY_CODE(MODULE_CODE, ALIAS) 135 STRINGIFY_CODE(MODULE_CODE, GCNAME) 136 STRINGIFY_CODE(MODULE_CODE, VSTOFFSET) 137 STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES_UNUSED) 138 STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME) 139 STRINGIFY_CODE(MODULE_CODE, HASH) 140 } 141 case bitc::IDENTIFICATION_BLOCK_ID: 142 switch (CodeID) { 143 default: 144 return None; 145 STRINGIFY_CODE(IDENTIFICATION_CODE, STRING) 146 STRINGIFY_CODE(IDENTIFICATION_CODE, EPOCH) 147 } 148 case bitc::PARAMATTR_BLOCK_ID: 149 switch (CodeID) { 150 default: 151 return None; 152 // FIXME: Should these be different? 153 case bitc::PARAMATTR_CODE_ENTRY_OLD: 154 return "ENTRY"; 155 case bitc::PARAMATTR_CODE_ENTRY: 156 return "ENTRY"; 157 } 158 case bitc::PARAMATTR_GROUP_BLOCK_ID: 159 switch (CodeID) { 160 default: 161 return None; 162 case bitc::PARAMATTR_GRP_CODE_ENTRY: 163 return "ENTRY"; 164 } 165 case bitc::TYPE_BLOCK_ID_NEW: 166 switch (CodeID) { 167 default: 168 return None; 169 STRINGIFY_CODE(TYPE_CODE, NUMENTRY) 170 STRINGIFY_CODE(TYPE_CODE, VOID) 171 STRINGIFY_CODE(TYPE_CODE, FLOAT) 172 STRINGIFY_CODE(TYPE_CODE, DOUBLE) 173 STRINGIFY_CODE(TYPE_CODE, LABEL) 174 STRINGIFY_CODE(TYPE_CODE, OPAQUE) 175 STRINGIFY_CODE(TYPE_CODE, INTEGER) 176 STRINGIFY_CODE(TYPE_CODE, POINTER) 177 STRINGIFY_CODE(TYPE_CODE, ARRAY) 178 STRINGIFY_CODE(TYPE_CODE, VECTOR) 179 STRINGIFY_CODE(TYPE_CODE, X86_FP80) 180 STRINGIFY_CODE(TYPE_CODE, FP128) 181 STRINGIFY_CODE(TYPE_CODE, PPC_FP128) 182 STRINGIFY_CODE(TYPE_CODE, METADATA) 183 STRINGIFY_CODE(TYPE_CODE, STRUCT_ANON) 184 STRINGIFY_CODE(TYPE_CODE, STRUCT_NAME) 185 STRINGIFY_CODE(TYPE_CODE, STRUCT_NAMED) 186 STRINGIFY_CODE(TYPE_CODE, FUNCTION) 187 } 188 189 case bitc::CONSTANTS_BLOCK_ID: 190 switch (CodeID) { 191 default: 192 return None; 193 STRINGIFY_CODE(CST_CODE, SETTYPE) 194 STRINGIFY_CODE(CST_CODE, NULL) 195 STRINGIFY_CODE(CST_CODE, UNDEF) 196 STRINGIFY_CODE(CST_CODE, INTEGER) 197 STRINGIFY_CODE(CST_CODE, WIDE_INTEGER) 198 STRINGIFY_CODE(CST_CODE, FLOAT) 199 STRINGIFY_CODE(CST_CODE, AGGREGATE) 200 STRINGIFY_CODE(CST_CODE, STRING) 201 STRINGIFY_CODE(CST_CODE, CSTRING) 202 STRINGIFY_CODE(CST_CODE, CE_BINOP) 203 STRINGIFY_CODE(CST_CODE, CE_CAST) 204 STRINGIFY_CODE(CST_CODE, CE_GEP) 205 STRINGIFY_CODE(CST_CODE, CE_INBOUNDS_GEP) 206 STRINGIFY_CODE(CST_CODE, CE_SELECT) 207 STRINGIFY_CODE(CST_CODE, CE_EXTRACTELT) 208 STRINGIFY_CODE(CST_CODE, CE_INSERTELT) 209 STRINGIFY_CODE(CST_CODE, CE_SHUFFLEVEC) 210 STRINGIFY_CODE(CST_CODE, CE_CMP) 211 STRINGIFY_CODE(CST_CODE, INLINEASM) 212 STRINGIFY_CODE(CST_CODE, CE_SHUFVEC_EX) 213 STRINGIFY_CODE(CST_CODE, CE_UNOP) 214 case bitc::CST_CODE_BLOCKADDRESS: 215 return "CST_CODE_BLOCKADDRESS"; 216 STRINGIFY_CODE(CST_CODE, DATA) 217 } 218 case bitc::FUNCTION_BLOCK_ID: 219 switch (CodeID) { 220 default: 221 return None; 222 STRINGIFY_CODE(FUNC_CODE, DECLAREBLOCKS) 223 STRINGIFY_CODE(FUNC_CODE, INST_BINOP) 224 STRINGIFY_CODE(FUNC_CODE, INST_CAST) 225 STRINGIFY_CODE(FUNC_CODE, INST_GEP_OLD) 226 STRINGIFY_CODE(FUNC_CODE, INST_INBOUNDS_GEP_OLD) 227 STRINGIFY_CODE(FUNC_CODE, INST_SELECT) 228 STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTELT) 229 STRINGIFY_CODE(FUNC_CODE, INST_INSERTELT) 230 STRINGIFY_CODE(FUNC_CODE, INST_SHUFFLEVEC) 231 STRINGIFY_CODE(FUNC_CODE, INST_CMP) 232 STRINGIFY_CODE(FUNC_CODE, INST_RET) 233 STRINGIFY_CODE(FUNC_CODE, INST_BR) 234 STRINGIFY_CODE(FUNC_CODE, INST_SWITCH) 235 STRINGIFY_CODE(FUNC_CODE, INST_INVOKE) 236 STRINGIFY_CODE(FUNC_CODE, INST_UNOP) 237 STRINGIFY_CODE(FUNC_CODE, INST_UNREACHABLE) 238 STRINGIFY_CODE(FUNC_CODE, INST_CLEANUPRET) 239 STRINGIFY_CODE(FUNC_CODE, INST_CATCHRET) 240 STRINGIFY_CODE(FUNC_CODE, INST_CATCHPAD) 241 STRINGIFY_CODE(FUNC_CODE, INST_PHI) 242 STRINGIFY_CODE(FUNC_CODE, INST_ALLOCA) 243 STRINGIFY_CODE(FUNC_CODE, INST_LOAD) 244 STRINGIFY_CODE(FUNC_CODE, INST_VAARG) 245 STRINGIFY_CODE(FUNC_CODE, INST_STORE) 246 STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTVAL) 247 STRINGIFY_CODE(FUNC_CODE, INST_INSERTVAL) 248 STRINGIFY_CODE(FUNC_CODE, INST_CMP2) 249 STRINGIFY_CODE(FUNC_CODE, INST_VSELECT) 250 STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC_AGAIN) 251 STRINGIFY_CODE(FUNC_CODE, INST_CALL) 252 STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC) 253 STRINGIFY_CODE(FUNC_CODE, INST_GEP) 254 STRINGIFY_CODE(FUNC_CODE, OPERAND_BUNDLE) 255 STRINGIFY_CODE(FUNC_CODE, INST_FENCE) 256 STRINGIFY_CODE(FUNC_CODE, INST_ATOMICRMW) 257 STRINGIFY_CODE(FUNC_CODE, INST_LOADATOMIC) 258 STRINGIFY_CODE(FUNC_CODE, INST_STOREATOMIC) 259 STRINGIFY_CODE(FUNC_CODE, INST_CMPXCHG) 260 STRINGIFY_CODE(FUNC_CODE, INST_CALLBR) 261 } 262 case bitc::VALUE_SYMTAB_BLOCK_ID: 263 switch (CodeID) { 264 default: 265 return None; 266 STRINGIFY_CODE(VST_CODE, ENTRY) 267 STRINGIFY_CODE(VST_CODE, BBENTRY) 268 STRINGIFY_CODE(VST_CODE, FNENTRY) 269 STRINGIFY_CODE(VST_CODE, COMBINED_ENTRY) 270 } 271 case bitc::MODULE_STRTAB_BLOCK_ID: 272 switch (CodeID) { 273 default: 274 return None; 275 STRINGIFY_CODE(MST_CODE, ENTRY) 276 STRINGIFY_CODE(MST_CODE, HASH) 277 } 278 case bitc::GLOBALVAL_SUMMARY_BLOCK_ID: 279 case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID: 280 switch (CodeID) { 281 default: 282 return None; 283 STRINGIFY_CODE(FS, PERMODULE) 284 STRINGIFY_CODE(FS, PERMODULE_PROFILE) 285 STRINGIFY_CODE(FS, PERMODULE_RELBF) 286 STRINGIFY_CODE(FS, PERMODULE_GLOBALVAR_INIT_REFS) 287 STRINGIFY_CODE(FS, PERMODULE_VTABLE_GLOBALVAR_INIT_REFS) 288 STRINGIFY_CODE(FS, COMBINED) 289 STRINGIFY_CODE(FS, COMBINED_PROFILE) 290 STRINGIFY_CODE(FS, COMBINED_GLOBALVAR_INIT_REFS) 291 STRINGIFY_CODE(FS, ALIAS) 292 STRINGIFY_CODE(FS, COMBINED_ALIAS) 293 STRINGIFY_CODE(FS, COMBINED_ORIGINAL_NAME) 294 STRINGIFY_CODE(FS, VERSION) 295 STRINGIFY_CODE(FS, FLAGS) 296 STRINGIFY_CODE(FS, TYPE_TESTS) 297 STRINGIFY_CODE(FS, TYPE_TEST_ASSUME_VCALLS) 298 STRINGIFY_CODE(FS, TYPE_CHECKED_LOAD_VCALLS) 299 STRINGIFY_CODE(FS, TYPE_TEST_ASSUME_CONST_VCALL) 300 STRINGIFY_CODE(FS, TYPE_CHECKED_LOAD_CONST_VCALL) 301 STRINGIFY_CODE(FS, VALUE_GUID) 302 STRINGIFY_CODE(FS, CFI_FUNCTION_DEFS) 303 STRINGIFY_CODE(FS, CFI_FUNCTION_DECLS) 304 STRINGIFY_CODE(FS, TYPE_ID) 305 STRINGIFY_CODE(FS, TYPE_ID_METADATA) 306 } 307 case bitc::METADATA_ATTACHMENT_ID: 308 switch (CodeID) { 309 default: 310 return None; 311 STRINGIFY_CODE(METADATA, ATTACHMENT) 312 } 313 case bitc::METADATA_BLOCK_ID: 314 switch (CodeID) { 315 default: 316 return None; 317 STRINGIFY_CODE(METADATA, STRING_OLD) 318 STRINGIFY_CODE(METADATA, VALUE) 319 STRINGIFY_CODE(METADATA, NODE) 320 STRINGIFY_CODE(METADATA, NAME) 321 STRINGIFY_CODE(METADATA, DISTINCT_NODE) 322 STRINGIFY_CODE(METADATA, KIND) // Older bitcode has it in a MODULE_BLOCK 323 STRINGIFY_CODE(METADATA, LOCATION) 324 STRINGIFY_CODE(METADATA, OLD_NODE) 325 STRINGIFY_CODE(METADATA, OLD_FN_NODE) 326 STRINGIFY_CODE(METADATA, NAMED_NODE) 327 STRINGIFY_CODE(METADATA, GENERIC_DEBUG) 328 STRINGIFY_CODE(METADATA, SUBRANGE) 329 STRINGIFY_CODE(METADATA, ENUMERATOR) 330 STRINGIFY_CODE(METADATA, BASIC_TYPE) 331 STRINGIFY_CODE(METADATA, FILE) 332 STRINGIFY_CODE(METADATA, DERIVED_TYPE) 333 STRINGIFY_CODE(METADATA, COMPOSITE_TYPE) 334 STRINGIFY_CODE(METADATA, SUBROUTINE_TYPE) 335 STRINGIFY_CODE(METADATA, COMPILE_UNIT) 336 STRINGIFY_CODE(METADATA, SUBPROGRAM) 337 STRINGIFY_CODE(METADATA, LEXICAL_BLOCK) 338 STRINGIFY_CODE(METADATA, LEXICAL_BLOCK_FILE) 339 STRINGIFY_CODE(METADATA, NAMESPACE) 340 STRINGIFY_CODE(METADATA, TEMPLATE_TYPE) 341 STRINGIFY_CODE(METADATA, TEMPLATE_VALUE) 342 STRINGIFY_CODE(METADATA, GLOBAL_VAR) 343 STRINGIFY_CODE(METADATA, LOCAL_VAR) 344 STRINGIFY_CODE(METADATA, EXPRESSION) 345 STRINGIFY_CODE(METADATA, OBJC_PROPERTY) 346 STRINGIFY_CODE(METADATA, IMPORTED_ENTITY) 347 STRINGIFY_CODE(METADATA, MODULE) 348 STRINGIFY_CODE(METADATA, MACRO) 349 STRINGIFY_CODE(METADATA, MACRO_FILE) 350 STRINGIFY_CODE(METADATA, STRINGS) 351 STRINGIFY_CODE(METADATA, GLOBAL_DECL_ATTACHMENT) 352 STRINGIFY_CODE(METADATA, GLOBAL_VAR_EXPR) 353 STRINGIFY_CODE(METADATA, INDEX_OFFSET) 354 STRINGIFY_CODE(METADATA, INDEX) 355 } 356 case bitc::METADATA_KIND_BLOCK_ID: 357 switch (CodeID) { 358 default: 359 return None; 360 STRINGIFY_CODE(METADATA, KIND) 361 } 362 case bitc::USELIST_BLOCK_ID: 363 switch (CodeID) { 364 default: 365 return None; 366 case bitc::USELIST_CODE_DEFAULT: 367 return "USELIST_CODE_DEFAULT"; 368 case bitc::USELIST_CODE_BB: 369 return "USELIST_CODE_BB"; 370 } 371 372 case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID: 373 switch (CodeID) { 374 default: 375 return None; 376 case bitc::OPERAND_BUNDLE_TAG: 377 return "OPERAND_BUNDLE_TAG"; 378 } 379 case bitc::STRTAB_BLOCK_ID: 380 switch (CodeID) { 381 default: 382 return None; 383 case bitc::STRTAB_BLOB: 384 return "BLOB"; 385 } 386 case bitc::SYMTAB_BLOCK_ID: 387 switch (CodeID) { 388 default: 389 return None; 390 case bitc::SYMTAB_BLOB: 391 return "BLOB"; 392 } 393 } 394 #undef STRINGIFY_CODE 395 } 396 397 static void printSize(raw_ostream &OS, double Bits) { 398 OS << format("%.2f/%.2fB/%luW", Bits, Bits / 8, (unsigned long)(Bits / 32)); 399 } 400 static void printSize(raw_ostream &OS, uint64_t Bits) { 401 OS << format("%lub/%.2fB/%luW", (unsigned long)Bits, (double)Bits / 8, 402 (unsigned long)(Bits / 32)); 403 } 404 405 static Expected<CurStreamTypeType> ReadSignature(BitstreamCursor &Stream) { 406 auto tryRead = [&Stream](char &Dest, size_t size) -> Error { 407 if (Expected<SimpleBitstreamCursor::word_t> MaybeWord = Stream.Read(size)) 408 Dest = MaybeWord.get(); 409 else 410 return MaybeWord.takeError(); 411 return Error::success(); 412 }; 413 414 char Signature[6]; 415 if (Error Err = tryRead(Signature[0], 8)) 416 return std::move(Err); 417 if (Error Err = tryRead(Signature[1], 8)) 418 return std::move(Err); 419 420 // Autodetect the file contents, if it is one we know. 421 if (Signature[0] == 'C' && Signature[1] == 'P') { 422 if (Error Err = tryRead(Signature[2], 8)) 423 return std::move(Err); 424 if (Error Err = tryRead(Signature[3], 8)) 425 return std::move(Err); 426 if (Signature[2] == 'C' && Signature[3] == 'H') 427 return ClangSerializedASTBitstream; 428 } else if (Signature[0] == 'D' && Signature[1] == 'I') { 429 if (Error Err = tryRead(Signature[2], 8)) 430 return std::move(Err); 431 if (Error Err = tryRead(Signature[3], 8)) 432 return std::move(Err); 433 if (Signature[2] == 'A' && Signature[3] == 'G') 434 return ClangSerializedDiagnosticsBitstream; 435 } else { 436 if (Error Err = tryRead(Signature[2], 4)) 437 return std::move(Err); 438 if (Error Err = tryRead(Signature[3], 4)) 439 return std::move(Err); 440 if (Error Err = tryRead(Signature[4], 4)) 441 return std::move(Err); 442 if (Error Err = tryRead(Signature[5], 4)) 443 return std::move(Err); 444 if (Signature[0] == 'B' && Signature[1] == 'C' && Signature[2] == 0x0 && 445 Signature[3] == 0xC && Signature[4] == 0xE && Signature[5] == 0xD) 446 return LLVMIRBitstream; 447 } 448 return UnknownBitstream; 449 } 450 451 static Expected<CurStreamTypeType> analyzeHeader(Optional<BCDumpOptions> O, 452 BitstreamCursor &Stream) { 453 ArrayRef<uint8_t> Bytes = Stream.getBitcodeBytes(); 454 const unsigned char *BufPtr = (const unsigned char *)Bytes.data(); 455 const unsigned char *EndBufPtr = BufPtr + Bytes.size(); 456 457 // If we have a wrapper header, parse it and ignore the non-bc file 458 // contents. The magic number is 0x0B17C0DE stored in little endian. 459 if (isBitcodeWrapper(BufPtr, EndBufPtr)) { 460 if (Bytes.size() < BWH_HeaderSize) 461 return reportError("Invalid bitcode wrapper header"); 462 463 if (O) { 464 unsigned Magic = support::endian::read32le(&BufPtr[BWH_MagicField]); 465 unsigned Version = support::endian::read32le(&BufPtr[BWH_VersionField]); 466 unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]); 467 unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]); 468 unsigned CPUType = support::endian::read32le(&BufPtr[BWH_CPUTypeField]); 469 470 O->OS << "<BITCODE_WRAPPER_HEADER" 471 << " Magic=" << format_hex(Magic, 10) 472 << " Version=" << format_hex(Version, 10) 473 << " Offset=" << format_hex(Offset, 10) 474 << " Size=" << format_hex(Size, 10) 475 << " CPUType=" << format_hex(CPUType, 10) << "/>\n"; 476 } 477 478 if (SkipBitcodeWrapperHeader(BufPtr, EndBufPtr, true)) 479 return reportError("Invalid bitcode wrapper header"); 480 } 481 482 // Use the cursor modified by skipping the wrapper header. 483 Stream = BitstreamCursor(ArrayRef<uint8_t>(BufPtr, EndBufPtr)); 484 485 return ReadSignature(Stream); 486 } 487 488 static bool canDecodeBlob(unsigned Code, unsigned BlockID) { 489 return BlockID == bitc::METADATA_BLOCK_ID && Code == bitc::METADATA_STRINGS; 490 } 491 492 Error BitcodeAnalyzer::decodeMetadataStringsBlob(StringRef Indent, 493 ArrayRef<uint64_t> Record, 494 StringRef Blob, 495 raw_ostream &OS) { 496 if (Blob.empty()) 497 return reportError("Cannot decode empty blob."); 498 499 if (Record.size() != 2) 500 return reportError( 501 "Decoding metadata strings blob needs two record entries."); 502 503 unsigned NumStrings = Record[0]; 504 unsigned StringsOffset = Record[1]; 505 OS << " num-strings = " << NumStrings << " {\n"; 506 507 StringRef Lengths = Blob.slice(0, StringsOffset); 508 SimpleBitstreamCursor R(Lengths); 509 StringRef Strings = Blob.drop_front(StringsOffset); 510 do { 511 if (R.AtEndOfStream()) 512 return reportError("bad length"); 513 514 Expected<uint32_t> MaybeSize = R.ReadVBR(6); 515 if (!MaybeSize) 516 return MaybeSize.takeError(); 517 uint32_t Size = MaybeSize.get(); 518 if (Strings.size() < Size) 519 return reportError("truncated chars"); 520 521 OS << Indent << " '"; 522 OS.write_escaped(Strings.slice(0, Size), /*hex=*/true); 523 OS << "'\n"; 524 Strings = Strings.drop_front(Size); 525 } while (--NumStrings); 526 527 OS << Indent << " }"; 528 return Error::success(); 529 } 530 531 BitcodeAnalyzer::BitcodeAnalyzer(StringRef Buffer, 532 Optional<StringRef> BlockInfoBuffer) 533 : Stream(Buffer) { 534 if (BlockInfoBuffer) 535 BlockInfoStream.emplace(*BlockInfoBuffer); 536 } 537 538 Error BitcodeAnalyzer::analyze(Optional<BCDumpOptions> O, 539 Optional<StringRef> CheckHash) { 540 if (Expected<CurStreamTypeType> H = analyzeHeader(O, Stream)) 541 CurStreamType = *H; 542 543 Stream.setBlockInfo(&BlockInfo); 544 545 // Read block info from BlockInfoStream, if specified. 546 // The block info must be a top-level block. 547 if (BlockInfoStream) { 548 BitstreamCursor BlockInfoCursor(*BlockInfoStream); 549 Expected<CurStreamTypeType> H = analyzeHeader(O, BlockInfoCursor); 550 if (!H) 551 return H.takeError(); 552 553 while (!BlockInfoCursor.AtEndOfStream()) { 554 Expected<unsigned> MaybeCode = BlockInfoCursor.ReadCode(); 555 if (!MaybeCode) 556 return MaybeCode.takeError(); 557 if (MaybeCode.get() != bitc::ENTER_SUBBLOCK) 558 return reportError("Invalid record at top-level in block info file"); 559 560 Expected<unsigned> MaybeBlockID = BlockInfoCursor.ReadSubBlockID(); 561 if (!MaybeBlockID) 562 return MaybeBlockID.takeError(); 563 if (MaybeBlockID.get() == bitc::BLOCKINFO_BLOCK_ID) { 564 Expected<Optional<BitstreamBlockInfo>> MaybeNewBlockInfo = 565 BlockInfoCursor.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true); 566 if (!MaybeNewBlockInfo) 567 return MaybeNewBlockInfo.takeError(); 568 Optional<BitstreamBlockInfo> NewBlockInfo = 569 std::move(MaybeNewBlockInfo.get()); 570 if (!NewBlockInfo) 571 return reportError("Malformed BlockInfoBlock in block info file"); 572 BlockInfo = std::move(*NewBlockInfo); 573 break; 574 } 575 576 if (Error Err = BlockInfoCursor.SkipBlock()) 577 return Err; 578 } 579 } 580 581 // Parse the top-level structure. We only allow blocks at the top-level. 582 while (!Stream.AtEndOfStream()) { 583 Expected<unsigned> MaybeCode = Stream.ReadCode(); 584 if (!MaybeCode) 585 return MaybeCode.takeError(); 586 if (MaybeCode.get() != bitc::ENTER_SUBBLOCK) 587 return reportError("Invalid record at top-level"); 588 589 Expected<unsigned> MaybeBlockID = Stream.ReadSubBlockID(); 590 if (!MaybeBlockID) 591 return MaybeBlockID.takeError(); 592 593 if (Error E = parseBlock(MaybeBlockID.get(), 0, O, CheckHash)) 594 return E; 595 ++NumTopBlocks; 596 } 597 598 return Error::success(); 599 } 600 601 void BitcodeAnalyzer::printStats(BCDumpOptions O, 602 Optional<StringRef> Filename) { 603 uint64_t BufferSizeBits = Stream.getBitcodeBytes().size() * CHAR_BIT; 604 // Print a summary of the read file. 605 O.OS << "Summary "; 606 if (Filename) 607 O.OS << "of " << Filename->data() << ":\n"; 608 O.OS << " Total size: "; 609 printSize(O.OS, BufferSizeBits); 610 O.OS << "\n"; 611 O.OS << " Stream type: "; 612 switch (CurStreamType) { 613 case UnknownBitstream: 614 O.OS << "unknown\n"; 615 break; 616 case LLVMIRBitstream: 617 O.OS << "LLVM IR\n"; 618 break; 619 case ClangSerializedASTBitstream: 620 O.OS << "Clang Serialized AST\n"; 621 break; 622 case ClangSerializedDiagnosticsBitstream: 623 O.OS << "Clang Serialized Diagnostics\n"; 624 break; 625 } 626 O.OS << " # Toplevel Blocks: " << NumTopBlocks << "\n"; 627 O.OS << "\n"; 628 629 // Emit per-block stats. 630 O.OS << "Per-block Summary:\n"; 631 for (std::map<unsigned, PerBlockIDStats>::iterator I = BlockIDStats.begin(), 632 E = BlockIDStats.end(); 633 I != E; ++I) { 634 O.OS << " Block ID #" << I->first; 635 if (Optional<const char *> BlockName = 636 GetBlockName(I->first, BlockInfo, CurStreamType)) 637 O.OS << " (" << *BlockName << ")"; 638 O.OS << ":\n"; 639 640 const PerBlockIDStats &Stats = I->second; 641 O.OS << " Num Instances: " << Stats.NumInstances << "\n"; 642 O.OS << " Total Size: "; 643 printSize(O.OS, Stats.NumBits); 644 O.OS << "\n"; 645 double pct = (Stats.NumBits * 100.0) / BufferSizeBits; 646 O.OS << " Percent of file: " << format("%2.4f%%", pct) << "\n"; 647 if (Stats.NumInstances > 1) { 648 O.OS << " Average Size: "; 649 printSize(O.OS, Stats.NumBits / (double)Stats.NumInstances); 650 O.OS << "\n"; 651 O.OS << " Tot/Avg SubBlocks: " << Stats.NumSubBlocks << "/" 652 << Stats.NumSubBlocks / (double)Stats.NumInstances << "\n"; 653 O.OS << " Tot/Avg Abbrevs: " << Stats.NumAbbrevs << "/" 654 << Stats.NumAbbrevs / (double)Stats.NumInstances << "\n"; 655 O.OS << " Tot/Avg Records: " << Stats.NumRecords << "/" 656 << Stats.NumRecords / (double)Stats.NumInstances << "\n"; 657 } else { 658 O.OS << " Num SubBlocks: " << Stats.NumSubBlocks << "\n"; 659 O.OS << " Num Abbrevs: " << Stats.NumAbbrevs << "\n"; 660 O.OS << " Num Records: " << Stats.NumRecords << "\n"; 661 } 662 if (Stats.NumRecords) { 663 double pct = (Stats.NumAbbreviatedRecords * 100.0) / Stats.NumRecords; 664 O.OS << " Percent Abbrevs: " << format("%2.4f%%", pct) << "\n"; 665 } 666 O.OS << "\n"; 667 668 // Print a histogram of the codes we see. 669 if (O.Histogram && !Stats.CodeFreq.empty()) { 670 std::vector<std::pair<unsigned, unsigned>> FreqPairs; // <freq,code> 671 for (unsigned i = 0, e = Stats.CodeFreq.size(); i != e; ++i) 672 if (unsigned Freq = Stats.CodeFreq[i].NumInstances) 673 FreqPairs.push_back(std::make_pair(Freq, i)); 674 llvm::stable_sort(FreqPairs); 675 std::reverse(FreqPairs.begin(), FreqPairs.end()); 676 677 O.OS << "\tRecord Histogram:\n"; 678 O.OS << "\t\t Count # Bits b/Rec % Abv Record Kind\n"; 679 for (unsigned i = 0, e = FreqPairs.size(); i != e; ++i) { 680 const PerRecordStats &RecStats = Stats.CodeFreq[FreqPairs[i].second]; 681 682 O.OS << format("\t\t%7d %9lu", RecStats.NumInstances, 683 (unsigned long)RecStats.TotalBits); 684 685 if (RecStats.NumInstances > 1) 686 O.OS << format(" %9.1f", 687 (double)RecStats.TotalBits / RecStats.NumInstances); 688 else 689 O.OS << " "; 690 691 if (RecStats.NumAbbrev) 692 O.OS << format(" %7.2f", (double)RecStats.NumAbbrev / 693 RecStats.NumInstances * 100); 694 else 695 O.OS << " "; 696 697 O.OS << " "; 698 if (Optional<const char *> CodeName = GetCodeName( 699 FreqPairs[i].second, I->first, BlockInfo, CurStreamType)) 700 O.OS << *CodeName << "\n"; 701 else 702 O.OS << "UnknownCode" << FreqPairs[i].second << "\n"; 703 } 704 O.OS << "\n"; 705 } 706 } 707 } 708 709 Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel, 710 Optional<BCDumpOptions> O, 711 Optional<StringRef> CheckHash) { 712 std::string Indent(IndentLevel * 2, ' '); 713 uint64_t BlockBitStart = Stream.GetCurrentBitNo(); 714 715 // Get the statistics for this BlockID. 716 PerBlockIDStats &BlockStats = BlockIDStats[BlockID]; 717 718 BlockStats.NumInstances++; 719 720 // BLOCKINFO is a special part of the stream. 721 bool DumpRecords = O.hasValue(); 722 if (BlockID == bitc::BLOCKINFO_BLOCK_ID) { 723 if (O) 724 O->OS << Indent << "<BLOCKINFO_BLOCK/>\n"; 725 Expected<Optional<BitstreamBlockInfo>> MaybeNewBlockInfo = 726 Stream.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true); 727 if (!MaybeNewBlockInfo) 728 return MaybeNewBlockInfo.takeError(); 729 Optional<BitstreamBlockInfo> NewBlockInfo = 730 std::move(MaybeNewBlockInfo.get()); 731 if (!NewBlockInfo) 732 return reportError("Malformed BlockInfoBlock"); 733 BlockInfo = std::move(*NewBlockInfo); 734 if (Error Err = Stream.JumpToBit(BlockBitStart)) 735 return Err; 736 // It's not really interesting to dump the contents of the blockinfo 737 // block. 738 DumpRecords = false; 739 } 740 741 unsigned NumWords = 0; 742 if (Error Err = Stream.EnterSubBlock(BlockID, &NumWords)) 743 return Err; 744 745 // Keep it for later, when we see a MODULE_HASH record 746 uint64_t BlockEntryPos = Stream.getCurrentByteNo(); 747 748 Optional<const char *> BlockName = None; 749 if (DumpRecords) { 750 O->OS << Indent << "<"; 751 if ((BlockName = GetBlockName(BlockID, BlockInfo, CurStreamType))) 752 O->OS << *BlockName; 753 else 754 O->OS << "UnknownBlock" << BlockID; 755 756 if (!O->Symbolic && BlockName) 757 O->OS << " BlockID=" << BlockID; 758 759 O->OS << " NumWords=" << NumWords 760 << " BlockCodeSize=" << Stream.getAbbrevIDWidth() << ">\n"; 761 } 762 763 SmallVector<uint64_t, 64> Record; 764 765 // Keep the offset to the metadata index if seen. 766 uint64_t MetadataIndexOffset = 0; 767 768 // Read all the records for this block. 769 while (1) { 770 if (Stream.AtEndOfStream()) 771 return reportError("Premature end of bitstream"); 772 773 uint64_t RecordStartBit = Stream.GetCurrentBitNo(); 774 775 Expected<BitstreamEntry> MaybeEntry = 776 Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs); 777 if (!MaybeEntry) 778 return MaybeEntry.takeError(); 779 BitstreamEntry Entry = MaybeEntry.get(); 780 781 switch (Entry.Kind) { 782 case BitstreamEntry::Error: 783 return reportError("malformed bitcode file"); 784 case BitstreamEntry::EndBlock: { 785 uint64_t BlockBitEnd = Stream.GetCurrentBitNo(); 786 BlockStats.NumBits += BlockBitEnd - BlockBitStart; 787 if (DumpRecords) { 788 O->OS << Indent << "</"; 789 if (BlockName) 790 O->OS << *BlockName << ">\n"; 791 else 792 O->OS << "UnknownBlock" << BlockID << ">\n"; 793 } 794 return Error::success(); 795 } 796 797 case BitstreamEntry::SubBlock: { 798 uint64_t SubBlockBitStart = Stream.GetCurrentBitNo(); 799 if (Error E = parseBlock(Entry.ID, IndentLevel + 1, O, CheckHash)) 800 return E; 801 ++BlockStats.NumSubBlocks; 802 uint64_t SubBlockBitEnd = Stream.GetCurrentBitNo(); 803 804 // Don't include subblock sizes in the size of this block. 805 BlockBitStart += SubBlockBitEnd - SubBlockBitStart; 806 continue; 807 } 808 case BitstreamEntry::Record: 809 // The interesting case. 810 break; 811 } 812 813 if (Entry.ID == bitc::DEFINE_ABBREV) { 814 if (Error Err = Stream.ReadAbbrevRecord()) 815 return Err; 816 ++BlockStats.NumAbbrevs; 817 continue; 818 } 819 820 Record.clear(); 821 822 ++BlockStats.NumRecords; 823 824 StringRef Blob; 825 uint64_t CurrentRecordPos = Stream.GetCurrentBitNo(); 826 Expected<unsigned> MaybeCode = Stream.readRecord(Entry.ID, Record, &Blob); 827 if (!MaybeCode) 828 return MaybeCode.takeError(); 829 unsigned Code = MaybeCode.get(); 830 831 // Increment the # occurrences of this code. 832 if (BlockStats.CodeFreq.size() <= Code) 833 BlockStats.CodeFreq.resize(Code + 1); 834 BlockStats.CodeFreq[Code].NumInstances++; 835 BlockStats.CodeFreq[Code].TotalBits += 836 Stream.GetCurrentBitNo() - RecordStartBit; 837 if (Entry.ID != bitc::UNABBREV_RECORD) { 838 BlockStats.CodeFreq[Code].NumAbbrev++; 839 ++BlockStats.NumAbbreviatedRecords; 840 } 841 842 if (DumpRecords) { 843 O->OS << Indent << " <"; 844 Optional<const char *> CodeName = 845 GetCodeName(Code, BlockID, BlockInfo, CurStreamType); 846 if (CodeName) 847 O->OS << *CodeName; 848 else 849 O->OS << "UnknownCode" << Code; 850 if (!O->Symbolic && CodeName) 851 O->OS << " codeid=" << Code; 852 const BitCodeAbbrev *Abbv = nullptr; 853 if (Entry.ID != bitc::UNABBREV_RECORD) { 854 Abbv = Stream.getAbbrev(Entry.ID); 855 O->OS << " abbrevid=" << Entry.ID; 856 } 857 858 for (unsigned i = 0, e = Record.size(); i != e; ++i) 859 O->OS << " op" << i << "=" << (int64_t)Record[i]; 860 861 // If we found a metadata index, let's verify that we had an offset 862 // before and validate its forward reference offset was correct! 863 if (BlockID == bitc::METADATA_BLOCK_ID) { 864 if (Code == bitc::METADATA_INDEX_OFFSET) { 865 if (Record.size() != 2) 866 O->OS << "(Invalid record)"; 867 else { 868 auto Offset = Record[0] + (Record[1] << 32); 869 MetadataIndexOffset = Stream.GetCurrentBitNo() + Offset; 870 } 871 } 872 if (Code == bitc::METADATA_INDEX) { 873 O->OS << " (offset "; 874 if (MetadataIndexOffset == RecordStartBit) 875 O->OS << "match)"; 876 else 877 O->OS << "mismatch: " << MetadataIndexOffset << " vs " 878 << RecordStartBit << ")"; 879 } 880 } 881 882 // If we found a module hash, let's verify that it matches! 883 if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH && 884 CheckHash.hasValue()) { 885 if (Record.size() != 5) 886 O->OS << " (invalid)"; 887 else { 888 // Recompute the hash and compare it to the one in the bitcode 889 SHA1 Hasher; 890 StringRef Hash; 891 Hasher.update(*CheckHash); 892 { 893 int BlockSize = (CurrentRecordPos / 8) - BlockEntryPos; 894 auto Ptr = Stream.getPointerToByte(BlockEntryPos, BlockSize); 895 Hasher.update(ArrayRef<uint8_t>(Ptr, BlockSize)); 896 Hash = Hasher.result(); 897 } 898 SmallString<20> RecordedHash; 899 RecordedHash.resize(20); 900 int Pos = 0; 901 for (auto &Val : Record) { 902 assert(!(Val >> 32) && "Unexpected high bits set"); 903 RecordedHash[Pos++] = (Val >> 24) & 0xFF; 904 RecordedHash[Pos++] = (Val >> 16) & 0xFF; 905 RecordedHash[Pos++] = (Val >> 8) & 0xFF; 906 RecordedHash[Pos++] = (Val >> 0) & 0xFF; 907 } 908 if (Hash == RecordedHash) 909 O->OS << " (match)"; 910 else 911 O->OS << " (!mismatch!)"; 912 } 913 } 914 915 O->OS << "/>"; 916 917 if (Abbv) { 918 for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) { 919 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 920 if (!Op.isEncoding() || Op.getEncoding() != BitCodeAbbrevOp::Array) 921 continue; 922 assert(i + 2 == e && "Array op not second to last"); 923 std::string Str; 924 bool ArrayIsPrintable = true; 925 for (unsigned j = i - 1, je = Record.size(); j != je; ++j) { 926 if (!isPrint(static_cast<unsigned char>(Record[j]))) { 927 ArrayIsPrintable = false; 928 break; 929 } 930 Str += (char)Record[j]; 931 } 932 if (ArrayIsPrintable) 933 O->OS << " record string = '" << Str << "'"; 934 break; 935 } 936 } 937 938 if (Blob.data()) { 939 if (canDecodeBlob(Code, BlockID)) { 940 if (Error E = decodeMetadataStringsBlob(Indent, Record, Blob, O->OS)) 941 return E; 942 } else { 943 O->OS << " blob data = "; 944 if (O->ShowBinaryBlobs) { 945 O->OS << "'"; 946 O->OS.write_escaped(Blob, /*hex=*/true) << "'"; 947 } else { 948 bool BlobIsPrintable = true; 949 for (unsigned i = 0, e = Blob.size(); i != e; ++i) 950 if (!isPrint(static_cast<unsigned char>(Blob[i]))) { 951 BlobIsPrintable = false; 952 break; 953 } 954 955 if (BlobIsPrintable) 956 O->OS << "'" << Blob << "'"; 957 else 958 O->OS << "unprintable, " << Blob.size() << " bytes."; 959 } 960 } 961 } 962 963 O->OS << "\n"; 964 } 965 966 // Make sure that we can skip the current record. 967 if (Error Err = Stream.JumpToBit(CurrentRecordPos)) 968 return Err; 969 if (Expected<unsigned> Skipped = Stream.skipRecord(Entry.ID)) 970 ; // Do nothing. 971 else 972 return Skipped.takeError(); 973 } 974 } 975 976