1 //===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This header defines interfaces to read LLVM bitcode files/streams. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_BITCODE_BITCODEREADER_H 15 #define LLVM_BITCODE_BITCODEREADER_H 16 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/Bitcode/BitCodes.h" 20 #include "llvm/IR/ModuleSummaryIndex.h" 21 #include "llvm/Support/Endian.h" 22 #include "llvm/Support/Error.h" 23 #include "llvm/Support/ErrorOr.h" 24 #include "llvm/Support/MemoryBuffer.h" 25 #include <cstdint> 26 #include <memory> 27 #include <string> 28 #include <system_error> 29 #include <vector> 30 namespace llvm { 31 32 class LLVMContext; 33 class Module; 34 35 // These functions are for converting Expected/Error values to 36 // ErrorOr/std::error_code for compatibility with legacy clients. FIXME: 37 // Remove these functions once no longer needed by the C and libLTO APIs. 38 39 std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err); 40 41 template <typename T> expectedToErrorOrAndEmitErrors(LLVMContext & Ctx,Expected<T> Val)42 ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) { 43 if (!Val) 44 return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError()); 45 return std::move(*Val); 46 } 47 48 struct BitcodeFileContents; 49 50 /// Basic information extracted from a bitcode module to be used for LTO. 51 struct BitcodeLTOInfo { 52 bool IsThinLTO; 53 bool HasSummary; 54 bool EnableSplitLTOUnit; 55 }; 56 57 /// Represents a module in a bitcode file. 58 class BitcodeModule { 59 // This covers the identification (if present) and module blocks. 60 ArrayRef<uint8_t> Buffer; 61 StringRef ModuleIdentifier; 62 63 // The string table used to interpret this module. 64 StringRef Strtab; 65 66 // The bitstream location of the IDENTIFICATION_BLOCK. 67 uint64_t IdentificationBit; 68 69 // The bitstream location of this module's MODULE_BLOCK. 70 uint64_t ModuleBit; 71 BitcodeModule(ArrayRef<uint8_t> Buffer,StringRef ModuleIdentifier,uint64_t IdentificationBit,uint64_t ModuleBit)72 BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier, 73 uint64_t IdentificationBit, uint64_t ModuleBit) 74 : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier), 75 IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {} 76 77 // Calls the ctor. 78 friend Expected<BitcodeFileContents> 79 getBitcodeFileContents(MemoryBufferRef Buffer); 80 81 Expected<std::unique_ptr<Module>> getModuleImpl(LLVMContext &Context, 82 bool MaterializeAll, 83 bool ShouldLazyLoadMetadata, 84 bool IsImporting); 85 86 public: getBuffer()87 StringRef getBuffer() const { 88 return StringRef((const char *)Buffer.begin(), Buffer.size()); 89 } 90 getStrtab()91 StringRef getStrtab() const { return Strtab; } 92 getModuleIdentifier()93 StringRef getModuleIdentifier() const { return ModuleIdentifier; } 94 95 /// Read the bitcode module and prepare for lazy deserialization of function 96 /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well. 97 /// If IsImporting is true, this module is being parsed for ThinLTO 98 /// importing into another module. 99 Expected<std::unique_ptr<Module>> getLazyModule(LLVMContext &Context, 100 bool ShouldLazyLoadMetadata, 101 bool IsImporting); 102 103 /// Read the entire bitcode module and return it. 104 Expected<std::unique_ptr<Module>> parseModule(LLVMContext &Context); 105 106 /// Returns information about the module to be used for LTO: whether to 107 /// compile with ThinLTO, and whether it has a summary. 108 Expected<BitcodeLTOInfo> getLTOInfo(); 109 110 /// Parse the specified bitcode buffer, returning the module summary index. 111 Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary(); 112 113 /// Parse the specified bitcode buffer and merge its module summary index 114 /// into CombinedIndex. 115 Error readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath, 116 uint64_t ModuleId); 117 }; 118 119 struct BitcodeFileContents { 120 std::vector<BitcodeModule> Mods; 121 StringRef Symtab, StrtabForSymtab; 122 }; 123 124 /// Returns the contents of a bitcode file. This includes the raw contents of 125 /// the symbol table embedded in the bitcode file. Clients which require a 126 /// symbol table should prefer to use irsymtab::read instead of this function 127 /// because it creates a reader for the irsymtab and handles upgrading bitcode 128 /// files without a symbol table or with an old symbol table. 129 Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer); 130 131 /// Returns a list of modules in the specified bitcode buffer. 132 Expected<std::vector<BitcodeModule>> 133 getBitcodeModuleList(MemoryBufferRef Buffer); 134 135 /// Read the header of the specified bitcode buffer and prepare for lazy 136 /// deserialization of function bodies. If ShouldLazyLoadMetadata is true, 137 /// lazily load metadata as well. If IsImporting is true, this module is 138 /// being parsed for ThinLTO importing into another module. 139 Expected<std::unique_ptr<Module>> 140 getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context, 141 bool ShouldLazyLoadMetadata = false, 142 bool IsImporting = false); 143 144 /// Like getLazyBitcodeModule, except that the module takes ownership of 145 /// the memory buffer if successful. If successful, this moves Buffer. On 146 /// error, this *does not* move Buffer. If IsImporting is true, this module is 147 /// being parsed for ThinLTO importing into another module. 148 Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule( 149 std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context, 150 bool ShouldLazyLoadMetadata = false, bool IsImporting = false); 151 152 /// Read the header of the specified bitcode buffer and extract just the 153 /// triple information. If successful, this returns a string. On error, this 154 /// returns "". 155 Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer); 156 157 /// Return true if \p Buffer contains a bitcode file with ObjC code (category 158 /// or class) in it. 159 Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer); 160 161 /// Read the header of the specified bitcode buffer and extract just the 162 /// producer string information. If successful, this returns a string. On 163 /// error, this returns "". 164 Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer); 165 166 /// Read the specified bitcode file, returning the module. 167 Expected<std::unique_ptr<Module>> parseBitcodeFile(MemoryBufferRef Buffer, 168 LLVMContext &Context); 169 170 /// Returns LTO information for the specified bitcode file. 171 Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer); 172 173 /// Parse the specified bitcode buffer, returning the module summary index. 174 Expected<std::unique_ptr<ModuleSummaryIndex>> 175 getModuleSummaryIndex(MemoryBufferRef Buffer); 176 177 /// Parse the specified bitcode buffer and merge the index into CombinedIndex. 178 Error readModuleSummaryIndex(MemoryBufferRef Buffer, 179 ModuleSummaryIndex &CombinedIndex, 180 uint64_t ModuleId); 181 182 /// Parse the module summary index out of an IR file and return the module 183 /// summary index object if found, or an empty summary if not. If Path refers 184 /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then 185 /// this function will return nullptr. 186 Expected<std::unique_ptr<ModuleSummaryIndex>> 187 getModuleSummaryIndexForFile(StringRef Path, 188 bool IgnoreEmptyThinLTOIndexFile = false); 189 190 /// isBitcodeWrapper - Return true if the given bytes are the magic bytes 191 /// for an LLVM IR bitcode wrapper. isBitcodeWrapper(const unsigned char * BufPtr,const unsigned char * BufEnd)192 inline bool isBitcodeWrapper(const unsigned char *BufPtr, 193 const unsigned char *BufEnd) { 194 // See if you can find the hidden message in the magic bytes :-). 195 // (Hint: it's a little-endian encoding.) 196 return BufPtr != BufEnd && 197 BufPtr[0] == 0xDE && 198 BufPtr[1] == 0xC0 && 199 BufPtr[2] == 0x17 && 200 BufPtr[3] == 0x0B; 201 } 202 203 /// isRawBitcode - Return true if the given bytes are the magic bytes for 204 /// raw LLVM IR bitcode (without a wrapper). isRawBitcode(const unsigned char * BufPtr,const unsigned char * BufEnd)205 inline bool isRawBitcode(const unsigned char *BufPtr, 206 const unsigned char *BufEnd) { 207 // These bytes sort of have a hidden message, but it's not in 208 // little-endian this time, and it's a little redundant. 209 return BufPtr != BufEnd && 210 BufPtr[0] == 'B' && 211 BufPtr[1] == 'C' && 212 BufPtr[2] == 0xc0 && 213 BufPtr[3] == 0xde; 214 } 215 216 /// isBitcode - Return true if the given bytes are the magic bytes for 217 /// LLVM IR bitcode, either with or without a wrapper. isBitcode(const unsigned char * BufPtr,const unsigned char * BufEnd)218 inline bool isBitcode(const unsigned char *BufPtr, 219 const unsigned char *BufEnd) { 220 return isBitcodeWrapper(BufPtr, BufEnd) || 221 isRawBitcode(BufPtr, BufEnd); 222 } 223 224 /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special 225 /// header for padding or other reasons. The format of this header is: 226 /// 227 /// struct bc_header { 228 /// uint32_t Magic; // 0x0B17C0DE 229 /// uint32_t Version; // Version, currently always 0. 230 /// uint32_t BitcodeOffset; // Offset to traditional bitcode file. 231 /// uint32_t BitcodeSize; // Size of traditional bitcode file. 232 /// ... potentially other gunk ... 233 /// }; 234 /// 235 /// This function is called when we find a file with a matching magic number. 236 /// In this case, skip down to the subsection of the file that is actually a 237 /// BC file. 238 /// If 'VerifyBufferSize' is true, check that the buffer is large enough to 239 /// contain the whole bitcode file. SkipBitcodeWrapperHeader(const unsigned char * & BufPtr,const unsigned char * & BufEnd,bool VerifyBufferSize)240 inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr, 241 const unsigned char *&BufEnd, 242 bool VerifyBufferSize) { 243 // Must contain the offset and size field! 244 if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4) 245 return true; 246 247 unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]); 248 unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]); 249 uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size; 250 251 // Verify that Offset+Size fits in the file. 252 if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr)) 253 return true; 254 BufPtr += Offset; 255 BufEnd = BufPtr+Size; 256 return false; 257 } 258 259 const std::error_category &BitcodeErrorCategory(); 260 enum class BitcodeError { CorruptedBitcode = 1 }; make_error_code(BitcodeError E)261 inline std::error_code make_error_code(BitcodeError E) { 262 return std::error_code(static_cast<int>(E), BitcodeErrorCategory()); 263 } 264 265 } // end namespace llvm 266 267 namespace std { 268 269 template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {}; 270 271 } // end namespace std 272 273 #endif // LLVM_BITCODE_BITCODEREADER_H 274