1 //===- BitstreamReader.cpp - BitstreamReader implementation ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "llvm/Bitstream/BitstreamReader.h"
10 #include "llvm/ADT/StringRef.h"
11 #include <cassert>
12 #include <string>
13
14 using namespace llvm;
15
16 //===----------------------------------------------------------------------===//
17 // BitstreamCursor implementation
18 //===----------------------------------------------------------------------===//
19 //
error(const char * Message)20 static Error error(const char *Message) {
21 return createStringError(std::errc::illegal_byte_sequence, Message);
22 }
23
24 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block.
EnterSubBlock(unsigned BlockID,unsigned * NumWordsP)25 Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
26 // Save the current block's state on BlockScope.
27 BlockScope.push_back(Block(CurCodeSize));
28 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
29
30 // Add the abbrevs specific to this block to the CurAbbrevs list.
31 if (BlockInfo) {
32 if (const BitstreamBlockInfo::BlockInfo *Info =
33 BlockInfo->getBlockInfo(BlockID)) {
34 llvm::append_range(CurAbbrevs, Info->Abbrevs);
35 }
36 }
37
38 // Get the codesize of this block.
39 Expected<uint32_t> MaybeVBR = ReadVBR(bitc::CodeLenWidth);
40 if (!MaybeVBR)
41 return MaybeVBR.takeError();
42 CurCodeSize = MaybeVBR.get();
43
44 if (CurCodeSize > MaxChunkSize)
45 return llvm::createStringError(
46 std::errc::illegal_byte_sequence,
47 "can't read more than %zu at a time, trying to read %u", +MaxChunkSize,
48 CurCodeSize);
49
50 SkipToFourByteBoundary();
51 Expected<word_t> MaybeNum = Read(bitc::BlockSizeWidth);
52 if (!MaybeNum)
53 return MaybeNum.takeError();
54 word_t NumWords = MaybeNum.get();
55 if (NumWordsP)
56 *NumWordsP = NumWords;
57
58 if (CurCodeSize == 0)
59 return llvm::createStringError(
60 std::errc::illegal_byte_sequence,
61 "can't enter sub-block: current code size is 0");
62 if (AtEndOfStream())
63 return llvm::createStringError(
64 std::errc::illegal_byte_sequence,
65 "can't enter sub block: already at end of stream");
66
67 return Error::success();
68 }
69
readAbbreviatedField(BitstreamCursor & Cursor,const BitCodeAbbrevOp & Op)70 static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor,
71 const BitCodeAbbrevOp &Op) {
72 assert(!Op.isLiteral() && "Not to be used with literals!");
73
74 // Decode the value as we are commanded.
75 switch (Op.getEncoding()) {
76 case BitCodeAbbrevOp::Array:
77 case BitCodeAbbrevOp::Blob:
78 llvm_unreachable("Should not reach here");
79 case BitCodeAbbrevOp::Fixed:
80 assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
81 return Cursor.Read((unsigned)Op.getEncodingData());
82 case BitCodeAbbrevOp::VBR:
83 assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
84 return Cursor.ReadVBR64((unsigned)Op.getEncodingData());
85 case BitCodeAbbrevOp::Char6:
86 if (Expected<unsigned> Res = Cursor.Read(6))
87 return BitCodeAbbrevOp::DecodeChar6(Res.get());
88 else
89 return Res.takeError();
90 }
91 llvm_unreachable("invalid abbreviation encoding");
92 }
93
94 /// skipRecord - Read the current record and discard it.
skipRecord(unsigned AbbrevID)95 Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) {
96 // Skip unabbreviated records by reading past their entries.
97 if (AbbrevID == bitc::UNABBREV_RECORD) {
98 Expected<uint32_t> MaybeCode = ReadVBR(6);
99 if (!MaybeCode)
100 return MaybeCode.takeError();
101 unsigned Code = MaybeCode.get();
102 Expected<uint32_t> MaybeVBR = ReadVBR(6);
103 if (!MaybeVBR)
104 return MaybeVBR.takeError();
105 unsigned NumElts = MaybeVBR.get();
106 for (unsigned i = 0; i != NumElts; ++i)
107 if (Expected<uint64_t> Res = ReadVBR64(6))
108 ; // Skip!
109 else
110 return Res.takeError();
111 return Code;
112 }
113
114 Expected<const BitCodeAbbrev *> MaybeAbbv = getAbbrev(AbbrevID);
115 if (!MaybeAbbv)
116 return MaybeAbbv.takeError();
117
118 const BitCodeAbbrev *Abbv = MaybeAbbv.get();
119 const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
120 unsigned Code;
121 if (CodeOp.isLiteral())
122 Code = CodeOp.getLiteralValue();
123 else {
124 if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
125 CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
126 return llvm::createStringError(
127 std::errc::illegal_byte_sequence,
128 "Abbreviation starts with an Array or a Blob");
129 Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp);
130 if (!MaybeCode)
131 return MaybeCode.takeError();
132 Code = MaybeCode.get();
133 }
134
135 for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) {
136 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
137 if (Op.isLiteral())
138 continue;
139
140 if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
141 Op.getEncoding() != BitCodeAbbrevOp::Blob) {
142 if (Expected<uint64_t> MaybeField = readAbbreviatedField(*this, Op))
143 continue;
144 else
145 return MaybeField.takeError();
146 }
147
148 if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
149 // Array case. Read the number of elements as a vbr6.
150 Expected<uint32_t> MaybeNum = ReadVBR(6);
151 if (!MaybeNum)
152 return MaybeNum.takeError();
153 unsigned NumElts = MaybeNum.get();
154
155 // Get the element encoding.
156 assert(i+2 == e && "array op not second to last?");
157 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
158
159 // Read all the elements.
160 // Decode the value as we are commanded.
161 switch (EltEnc.getEncoding()) {
162 default:
163 return error("Array element type can't be an Array or a Blob");
164 case BitCodeAbbrevOp::Fixed:
165 assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
166 if (Error Err =
167 JumpToBit(GetCurrentBitNo() + static_cast<uint64_t>(NumElts) *
168 EltEnc.getEncodingData()))
169 return std::move(Err);
170 break;
171 case BitCodeAbbrevOp::VBR:
172 assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
173 for (; NumElts; --NumElts)
174 if (Expected<uint64_t> Res =
175 ReadVBR64((unsigned)EltEnc.getEncodingData()))
176 ; // Skip!
177 else
178 return Res.takeError();
179 break;
180 case BitCodeAbbrevOp::Char6:
181 if (Error Err = JumpToBit(GetCurrentBitNo() + NumElts * 6))
182 return std::move(Err);
183 break;
184 }
185 continue;
186 }
187
188 assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
189 // Blob case. Read the number of bytes as a vbr6.
190 Expected<uint32_t> MaybeNum = ReadVBR(6);
191 if (!MaybeNum)
192 return MaybeNum.takeError();
193 unsigned NumElts = MaybeNum.get();
194 SkipToFourByteBoundary(); // 32-bit alignment
195
196 // Figure out where the end of this blob will be including tail padding.
197 const size_t NewEnd = GetCurrentBitNo() + alignTo(NumElts, 4) * 8;
198
199 // If this would read off the end of the bitcode file, just set the
200 // record to empty and return.
201 if (!canSkipToPos(NewEnd/8)) {
202 skipToEnd();
203 break;
204 }
205
206 // Skip over the blob.
207 if (Error Err = JumpToBit(NewEnd))
208 return std::move(Err);
209 }
210 return Code;
211 }
212
readRecord(unsigned AbbrevID,SmallVectorImpl<uint64_t> & Vals,StringRef * Blob)213 Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID,
214 SmallVectorImpl<uint64_t> &Vals,
215 StringRef *Blob) {
216 if (AbbrevID == bitc::UNABBREV_RECORD) {
217 Expected<uint32_t> MaybeCode = ReadVBR(6);
218 if (!MaybeCode)
219 return MaybeCode.takeError();
220 uint32_t Code = MaybeCode.get();
221 Expected<uint32_t> MaybeNumElts = ReadVBR(6);
222 if (!MaybeNumElts)
223 return error(
224 ("Failed to read size: " + toString(MaybeNumElts.takeError()))
225 .c_str());
226 uint32_t NumElts = MaybeNumElts.get();
227 if (!isSizePlausible(NumElts))
228 return error("Size is not plausible");
229 Vals.reserve(Vals.size() + NumElts);
230
231 for (unsigned i = 0; i != NumElts; ++i)
232 if (Expected<uint64_t> MaybeVal = ReadVBR64(6))
233 Vals.push_back(MaybeVal.get());
234 else
235 return MaybeVal.takeError();
236 return Code;
237 }
238
239 Expected<const BitCodeAbbrev *> MaybeAbbv = getAbbrev(AbbrevID);
240 if (!MaybeAbbv)
241 return MaybeAbbv.takeError();
242 const BitCodeAbbrev *Abbv = MaybeAbbv.get();
243
244 // Read the record code first.
245 assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?");
246 const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
247 unsigned Code;
248 if (CodeOp.isLiteral())
249 Code = CodeOp.getLiteralValue();
250 else {
251 if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
252 CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
253 return error("Abbreviation starts with an Array or a Blob");
254 if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp))
255 Code = MaybeCode.get();
256 else
257 return MaybeCode.takeError();
258 }
259
260 for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
261 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
262 if (Op.isLiteral()) {
263 Vals.push_back(Op.getLiteralValue());
264 continue;
265 }
266
267 if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
268 Op.getEncoding() != BitCodeAbbrevOp::Blob) {
269 if (Expected<uint64_t> MaybeVal = readAbbreviatedField(*this, Op))
270 Vals.push_back(MaybeVal.get());
271 else
272 return MaybeVal.takeError();
273 continue;
274 }
275
276 if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
277 // Array case. Read the number of elements as a vbr6.
278 Expected<uint32_t> MaybeNumElts = ReadVBR(6);
279 if (!MaybeNumElts)
280 return error(
281 ("Failed to read size: " + toString(MaybeNumElts.takeError()))
282 .c_str());
283 uint32_t NumElts = MaybeNumElts.get();
284 if (!isSizePlausible(NumElts))
285 return error("Size is not plausible");
286 Vals.reserve(Vals.size() + NumElts);
287
288 // Get the element encoding.
289 if (i + 2 != e)
290 return error("Array op not second to last");
291 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
292 if (!EltEnc.isEncoding())
293 return error(
294 "Array element type has to be an encoding of a type");
295
296 // Read all the elements.
297 switch (EltEnc.getEncoding()) {
298 default:
299 return error("Array element type can't be an Array or a Blob");
300 case BitCodeAbbrevOp::Fixed:
301 for (; NumElts; --NumElts)
302 if (Expected<SimpleBitstreamCursor::word_t> MaybeVal =
303 Read((unsigned)EltEnc.getEncodingData()))
304 Vals.push_back(MaybeVal.get());
305 else
306 return MaybeVal.takeError();
307 break;
308 case BitCodeAbbrevOp::VBR:
309 for (; NumElts; --NumElts)
310 if (Expected<uint64_t> MaybeVal =
311 ReadVBR64((unsigned)EltEnc.getEncodingData()))
312 Vals.push_back(MaybeVal.get());
313 else
314 return MaybeVal.takeError();
315 break;
316 case BitCodeAbbrevOp::Char6:
317 for (; NumElts; --NumElts)
318 if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(6))
319 Vals.push_back(BitCodeAbbrevOp::DecodeChar6(MaybeVal.get()));
320 else
321 return MaybeVal.takeError();
322 }
323 continue;
324 }
325
326 assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
327 // Blob case. Read the number of bytes as a vbr6.
328 Expected<uint32_t> MaybeNumElts = ReadVBR(6);
329 if (!MaybeNumElts)
330 return MaybeNumElts.takeError();
331 uint32_t NumElts = MaybeNumElts.get();
332 SkipToFourByteBoundary(); // 32-bit alignment
333
334 // Figure out where the end of this blob will be including tail padding.
335 size_t CurBitPos = GetCurrentBitNo();
336 const size_t NewEnd = CurBitPos + alignTo(NumElts, 4) * 8;
337
338 // Make sure the bitstream is large enough to contain the blob.
339 if (!canSkipToPos(NewEnd/8))
340 return error("Blob ends too soon");
341
342 // Otherwise, inform the streamer that we need these bytes in memory. Skip
343 // over tail padding first, in case jumping to NewEnd invalidates the Blob
344 // pointer.
345 if (Error Err = JumpToBit(NewEnd))
346 return std::move(Err);
347 const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts);
348
349 // If we can return a reference to the data, do so to avoid copying it.
350 if (Blob) {
351 *Blob = StringRef(Ptr, NumElts);
352 } else {
353 // Otherwise, unpack into Vals with zero extension.
354 auto *UPtr = reinterpret_cast<const unsigned char *>(Ptr);
355 Vals.append(UPtr, UPtr + NumElts);
356 }
357 }
358
359 return Code;
360 }
361
ReadAbbrevRecord()362 Error BitstreamCursor::ReadAbbrevRecord() {
363 auto Abbv = std::make_shared<BitCodeAbbrev>();
364 Expected<uint32_t> MaybeNumOpInfo = ReadVBR(5);
365 if (!MaybeNumOpInfo)
366 return MaybeNumOpInfo.takeError();
367 unsigned NumOpInfo = MaybeNumOpInfo.get();
368 for (unsigned i = 0; i != NumOpInfo; ++i) {
369 Expected<word_t> MaybeIsLiteral = Read(1);
370 if (!MaybeIsLiteral)
371 return MaybeIsLiteral.takeError();
372 bool IsLiteral = MaybeIsLiteral.get();
373 if (IsLiteral) {
374 Expected<uint64_t> MaybeOp = ReadVBR64(8);
375 if (!MaybeOp)
376 return MaybeOp.takeError();
377 Abbv->Add(BitCodeAbbrevOp(MaybeOp.get()));
378 continue;
379 }
380
381 Expected<word_t> MaybeEncoding = Read(3);
382 if (!MaybeEncoding)
383 return MaybeEncoding.takeError();
384 if (!BitCodeAbbrevOp::isValidEncoding(MaybeEncoding.get()))
385 return error("Invalid encoding");
386
387 BitCodeAbbrevOp::Encoding E =
388 (BitCodeAbbrevOp::Encoding)MaybeEncoding.get();
389 if (BitCodeAbbrevOp::hasEncodingData(E)) {
390 Expected<uint64_t> MaybeData = ReadVBR64(5);
391 if (!MaybeData)
392 return MaybeData.takeError();
393 uint64_t Data = MaybeData.get();
394
395 // As a special case, handle fixed(0) (i.e., a fixed field with zero bits)
396 // and vbr(0) as a literal zero. This is decoded the same way, and avoids
397 // a slow path in Read() to have to handle reading zero bits.
398 if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
399 Data == 0) {
400 Abbv->Add(BitCodeAbbrevOp(0));
401 continue;
402 }
403
404 if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
405 Data > MaxChunkSize)
406 return error("Fixed or VBR abbrev record with size > MaxChunkData");
407
408 Abbv->Add(BitCodeAbbrevOp(E, Data));
409 } else
410 Abbv->Add(BitCodeAbbrevOp(E));
411 }
412
413 if (Abbv->getNumOperandInfos() == 0)
414 return error("Abbrev record with no operands");
415 CurAbbrevs.push_back(std::move(Abbv));
416
417 return Error::success();
418 }
419
420 Expected<Optional<BitstreamBlockInfo>>
ReadBlockInfoBlock(bool ReadBlockInfoNames)421 BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) {
422 if (llvm::Error Err = EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID))
423 return std::move(Err);
424
425 BitstreamBlockInfo NewBlockInfo;
426
427 SmallVector<uint64_t, 64> Record;
428 BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr;
429
430 // Read all the records for this module.
431 while (true) {
432 Expected<BitstreamEntry> MaybeEntry =
433 advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs);
434 if (!MaybeEntry)
435 return MaybeEntry.takeError();
436 BitstreamEntry Entry = MaybeEntry.get();
437
438 switch (Entry.Kind) {
439 case llvm::BitstreamEntry::SubBlock: // Handled for us already.
440 case llvm::BitstreamEntry::Error:
441 return None;
442 case llvm::BitstreamEntry::EndBlock:
443 return std::move(NewBlockInfo);
444 case llvm::BitstreamEntry::Record:
445 // The interesting case.
446 break;
447 }
448
449 // Read abbrev records, associate them with CurBID.
450 if (Entry.ID == bitc::DEFINE_ABBREV) {
451 if (!CurBlockInfo) return None;
452 if (Error Err = ReadAbbrevRecord())
453 return std::move(Err);
454
455 // ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the
456 // appropriate BlockInfo.
457 CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back()));
458 CurAbbrevs.pop_back();
459 continue;
460 }
461
462 // Read a record.
463 Record.clear();
464 Expected<unsigned> MaybeBlockInfo = readRecord(Entry.ID, Record);
465 if (!MaybeBlockInfo)
466 return MaybeBlockInfo.takeError();
467 switch (MaybeBlockInfo.get()) {
468 default:
469 break; // Default behavior, ignore unknown content.
470 case bitc::BLOCKINFO_CODE_SETBID:
471 if (Record.size() < 1)
472 return None;
473 CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]);
474 break;
475 case bitc::BLOCKINFO_CODE_BLOCKNAME: {
476 if (!CurBlockInfo)
477 return None;
478 if (!ReadBlockInfoNames)
479 break; // Ignore name.
480 CurBlockInfo->Name = std::string(Record.begin(), Record.end());
481 break;
482 }
483 case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
484 if (!CurBlockInfo) return None;
485 if (!ReadBlockInfoNames)
486 break; // Ignore name.
487 CurBlockInfo->RecordNames.emplace_back(
488 (unsigned)Record[0], std::string(Record.begin() + 1, Record.end()));
489 break;
490 }
491 }
492 }
493 }
494