1 //===- BitstreamReader.cpp - BitstreamReader implementation ---------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "llvm/Bitcode/BitstreamReader.h"
11 #include "llvm/ADT/StringRef.h"
12 #include <cassert>
13 #include <string>
14 
15 using namespace llvm;
16 
17 //===----------------------------------------------------------------------===//
18 //  BitstreamCursor implementation
19 //===----------------------------------------------------------------------===//
20 
21 /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter
22 /// the block, and return true if the block has an error.
23 bool BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
24   // Save the current block's state on BlockScope.
25   BlockScope.push_back(Block(CurCodeSize));
26   BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
27 
28   // Add the abbrevs specific to this block to the CurAbbrevs list.
29   if (BlockInfo) {
30     if (const BitstreamBlockInfo::BlockInfo *Info =
31             BlockInfo->getBlockInfo(BlockID)) {
32       CurAbbrevs.insert(CurAbbrevs.end(), Info->Abbrevs.begin(),
33                         Info->Abbrevs.end());
34     }
35   }
36 
37   // Get the codesize of this block.
38   CurCodeSize = ReadVBR(bitc::CodeLenWidth);
39   // We can't read more than MaxChunkSize at a time
40   if (CurCodeSize > MaxChunkSize)
41     return true;
42 
43   SkipToFourByteBoundary();
44   unsigned NumWords = Read(bitc::BlockSizeWidth);
45   if (NumWordsP) *NumWordsP = NumWords;
46 
47   // Validate that this block is sane.
48   return CurCodeSize == 0 || AtEndOfStream();
49 }
50 
51 static uint64_t readAbbreviatedField(BitstreamCursor &Cursor,
52                                      const BitCodeAbbrevOp &Op) {
53   assert(!Op.isLiteral() && "Not to be used with literals!");
54 
55   // Decode the value as we are commanded.
56   switch (Op.getEncoding()) {
57   case BitCodeAbbrevOp::Array:
58   case BitCodeAbbrevOp::Blob:
59     llvm_unreachable("Should not reach here");
60   case BitCodeAbbrevOp::Fixed:
61     assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
62     return Cursor.Read((unsigned)Op.getEncodingData());
63   case BitCodeAbbrevOp::VBR:
64     assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
65     return Cursor.ReadVBR64((unsigned)Op.getEncodingData());
66   case BitCodeAbbrevOp::Char6:
67     return BitCodeAbbrevOp::DecodeChar6(Cursor.Read(6));
68   }
69   llvm_unreachable("invalid abbreviation encoding");
70 }
71 
72 static void skipAbbreviatedField(BitstreamCursor &Cursor,
73                                  const BitCodeAbbrevOp &Op) {
74   assert(!Op.isLiteral() && "Not to be used with literals!");
75 
76   // Decode the value as we are commanded.
77   switch (Op.getEncoding()) {
78   case BitCodeAbbrevOp::Array:
79   case BitCodeAbbrevOp::Blob:
80     llvm_unreachable("Should not reach here");
81   case BitCodeAbbrevOp::Fixed:
82     assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
83     Cursor.Read((unsigned)Op.getEncodingData());
84     break;
85   case BitCodeAbbrevOp::VBR:
86     assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
87     Cursor.ReadVBR64((unsigned)Op.getEncodingData());
88     break;
89   case BitCodeAbbrevOp::Char6:
90     Cursor.Read(6);
91     break;
92   }
93 }
94 
95 /// skipRecord - Read the current record and discard it.
96 void BitstreamCursor::skipRecord(unsigned AbbrevID) {
97   // Skip unabbreviated records by reading past their entries.
98   if (AbbrevID == bitc::UNABBREV_RECORD) {
99     unsigned Code = ReadVBR(6);
100     (void)Code;
101     unsigned NumElts = ReadVBR(6);
102     for (unsigned i = 0; i != NumElts; ++i)
103       (void)ReadVBR64(6);
104     return;
105   }
106 
107   const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
108 
109   for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) {
110     const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
111     if (Op.isLiteral())
112       continue;
113 
114     if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
115         Op.getEncoding() != BitCodeAbbrevOp::Blob) {
116       skipAbbreviatedField(*this, Op);
117       continue;
118     }
119 
120     if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
121       // Array case.  Read the number of elements as a vbr6.
122       unsigned NumElts = ReadVBR(6);
123 
124       // Get the element encoding.
125       assert(i+2 == e && "array op not second to last?");
126       const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
127 
128       // Read all the elements.
129       // Decode the value as we are commanded.
130       switch (EltEnc.getEncoding()) {
131       default:
132         report_fatal_error("Array element type can't be an Array or a Blob");
133       case BitCodeAbbrevOp::Fixed:
134         assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
135         JumpToBit(GetCurrentBitNo() + NumElts * EltEnc.getEncodingData());
136         break;
137       case BitCodeAbbrevOp::VBR:
138         assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
139         for (; NumElts; --NumElts)
140           ReadVBR64((unsigned)EltEnc.getEncodingData());
141         break;
142       case BitCodeAbbrevOp::Char6:
143         JumpToBit(GetCurrentBitNo() + NumElts * 6);
144         break;
145       }
146       continue;
147     }
148 
149     assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
150     // Blob case.  Read the number of bytes as a vbr6.
151     unsigned NumElts = ReadVBR(6);
152     SkipToFourByteBoundary();  // 32-bit alignment
153 
154     // Figure out where the end of this blob will be including tail padding.
155     size_t NewEnd = GetCurrentBitNo()+((NumElts+3)&~3)*8;
156 
157     // If this would read off the end of the bitcode file, just set the
158     // record to empty and return.
159     if (!canSkipToPos(NewEnd/8)) {
160       skipToEnd();
161       break;
162     }
163 
164     // Skip over the blob.
165     JumpToBit(NewEnd);
166   }
167 }
168 
169 unsigned BitstreamCursor::readRecord(unsigned AbbrevID,
170                                      SmallVectorImpl<uint64_t> &Vals,
171                                      StringRef *Blob) {
172   if (AbbrevID == bitc::UNABBREV_RECORD) {
173     unsigned Code = ReadVBR(6);
174     unsigned NumElts = ReadVBR(6);
175     for (unsigned i = 0; i != NumElts; ++i)
176       Vals.push_back(ReadVBR64(6));
177     return Code;
178   }
179 
180   const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
181 
182   // Read the record code first.
183   assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?");
184   const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
185   unsigned Code;
186   if (CodeOp.isLiteral())
187     Code = CodeOp.getLiteralValue();
188   else {
189     if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
190         CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
191       report_fatal_error("Abbreviation starts with an Array or a Blob");
192     Code = readAbbreviatedField(*this, CodeOp);
193   }
194 
195   for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
196     const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
197     if (Op.isLiteral()) {
198       Vals.push_back(Op.getLiteralValue());
199       continue;
200     }
201 
202     if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
203         Op.getEncoding() != BitCodeAbbrevOp::Blob) {
204       Vals.push_back(readAbbreviatedField(*this, Op));
205       continue;
206     }
207 
208     if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
209       // Array case.  Read the number of elements as a vbr6.
210       unsigned NumElts = ReadVBR(6);
211 
212       // Get the element encoding.
213       if (i + 2 != e)
214         report_fatal_error("Array op not second to last");
215       const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
216       if (!EltEnc.isEncoding())
217         report_fatal_error(
218             "Array element type has to be an encoding of a type");
219 
220       // Read all the elements.
221       switch (EltEnc.getEncoding()) {
222       default:
223         report_fatal_error("Array element type can't be an Array or a Blob");
224       case BitCodeAbbrevOp::Fixed:
225         for (; NumElts; --NumElts)
226           Vals.push_back(Read((unsigned)EltEnc.getEncodingData()));
227         break;
228       case BitCodeAbbrevOp::VBR:
229         for (; NumElts; --NumElts)
230           Vals.push_back(ReadVBR64((unsigned)EltEnc.getEncodingData()));
231         break;
232       case BitCodeAbbrevOp::Char6:
233         for (; NumElts; --NumElts)
234           Vals.push_back(BitCodeAbbrevOp::DecodeChar6(Read(6)));
235       }
236       continue;
237     }
238 
239     assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
240     // Blob case.  Read the number of bytes as a vbr6.
241     unsigned NumElts = ReadVBR(6);
242     SkipToFourByteBoundary();  // 32-bit alignment
243 
244     // Figure out where the end of this blob will be including tail padding.
245     size_t CurBitPos = GetCurrentBitNo();
246     size_t NewEnd = CurBitPos+((NumElts+3)&~3)*8;
247 
248     // If this would read off the end of the bitcode file, just set the
249     // record to empty and return.
250     if (!canSkipToPos(NewEnd/8)) {
251       Vals.append(NumElts, 0);
252       skipToEnd();
253       break;
254     }
255 
256     // Otherwise, inform the streamer that we need these bytes in memory.  Skip
257     // over tail padding first, in case jumping to NewEnd invalidates the Blob
258     // pointer.
259     JumpToBit(NewEnd);
260     const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts);
261 
262     // If we can return a reference to the data, do so to avoid copying it.
263     if (Blob) {
264       *Blob = StringRef(Ptr, NumElts);
265     } else {
266       // Otherwise, unpack into Vals with zero extension.
267       for (; NumElts; --NumElts)
268         Vals.push_back((unsigned char)*Ptr++);
269     }
270   }
271 
272   return Code;
273 }
274 
275 void BitstreamCursor::ReadAbbrevRecord() {
276   BitCodeAbbrev *Abbv = new BitCodeAbbrev();
277   unsigned NumOpInfo = ReadVBR(5);
278   for (unsigned i = 0; i != NumOpInfo; ++i) {
279     bool IsLiteral = Read(1);
280     if (IsLiteral) {
281       Abbv->Add(BitCodeAbbrevOp(ReadVBR64(8)));
282       continue;
283     }
284 
285     BitCodeAbbrevOp::Encoding E = (BitCodeAbbrevOp::Encoding)Read(3);
286     if (BitCodeAbbrevOp::hasEncodingData(E)) {
287       uint64_t Data = ReadVBR64(5);
288 
289       // As a special case, handle fixed(0) (i.e., a fixed field with zero bits)
290       // and vbr(0) as a literal zero.  This is decoded the same way, and avoids
291       // a slow path in Read() to have to handle reading zero bits.
292       if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
293           Data == 0) {
294         Abbv->Add(BitCodeAbbrevOp(0));
295         continue;
296       }
297 
298       if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
299           Data > MaxChunkSize)
300         report_fatal_error(
301             "Fixed or VBR abbrev record with size > MaxChunkData");
302 
303       Abbv->Add(BitCodeAbbrevOp(E, Data));
304     } else
305       Abbv->Add(BitCodeAbbrevOp(E));
306   }
307 
308   if (Abbv->getNumOperandInfos() == 0)
309     report_fatal_error("Abbrev record with no operands");
310   CurAbbrevs.push_back(Abbv);
311 }
312 
313 Optional<BitstreamBlockInfo>
314 BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) {
315   if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return None;
316 
317   BitstreamBlockInfo NewBlockInfo;
318 
319   SmallVector<uint64_t, 64> Record;
320   BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr;
321 
322   // Read all the records for this module.
323   while (true) {
324     BitstreamEntry Entry = advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs);
325 
326     switch (Entry.Kind) {
327     case llvm::BitstreamEntry::SubBlock: // Handled for us already.
328     case llvm::BitstreamEntry::Error:
329       return None;
330     case llvm::BitstreamEntry::EndBlock:
331       return std::move(NewBlockInfo);
332     case llvm::BitstreamEntry::Record:
333       // The interesting case.
334       break;
335     }
336 
337     // Read abbrev records, associate them with CurBID.
338     if (Entry.ID == bitc::DEFINE_ABBREV) {
339       if (!CurBlockInfo) return None;
340       ReadAbbrevRecord();
341 
342       // ReadAbbrevRecord installs the abbrev in CurAbbrevs.  Move it to the
343       // appropriate BlockInfo.
344       CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back()));
345       CurAbbrevs.pop_back();
346       continue;
347     }
348 
349     // Read a record.
350     Record.clear();
351     switch (readRecord(Entry.ID, Record)) {
352       default: break;  // Default behavior, ignore unknown content.
353       case bitc::BLOCKINFO_CODE_SETBID:
354         if (Record.size() < 1) return None;
355         CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]);
356         break;
357       case bitc::BLOCKINFO_CODE_BLOCKNAME: {
358         if (!CurBlockInfo) return None;
359         if (!ReadBlockInfoNames)
360           break; // Ignore name.
361         std::string Name;
362         for (unsigned i = 0, e = Record.size(); i != e; ++i)
363           Name += (char)Record[i];
364         CurBlockInfo->Name = Name;
365         break;
366       }
367       case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
368         if (!CurBlockInfo) return None;
369         if (!ReadBlockInfoNames)
370           break; // Ignore name.
371         std::string Name;
372         for (unsigned i = 1, e = Record.size(); i != e; ++i)
373           Name += (char)Record[i];
374         CurBlockInfo->RecordNames.push_back(std::make_pair((unsigned)Record[0],
375                                                            Name));
376         break;
377       }
378     }
379   }
380 }
381