1 //===- BitCodes.h - Enum values for the bitstream format --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This header defines bitstream enum values.
10 //
11 // The enum values defined in this file should be considered permanent.  If
12 // new features are added, they should have values added at the end of the
13 // respective lists.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_BITSTREAM_BITCODES_H
18 #define LLVM_BITSTREAM_BITCODES_H
19 
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringExtras.h"
22 #include "llvm/Support/DataTypes.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include <cassert>
25 
26 namespace llvm {
27 /// Offsets of the 32-bit fields of bitstream wrapper header.
28 enum BitstreamWrapperHeader : unsigned {
29   BWH_MagicField   = 0 * 4,
30   BWH_VersionField = 1 * 4,
31   BWH_OffsetField  = 2 * 4,
32   BWH_SizeField    = 3 * 4,
33   BWH_CPUTypeField = 4 * 4,
34   BWH_HeaderSize   = 5 * 4
35 };
36 
37 namespace bitc {
38   enum StandardWidths {
39     BlockIDWidth   = 8,  // We use VBR-8 for block IDs.
40     CodeLenWidth   = 4,  // Codelen are VBR-4.
41     BlockSizeWidth = 32  // BlockSize up to 2^32 32-bit words = 16GB per block.
42   };
43 
44   // The standard abbrev namespace always has a way to exit a block, enter a
45   // nested block, define abbrevs, and define an unabbreviated record.
46   enum FixedAbbrevIDs {
47     END_BLOCK = 0,  // Must be zero to guarantee termination for broken bitcode.
48     ENTER_SUBBLOCK = 1,
49 
50     /// DEFINE_ABBREV - Defines an abbrev for the current block.  It consists
51     /// of a vbr5 for # operand infos.  Each operand info is emitted with a
52     /// single bit to indicate if it is a literal encoding.  If so, the value is
53     /// emitted with a vbr8.  If not, the encoding is emitted as 3 bits followed
54     /// by the info value as a vbr5 if needed.
55     DEFINE_ABBREV = 2,
56 
57     // UNABBREV_RECORDs are emitted with a vbr6 for the record code, followed by
58     // a vbr6 for the # operands, followed by vbr6's for each operand.
59     UNABBREV_RECORD = 3,
60 
61     // This is not a code, this is a marker for the first abbrev assignment.
62     FIRST_APPLICATION_ABBREV = 4
63   };
64 
65   /// StandardBlockIDs - All bitcode files can optionally include a BLOCKINFO
66   /// block, which contains metadata about other blocks in the file.
67   enum StandardBlockIDs {
68     /// BLOCKINFO_BLOCK is used to define metadata about blocks, for example,
69     /// standard abbrevs that should be available to all blocks of a specified
70     /// ID.
71     BLOCKINFO_BLOCK_ID = 0,
72 
73     // Block IDs 1-7 are reserved for future expansion.
74     FIRST_APPLICATION_BLOCKID = 8
75   };
76 
77   /// BlockInfoCodes - The blockinfo block contains metadata about user-defined
78   /// blocks.
79   enum BlockInfoCodes {
80     // DEFINE_ABBREV has magic semantics here, applying to the current SETBID'd
81     // block, instead of the BlockInfo block.
82 
83     BLOCKINFO_CODE_SETBID        = 1, // SETBID: [blockid#]
84     BLOCKINFO_CODE_BLOCKNAME     = 2, // BLOCKNAME: [name]
85     BLOCKINFO_CODE_SETRECORDNAME = 3  // BLOCKINFO_CODE_SETRECORDNAME:
86                                       //                             [id, name]
87   };
88 
89 } // End bitc namespace
90 
91 /// BitCodeAbbrevOp - This describes one or more operands in an abbreviation.
92 /// This is actually a union of two different things:
93 ///   1. It could be a literal integer value ("the operand is always 17").
94 ///   2. It could be an encoding specification ("this operand encoded like so").
95 ///
96 class BitCodeAbbrevOp {
97   uint64_t Val;           // A literal value or data for an encoding.
98   bool IsLiteral : 1;     // Indicate whether this is a literal value or not.
99   unsigned Enc   : 3;     // The encoding to use.
100 public:
101   enum Encoding {
102     Fixed = 1,  // A fixed width field, Val specifies number of bits.
103     VBR   = 2,  // A VBR field where Val specifies the width of each chunk.
104     Array = 3,  // A sequence of fields, next field species elt encoding.
105     Char6 = 4,  // A 6-bit fixed field which maps to [a-zA-Z0-9._].
106     Blob  = 5   // 32-bit aligned array of 8-bit characters.
107   };
108 
109   static bool isValidEncoding(uint64_t E) {
110     return E >= 1 && E <= 5;
111   }
112 
113   explicit BitCodeAbbrevOp(uint64_t V) :  Val(V), IsLiteral(true) {}
114   explicit BitCodeAbbrevOp(Encoding E, uint64_t Data = 0)
115     : Val(Data), IsLiteral(false), Enc(E) {}
116 
117   bool isLiteral() const  { return IsLiteral; }
118   bool isEncoding() const { return !IsLiteral; }
119 
120   // Accessors for literals.
121   uint64_t getLiteralValue() const { assert(isLiteral()); return Val; }
122 
123   // Accessors for encoding info.
124   Encoding getEncoding() const { assert(isEncoding()); return (Encoding)Enc; }
125   uint64_t getEncodingData() const {
126     assert(isEncoding() && hasEncodingData());
127     return Val;
128   }
129 
130   bool hasEncodingData() const { return hasEncodingData(getEncoding()); }
131   static bool hasEncodingData(Encoding E) {
132     switch (E) {
133     case Fixed:
134     case VBR:
135       return true;
136     case Array:
137     case Char6:
138     case Blob:
139       return false;
140     }
141     report_fatal_error("Invalid encoding");
142   }
143 
144   /// isChar6 - Return true if this character is legal in the Char6 encoding.
145   static bool isChar6(char C) { return isAlnum(C) || C == '.' || C == '_'; }
146   static unsigned EncodeChar6(char C) {
147     if (C >= 'a' && C <= 'z') return C-'a';
148     if (C >= 'A' && C <= 'Z') return C-'A'+26;
149     if (C >= '0' && C <= '9') return C-'0'+26+26;
150     if (C == '.')             return 62;
151     if (C == '_')             return 63;
152     llvm_unreachable("Not a value Char6 character!");
153   }
154 
155   static char DecodeChar6(unsigned V) {
156     assert((V & ~63) == 0 && "Not a Char6 encoded character!");
157     return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._"
158         [V];
159   }
160 
161 };
162 
163 /// BitCodeAbbrev - This class represents an abbreviation record.  An
164 /// abbreviation allows a complex record that has redundancy to be stored in a
165 /// specialized format instead of the fully-general, fully-vbr, format.
166 class BitCodeAbbrev {
167   SmallVector<BitCodeAbbrevOp, 32> OperandList;
168 
169 public:
170   BitCodeAbbrev() = default;
171 
172   explicit BitCodeAbbrev(std::initializer_list<BitCodeAbbrevOp> OperandList)
173       : OperandList(OperandList) {}
174 
175   unsigned getNumOperandInfos() const {
176     return static_cast<unsigned>(OperandList.size());
177   }
178   const BitCodeAbbrevOp &getOperandInfo(unsigned N) const {
179     return OperandList[N];
180   }
181 
182   void Add(const BitCodeAbbrevOp &OpInfo) {
183     OperandList.push_back(OpInfo);
184   }
185 };
186 } // End llvm namespace
187 
188 #endif
189