1 //===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // CodeEmitterGen uses the descriptions of instructions and their fields to
10 // construct an automated code emitter: a function that, given a MachineInstr,
11 // returns the (currently, 32-bit unsigned) value of the instruction.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "CodeGenInstruction.h"
16 #include "CodeGenTarget.h"
17 #include "SubtargetFeatureInfo.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/Support/Casting.h"
21 #include "llvm/Support/raw_ostream.h"
22 #include "llvm/TableGen/Record.h"
23 #include "llvm/TableGen/TableGenBackend.h"
24 #include <cassert>
25 #include <cstdint>
26 #include <map>
27 #include <set>
28 #include <string>
29 #include <utility>
30 #include <vector>
31 
32 using namespace llvm;
33 
34 namespace {
35 
36 class CodeEmitterGen {
37   RecordKeeper &Records;
38 
39 public:
40   CodeEmitterGen(RecordKeeper &R) : Records(R) {}
41 
42   void run(raw_ostream &o);
43 
44 private:
45   int getVariableBit(const std::string &VarName, BitsInit *BI, int bit);
46   std::string getInstructionCase(Record *R, CodeGenTarget &Target);
47   void AddCodeToMergeInOperand(Record *R, BitsInit *BI,
48                                const std::string &VarName,
49                                unsigned &NumberedOp,
50                                std::set<unsigned> &NamedOpIndices,
51                                std::string &Case, CodeGenTarget &Target);
52 
53 };
54 
55 // If the VarBitInit at position 'bit' matches the specified variable then
56 // return the variable bit position.  Otherwise return -1.
57 int CodeEmitterGen::getVariableBit(const std::string &VarName,
58                                    BitsInit *BI, int bit) {
59   if (VarBitInit *VBI = dyn_cast<VarBitInit>(BI->getBit(bit))) {
60     if (VarInit *VI = dyn_cast<VarInit>(VBI->getBitVar()))
61       if (VI->getName() == VarName)
62         return VBI->getBitNum();
63   } else if (VarInit *VI = dyn_cast<VarInit>(BI->getBit(bit))) {
64     if (VI->getName() == VarName)
65       return 0;
66   }
67 
68   return -1;
69 }
70 
71 void CodeEmitterGen::
72 AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName,
73                         unsigned &NumberedOp,
74                         std::set<unsigned> &NamedOpIndices,
75                         std::string &Case, CodeGenTarget &Target) {
76   CodeGenInstruction &CGI = Target.getInstruction(R);
77 
78   // Determine if VarName actually contributes to the Inst encoding.
79   int bit = BI->getNumBits()-1;
80 
81   // Scan for a bit that this contributed to.
82   for (; bit >= 0; ) {
83     if (getVariableBit(VarName, BI, bit) != -1)
84       break;
85 
86     --bit;
87   }
88 
89   // If we found no bits, ignore this value, otherwise emit the call to get the
90   // operand encoding.
91   if (bit < 0) return;
92 
93   // If the operand matches by name, reference according to that
94   // operand number. Non-matching operands are assumed to be in
95   // order.
96   unsigned OpIdx;
97   if (CGI.Operands.hasOperandNamed(VarName, OpIdx)) {
98     // Get the machine operand number for the indicated operand.
99     OpIdx = CGI.Operands[OpIdx].MIOperandNo;
100     assert(!CGI.Operands.isFlatOperandNotEmitted(OpIdx) &&
101            "Explicitly used operand also marked as not emitted!");
102   } else {
103     unsigned NumberOps = CGI.Operands.size();
104     /// If this operand is not supposed to be emitted by the
105     /// generated emitter, skip it.
106     while (NumberedOp < NumberOps &&
107            (CGI.Operands.isFlatOperandNotEmitted(NumberedOp) ||
108               (!NamedOpIndices.empty() && NamedOpIndices.count(
109                 CGI.Operands.getSubOperandNumber(NumberedOp).first)))) {
110       ++NumberedOp;
111 
112       if (NumberedOp >= CGI.Operands.back().MIOperandNo +
113                         CGI.Operands.back().MINumOperands) {
114         errs() << "Too few operands in record " << R->getName() <<
115                   " (no match for variable " << VarName << "):\n";
116         errs() << *R;
117         errs() << '\n';
118 
119         return;
120       }
121     }
122 
123     OpIdx = NumberedOp++;
124   }
125 
126   std::pair<unsigned, unsigned> SO = CGI.Operands.getSubOperandNumber(OpIdx);
127   std::string &EncoderMethodName = CGI.Operands[SO.first].EncoderMethodName;
128 
129   // If the source operand has a custom encoder, use it. This will
130   // get the encoding for all of the suboperands.
131   if (!EncoderMethodName.empty()) {
132     // A custom encoder has all of the information for the
133     // sub-operands, if there are more than one, so only
134     // query the encoder once per source operand.
135     if (SO.second == 0) {
136       Case += "      // op: " + VarName + "\n" +
137               "      op = " + EncoderMethodName + "(MI, " + utostr(OpIdx);
138       Case += ", Fixups, STI";
139       Case += ");\n";
140     }
141   } else {
142     Case += "      // op: " + VarName + "\n" +
143       "      op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")";
144     Case += ", Fixups, STI";
145     Case += ");\n";
146   }
147 
148   for (; bit >= 0; ) {
149     int varBit = getVariableBit(VarName, BI, bit);
150 
151     // If this bit isn't from a variable, skip it.
152     if (varBit == -1) {
153       --bit;
154       continue;
155     }
156 
157     // Figure out the consecutive range of bits covered by this operand, in
158     // order to generate better encoding code.
159     int beginInstBit = bit;
160     int beginVarBit = varBit;
161     int N = 1;
162     for (--bit; bit >= 0;) {
163       varBit = getVariableBit(VarName, BI, bit);
164       if (varBit == -1 || varBit != (beginVarBit - N)) break;
165       ++N;
166       --bit;
167     }
168 
169     uint64_t opMask = ~(uint64_t)0 >> (64-N);
170     int opShift = beginVarBit - N + 1;
171     opMask <<= opShift;
172     opShift = beginInstBit - beginVarBit;
173 
174     if (opShift > 0) {
175       Case += "      Value |= (op & UINT64_C(" + utostr(opMask) + ")) << " +
176               itostr(opShift) + ";\n";
177     } else if (opShift < 0) {
178       Case += "      Value |= (op & UINT64_C(" + utostr(opMask) + ")) >> " +
179               itostr(-opShift) + ";\n";
180     } else {
181       Case += "      Value |= op & UINT64_C(" + utostr(opMask) + ");\n";
182     }
183   }
184 }
185 
186 std::string CodeEmitterGen::getInstructionCase(Record *R,
187                                                CodeGenTarget &Target) {
188   std::string Case;
189   BitsInit *BI = R->getValueAsBitsInit("Inst");
190   unsigned NumberedOp = 0;
191   std::set<unsigned> NamedOpIndices;
192 
193   // Collect the set of operand indices that might correspond to named
194   // operand, and skip these when assigning operands based on position.
195   if (Target.getInstructionSet()->
196        getValueAsBit("noNamedPositionallyEncodedOperands")) {
197     CodeGenInstruction &CGI = Target.getInstruction(R);
198     for (const RecordVal &RV : R->getValues()) {
199       unsigned OpIdx;
200       if (!CGI.Operands.hasOperandNamed(RV.getName(), OpIdx))
201         continue;
202 
203       NamedOpIndices.insert(OpIdx);
204     }
205   }
206 
207   // Loop over all of the fields in the instruction, determining which are the
208   // operands to the instruction.
209   for (const RecordVal &RV : R->getValues()) {
210     // Ignore fixed fields in the record, we're looking for values like:
211     //    bits<5> RST = { ?, ?, ?, ?, ? };
212     if (RV.getPrefix() || RV.getValue()->isComplete())
213       continue;
214 
215     AddCodeToMergeInOperand(R, BI, RV.getName(), NumberedOp,
216                             NamedOpIndices, Case, Target);
217   }
218 
219   StringRef PostEmitter = R->getValueAsString("PostEncoderMethod");
220   if (!PostEmitter.empty()) {
221     Case += "      Value = ";
222     Case += PostEmitter;
223     Case += "(MI, Value";
224     Case += ", STI";
225     Case += ");\n";
226   }
227 
228   return Case;
229 }
230 
231 void CodeEmitterGen::run(raw_ostream &o) {
232   CodeGenTarget Target(Records);
233   std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction");
234 
235   // For little-endian instruction bit encodings, reverse the bit order
236   Target.reverseBitsForLittleEndianEncoding();
237 
238   ArrayRef<const CodeGenInstruction*> NumberedInstructions =
239     Target.getInstructionsByEnumValue();
240 
241   // Emit function declaration
242   o << "uint64_t " << Target.getName();
243   o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
244     << "    SmallVectorImpl<MCFixup> &Fixups,\n"
245     << "    const MCSubtargetInfo &STI) const {\n";
246 
247   // Emit instruction base values
248   o << "  static const uint64_t InstBits[] = {\n";
249   for (const CodeGenInstruction *CGI : NumberedInstructions) {
250     Record *R = CGI->TheDef;
251 
252     if (R->getValueAsString("Namespace") == "TargetOpcode" ||
253         R->getValueAsBit("isPseudo")) {
254       o << "    UINT64_C(0),\n";
255       continue;
256     }
257 
258     BitsInit *BI = R->getValueAsBitsInit("Inst");
259 
260     // Start by filling in fixed values.
261     uint64_t Value = 0;
262     for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) {
263       if (BitInit *B = dyn_cast<BitInit>(BI->getBit(e-i-1)))
264         Value |= (uint64_t)B->getValue() << (e-i-1);
265     }
266     o << "    UINT64_C(" << Value << ")," << '\t' << "// " << R->getName() << "\n";
267   }
268   o << "    UINT64_C(0)\n  };\n";
269 
270   // Map to accumulate all the cases.
271   std::map<std::string, std::vector<std::string>> CaseMap;
272 
273   // Construct all cases statement for each opcode
274   for (std::vector<Record*>::iterator IC = Insts.begin(), EC = Insts.end();
275         IC != EC; ++IC) {
276     Record *R = *IC;
277     if (R->getValueAsString("Namespace") == "TargetOpcode" ||
278         R->getValueAsBit("isPseudo"))
279       continue;
280     std::string InstName =
281         (R->getValueAsString("Namespace") + "::" + R->getName()).str();
282     std::string Case = getInstructionCase(R, Target);
283 
284     CaseMap[Case].push_back(std::move(InstName));
285   }
286 
287   // Emit initial function code
288   o << "  const unsigned opcode = MI.getOpcode();\n"
289     << "  uint64_t Value = InstBits[opcode];\n"
290     << "  uint64_t op = 0;\n"
291     << "  (void)op;  // suppress warning\n"
292     << "  switch (opcode) {\n";
293 
294   // Emit each case statement
295   std::map<std::string, std::vector<std::string>>::iterator IE, EE;
296   for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) {
297     const std::string &Case = IE->first;
298     std::vector<std::string> &InstList = IE->second;
299 
300     for (int i = 0, N = InstList.size(); i < N; i++) {
301       if (i) o << "\n";
302       o << "    case " << InstList[i]  << ":";
303     }
304     o << " {\n";
305     o << Case;
306     o << "      break;\n"
307       << "    }\n";
308   }
309 
310   // Default case: unhandled opcode
311   o << "  default:\n"
312     << "    std::string msg;\n"
313     << "    raw_string_ostream Msg(msg);\n"
314     << "    Msg << \"Not supported instr: \" << MI;\n"
315     << "    report_fatal_error(Msg.str());\n"
316     << "  }\n"
317     << "  return Value;\n"
318     << "}\n\n";
319 
320   const auto &All = SubtargetFeatureInfo::getAll(Records);
321   std::map<Record *, SubtargetFeatureInfo, LessRecordByID> SubtargetFeatures;
322   SubtargetFeatures.insert(All.begin(), All.end());
323 
324   o << "#ifdef ENABLE_INSTR_PREDICATE_VERIFIER\n"
325     << "#undef ENABLE_INSTR_PREDICATE_VERIFIER\n"
326     << "#include <sstream>\n\n";
327 
328   // Emit the subtarget feature enumeration.
329   SubtargetFeatureInfo::emitSubtargetFeatureFlagEnumeration(SubtargetFeatures,
330                                                             o);
331 
332   // Emit the name table for error messages.
333   o << "#ifndef NDEBUG\n";
334   SubtargetFeatureInfo::emitNameTable(SubtargetFeatures, o);
335   o << "#endif // NDEBUG\n";
336 
337   // Emit the available features compute function.
338   SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures(
339       Target.getName(), "MCCodeEmitter", "computeAvailableFeatures",
340       SubtargetFeatures, o);
341 
342   // Emit the predicate verifier.
343   o << "void " << Target.getName()
344     << "MCCodeEmitter::verifyInstructionPredicates(\n"
345     << "    const MCInst &Inst, uint64_t AvailableFeatures) const {\n"
346     << "#ifndef NDEBUG\n"
347     << "  static uint64_t RequiredFeatures[] = {\n";
348   unsigned InstIdx = 0;
349   for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) {
350     o << "    ";
351     for (Record *Predicate : Inst->TheDef->getValueAsListOfDefs("Predicates")) {
352       const auto &I = SubtargetFeatures.find(Predicate);
353       if (I != SubtargetFeatures.end())
354         o << I->second.getEnumName() << " | ";
355     }
356     o << "0, // " << Inst->TheDef->getName() << " = " << InstIdx << "\n";
357     InstIdx++;
358   }
359   o << "  };\n\n";
360   o << "  assert(Inst.getOpcode() < " << InstIdx << ");\n";
361   o << "  uint64_t MissingFeatures =\n"
362     << "      (AvailableFeatures & RequiredFeatures[Inst.getOpcode()]) ^\n"
363     << "      RequiredFeatures[Inst.getOpcode()];\n"
364     << "  if (MissingFeatures) {\n"
365     << "    std::ostringstream Msg;\n"
366     << "    Msg << \"Attempting to emit \" << "
367        "MCII.getName(Inst.getOpcode()).str()\n"
368     << "        << \" instruction but the \";\n"
369     << "    for (unsigned i = 0; i < 8 * sizeof(MissingFeatures); ++i)\n"
370     << "      if (MissingFeatures & (1ULL << i))\n"
371     << "        Msg << SubtargetFeatureNames[i] << \" \";\n"
372     << "    Msg << \"predicate(s) are not met\";\n"
373     << "    report_fatal_error(Msg.str());\n"
374     << "  }\n"
375     << "#else\n"
376     << "// Silence unused variable warning on targets that don't use MCII for "
377        "other purposes (e.g. BPF).\n"
378     << "(void)MCII;\n"
379     << "#endif // NDEBUG\n";
380   o << "}\n";
381   o << "#endif\n";
382 }
383 
384 } // end anonymous namespace
385 
386 namespace llvm {
387 
388 void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS) {
389   emitSourceFileHeader("Machine Code Emitter", OS);
390   CodeEmitterGen(RK).run(OS);
391 }
392 
393 } // end namespace llvm
394