1 //===- VarLenCodeEmitterGen.cpp - CEG for variable-length insts -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The CodeEmitterGen component for variable-length instructions.
10 //
11 // The basic CodeEmitterGen is almost exclusively designed for fixed-
12 // length instructions. A good analogy for its encoding scheme is how printf
13 // works: The (immutable) formatting string represent the fixed values in the
14 // encoded instruction. Placeholders (i.e. %something), on the other hand,
15 // represent encoding for instruction operands.
16 // ```
17 // printf("1101 %src 1001 %dst", <encoded value for operand `src`>,
18 //                               <encoded value for operand `dst`>);
19 // ```
20 // VarLenCodeEmitterGen in this file provides an alternative encoding scheme
21 // that works more like a C++ stream operator:
22 // ```
23 // OS << 0b1101;
24 // if (Cond)
25 //   OS << OperandEncoding0;
26 // OS << 0b1001 << OperandEncoding1;
27 // ```
28 // You are free to concatenate arbitrary types (and sizes) of encoding
29 // fragments on any bit position, bringing more flexibilities on defining
30 // encoding for variable-length instructions.
31 //
32 // In a more specific way, instruction encoding is represented by a DAG type
33 // `Inst` field. Here is an example:
34 // ```
35 // dag Inst = (descend 0b1101, (operand "$src", 4), 0b1001,
36 //                     (operand "$dst", 4));
37 // ```
38 // It represents the following instruction encoding:
39 // ```
40 // MSB                                                     LSB
41 // 1101<encoding for operand src>1001<encoding for operand dst>
42 // ```
43 // For more details about DAG operators in the above snippet, please
44 // refer to \file include/llvm/Target/Target.td.
45 //
46 // VarLenCodeEmitter will convert the above DAG into the same helper function
47 // generated by CodeEmitter, `MCCodeEmitter::getBinaryCodeForInstr` (except
48 // for few details).
49 //
50 //===----------------------------------------------------------------------===//
51 
52 #include "VarLenCodeEmitterGen.h"
53 #include "CodeGenInstruction.h"
54 #include "CodeGenTarget.h"
55 #include "SubtargetFeatureInfo.h"
56 #include "llvm/ADT/ArrayRef.h"
57 #include "llvm/ADT/DenseMap.h"
58 #include "llvm/Support/raw_ostream.h"
59 #include "llvm/TableGen/Error.h"
60 #include "llvm/TableGen/Record.h"
61 
62 using namespace llvm;
63 
64 namespace {
65 
66 class VarLenCodeEmitterGen {
67   RecordKeeper &Records;
68 
69   class VarLenInst {
70     size_t NumBits;
71 
72     // Set if any of the segment is not fixed value.
73     bool HasDynamicSegment;
74 
75     // {Number of bits, Value}
76     SmallVector<std::pair<unsigned, const Init *>, 4> Segments;
77 
78     void buildRec(const DagInit *DI);
79 
80   public:
81     VarLenInst() : NumBits(0U), HasDynamicSegment(false) {}
82 
83     explicit VarLenInst(const DagInit *DI);
84 
85     /// Number of bits
86     size_t size() const { return NumBits; }
87 
88     using const_iterator = decltype(Segments)::const_iterator;
89 
90     const_iterator begin() const { return Segments.begin(); }
91     const_iterator end() const { return Segments.end(); }
92     size_t getNumSegments() const { return Segments.size(); }
93 
94     bool isFixedValueOnly() const { return !HasDynamicSegment; }
95   };
96 
97   DenseMap<Record *, VarLenInst> VarLenInsts;
98 
99   // Emit based values (i.e. fixed bits in the encoded instructions)
100   void emitInstructionBaseValues(
101       raw_ostream &OS,
102       ArrayRef<const CodeGenInstruction *> NumberedInstructions,
103       CodeGenTarget &Target, int HwMode = -1);
104 
105   std::string getInstructionCase(Record *R, CodeGenTarget &Target);
106   std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
107                                             CodeGenTarget &Target);
108 
109 public:
110   explicit VarLenCodeEmitterGen(RecordKeeper &R) : Records(R) {}
111 
112   void run(raw_ostream &OS);
113 };
114 
115 } // end anonymous namespace
116 
117 VarLenCodeEmitterGen::VarLenInst::VarLenInst(const DagInit *DI) : NumBits(0U) {
118   buildRec(DI);
119   for (const auto &S : Segments)
120     NumBits += S.first;
121 }
122 
123 void VarLenCodeEmitterGen::VarLenInst::buildRec(const DagInit *DI) {
124   std::string Op = DI->getOperator()->getAsString();
125 
126   if (Op == "ascend" || Op == "descend") {
127     bool Reverse = Op == "descend";
128     int i = Reverse ? DI->getNumArgs() - 1 : 0;
129     int e = Reverse ? -1 : DI->getNumArgs();
130     int s = Reverse ? -1 : 1;
131     for (; i != e; i += s) {
132       const Init *Arg = DI->getArg(i);
133       if (const auto *BI = dyn_cast<BitsInit>(Arg)) {
134         if (!BI->isComplete())
135           PrintFatalError("Expecting complete bits init in `" + Op + "`");
136         Segments.push_back({BI->getNumBits(), BI});
137       } else if (const auto *BI = dyn_cast<BitInit>(Arg)) {
138         if (!BI->isConcrete())
139           PrintFatalError("Expecting concrete bit init in `" + Op + "`");
140         Segments.push_back({1, BI});
141       } else if (const auto *SubDI = dyn_cast<DagInit>(Arg)) {
142         buildRec(SubDI);
143       } else {
144         PrintFatalError("Unrecognized type of argument in `" + Op +
145                         "`: " + Arg->getAsString());
146       }
147     }
148   } else if (Op == "operand") {
149     // (operand <operand name>, <# of bits>)
150     if (DI->getNumArgs() != 2)
151       PrintFatalError("Expecting 2 arguments for `operand`");
152     HasDynamicSegment = true;
153     const Init *OperandName = DI->getArg(0), *NumBits = DI->getArg(1);
154     if (!isa<StringInit>(OperandName) || !isa<IntInit>(NumBits))
155       PrintFatalError("Invalid argument types for `operand`");
156 
157     auto NumBitsVal = cast<IntInit>(NumBits)->getValue();
158     if (NumBitsVal <= 0)
159       PrintFatalError("Invalid number of bits for `operand`");
160 
161     Segments.push_back({NumBitsVal, OperandName});
162   } else if (Op == "slice") {
163     // (slice <operand name>, <high / low bit>, <low / high bit>)
164     if (DI->getNumArgs() != 3)
165       PrintFatalError("Expecting 3 arguments for `slice`");
166     HasDynamicSegment = true;
167     Init *OperandName = DI->getArg(0), *HiBit = DI->getArg(1),
168          *LoBit = DI->getArg(2);
169     if (!isa<StringInit>(OperandName) || !isa<IntInit>(HiBit) ||
170         !isa<IntInit>(LoBit))
171       PrintFatalError("Invalid argument types for `slice`");
172 
173     auto HiBitVal = cast<IntInit>(HiBit)->getValue(),
174          LoBitVal = cast<IntInit>(LoBit)->getValue();
175     if (HiBitVal < 0 || LoBitVal < 0)
176       PrintFatalError("Invalid bit range for `slice`");
177     bool NeedSwap = false;
178     unsigned NumBits = 0U;
179     if (HiBitVal < LoBitVal) {
180       NeedSwap = true;
181       NumBits = static_cast<unsigned>(LoBitVal - HiBitVal + 1);
182     } else {
183       NumBits = static_cast<unsigned>(HiBitVal - LoBitVal + 1);
184     }
185 
186     if (NeedSwap) {
187       // Normalization: Hi bit should always be the second argument.
188       Init *const NewArgs[] = {OperandName, LoBit, HiBit};
189       Segments.push_back(
190           {NumBits, DagInit::get(DI->getOperator(), nullptr, NewArgs, {})});
191     } else {
192       Segments.push_back({NumBits, DI});
193     }
194   }
195 }
196 
197 void VarLenCodeEmitterGen::run(raw_ostream &OS) {
198   CodeGenTarget Target(Records);
199   auto Insts = Records.getAllDerivedDefinitions("Instruction");
200 
201   auto NumberedInstructions = Target.getInstructionsByEnumValue();
202   const CodeGenHwModes &HWM = Target.getHwModes();
203 
204   // The set of HwModes used by instruction encodings.
205   std::set<unsigned> HwModes;
206   for (const CodeGenInstruction *CGI : NumberedInstructions) {
207     Record *R = CGI->TheDef;
208 
209     // Create the corresponding VarLenInst instance.
210     if (R->getValueAsString("Namespace") == "TargetOpcode" ||
211         R->getValueAsBit("isPseudo"))
212       continue;
213 
214     if (const RecordVal *RV = R->getValue("EncodingInfos")) {
215       if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
216         EncodingInfoByHwMode EBM(DI->getDef(), HWM);
217         for (auto &KV : EBM) {
218           HwModes.insert(KV.first);
219           Record *EncodingDef = KV.second;
220           auto *DI = EncodingDef->getValueAsDag("Inst");
221           VarLenInsts.insert({EncodingDef, VarLenInst(DI)});
222         }
223         continue;
224       }
225     }
226     auto *DI = R->getValueAsDag("Inst");
227     VarLenInsts.insert({R, VarLenInst(DI)});
228   }
229 
230   // Emit function declaration
231   OS << "void " << Target.getName()
232      << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
233      << "    SmallVectorImpl<MCFixup> &Fixups,\n"
234      << "    APInt &Inst,\n"
235      << "    APInt &Scratch,\n"
236      << "    const MCSubtargetInfo &STI) const {\n";
237 
238   // Emit instruction base values
239   if (HwModes.empty()) {
240     emitInstructionBaseValues(OS, NumberedInstructions, Target);
241   } else {
242     for (unsigned HwMode : HwModes)
243       emitInstructionBaseValues(OS, NumberedInstructions, Target, (int)HwMode);
244   }
245 
246   if (!HwModes.empty()) {
247     OS << "  const unsigned **Index;\n";
248     OS << "  const uint64_t *InstBits;\n";
249     OS << "  unsigned HwMode = STI.getHwMode();\n";
250     OS << "  switch (HwMode) {\n";
251     OS << "  default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n";
252     for (unsigned I : HwModes) {
253       OS << "  case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name
254          << "; Index = Index_" << HWM.getMode(I).Name << "; break;\n";
255     }
256     OS << "  };\n";
257   }
258 
259   // Emit helper function to retrieve base values.
260   OS << "  auto getInstBits = [&](unsigned Opcode) -> APInt {\n"
261      << "    unsigned NumBits = Index[Opcode][0];\n"
262      << "    if (!NumBits)\n"
263      << "      return APInt::getZeroWidth();\n"
264      << "    unsigned Idx = Index[Opcode][1];\n"
265      << "    ArrayRef<uint64_t> Data(&InstBits[Idx], "
266      << "APInt::getNumWords(NumBits));\n"
267      << "    return APInt(NumBits, Data);\n"
268      << "  };\n";
269 
270   // Map to accumulate all the cases.
271   std::map<std::string, std::vector<std::string>> CaseMap;
272 
273   // Construct all cases statement for each opcode
274   for (Record *R : Insts) {
275     if (R->getValueAsString("Namespace") == "TargetOpcode" ||
276         R->getValueAsBit("isPseudo"))
277       continue;
278     std::string InstName =
279         (R->getValueAsString("Namespace") + "::" + R->getName()).str();
280     std::string Case = getInstructionCase(R, Target);
281 
282     CaseMap[Case].push_back(std::move(InstName));
283   }
284 
285   // Emit initial function code
286   OS << "  const unsigned opcode = MI.getOpcode();\n"
287      << "  switch (opcode) {\n";
288 
289   // Emit each case statement
290   for (const auto &C : CaseMap) {
291     const std::string &Case = C.first;
292     const auto &InstList = C.second;
293 
294     ListSeparator LS("\n");
295     for (const auto &InstName : InstList)
296       OS << LS << "    case " << InstName << ":";
297 
298     OS << " {\n";
299     OS << Case;
300     OS << "      break;\n"
301        << "    }\n";
302   }
303   // Default case: unhandled opcode
304   OS << "  default:\n"
305      << "    std::string msg;\n"
306      << "    raw_string_ostream Msg(msg);\n"
307      << "    Msg << \"Not supported instr: \" << MI;\n"
308      << "    report_fatal_error(Msg.str().c_str());\n"
309      << "  }\n";
310   OS << "}\n\n";
311 }
312 
313 static void emitInstBits(raw_ostream &IS, raw_ostream &SS, const APInt &Bits,
314                          unsigned &Index) {
315   if (!Bits.getNumWords()) {
316     IS.indent(4) << "{/*NumBits*/0, /*Index*/0},";
317     return;
318   }
319 
320   IS.indent(4) << "{/*NumBits*/" << Bits.getBitWidth() << ", "
321                << "/*Index*/" << Index << "},";
322 
323   SS.indent(4);
324   for (unsigned I = 0; I < Bits.getNumWords(); ++I, ++Index)
325     SS << "UINT64_C(" << utostr(Bits.getRawData()[I]) << "),";
326 }
327 
328 void VarLenCodeEmitterGen::emitInstructionBaseValues(
329     raw_ostream &OS, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
330     CodeGenTarget &Target, int HwMode) {
331   std::string IndexArray, StorageArray;
332   raw_string_ostream IS(IndexArray), SS(StorageArray);
333 
334   const CodeGenHwModes &HWM = Target.getHwModes();
335   if (HwMode == -1) {
336     IS << "  static const unsigned Index[][2] = {\n";
337     SS << "  static const uint64_t InstBits[] = {\n";
338   } else {
339     StringRef Name = HWM.getMode(HwMode).Name;
340     IS << "  static const unsigned Index_" << Name << "[][2] = {\n";
341     SS << "  static const uint64_t InstBits_" << Name << "[] = {\n";
342   }
343 
344   unsigned NumFixedValueWords = 0U;
345   for (const CodeGenInstruction *CGI : NumberedInstructions) {
346     Record *R = CGI->TheDef;
347 
348     if (R->getValueAsString("Namespace") == "TargetOpcode" ||
349         R->getValueAsBit("isPseudo")) {
350       IS.indent(4) << "{/*NumBits*/0, /*Index*/0},\n";
351       continue;
352     }
353 
354     Record *EncodingDef = R;
355     if (const RecordVal *RV = R->getValue("EncodingInfos")) {
356       if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
357         EncodingInfoByHwMode EBM(DI->getDef(), HWM);
358         if (EBM.hasMode(HwMode))
359           EncodingDef = EBM.get(HwMode);
360       }
361     }
362 
363     auto It = VarLenInsts.find(EncodingDef);
364     if (It == VarLenInsts.end())
365       PrintFatalError(EncodingDef, "VarLenInst not found for this record");
366     const VarLenInst &VLI = It->second;
367 
368     unsigned i = 0U, BitWidth = VLI.size();
369 
370     // Start by filling in fixed values.
371     APInt Value(BitWidth, 0);
372     auto SI = VLI.begin(), SE = VLI.end();
373     // Scan through all the segments that have fixed-bits values.
374     while (i < BitWidth && SI != SE) {
375       unsigned SegmentNumBits = SI->first;
376       if (const auto *BI = dyn_cast<BitsInit>(SI->second)) {
377         for (unsigned Idx = 0U; Idx != SegmentNumBits; ++Idx) {
378           auto *B = cast<BitInit>(BI->getBit(Idx));
379           Value.setBitVal(i + Idx, B->getValue());
380         }
381       }
382       if (const auto *BI = dyn_cast<BitInit>(SI->second))
383         Value.setBitVal(i, BI->getValue());
384 
385       i += SegmentNumBits;
386       ++SI;
387     }
388 
389     emitInstBits(IS, SS, Value, NumFixedValueWords);
390     IS << '\t' << "// " << R->getName() << "\n";
391     if (Value.getNumWords())
392       SS << '\t' << "// " << R->getName() << "\n";
393   }
394   IS.indent(4) << "{/*NumBits*/0, /*Index*/0}\n  };\n";
395   SS.indent(4) << "UINT64_C(0)\n  };\n";
396 
397   OS << IS.str() << SS.str();
398 }
399 
400 std::string VarLenCodeEmitterGen::getInstructionCase(Record *R,
401                                                      CodeGenTarget &Target) {
402   std::string Case;
403   if (const RecordVal *RV = R->getValue("EncodingInfos")) {
404     if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
405       const CodeGenHwModes &HWM = Target.getHwModes();
406       EncodingInfoByHwMode EBM(DI->getDef(), HWM);
407       Case += "      switch (HwMode) {\n";
408       Case += "      default: llvm_unreachable(\"Unhandled HwMode\");\n";
409       for (auto &KV : EBM) {
410         Case += "      case " + itostr(KV.first) + ": {\n";
411         Case += getInstructionCaseForEncoding(R, KV.second, Target);
412         Case += "      break;\n";
413         Case += "      }\n";
414       }
415       Case += "      }\n";
416       return Case;
417     }
418   }
419   return getInstructionCaseForEncoding(R, R, Target);
420 }
421 
422 std::string VarLenCodeEmitterGen::getInstructionCaseForEncoding(
423     Record *R, Record *EncodingDef, CodeGenTarget &Target) {
424   auto It = VarLenInsts.find(EncodingDef);
425   if (It == VarLenInsts.end())
426     PrintFatalError(EncodingDef, "Parsed encoding record not found");
427   const VarLenInst &VLI = It->second;
428   size_t BitWidth = VLI.size();
429 
430   CodeGenInstruction &CGI = Target.getInstruction(R);
431 
432   std::string Case;
433   raw_string_ostream SS(Case);
434   // Resize the scratch buffer.
435   if (BitWidth && !VLI.isFixedValueOnly())
436     SS.indent(6) << "Scratch = Scratch.zextOrSelf(" << BitWidth << ");\n";
437   // Populate based value.
438   SS.indent(6) << "Inst = getInstBits(opcode);\n";
439 
440   // Process each segment in VLI.
441   size_t Offset = 0U;
442   for (const auto &Pair : VLI) {
443     unsigned NumBits = Pair.first;
444     const Init *Val = Pair.second;
445     // If it's a StringInit or DagInit, it's a reference to an operand
446     // or part of an operand.
447     if (isa<StringInit>(Val) || isa<DagInit>(Val)) {
448       StringRef OperandName;
449       unsigned LoBit = 0U;
450       if (const auto *SV = dyn_cast<StringInit>(Val)) {
451         OperandName = SV->getValue();
452       } else {
453         // Normalized: (slice <operand name>, <high bit>, <low bit>)
454         const auto *DV = cast<DagInit>(Val);
455         OperandName = cast<StringInit>(DV->getArg(0))->getValue();
456         LoBit = static_cast<unsigned>(cast<IntInit>(DV->getArg(2))->getValue());
457       }
458 
459       auto OpIdx = CGI.Operands.ParseOperandName(OperandName);
460       unsigned FlatOpIdx = CGI.Operands.getFlattenedOperandNumber(OpIdx);
461       StringRef EncoderMethodName = "getMachineOpValue";
462       auto &CustomEncoder = CGI.Operands[OpIdx.first].EncoderMethodName;
463       if (!CustomEncoder.empty())
464         EncoderMethodName = CustomEncoder;
465 
466       SS.indent(6) << "Scratch.clearAllBits();\n";
467       SS.indent(6) << "// op: " << OperandName.drop_front(1) << "\n";
468       SS.indent(6) << EncoderMethodName << "(MI, MI.getOperand("
469                    << utostr(FlatOpIdx) << "), Scratch, Fixups, STI);\n";
470       SS.indent(6) << "Inst.insertBits("
471                    << "Scratch.extractBits(" << utostr(NumBits) << ", "
472                    << utostr(LoBit) << ")"
473                    << ", " << Offset << ");\n";
474     }
475     Offset += NumBits;
476   }
477 
478   StringRef PostEmitter = R->getValueAsString("PostEncoderMethod");
479   if (!PostEmitter.empty())
480     SS.indent(6) << "Inst = " << PostEmitter << "(MI, Inst, STI);\n";
481 
482   return Case;
483 }
484 
485 namespace llvm {
486 
487 void emitVarLenCodeEmitter(RecordKeeper &R, raw_ostream &OS) {
488   VarLenCodeEmitterGen(R).run(OS);
489 }
490 
491 } // end namespace llvm
492