1 //===- VarLenCodeEmitterGen.cpp - CEG for variable-length insts -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The CodeEmitterGen component for variable-length instructions.
10 //
11 // The basic CodeEmitterGen is almost exclusively designed for fixed-
12 // length instructions. A good analogy for its encoding scheme is how printf
13 // works: The (immutable) formatting string represent the fixed values in the
14 // encoded instruction. Placeholders (i.e. %something), on the other hand,
15 // represent encoding for instruction operands.
16 // ```
17 // printf("1101 %src 1001 %dst", <encoded value for operand `src`>,
18 //                               <encoded value for operand `dst`>);
19 // ```
20 // VarLenCodeEmitterGen in this file provides an alternative encoding scheme
21 // that works more like a C++ stream operator:
22 // ```
23 // OS << 0b1101;
24 // if (Cond)
25 //   OS << OperandEncoding0;
26 // OS << 0b1001 << OperandEncoding1;
27 // ```
28 // You are free to concatenate arbitrary types (and sizes) of encoding
29 // fragments on any bit position, bringing more flexibilities on defining
30 // encoding for variable-length instructions.
31 //
32 // In a more specific way, instruction encoding is represented by a DAG type
33 // `Inst` field. Here is an example:
34 // ```
35 // dag Inst = (descend 0b1101, (operand "$src", 4), 0b1001,
36 //                     (operand "$dst", 4));
37 // ```
38 // It represents the following instruction encoding:
39 // ```
40 // MSB                                                     LSB
41 // 1101<encoding for operand src>1001<encoding for operand dst>
42 // ```
43 // For more details about DAG operators in the above snippet, please
44 // refer to \file include/llvm/Target/Target.td.
45 //
46 // VarLenCodeEmitter will convert the above DAG into the same helper function
47 // generated by CodeEmitter, `MCCodeEmitter::getBinaryCodeForInstr` (except
48 // for few details).
49 //
50 //===----------------------------------------------------------------------===//
51 
52 #include "VarLenCodeEmitterGen.h"
53 #include "CodeGenInstruction.h"
54 #include "CodeGenTarget.h"
55 #include "SubtargetFeatureInfo.h"
56 #include "llvm/ADT/ArrayRef.h"
57 #include "llvm/ADT/DenseMap.h"
58 #include "llvm/Support/raw_ostream.h"
59 #include "llvm/TableGen/Error.h"
60 #include "llvm/TableGen/Record.h"
61 
62 using namespace llvm;
63 
64 namespace {
65 
66 class VarLenCodeEmitterGen {
67   RecordKeeper &Records;
68 
69   struct EncodingSegment {
70     unsigned BitWidth;
71     const Init *Value;
72     StringRef CustomEncoder = "";
73   };
74 
75   class VarLenInst {
76     size_t NumBits;
77 
78     // Set if any of the segment is not fixed value.
79     bool HasDynamicSegment;
80 
81     SmallVector<EncodingSegment, 4> Segments;
82 
83     void buildRec(const DagInit *DI);
84 
85     StringRef getCustomEncoderName(const Init *EI) const {
86       if (const auto *DI = dyn_cast<DagInit>(EI)) {
87         if (DI->getNumArgs() && isa<StringInit>(DI->getArg(0)))
88           return cast<StringInit>(DI->getArg(0))->getValue();
89       }
90       return "";
91     }
92 
93   public:
94     VarLenInst() : NumBits(0U), HasDynamicSegment(false) {}
95 
96     explicit VarLenInst(const DagInit *DI);
97 
98     /// Number of bits
99     size_t size() const { return NumBits; }
100 
101     using const_iterator = decltype(Segments)::const_iterator;
102 
103     const_iterator begin() const { return Segments.begin(); }
104     const_iterator end() const { return Segments.end(); }
105     size_t getNumSegments() const { return Segments.size(); }
106 
107     bool isFixedValueOnly() const { return !HasDynamicSegment; }
108   };
109 
110   DenseMap<Record *, VarLenInst> VarLenInsts;
111 
112   // Emit based values (i.e. fixed bits in the encoded instructions)
113   void emitInstructionBaseValues(
114       raw_ostream &OS,
115       ArrayRef<const CodeGenInstruction *> NumberedInstructions,
116       CodeGenTarget &Target, int HwMode = -1);
117 
118   std::string getInstructionCase(Record *R, CodeGenTarget &Target);
119   std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
120                                             CodeGenTarget &Target);
121 
122 public:
123   explicit VarLenCodeEmitterGen(RecordKeeper &R) : Records(R) {}
124 
125   void run(raw_ostream &OS);
126 };
127 
128 } // end anonymous namespace
129 
130 VarLenCodeEmitterGen::VarLenInst::VarLenInst(const DagInit *DI) : NumBits(0U) {
131   buildRec(DI);
132   for (const auto &S : Segments)
133     NumBits += S.BitWidth;
134 }
135 
136 void VarLenCodeEmitterGen::VarLenInst::buildRec(const DagInit *DI) {
137   std::string Op = DI->getOperator()->getAsString();
138 
139   if (Op == "ascend" || Op == "descend") {
140     bool Reverse = Op == "descend";
141     int i = Reverse ? DI->getNumArgs() - 1 : 0;
142     int e = Reverse ? -1 : DI->getNumArgs();
143     int s = Reverse ? -1 : 1;
144     for (; i != e; i += s) {
145       const Init *Arg = DI->getArg(i);
146       if (const auto *BI = dyn_cast<BitsInit>(Arg)) {
147         if (!BI->isComplete())
148           PrintFatalError("Expecting complete bits init in `" + Op + "`");
149         Segments.push_back({BI->getNumBits(), BI});
150       } else if (const auto *BI = dyn_cast<BitInit>(Arg)) {
151         if (!BI->isConcrete())
152           PrintFatalError("Expecting concrete bit init in `" + Op + "`");
153         Segments.push_back({1, BI});
154       } else if (const auto *SubDI = dyn_cast<DagInit>(Arg)) {
155         buildRec(SubDI);
156       } else {
157         PrintFatalError("Unrecognized type of argument in `" + Op +
158                         "`: " + Arg->getAsString());
159       }
160     }
161   } else if (Op == "operand") {
162     // (operand <operand name>, <# of bits>, [(encoder <custom encoder>)])
163     if (DI->getNumArgs() < 2)
164       PrintFatalError("Expecting at least 2 arguments for `operand`");
165     HasDynamicSegment = true;
166     const Init *OperandName = DI->getArg(0), *NumBits = DI->getArg(1);
167     if (!isa<StringInit>(OperandName) || !isa<IntInit>(NumBits))
168       PrintFatalError("Invalid argument types for `operand`");
169 
170     auto NumBitsVal = cast<IntInit>(NumBits)->getValue();
171     if (NumBitsVal <= 0)
172       PrintFatalError("Invalid number of bits for `operand`");
173 
174     StringRef CustomEncoder;
175     if (DI->getNumArgs() >= 3)
176       CustomEncoder = getCustomEncoderName(DI->getArg(2));
177     Segments.push_back(
178         {static_cast<unsigned>(NumBitsVal), OperandName, CustomEncoder});
179   } else if (Op == "slice") {
180     // (slice <operand name>, <high / low bit>, <low / high bit>,
181     //        [(encoder <custom encoder>)])
182     if (DI->getNumArgs() < 3)
183       PrintFatalError("Expecting at least 3 arguments for `slice`");
184     HasDynamicSegment = true;
185     Init *OperandName = DI->getArg(0), *HiBit = DI->getArg(1),
186          *LoBit = DI->getArg(2);
187     if (!isa<StringInit>(OperandName) || !isa<IntInit>(HiBit) ||
188         !isa<IntInit>(LoBit))
189       PrintFatalError("Invalid argument types for `slice`");
190 
191     auto HiBitVal = cast<IntInit>(HiBit)->getValue(),
192          LoBitVal = cast<IntInit>(LoBit)->getValue();
193     if (HiBitVal < 0 || LoBitVal < 0)
194       PrintFatalError("Invalid bit range for `slice`");
195     bool NeedSwap = false;
196     unsigned NumBits = 0U;
197     if (HiBitVal < LoBitVal) {
198       NeedSwap = true;
199       NumBits = static_cast<unsigned>(LoBitVal - HiBitVal + 1);
200     } else {
201       NumBits = static_cast<unsigned>(HiBitVal - LoBitVal + 1);
202     }
203 
204     StringRef CustomEncoder;
205     if (DI->getNumArgs() >= 4)
206       CustomEncoder = getCustomEncoderName(DI->getArg(3));
207 
208     if (NeedSwap) {
209       // Normalization: Hi bit should always be the second argument.
210       Init *const NewArgs[] = {OperandName, LoBit, HiBit};
211       Segments.push_back({NumBits,
212                           DagInit::get(DI->getOperator(), nullptr, NewArgs, {}),
213                           CustomEncoder});
214     } else {
215       Segments.push_back({NumBits, DI, CustomEncoder});
216     }
217   }
218 }
219 
220 void VarLenCodeEmitterGen::run(raw_ostream &OS) {
221   CodeGenTarget Target(Records);
222   auto Insts = Records.getAllDerivedDefinitions("Instruction");
223 
224   auto NumberedInstructions = Target.getInstructionsByEnumValue();
225   const CodeGenHwModes &HWM = Target.getHwModes();
226 
227   // The set of HwModes used by instruction encodings.
228   std::set<unsigned> HwModes;
229   for (const CodeGenInstruction *CGI : NumberedInstructions) {
230     Record *R = CGI->TheDef;
231 
232     // Create the corresponding VarLenInst instance.
233     if (R->getValueAsString("Namespace") == "TargetOpcode" ||
234         R->getValueAsBit("isPseudo"))
235       continue;
236 
237     if (const RecordVal *RV = R->getValue("EncodingInfos")) {
238       if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
239         EncodingInfoByHwMode EBM(DI->getDef(), HWM);
240         for (auto &KV : EBM) {
241           HwModes.insert(KV.first);
242           Record *EncodingDef = KV.second;
243           auto *DI = EncodingDef->getValueAsDag("Inst");
244           VarLenInsts.insert({EncodingDef, VarLenInst(DI)});
245         }
246         continue;
247       }
248     }
249     auto *DI = R->getValueAsDag("Inst");
250     VarLenInsts.insert({R, VarLenInst(DI)});
251   }
252 
253   // Emit function declaration
254   OS << "void " << Target.getName()
255      << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
256      << "    SmallVectorImpl<MCFixup> &Fixups,\n"
257      << "    APInt &Inst,\n"
258      << "    APInt &Scratch,\n"
259      << "    const MCSubtargetInfo &STI) const {\n";
260 
261   // Emit instruction base values
262   if (HwModes.empty()) {
263     emitInstructionBaseValues(OS, NumberedInstructions, Target);
264   } else {
265     for (unsigned HwMode : HwModes)
266       emitInstructionBaseValues(OS, NumberedInstructions, Target, (int)HwMode);
267   }
268 
269   if (!HwModes.empty()) {
270     OS << "  const unsigned **Index;\n";
271     OS << "  const uint64_t *InstBits;\n";
272     OS << "  unsigned HwMode = STI.getHwMode();\n";
273     OS << "  switch (HwMode) {\n";
274     OS << "  default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n";
275     for (unsigned I : HwModes) {
276       OS << "  case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name
277          << "; Index = Index_" << HWM.getMode(I).Name << "; break;\n";
278     }
279     OS << "  };\n";
280   }
281 
282   // Emit helper function to retrieve base values.
283   OS << "  auto getInstBits = [&](unsigned Opcode) -> APInt {\n"
284      << "    unsigned NumBits = Index[Opcode][0];\n"
285      << "    if (!NumBits)\n"
286      << "      return APInt::getZeroWidth();\n"
287      << "    unsigned Idx = Index[Opcode][1];\n"
288      << "    ArrayRef<uint64_t> Data(&InstBits[Idx], "
289      << "APInt::getNumWords(NumBits));\n"
290      << "    return APInt(NumBits, Data);\n"
291      << "  };\n";
292 
293   // Map to accumulate all the cases.
294   std::map<std::string, std::vector<std::string>> CaseMap;
295 
296   // Construct all cases statement for each opcode
297   for (Record *R : Insts) {
298     if (R->getValueAsString("Namespace") == "TargetOpcode" ||
299         R->getValueAsBit("isPseudo"))
300       continue;
301     std::string InstName =
302         (R->getValueAsString("Namespace") + "::" + R->getName()).str();
303     std::string Case = getInstructionCase(R, Target);
304 
305     CaseMap[Case].push_back(std::move(InstName));
306   }
307 
308   // Emit initial function code
309   OS << "  const unsigned opcode = MI.getOpcode();\n"
310      << "  switch (opcode) {\n";
311 
312   // Emit each case statement
313   for (const auto &C : CaseMap) {
314     const std::string &Case = C.first;
315     const auto &InstList = C.second;
316 
317     ListSeparator LS("\n");
318     for (const auto &InstName : InstList)
319       OS << LS << "    case " << InstName << ":";
320 
321     OS << " {\n";
322     OS << Case;
323     OS << "      break;\n"
324        << "    }\n";
325   }
326   // Default case: unhandled opcode
327   OS << "  default:\n"
328      << "    std::string msg;\n"
329      << "    raw_string_ostream Msg(msg);\n"
330      << "    Msg << \"Not supported instr: \" << MI;\n"
331      << "    report_fatal_error(Msg.str().c_str());\n"
332      << "  }\n";
333   OS << "}\n\n";
334 }
335 
336 static void emitInstBits(raw_ostream &IS, raw_ostream &SS, const APInt &Bits,
337                          unsigned &Index) {
338   if (!Bits.getNumWords()) {
339     IS.indent(4) << "{/*NumBits*/0, /*Index*/0},";
340     return;
341   }
342 
343   IS.indent(4) << "{/*NumBits*/" << Bits.getBitWidth() << ", "
344                << "/*Index*/" << Index << "},";
345 
346   SS.indent(4);
347   for (unsigned I = 0; I < Bits.getNumWords(); ++I, ++Index)
348     SS << "UINT64_C(" << utostr(Bits.getRawData()[I]) << "),";
349 }
350 
351 void VarLenCodeEmitterGen::emitInstructionBaseValues(
352     raw_ostream &OS, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
353     CodeGenTarget &Target, int HwMode) {
354   std::string IndexArray, StorageArray;
355   raw_string_ostream IS(IndexArray), SS(StorageArray);
356 
357   const CodeGenHwModes &HWM = Target.getHwModes();
358   if (HwMode == -1) {
359     IS << "  static const unsigned Index[][2] = {\n";
360     SS << "  static const uint64_t InstBits[] = {\n";
361   } else {
362     StringRef Name = HWM.getMode(HwMode).Name;
363     IS << "  static const unsigned Index_" << Name << "[][2] = {\n";
364     SS << "  static const uint64_t InstBits_" << Name << "[] = {\n";
365   }
366 
367   unsigned NumFixedValueWords = 0U;
368   for (const CodeGenInstruction *CGI : NumberedInstructions) {
369     Record *R = CGI->TheDef;
370 
371     if (R->getValueAsString("Namespace") == "TargetOpcode" ||
372         R->getValueAsBit("isPseudo")) {
373       IS.indent(4) << "{/*NumBits*/0, /*Index*/0},\n";
374       continue;
375     }
376 
377     Record *EncodingDef = R;
378     if (const RecordVal *RV = R->getValue("EncodingInfos")) {
379       if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
380         EncodingInfoByHwMode EBM(DI->getDef(), HWM);
381         if (EBM.hasMode(HwMode))
382           EncodingDef = EBM.get(HwMode);
383       }
384     }
385 
386     auto It = VarLenInsts.find(EncodingDef);
387     if (It == VarLenInsts.end())
388       PrintFatalError(EncodingDef, "VarLenInst not found for this record");
389     const VarLenInst &VLI = It->second;
390 
391     unsigned i = 0U, BitWidth = VLI.size();
392 
393     // Start by filling in fixed values.
394     APInt Value(BitWidth, 0);
395     auto SI = VLI.begin(), SE = VLI.end();
396     // Scan through all the segments that have fixed-bits values.
397     while (i < BitWidth && SI != SE) {
398       unsigned SegmentNumBits = SI->BitWidth;
399       if (const auto *BI = dyn_cast<BitsInit>(SI->Value)) {
400         for (unsigned Idx = 0U; Idx != SegmentNumBits; ++Idx) {
401           auto *B = cast<BitInit>(BI->getBit(Idx));
402           Value.setBitVal(i + Idx, B->getValue());
403         }
404       }
405       if (const auto *BI = dyn_cast<BitInit>(SI->Value))
406         Value.setBitVal(i, BI->getValue());
407 
408       i += SegmentNumBits;
409       ++SI;
410     }
411 
412     emitInstBits(IS, SS, Value, NumFixedValueWords);
413     IS << '\t' << "// " << R->getName() << "\n";
414     if (Value.getNumWords())
415       SS << '\t' << "// " << R->getName() << "\n";
416   }
417   IS.indent(4) << "{/*NumBits*/0, /*Index*/0}\n  };\n";
418   SS.indent(4) << "UINT64_C(0)\n  };\n";
419 
420   OS << IS.str() << SS.str();
421 }
422 
423 std::string VarLenCodeEmitterGen::getInstructionCase(Record *R,
424                                                      CodeGenTarget &Target) {
425   std::string Case;
426   if (const RecordVal *RV = R->getValue("EncodingInfos")) {
427     if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
428       const CodeGenHwModes &HWM = Target.getHwModes();
429       EncodingInfoByHwMode EBM(DI->getDef(), HWM);
430       Case += "      switch (HwMode) {\n";
431       Case += "      default: llvm_unreachable(\"Unhandled HwMode\");\n";
432       for (auto &KV : EBM) {
433         Case += "      case " + itostr(KV.first) + ": {\n";
434         Case += getInstructionCaseForEncoding(R, KV.second, Target);
435         Case += "      break;\n";
436         Case += "      }\n";
437       }
438       Case += "      }\n";
439       return Case;
440     }
441   }
442   return getInstructionCaseForEncoding(R, R, Target);
443 }
444 
445 std::string VarLenCodeEmitterGen::getInstructionCaseForEncoding(
446     Record *R, Record *EncodingDef, CodeGenTarget &Target) {
447   auto It = VarLenInsts.find(EncodingDef);
448   if (It == VarLenInsts.end())
449     PrintFatalError(EncodingDef, "Parsed encoding record not found");
450   const VarLenInst &VLI = It->second;
451   size_t BitWidth = VLI.size();
452 
453   CodeGenInstruction &CGI = Target.getInstruction(R);
454 
455   std::string Case;
456   raw_string_ostream SS(Case);
457   // Resize the scratch buffer.
458   if (BitWidth && !VLI.isFixedValueOnly())
459     SS.indent(6) << "Scratch = Scratch.zextOrSelf(" << BitWidth << ");\n";
460   // Populate based value.
461   SS.indent(6) << "Inst = getInstBits(opcode);\n";
462 
463   // Process each segment in VLI.
464   size_t Offset = 0U;
465   for (const auto &ES : VLI) {
466     unsigned NumBits = ES.BitWidth;
467     const Init *Val = ES.Value;
468     // If it's a StringInit or DagInit, it's a reference to an operand
469     // or part of an operand.
470     if (isa<StringInit>(Val) || isa<DagInit>(Val)) {
471       StringRef OperandName;
472       unsigned LoBit = 0U;
473       if (const auto *SV = dyn_cast<StringInit>(Val)) {
474         OperandName = SV->getValue();
475       } else {
476         // Normalized: (slice <operand name>, <high bit>, <low bit>)
477         const auto *DV = cast<DagInit>(Val);
478         OperandName = cast<StringInit>(DV->getArg(0))->getValue();
479         LoBit = static_cast<unsigned>(cast<IntInit>(DV->getArg(2))->getValue());
480       }
481 
482       auto OpIdx = CGI.Operands.ParseOperandName(OperandName);
483       unsigned FlatOpIdx = CGI.Operands.getFlattenedOperandNumber(OpIdx);
484       StringRef CustomEncoder = CGI.Operands[OpIdx.first].EncoderMethodName;
485       if (ES.CustomEncoder.size())
486         CustomEncoder = ES.CustomEncoder;
487 
488       SS.indent(6) << "Scratch.clearAllBits();\n";
489       SS.indent(6) << "// op: " << OperandName.drop_front(1) << "\n";
490       if (CustomEncoder.empty())
491         SS.indent(6) << "getMachineOpValue(MI, MI.getOperand("
492                      << utostr(FlatOpIdx) << ")";
493       else
494         SS.indent(6) << CustomEncoder << "(MI, /*OpIdx=*/" << utostr(FlatOpIdx);
495 
496       SS << ", /*Pos=*/" << utostr(Offset) << ", Scratch, Fixups, STI);\n";
497 
498       SS.indent(6) << "Inst.insertBits("
499                    << "Scratch.extractBits(" << utostr(NumBits) << ", "
500                    << utostr(LoBit) << ")"
501                    << ", " << Offset << ");\n";
502     }
503     Offset += NumBits;
504   }
505 
506   StringRef PostEmitter = R->getValueAsString("PostEncoderMethod");
507   if (!PostEmitter.empty())
508     SS.indent(6) << "Inst = " << PostEmitter << "(MI, Inst, STI);\n";
509 
510   return Case;
511 }
512 
513 namespace llvm {
514 
515 void emitVarLenCodeEmitter(RecordKeeper &R, raw_ostream &OS) {
516   VarLenCodeEmitterGen(R).run(OS);
517 }
518 
519 } // end namespace llvm
520