1 //===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // CodeEmitterGen uses the descriptions of instructions and their fields to
10 // construct an automated code emitter: a function that, given a MachineInstr,
11 // returns the (currently, 32-bit unsigned) value of the instruction.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "CodeGenInstruction.h"
16 #include "CodeGenTarget.h"
17 #include "SubtargetFeatureInfo.h"
18 #include "Types.h"
19 #include "VarLenCodeEmitterGen.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/Support/Casting.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include "llvm/TableGen/Error.h"
26 #include "llvm/TableGen/Record.h"
27 #include "llvm/TableGen/TableGenBackend.h"
28 #include <cassert>
29 #include <cstdint>
30 #include <map>
31 #include <set>
32 #include <string>
33 #include <utility>
34 #include <vector>
35
36 using namespace llvm;
37
38 namespace {
39
40 class CodeEmitterGen {
41 RecordKeeper &Records;
42
43 public:
CodeEmitterGen(RecordKeeper & R)44 CodeEmitterGen(RecordKeeper &R) : Records(R) {}
45
46 void run(raw_ostream &o);
47
48 private:
49 int getVariableBit(const std::string &VarName, BitsInit *BI, int bit);
50 std::string getInstructionCase(Record *R, CodeGenTarget &Target);
51 std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
52 CodeGenTarget &Target);
53 void AddCodeToMergeInOperand(Record *R, BitsInit *BI,
54 const std::string &VarName,
55 unsigned &NumberedOp,
56 std::set<unsigned> &NamedOpIndices,
57 std::string &Case, CodeGenTarget &Target);
58
59 void emitInstructionBaseValues(
60 raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
61 CodeGenTarget &Target, int HwMode = -1);
62 unsigned BitWidth;
63 bool UseAPInt;
64 };
65
66 // If the VarBitInit at position 'bit' matches the specified variable then
67 // return the variable bit position. Otherwise return -1.
getVariableBit(const std::string & VarName,BitsInit * BI,int bit)68 int CodeEmitterGen::getVariableBit(const std::string &VarName,
69 BitsInit *BI, int bit) {
70 if (VarBitInit *VBI = dyn_cast<VarBitInit>(BI->getBit(bit))) {
71 if (VarInit *VI = dyn_cast<VarInit>(VBI->getBitVar()))
72 if (VI->getName() == VarName)
73 return VBI->getBitNum();
74 } else if (VarInit *VI = dyn_cast<VarInit>(BI->getBit(bit))) {
75 if (VI->getName() == VarName)
76 return 0;
77 }
78
79 return -1;
80 }
81
82 void CodeEmitterGen::
AddCodeToMergeInOperand(Record * R,BitsInit * BI,const std::string & VarName,unsigned & NumberedOp,std::set<unsigned> & NamedOpIndices,std::string & Case,CodeGenTarget & Target)83 AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName,
84 unsigned &NumberedOp,
85 std::set<unsigned> &NamedOpIndices,
86 std::string &Case, CodeGenTarget &Target) {
87 CodeGenInstruction &CGI = Target.getInstruction(R);
88
89 // Determine if VarName actually contributes to the Inst encoding.
90 int bit = BI->getNumBits()-1;
91
92 // Scan for a bit that this contributed to.
93 for (; bit >= 0; ) {
94 if (getVariableBit(VarName, BI, bit) != -1)
95 break;
96
97 --bit;
98 }
99
100 // If we found no bits, ignore this value, otherwise emit the call to get the
101 // operand encoding.
102 if (bit < 0) return;
103
104 // If the operand matches by name, reference according to that
105 // operand number. Non-matching operands are assumed to be in
106 // order.
107 unsigned OpIdx;
108 if (CGI.Operands.hasOperandNamed(VarName, OpIdx)) {
109 // Get the machine operand number for the indicated operand.
110 OpIdx = CGI.Operands[OpIdx].MIOperandNo;
111 assert(!CGI.Operands.isFlatOperandNotEmitted(OpIdx) &&
112 "Explicitly used operand also marked as not emitted!");
113 } else {
114 unsigned NumberOps = CGI.Operands.size();
115 /// If this operand is not supposed to be emitted by the
116 /// generated emitter, skip it.
117 while (NumberedOp < NumberOps &&
118 (CGI.Operands.isFlatOperandNotEmitted(NumberedOp) ||
119 (!NamedOpIndices.empty() && NamedOpIndices.count(
120 CGI.Operands.getSubOperandNumber(NumberedOp).first)))) {
121 ++NumberedOp;
122 }
123
124 if (NumberedOp >=
125 CGI.Operands.back().MIOperandNo + CGI.Operands.back().MINumOperands) {
126 std::string E;
127 raw_string_ostream S(E);
128 S << "Too few operands in record " << R->getName()
129 << " (no match for variable " << VarName << "):\n";
130 S << *R;
131 PrintFatalError(R, E);
132 }
133
134 OpIdx = NumberedOp++;
135 }
136
137 std::pair<unsigned, unsigned> SO = CGI.Operands.getSubOperandNumber(OpIdx);
138 std::string &EncoderMethodName = CGI.Operands[SO.first].EncoderMethodName;
139
140 if (UseAPInt)
141 Case += " op.clearAllBits();\n";
142
143 // If the source operand has a custom encoder, use it. This will
144 // get the encoding for all of the suboperands.
145 if (!EncoderMethodName.empty()) {
146 // A custom encoder has all of the information for the
147 // sub-operands, if there are more than one, so only
148 // query the encoder once per source operand.
149 if (SO.second == 0) {
150 Case += " // op: " + VarName + "\n";
151 if (UseAPInt) {
152 Case += " " + EncoderMethodName + "(MI, " + utostr(OpIdx);
153 Case += ", op";
154 } else {
155 Case += " op = " + EncoderMethodName + "(MI, " + utostr(OpIdx);
156 }
157 Case += ", Fixups, STI);\n";
158 }
159 } else {
160 Case += " // op: " + VarName + "\n";
161 if (UseAPInt) {
162 Case += " getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")";
163 Case += ", op, Fixups, STI";
164 } else {
165 Case += " op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")";
166 Case += ", Fixups, STI";
167 }
168 Case += ");\n";
169 }
170
171 // Precalculate the number of lits this variable contributes to in the
172 // operand. If there is a single lit (consecutive range of bits) we can use a
173 // destructive sequence on APInt that reduces memory allocations.
174 int numOperandLits = 0;
175 for (int tmpBit = bit; tmpBit >= 0;) {
176 int varBit = getVariableBit(VarName, BI, tmpBit);
177
178 // If this bit isn't from a variable, skip it.
179 if (varBit == -1) {
180 --tmpBit;
181 continue;
182 }
183
184 // Figure out the consecutive range of bits covered by this operand, in
185 // order to generate better encoding code.
186 int beginVarBit = varBit;
187 int N = 1;
188 for (--tmpBit; tmpBit >= 0;) {
189 varBit = getVariableBit(VarName, BI, tmpBit);
190 if (varBit == -1 || varBit != (beginVarBit - N))
191 break;
192 ++N;
193 --tmpBit;
194 }
195 ++numOperandLits;
196 }
197
198 for (; bit >= 0; ) {
199 int varBit = getVariableBit(VarName, BI, bit);
200
201 // If this bit isn't from a variable, skip it.
202 if (varBit == -1) {
203 --bit;
204 continue;
205 }
206
207 // Figure out the consecutive range of bits covered by this operand, in
208 // order to generate better encoding code.
209 int beginInstBit = bit;
210 int beginVarBit = varBit;
211 int N = 1;
212 for (--bit; bit >= 0;) {
213 varBit = getVariableBit(VarName, BI, bit);
214 if (varBit == -1 || varBit != (beginVarBit - N)) break;
215 ++N;
216 --bit;
217 }
218
219 std::string maskStr;
220 int opShift;
221
222 unsigned loBit = beginVarBit - N + 1;
223 unsigned hiBit = loBit + N;
224 unsigned loInstBit = beginInstBit - N + 1;
225 if (UseAPInt) {
226 std::string extractStr;
227 if (N >= 64) {
228 extractStr = "op.extractBits(" + itostr(hiBit - loBit) + ", " +
229 itostr(loBit) + ")";
230 Case += " Value.insertBits(" + extractStr + ", " +
231 itostr(loInstBit) + ");\n";
232 } else {
233 extractStr = "op.extractBitsAsZExtValue(" + itostr(hiBit - loBit) +
234 ", " + itostr(loBit) + ")";
235 Case += " Value.insertBits(" + extractStr + ", " +
236 itostr(loInstBit) + ", " + itostr(hiBit - loBit) + ");\n";
237 }
238 } else {
239 uint64_t opMask = ~(uint64_t)0 >> (64 - N);
240 opShift = beginVarBit - N + 1;
241 opMask <<= opShift;
242 maskStr = "UINT64_C(" + utostr(opMask) + ")";
243 opShift = beginInstBit - beginVarBit;
244
245 if (numOperandLits == 1) {
246 Case += " op &= " + maskStr + ";\n";
247 if (opShift > 0) {
248 Case += " op <<= " + itostr(opShift) + ";\n";
249 } else if (opShift < 0) {
250 Case += " op >>= " + itostr(-opShift) + ";\n";
251 }
252 Case += " Value |= op;\n";
253 } else {
254 if (opShift > 0) {
255 Case += " Value |= (op & " + maskStr + ") << " +
256 itostr(opShift) + ";\n";
257 } else if (opShift < 0) {
258 Case += " Value |= (op & " + maskStr + ") >> " +
259 itostr(-opShift) + ";\n";
260 } else {
261 Case += " Value |= (op & " + maskStr + ");\n";
262 }
263 }
264 }
265 }
266 }
267
getInstructionCase(Record * R,CodeGenTarget & Target)268 std::string CodeEmitterGen::getInstructionCase(Record *R,
269 CodeGenTarget &Target) {
270 std::string Case;
271 if (const RecordVal *RV = R->getValue("EncodingInfos")) {
272 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
273 const CodeGenHwModes &HWM = Target.getHwModes();
274 EncodingInfoByHwMode EBM(DI->getDef(), HWM);
275 Case += " switch (HwMode) {\n";
276 Case += " default: llvm_unreachable(\"Unhandled HwMode\");\n";
277 for (auto &KV : EBM) {
278 Case += " case " + itostr(KV.first) + ": {\n";
279 Case += getInstructionCaseForEncoding(R, KV.second, Target);
280 Case += " break;\n";
281 Case += " }\n";
282 }
283 Case += " }\n";
284 return Case;
285 }
286 }
287 return getInstructionCaseForEncoding(R, R, Target);
288 }
289
getInstructionCaseForEncoding(Record * R,Record * EncodingDef,CodeGenTarget & Target)290 std::string CodeEmitterGen::getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
291 CodeGenTarget &Target) {
292 std::string Case;
293 BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst");
294 unsigned NumberedOp = 0;
295 std::set<unsigned> NamedOpIndices;
296
297 // Collect the set of operand indices that might correspond to named
298 // operand, and skip these when assigning operands based on position.
299 if (Target.getInstructionSet()->
300 getValueAsBit("noNamedPositionallyEncodedOperands")) {
301 CodeGenInstruction &CGI = Target.getInstruction(R);
302 for (const RecordVal &RV : R->getValues()) {
303 unsigned OpIdx;
304 if (!CGI.Operands.hasOperandNamed(RV.getName(), OpIdx))
305 continue;
306
307 NamedOpIndices.insert(OpIdx);
308 }
309 }
310
311 // Loop over all of the fields in the instruction, determining which are the
312 // operands to the instruction.
313 for (const RecordVal &RV : EncodingDef->getValues()) {
314 // Ignore fixed fields in the record, we're looking for values like:
315 // bits<5> RST = { ?, ?, ?, ?, ? };
316 if (RV.isNonconcreteOK() || RV.getValue()->isComplete())
317 continue;
318
319 AddCodeToMergeInOperand(R, BI, std::string(RV.getName()), NumberedOp,
320 NamedOpIndices, Case, Target);
321 }
322
323 StringRef PostEmitter = R->getValueAsString("PostEncoderMethod");
324 if (!PostEmitter.empty()) {
325 Case += " Value = ";
326 Case += PostEmitter;
327 Case += "(MI, Value";
328 Case += ", STI";
329 Case += ");\n";
330 }
331
332 return Case;
333 }
334
emitInstBits(raw_ostream & OS,const APInt & Bits)335 static void emitInstBits(raw_ostream &OS, const APInt &Bits) {
336 for (unsigned I = 0; I < Bits.getNumWords(); ++I)
337 OS << ((I > 0) ? ", " : "") << "UINT64_C(" << utostr(Bits.getRawData()[I])
338 << ")";
339 }
340
emitInstructionBaseValues(raw_ostream & o,ArrayRef<const CodeGenInstruction * > NumberedInstructions,CodeGenTarget & Target,int HwMode)341 void CodeEmitterGen::emitInstructionBaseValues(
342 raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
343 CodeGenTarget &Target, int HwMode) {
344 const CodeGenHwModes &HWM = Target.getHwModes();
345 if (HwMode == -1)
346 o << " static const uint64_t InstBits[] = {\n";
347 else
348 o << " static const uint64_t InstBits_" << HWM.getMode(HwMode).Name
349 << "[] = {\n";
350
351 for (const CodeGenInstruction *CGI : NumberedInstructions) {
352 Record *R = CGI->TheDef;
353
354 if (R->getValueAsString("Namespace") == "TargetOpcode" ||
355 R->getValueAsBit("isPseudo")) {
356 o << " "; emitInstBits(o, APInt(BitWidth, 0)); o << ",\n";
357 continue;
358 }
359
360 Record *EncodingDef = R;
361 if (const RecordVal *RV = R->getValue("EncodingInfos")) {
362 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
363 EncodingInfoByHwMode EBM(DI->getDef(), HWM);
364 if (EBM.hasMode(HwMode))
365 EncodingDef = EBM.get(HwMode);
366 }
367 }
368 BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst");
369
370 // Start by filling in fixed values.
371 APInt Value(BitWidth, 0);
372 for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) {
373 if (BitInit *B = dyn_cast<BitInit>(BI->getBit(e - i - 1)))
374 Value |= APInt(BitWidth, (uint64_t)B->getValue()) << (e - i - 1);
375 }
376 o << " ";
377 emitInstBits(o, Value);
378 o << "," << '\t' << "// " << R->getName() << "\n";
379 }
380 o << " UINT64_C(0)\n };\n";
381 }
382
run(raw_ostream & o)383 void CodeEmitterGen::run(raw_ostream &o) {
384 CodeGenTarget Target(Records);
385 std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction");
386
387 // For little-endian instruction bit encodings, reverse the bit order
388 Target.reverseBitsForLittleEndianEncoding();
389
390 ArrayRef<const CodeGenInstruction*> NumberedInstructions =
391 Target.getInstructionsByEnumValue();
392
393 if (any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) {
394 Record *R = CGI->TheDef;
395 return R->getValue("Inst") && isa<DagInit>(R->getValueInit("Inst"));
396 })) {
397 emitVarLenCodeEmitter(Records, o);
398 } else {
399 const CodeGenHwModes &HWM = Target.getHwModes();
400 // The set of HwModes used by instruction encodings.
401 std::set<unsigned> HwModes;
402 BitWidth = 0;
403 for (const CodeGenInstruction *CGI : NumberedInstructions) {
404 Record *R = CGI->TheDef;
405 if (R->getValueAsString("Namespace") == "TargetOpcode" ||
406 R->getValueAsBit("isPseudo"))
407 continue;
408
409 if (const RecordVal *RV = R->getValue("EncodingInfos")) {
410 if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
411 EncodingInfoByHwMode EBM(DI->getDef(), HWM);
412 for (auto &KV : EBM) {
413 BitsInit *BI = KV.second->getValueAsBitsInit("Inst");
414 BitWidth = std::max(BitWidth, BI->getNumBits());
415 HwModes.insert(KV.first);
416 }
417 continue;
418 }
419 }
420 BitsInit *BI = R->getValueAsBitsInit("Inst");
421 BitWidth = std::max(BitWidth, BI->getNumBits());
422 }
423 UseAPInt = BitWidth > 64;
424
425 // Emit function declaration
426 if (UseAPInt) {
427 o << "void " << Target.getName()
428 << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
429 << " SmallVectorImpl<MCFixup> &Fixups,\n"
430 << " APInt &Inst,\n"
431 << " APInt &Scratch,\n"
432 << " const MCSubtargetInfo &STI) const {\n";
433 } else {
434 o << "uint64_t " << Target.getName();
435 o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
436 << " SmallVectorImpl<MCFixup> &Fixups,\n"
437 << " const MCSubtargetInfo &STI) const {\n";
438 }
439
440 // Emit instruction base values
441 if (HwModes.empty()) {
442 emitInstructionBaseValues(o, NumberedInstructions, Target, -1);
443 } else {
444 for (unsigned HwMode : HwModes)
445 emitInstructionBaseValues(o, NumberedInstructions, Target, (int)HwMode);
446 }
447
448 if (!HwModes.empty()) {
449 o << " const uint64_t *InstBits;\n";
450 o << " unsigned HwMode = STI.getHwMode();\n";
451 o << " switch (HwMode) {\n";
452 o << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n";
453 for (unsigned I : HwModes) {
454 o << " case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name
455 << "; break;\n";
456 }
457 o << " };\n";
458 }
459
460 // Map to accumulate all the cases.
461 std::map<std::string, std::vector<std::string>> CaseMap;
462
463 // Construct all cases statement for each opcode
464 for (Record *R : Insts) {
465 if (R->getValueAsString("Namespace") == "TargetOpcode" ||
466 R->getValueAsBit("isPseudo"))
467 continue;
468 std::string InstName =
469 (R->getValueAsString("Namespace") + "::" + R->getName()).str();
470 std::string Case = getInstructionCase(R, Target);
471
472 CaseMap[Case].push_back(std::move(InstName));
473 }
474
475 // Emit initial function code
476 if (UseAPInt) {
477 int NumWords = APInt::getNumWords(BitWidth);
478 o << " const unsigned opcode = MI.getOpcode();\n"
479 << " if (Scratch.getBitWidth() != " << BitWidth << ")\n"
480 << " Scratch = Scratch.zext(" << BitWidth << ");\n"
481 << " Inst = APInt(" << BitWidth
482 << ", makeArrayRef(InstBits + opcode * " << NumWords << ", " << NumWords
483 << "));\n"
484 << " APInt &Value = Inst;\n"
485 << " APInt &op = Scratch;\n"
486 << " switch (opcode) {\n";
487 } else {
488 o << " const unsigned opcode = MI.getOpcode();\n"
489 << " uint64_t Value = InstBits[opcode];\n"
490 << " uint64_t op = 0;\n"
491 << " (void)op; // suppress warning\n"
492 << " switch (opcode) {\n";
493 }
494
495 // Emit each case statement
496 std::map<std::string, std::vector<std::string>>::iterator IE, EE;
497 for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) {
498 const std::string &Case = IE->first;
499 std::vector<std::string> &InstList = IE->second;
500
501 for (int i = 0, N = InstList.size(); i < N; i++) {
502 if (i)
503 o << "\n";
504 o << " case " << InstList[i] << ":";
505 }
506 o << " {\n";
507 o << Case;
508 o << " break;\n"
509 << " }\n";
510 }
511
512 // Default case: unhandled opcode
513 o << " default:\n"
514 << " std::string msg;\n"
515 << " raw_string_ostream Msg(msg);\n"
516 << " Msg << \"Not supported instr: \" << MI;\n"
517 << " report_fatal_error(Msg.str().c_str());\n"
518 << " }\n";
519 if (UseAPInt)
520 o << " Inst = Value;\n";
521 else
522 o << " return Value;\n";
523 o << "}\n\n";
524 }
525 }
526
527 } // end anonymous namespace
528
529 namespace llvm {
530
EmitCodeEmitter(RecordKeeper & RK,raw_ostream & OS)531 void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS) {
532 emitSourceFileHeader("Machine Code Emitter", OS);
533 CodeEmitterGen(RK).run(OS);
534 }
535
536 } // end namespace llvm
537