1 //===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // CodeEmitterGen uses the descriptions of instructions and their fields to 10 // construct an automated code emitter: a function that, given a MachineInstr, 11 // returns the (currently, 32-bit unsigned) value of the instruction. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "CodeGenInstruction.h" 16 #include "CodeGenTarget.h" 17 #include "SubtargetFeatureInfo.h" 18 #include "Types.h" 19 #include "llvm/ADT/APInt.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/StringExtras.h" 22 #include "llvm/Support/Casting.h" 23 #include "llvm/Support/raw_ostream.h" 24 #include "llvm/TableGen/Record.h" 25 #include "llvm/TableGen/TableGenBackend.h" 26 #include <cassert> 27 #include <cstdint> 28 #include <map> 29 #include <set> 30 #include <string> 31 #include <utility> 32 #include <vector> 33 34 using namespace llvm; 35 36 namespace { 37 38 class CodeEmitterGen { 39 RecordKeeper &Records; 40 41 public: 42 CodeEmitterGen(RecordKeeper &R) : Records(R) {} 43 44 void run(raw_ostream &o); 45 46 private: 47 int getVariableBit(const std::string &VarName, BitsInit *BI, int bit); 48 std::string getInstructionCase(Record *R, CodeGenTarget &Target); 49 void AddCodeToMergeInOperand(Record *R, BitsInit *BI, 50 const std::string &VarName, 51 unsigned &NumberedOp, 52 std::set<unsigned> &NamedOpIndices, 53 std::string &Case, CodeGenTarget &Target); 54 55 unsigned BitWidth; 56 bool UseAPInt; 57 }; 58 59 // If the VarBitInit at position 'bit' matches the specified variable then 60 // return the variable bit position. Otherwise return -1. 61 int CodeEmitterGen::getVariableBit(const std::string &VarName, 62 BitsInit *BI, int bit) { 63 if (VarBitInit *VBI = dyn_cast<VarBitInit>(BI->getBit(bit))) { 64 if (VarInit *VI = dyn_cast<VarInit>(VBI->getBitVar())) 65 if (VI->getName() == VarName) 66 return VBI->getBitNum(); 67 } else if (VarInit *VI = dyn_cast<VarInit>(BI->getBit(bit))) { 68 if (VI->getName() == VarName) 69 return 0; 70 } 71 72 return -1; 73 } 74 75 void CodeEmitterGen:: 76 AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName, 77 unsigned &NumberedOp, 78 std::set<unsigned> &NamedOpIndices, 79 std::string &Case, CodeGenTarget &Target) { 80 CodeGenInstruction &CGI = Target.getInstruction(R); 81 82 // Determine if VarName actually contributes to the Inst encoding. 83 int bit = BI->getNumBits()-1; 84 85 // Scan for a bit that this contributed to. 86 for (; bit >= 0; ) { 87 if (getVariableBit(VarName, BI, bit) != -1) 88 break; 89 90 --bit; 91 } 92 93 // If we found no bits, ignore this value, otherwise emit the call to get the 94 // operand encoding. 95 if (bit < 0) return; 96 97 // If the operand matches by name, reference according to that 98 // operand number. Non-matching operands are assumed to be in 99 // order. 100 unsigned OpIdx; 101 if (CGI.Operands.hasOperandNamed(VarName, OpIdx)) { 102 // Get the machine operand number for the indicated operand. 103 OpIdx = CGI.Operands[OpIdx].MIOperandNo; 104 assert(!CGI.Operands.isFlatOperandNotEmitted(OpIdx) && 105 "Explicitly used operand also marked as not emitted!"); 106 } else { 107 unsigned NumberOps = CGI.Operands.size(); 108 /// If this operand is not supposed to be emitted by the 109 /// generated emitter, skip it. 110 while (NumberedOp < NumberOps && 111 (CGI.Operands.isFlatOperandNotEmitted(NumberedOp) || 112 (!NamedOpIndices.empty() && NamedOpIndices.count( 113 CGI.Operands.getSubOperandNumber(NumberedOp).first)))) { 114 ++NumberedOp; 115 116 if (NumberedOp >= CGI.Operands.back().MIOperandNo + 117 CGI.Operands.back().MINumOperands) { 118 errs() << "Too few operands in record " << R->getName() << 119 " (no match for variable " << VarName << "):\n"; 120 errs() << *R; 121 errs() << '\n'; 122 123 return; 124 } 125 } 126 127 OpIdx = NumberedOp++; 128 } 129 130 std::pair<unsigned, unsigned> SO = CGI.Operands.getSubOperandNumber(OpIdx); 131 std::string &EncoderMethodName = CGI.Operands[SO.first].EncoderMethodName; 132 133 if (UseAPInt) 134 Case += " op.clearAllBits();\n"; 135 136 // If the source operand has a custom encoder, use it. This will 137 // get the encoding for all of the suboperands. 138 if (!EncoderMethodName.empty()) { 139 // A custom encoder has all of the information for the 140 // sub-operands, if there are more than one, so only 141 // query the encoder once per source operand. 142 if (SO.second == 0) { 143 Case += " // op: " + VarName + "\n"; 144 if (UseAPInt) { 145 Case += " " + EncoderMethodName + "(MI, " + utostr(OpIdx); 146 Case += ", op"; 147 } else { 148 Case += " op = " + EncoderMethodName + "(MI, " + utostr(OpIdx); 149 } 150 Case += ", Fixups, STI);\n"; 151 } 152 } else { 153 Case += " // op: " + VarName + "\n"; 154 if (UseAPInt) { 155 Case += " getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")"; 156 Case += ", op, Fixups, STI"; 157 } else { 158 Case += " op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")"; 159 Case += ", Fixups, STI"; 160 } 161 Case += ");\n"; 162 } 163 164 // Precalculate the number of lits this variable contributes to in the 165 // operand. If there is a single lit (consecutive range of bits) we can use a 166 // destructive sequence on APInt that reduces memory allocations. 167 int numOperandLits = 0; 168 for (int tmpBit = bit; tmpBit >= 0;) { 169 int varBit = getVariableBit(VarName, BI, tmpBit); 170 171 // If this bit isn't from a variable, skip it. 172 if (varBit == -1) { 173 --tmpBit; 174 continue; 175 } 176 177 // Figure out the consecutive range of bits covered by this operand, in 178 // order to generate better encoding code. 179 int beginVarBit = varBit; 180 int N = 1; 181 for (--tmpBit; tmpBit >= 0;) { 182 varBit = getVariableBit(VarName, BI, tmpBit); 183 if (varBit == -1 || varBit != (beginVarBit - N)) 184 break; 185 ++N; 186 --tmpBit; 187 } 188 ++numOperandLits; 189 } 190 191 for (; bit >= 0; ) { 192 int varBit = getVariableBit(VarName, BI, bit); 193 194 // If this bit isn't from a variable, skip it. 195 if (varBit == -1) { 196 --bit; 197 continue; 198 } 199 200 // Figure out the consecutive range of bits covered by this operand, in 201 // order to generate better encoding code. 202 int beginInstBit = bit; 203 int beginVarBit = varBit; 204 int N = 1; 205 for (--bit; bit >= 0;) { 206 varBit = getVariableBit(VarName, BI, bit); 207 if (varBit == -1 || varBit != (beginVarBit - N)) break; 208 ++N; 209 --bit; 210 } 211 212 std::string maskStr; 213 int opShift; 214 215 if (UseAPInt) { 216 unsigned loBit = beginVarBit - N + 1; 217 unsigned hiBit = loBit + N; 218 maskStr = "M" + itostr(bit); 219 Case += " const APInt " + maskStr + " = APInt::getBitsSet(" + 220 itostr(BitWidth) + ", " + itostr(loBit) + ", " + itostr(hiBit) + 221 ");\n"; 222 } else { 223 uint64_t opMask = ~(uint64_t)0 >> (64 - N); 224 opShift = beginVarBit - N + 1; 225 opMask <<= opShift; 226 maskStr = "UINT64_C(" + utostr(opMask) + ")"; 227 } 228 opShift = beginInstBit - beginVarBit; 229 230 if (numOperandLits == 1) { 231 // Because Op may be an APInt, ensure all arithmetic is done in-place 232 // where possible to elide copies. 233 Case += " op &= " + maskStr + ";\n"; 234 if (opShift > 0) { 235 Case += " op <<= " + itostr(opShift) + ";\n"; 236 } else if (opShift < 0) { 237 Case += " op >>= " + itostr(-opShift) + ";\n"; 238 } 239 Case += " Value |= op;\n"; 240 } else { 241 if (opShift > 0) { 242 Case += " Value |= (op & " + maskStr + ") << " + itostr(opShift) + 243 ";\n"; 244 } else if (opShift < 0) { 245 Case += " Value |= (op & " + maskStr + ") >> " + itostr(-opShift) + 246 ";\n"; 247 } else { 248 Case += " Value |= (op & " + maskStr + ");\n"; 249 } 250 } 251 } 252 } 253 254 std::string CodeEmitterGen::getInstructionCase(Record *R, 255 CodeGenTarget &Target) { 256 std::string Case; 257 BitsInit *BI = R->getValueAsBitsInit("Inst"); 258 unsigned NumberedOp = 0; 259 std::set<unsigned> NamedOpIndices; 260 261 // Collect the set of operand indices that might correspond to named 262 // operand, and skip these when assigning operands based on position. 263 if (Target.getInstructionSet()-> 264 getValueAsBit("noNamedPositionallyEncodedOperands")) { 265 CodeGenInstruction &CGI = Target.getInstruction(R); 266 for (const RecordVal &RV : R->getValues()) { 267 unsigned OpIdx; 268 if (!CGI.Operands.hasOperandNamed(RV.getName(), OpIdx)) 269 continue; 270 271 NamedOpIndices.insert(OpIdx); 272 } 273 } 274 275 // Loop over all of the fields in the instruction, determining which are the 276 // operands to the instruction. 277 for (const RecordVal &RV : R->getValues()) { 278 // Ignore fixed fields in the record, we're looking for values like: 279 // bits<5> RST = { ?, ?, ?, ?, ? }; 280 if (RV.getPrefix() || RV.getValue()->isComplete()) 281 continue; 282 283 AddCodeToMergeInOperand(R, BI, RV.getName(), NumberedOp, 284 NamedOpIndices, Case, Target); 285 } 286 287 StringRef PostEmitter = R->getValueAsString("PostEncoderMethod"); 288 if (!PostEmitter.empty()) { 289 Case += " Value = "; 290 Case += PostEmitter; 291 Case += "(MI, Value"; 292 Case += ", STI"; 293 Case += ");\n"; 294 } 295 296 return Case; 297 } 298 299 static std::string 300 getNameForFeatureBitset(const std::vector<Record *> &FeatureBitset) { 301 std::string Name = "CEFBS"; 302 for (const auto &Feature : FeatureBitset) 303 Name += ("_" + Feature->getName()).str(); 304 return Name; 305 } 306 307 static void emitInstBits(raw_ostream &OS, const APInt &Bits) { 308 for (unsigned I = 0; I < Bits.getNumWords(); ++I) 309 OS << ((I > 0) ? ", " : "") << "UINT64_C(" << utostr(Bits.getRawData()[I]) 310 << ")"; 311 } 312 313 void CodeEmitterGen::run(raw_ostream &o) { 314 CodeGenTarget Target(Records); 315 std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction"); 316 317 // For little-endian instruction bit encodings, reverse the bit order 318 Target.reverseBitsForLittleEndianEncoding(); 319 320 ArrayRef<const CodeGenInstruction*> NumberedInstructions = 321 Target.getInstructionsByEnumValue(); 322 323 // Default to something sensible in case the target doesn't define Inst. 324 BitWidth = 32; 325 for (const CodeGenInstruction *CGI : NumberedInstructions) { 326 Record *R = CGI->TheDef; 327 if (R->getValueAsString("Namespace") == "TargetOpcode" || 328 R->getValueAsBit("isPseudo")) 329 continue; 330 331 BitsInit *BI = R->getValueAsBitsInit("Inst"); 332 BitWidth = BI->getNumBits(); 333 break; 334 } 335 UseAPInt = BitWidth > 64; 336 337 // Emit function declaration 338 if (UseAPInt) { 339 o << "void " << Target.getName() 340 << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" 341 << " SmallVectorImpl<MCFixup> &Fixups,\n" 342 << " APInt &Inst,\n" 343 << " APInt &Scratch,\n" 344 << " const MCSubtargetInfo &STI) const {\n"; 345 } else { 346 o << "uint64_t " << Target.getName(); 347 o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" 348 << " SmallVectorImpl<MCFixup> &Fixups,\n" 349 << " const MCSubtargetInfo &STI) const {\n"; 350 } 351 352 // Emit instruction base values 353 o << " static const uint64_t InstBits[] = {\n"; 354 for (const CodeGenInstruction *CGI : NumberedInstructions) { 355 Record *R = CGI->TheDef; 356 357 if (R->getValueAsString("Namespace") == "TargetOpcode" || 358 R->getValueAsBit("isPseudo")) { 359 o << " "; emitInstBits(o, APInt(BitWidth, 0)); o << ",\n"; 360 continue; 361 } 362 363 BitsInit *BI = R->getValueAsBitsInit("Inst"); 364 BitWidth = BI->getNumBits(); 365 366 // Start by filling in fixed values. 367 APInt Value(BitWidth, 0); 368 for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) { 369 if (BitInit *B = dyn_cast<BitInit>(BI->getBit(e - i - 1))) 370 Value |= APInt(BitWidth, (uint64_t)B->getValue()) << (e - i - 1); 371 } 372 o << " "; 373 emitInstBits(o, Value); 374 o << "," << '\t' << "// " << R->getName() << "\n"; 375 } 376 o << " UINT64_C(0)\n };\n"; 377 378 // Map to accumulate all the cases. 379 std::map<std::string, std::vector<std::string>> CaseMap; 380 381 // Construct all cases statement for each opcode 382 for (std::vector<Record*>::iterator IC = Insts.begin(), EC = Insts.end(); 383 IC != EC; ++IC) { 384 Record *R = *IC; 385 if (R->getValueAsString("Namespace") == "TargetOpcode" || 386 R->getValueAsBit("isPseudo")) 387 continue; 388 std::string InstName = 389 (R->getValueAsString("Namespace") + "::" + R->getName()).str(); 390 std::string Case = getInstructionCase(R, Target); 391 392 CaseMap[Case].push_back(std::move(InstName)); 393 } 394 395 // Emit initial function code 396 if (UseAPInt) { 397 int NumWords = APInt::getNumWords(BitWidth); 398 int NumBytes = (BitWidth + 7) / 8; 399 o << " const unsigned opcode = MI.getOpcode();\n" 400 << " if (Inst.getBitWidth() != " << BitWidth << ")\n" 401 << " Inst = Inst.zext(" << BitWidth << ");\n" 402 << " if (Scratch.getBitWidth() != " << BitWidth << ")\n" 403 << " Scratch = Scratch.zext(" << BitWidth << ");\n" 404 << " LoadIntFromMemory(Inst, (uint8_t*)&InstBits[opcode * " << NumWords 405 << "], " << NumBytes << ");\n" 406 << " APInt &Value = Inst;\n" 407 << " APInt &op = Scratch;\n" 408 << " switch (opcode) {\n"; 409 } else { 410 o << " const unsigned opcode = MI.getOpcode();\n" 411 << " uint64_t Value = InstBits[opcode];\n" 412 << " uint64_t op = 0;\n" 413 << " (void)op; // suppress warning\n" 414 << " switch (opcode) {\n"; 415 } 416 417 // Emit each case statement 418 std::map<std::string, std::vector<std::string>>::iterator IE, EE; 419 for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) { 420 const std::string &Case = IE->first; 421 std::vector<std::string> &InstList = IE->second; 422 423 for (int i = 0, N = InstList.size(); i < N; i++) { 424 if (i) o << "\n"; 425 o << " case " << InstList[i] << ":"; 426 } 427 o << " {\n"; 428 o << Case; 429 o << " break;\n" 430 << " }\n"; 431 } 432 433 // Default case: unhandled opcode 434 o << " default:\n" 435 << " std::string msg;\n" 436 << " raw_string_ostream Msg(msg);\n" 437 << " Msg << \"Not supported instr: \" << MI;\n" 438 << " report_fatal_error(Msg.str());\n" 439 << " }\n" 440 << " return Value;\n" 441 << "}\n\n"; 442 443 const auto &All = SubtargetFeatureInfo::getAll(Records); 444 std::map<Record *, SubtargetFeatureInfo, LessRecordByID> SubtargetFeatures; 445 SubtargetFeatures.insert(All.begin(), All.end()); 446 447 o << "#ifdef ENABLE_INSTR_PREDICATE_VERIFIER\n" 448 << "#undef ENABLE_INSTR_PREDICATE_VERIFIER\n" 449 << "#include <sstream>\n\n"; 450 451 // Emit the subtarget feature enumeration. 452 SubtargetFeatureInfo::emitSubtargetFeatureBitEnumeration(SubtargetFeatures, 453 o); 454 455 // Emit the name table for error messages. 456 o << "#ifndef NDEBUG\n"; 457 SubtargetFeatureInfo::emitNameTable(SubtargetFeatures, o); 458 o << "#endif // NDEBUG\n"; 459 460 // Emit the available features compute function. 461 SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures( 462 Target.getName(), "MCCodeEmitter", "computeAvailableFeatures", 463 SubtargetFeatures, o); 464 465 std::vector<std::vector<Record *>> FeatureBitsets; 466 for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) { 467 FeatureBitsets.emplace_back(); 468 for (Record *Predicate : Inst->TheDef->getValueAsListOfDefs("Predicates")) { 469 const auto &I = SubtargetFeatures.find(Predicate); 470 if (I != SubtargetFeatures.end()) 471 FeatureBitsets.back().push_back(I->second.TheDef); 472 } 473 } 474 475 llvm::sort(FeatureBitsets, [&](const std::vector<Record *> &A, 476 const std::vector<Record *> &B) { 477 if (A.size() < B.size()) 478 return true; 479 if (A.size() > B.size()) 480 return false; 481 for (const auto &Pair : zip(A, B)) { 482 if (std::get<0>(Pair)->getName() < std::get<1>(Pair)->getName()) 483 return true; 484 if (std::get<0>(Pair)->getName() > std::get<1>(Pair)->getName()) 485 return false; 486 } 487 return false; 488 }); 489 FeatureBitsets.erase( 490 std::unique(FeatureBitsets.begin(), FeatureBitsets.end()), 491 FeatureBitsets.end()); 492 o << "#ifndef NDEBUG\n" 493 << "// Feature bitsets.\n" 494 << "enum : " << getMinimalTypeForRange(FeatureBitsets.size()) << " {\n" 495 << " CEFBS_None,\n"; 496 for (const auto &FeatureBitset : FeatureBitsets) { 497 if (FeatureBitset.empty()) 498 continue; 499 o << " " << getNameForFeatureBitset(FeatureBitset) << ",\n"; 500 } 501 o << "};\n\n" 502 << "static constexpr FeatureBitset FeatureBitsets[] = {\n" 503 << " {}, // CEFBS_None\n"; 504 for (const auto &FeatureBitset : FeatureBitsets) { 505 if (FeatureBitset.empty()) 506 continue; 507 o << " {"; 508 for (const auto &Feature : FeatureBitset) { 509 const auto &I = SubtargetFeatures.find(Feature); 510 assert(I != SubtargetFeatures.end() && "Didn't import predicate?"); 511 o << I->second.getEnumBitName() << ", "; 512 } 513 o << "},\n"; 514 } 515 o << "};\n" 516 << "#endif // NDEBUG\n\n"; 517 518 519 // Emit the predicate verifier. 520 o << "void " << Target.getName() 521 << "MCCodeEmitter::verifyInstructionPredicates(\n" 522 << " const MCInst &Inst, const FeatureBitset &AvailableFeatures) const {\n" 523 << "#ifndef NDEBUG\n" 524 << " static " << getMinimalTypeForRange(FeatureBitsets.size()) 525 << " RequiredFeaturesRefs[] = {\n"; 526 unsigned InstIdx = 0; 527 for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) { 528 o << " CEFBS"; 529 unsigned NumPredicates = 0; 530 for (Record *Predicate : Inst->TheDef->getValueAsListOfDefs("Predicates")) { 531 const auto &I = SubtargetFeatures.find(Predicate); 532 if (I != SubtargetFeatures.end()) { 533 o << '_' << I->second.TheDef->getName(); 534 NumPredicates++; 535 } 536 } 537 if (!NumPredicates) 538 o << "_None"; 539 o << ", // " << Inst->TheDef->getName() << " = " << InstIdx << "\n"; 540 InstIdx++; 541 } 542 o << " };\n\n"; 543 o << " assert(Inst.getOpcode() < " << InstIdx << ");\n"; 544 o << " const FeatureBitset &RequiredFeatures = " 545 "FeatureBitsets[RequiredFeaturesRefs[Inst.getOpcode()]];\n"; 546 o << " FeatureBitset MissingFeatures =\n" 547 << " (AvailableFeatures & RequiredFeatures) ^\n" 548 << " RequiredFeatures;\n" 549 << " if (MissingFeatures.any()) {\n" 550 << " std::ostringstream Msg;\n" 551 << " Msg << \"Attempting to emit \" << " 552 "MCII.getName(Inst.getOpcode()).str()\n" 553 << " << \" instruction but the \";\n" 554 << " for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i)\n" 555 << " if (MissingFeatures.test(i))\n" 556 << " Msg << SubtargetFeatureNames[i] << \" \";\n" 557 << " Msg << \"predicate(s) are not met\";\n" 558 << " report_fatal_error(Msg.str());\n" 559 << " }\n" 560 << "#else\n" 561 << "// Silence unused variable warning on targets that don't use MCII for " 562 "other purposes (e.g. BPF).\n" 563 << "(void)MCII;\n" 564 << "#endif // NDEBUG\n"; 565 o << "}\n"; 566 o << "#endif\n"; 567 } 568 569 } // end anonymous namespace 570 571 namespace llvm { 572 573 void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS) { 574 emitSourceFileHeader("Machine Code Emitter", OS); 575 CodeEmitterGen(RK).run(OS); 576 } 577 578 } // end namespace llvm 579