1 //===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // CodeEmitterGen uses the descriptions of instructions and their fields to 10 // construct an automated code emitter: a function that, given a MachineInstr, 11 // returns the (currently, 32-bit unsigned) value of the instruction. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "CodeGenInstruction.h" 16 #include "CodeGenTarget.h" 17 #include "SubtargetFeatureInfo.h" 18 #include "Types.h" 19 #include "llvm/ADT/APInt.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/StringExtras.h" 22 #include "llvm/Support/Casting.h" 23 #include "llvm/Support/raw_ostream.h" 24 #include "llvm/TableGen/Record.h" 25 #include "llvm/TableGen/TableGenBackend.h" 26 #include <cassert> 27 #include <cstdint> 28 #include <map> 29 #include <set> 30 #include <string> 31 #include <utility> 32 #include <vector> 33 34 using namespace llvm; 35 36 namespace { 37 38 class CodeEmitterGen { 39 RecordKeeper &Records; 40 41 public: 42 CodeEmitterGen(RecordKeeper &R) : Records(R) {} 43 44 void run(raw_ostream &o); 45 46 private: 47 int getVariableBit(const std::string &VarName, BitsInit *BI, int bit); 48 std::string getInstructionCase(Record *R, CodeGenTarget &Target); 49 void AddCodeToMergeInOperand(Record *R, BitsInit *BI, 50 const std::string &VarName, 51 unsigned &NumberedOp, 52 std::set<unsigned> &NamedOpIndices, 53 std::string &Case, CodeGenTarget &Target); 54 55 unsigned BitWidth; 56 bool UseAPInt; 57 }; 58 59 // If the VarBitInit at position 'bit' matches the specified variable then 60 // return the variable bit position. Otherwise return -1. 61 int CodeEmitterGen::getVariableBit(const std::string &VarName, 62 BitsInit *BI, int bit) { 63 if (VarBitInit *VBI = dyn_cast<VarBitInit>(BI->getBit(bit))) { 64 if (VarInit *VI = dyn_cast<VarInit>(VBI->getBitVar())) 65 if (VI->getName() == VarName) 66 return VBI->getBitNum(); 67 } else if (VarInit *VI = dyn_cast<VarInit>(BI->getBit(bit))) { 68 if (VI->getName() == VarName) 69 return 0; 70 } 71 72 return -1; 73 } 74 75 void CodeEmitterGen:: 76 AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName, 77 unsigned &NumberedOp, 78 std::set<unsigned> &NamedOpIndices, 79 std::string &Case, CodeGenTarget &Target) { 80 CodeGenInstruction &CGI = Target.getInstruction(R); 81 82 // Determine if VarName actually contributes to the Inst encoding. 83 int bit = BI->getNumBits()-1; 84 85 // Scan for a bit that this contributed to. 86 for (; bit >= 0; ) { 87 if (getVariableBit(VarName, BI, bit) != -1) 88 break; 89 90 --bit; 91 } 92 93 // If we found no bits, ignore this value, otherwise emit the call to get the 94 // operand encoding. 95 if (bit < 0) return; 96 97 // If the operand matches by name, reference according to that 98 // operand number. Non-matching operands are assumed to be in 99 // order. 100 unsigned OpIdx; 101 if (CGI.Operands.hasOperandNamed(VarName, OpIdx)) { 102 // Get the machine operand number for the indicated operand. 103 OpIdx = CGI.Operands[OpIdx].MIOperandNo; 104 assert(!CGI.Operands.isFlatOperandNotEmitted(OpIdx) && 105 "Explicitly used operand also marked as not emitted!"); 106 } else { 107 unsigned NumberOps = CGI.Operands.size(); 108 /// If this operand is not supposed to be emitted by the 109 /// generated emitter, skip it. 110 while (NumberedOp < NumberOps && 111 (CGI.Operands.isFlatOperandNotEmitted(NumberedOp) || 112 (!NamedOpIndices.empty() && NamedOpIndices.count( 113 CGI.Operands.getSubOperandNumber(NumberedOp).first)))) { 114 ++NumberedOp; 115 116 if (NumberedOp >= CGI.Operands.back().MIOperandNo + 117 CGI.Operands.back().MINumOperands) { 118 errs() << "Too few operands in record " << R->getName() << 119 " (no match for variable " << VarName << "):\n"; 120 errs() << *R; 121 errs() << '\n'; 122 123 return; 124 } 125 } 126 127 OpIdx = NumberedOp++; 128 } 129 130 std::pair<unsigned, unsigned> SO = CGI.Operands.getSubOperandNumber(OpIdx); 131 std::string &EncoderMethodName = CGI.Operands[SO.first].EncoderMethodName; 132 133 if (UseAPInt) 134 Case += " op.clearAllBits();\n"; 135 136 // If the source operand has a custom encoder, use it. This will 137 // get the encoding for all of the suboperands. 138 if (!EncoderMethodName.empty()) { 139 // A custom encoder has all of the information for the 140 // sub-operands, if there are more than one, so only 141 // query the encoder once per source operand. 142 if (SO.second == 0) { 143 Case += " // op: " + VarName + "\n"; 144 if (UseAPInt) { 145 Case += " " + EncoderMethodName + "(MI, " + utostr(OpIdx); 146 Case += ", op"; 147 } else { 148 Case += " op = " + EncoderMethodName + "(MI, " + utostr(OpIdx); 149 } 150 Case += ", Fixups, STI);\n"; 151 } 152 } else { 153 Case += " // op: " + VarName + "\n"; 154 if (UseAPInt) { 155 Case += " getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")"; 156 Case += ", op, Fixups, STI"; 157 } else { 158 Case += " op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")"; 159 Case += ", Fixups, STI"; 160 } 161 Case += ");\n"; 162 } 163 164 // Precalculate the number of lits this variable contributes to in the 165 // operand. If there is a single lit (consecutive range of bits) we can use a 166 // destructive sequence on APInt that reduces memory allocations. 167 int numOperandLits = 0; 168 for (int tmpBit = bit; tmpBit >= 0;) { 169 int varBit = getVariableBit(VarName, BI, tmpBit); 170 171 // If this bit isn't from a variable, skip it. 172 if (varBit == -1) { 173 --tmpBit; 174 continue; 175 } 176 177 // Figure out the consecutive range of bits covered by this operand, in 178 // order to generate better encoding code. 179 int beginVarBit = varBit; 180 int N = 1; 181 for (--tmpBit; tmpBit >= 0;) { 182 varBit = getVariableBit(VarName, BI, tmpBit); 183 if (varBit == -1 || varBit != (beginVarBit - N)) 184 break; 185 ++N; 186 --tmpBit; 187 } 188 ++numOperandLits; 189 } 190 191 for (; bit >= 0; ) { 192 int varBit = getVariableBit(VarName, BI, bit); 193 194 // If this bit isn't from a variable, skip it. 195 if (varBit == -1) { 196 --bit; 197 continue; 198 } 199 200 // Figure out the consecutive range of bits covered by this operand, in 201 // order to generate better encoding code. 202 int beginInstBit = bit; 203 int beginVarBit = varBit; 204 int N = 1; 205 for (--bit; bit >= 0;) { 206 varBit = getVariableBit(VarName, BI, bit); 207 if (varBit == -1 || varBit != (beginVarBit - N)) break; 208 ++N; 209 --bit; 210 } 211 212 std::string maskStr; 213 int opShift; 214 215 unsigned loBit = beginVarBit - N + 1; 216 unsigned hiBit = loBit + N; 217 unsigned loInstBit = beginInstBit - N + 1; 218 if (UseAPInt) { 219 std::string extractStr; 220 if (N >= 64) { 221 extractStr = "op.extractBits(" + itostr(hiBit - loBit) + ", " + 222 itostr(loBit) + ")"; 223 Case += " Value.insertBits(" + extractStr + ", " + 224 itostr(loInstBit) + ");\n"; 225 } else { 226 extractStr = "op.extractBitsAsZExtValue(" + itostr(hiBit - loBit) + 227 ", " + itostr(loBit) + ")"; 228 Case += " Value.insertBits(" + extractStr + ", " + 229 itostr(loInstBit) + ", " + itostr(hiBit - loBit) + ");\n"; 230 } 231 } else { 232 uint64_t opMask = ~(uint64_t)0 >> (64 - N); 233 opShift = beginVarBit - N + 1; 234 opMask <<= opShift; 235 maskStr = "UINT64_C(" + utostr(opMask) + ")"; 236 opShift = beginInstBit - beginVarBit; 237 238 if (numOperandLits == 1) { 239 Case += " op &= " + maskStr + ";\n"; 240 if (opShift > 0) { 241 Case += " op <<= " + itostr(opShift) + ";\n"; 242 } else if (opShift < 0) { 243 Case += " op >>= " + itostr(-opShift) + ";\n"; 244 } 245 Case += " Value |= op;\n"; 246 } else { 247 if (opShift > 0) { 248 Case += " Value |= (op & " + maskStr + ") << " + 249 itostr(opShift) + ";\n"; 250 } else if (opShift < 0) { 251 Case += " Value |= (op & " + maskStr + ") >> " + 252 itostr(-opShift) + ";\n"; 253 } else { 254 Case += " Value |= (op & " + maskStr + ");\n"; 255 } 256 } 257 } 258 } 259 } 260 261 std::string CodeEmitterGen::getInstructionCase(Record *R, 262 CodeGenTarget &Target) { 263 std::string Case; 264 BitsInit *BI = R->getValueAsBitsInit("Inst"); 265 unsigned NumberedOp = 0; 266 std::set<unsigned> NamedOpIndices; 267 268 // Collect the set of operand indices that might correspond to named 269 // operand, and skip these when assigning operands based on position. 270 if (Target.getInstructionSet()-> 271 getValueAsBit("noNamedPositionallyEncodedOperands")) { 272 CodeGenInstruction &CGI = Target.getInstruction(R); 273 for (const RecordVal &RV : R->getValues()) { 274 unsigned OpIdx; 275 if (!CGI.Operands.hasOperandNamed(RV.getName(), OpIdx)) 276 continue; 277 278 NamedOpIndices.insert(OpIdx); 279 } 280 } 281 282 // Loop over all of the fields in the instruction, determining which are the 283 // operands to the instruction. 284 for (const RecordVal &RV : R->getValues()) { 285 // Ignore fixed fields in the record, we're looking for values like: 286 // bits<5> RST = { ?, ?, ?, ?, ? }; 287 if (RV.getPrefix() || RV.getValue()->isComplete()) 288 continue; 289 290 AddCodeToMergeInOperand(R, BI, RV.getName(), NumberedOp, 291 NamedOpIndices, Case, Target); 292 } 293 294 StringRef PostEmitter = R->getValueAsString("PostEncoderMethod"); 295 if (!PostEmitter.empty()) { 296 Case += " Value = "; 297 Case += PostEmitter; 298 Case += "(MI, Value"; 299 Case += ", STI"; 300 Case += ");\n"; 301 } 302 303 return Case; 304 } 305 306 static std::string 307 getNameForFeatureBitset(const std::vector<Record *> &FeatureBitset) { 308 std::string Name = "CEFBS"; 309 for (const auto &Feature : FeatureBitset) 310 Name += ("_" + Feature->getName()).str(); 311 return Name; 312 } 313 314 static void emitInstBits(raw_ostream &OS, const APInt &Bits) { 315 for (unsigned I = 0; I < Bits.getNumWords(); ++I) 316 OS << ((I > 0) ? ", " : "") << "UINT64_C(" << utostr(Bits.getRawData()[I]) 317 << ")"; 318 } 319 320 void CodeEmitterGen::run(raw_ostream &o) { 321 CodeGenTarget Target(Records); 322 std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction"); 323 324 // For little-endian instruction bit encodings, reverse the bit order 325 Target.reverseBitsForLittleEndianEncoding(); 326 327 ArrayRef<const CodeGenInstruction*> NumberedInstructions = 328 Target.getInstructionsByEnumValue(); 329 330 // Default to something sensible in case the target doesn't define Inst. 331 BitWidth = 32; 332 for (const CodeGenInstruction *CGI : NumberedInstructions) { 333 Record *R = CGI->TheDef; 334 if (R->getValueAsString("Namespace") == "TargetOpcode" || 335 R->getValueAsBit("isPseudo")) 336 continue; 337 338 BitsInit *BI = R->getValueAsBitsInit("Inst"); 339 BitWidth = BI->getNumBits(); 340 break; 341 } 342 UseAPInt = BitWidth > 64; 343 344 // Emit function declaration 345 if (UseAPInt) { 346 o << "void " << Target.getName() 347 << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" 348 << " SmallVectorImpl<MCFixup> &Fixups,\n" 349 << " APInt &Inst,\n" 350 << " APInt &Scratch,\n" 351 << " const MCSubtargetInfo &STI) const {\n"; 352 } else { 353 o << "uint64_t " << Target.getName(); 354 o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" 355 << " SmallVectorImpl<MCFixup> &Fixups,\n" 356 << " const MCSubtargetInfo &STI) const {\n"; 357 } 358 359 // Emit instruction base values 360 o << " static const uint64_t InstBits[] = {\n"; 361 for (const CodeGenInstruction *CGI : NumberedInstructions) { 362 Record *R = CGI->TheDef; 363 364 if (R->getValueAsString("Namespace") == "TargetOpcode" || 365 R->getValueAsBit("isPseudo")) { 366 o << " "; emitInstBits(o, APInt(BitWidth, 0)); o << ",\n"; 367 continue; 368 } 369 370 BitsInit *BI = R->getValueAsBitsInit("Inst"); 371 BitWidth = BI->getNumBits(); 372 373 // Start by filling in fixed values. 374 APInt Value(BitWidth, 0); 375 for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) { 376 if (BitInit *B = dyn_cast<BitInit>(BI->getBit(e - i - 1))) 377 Value |= APInt(BitWidth, (uint64_t)B->getValue()) << (e - i - 1); 378 } 379 o << " "; 380 emitInstBits(o, Value); 381 o << "," << '\t' << "// " << R->getName() << "\n"; 382 } 383 o << " UINT64_C(0)\n };\n"; 384 385 // Map to accumulate all the cases. 386 std::map<std::string, std::vector<std::string>> CaseMap; 387 388 // Construct all cases statement for each opcode 389 for (std::vector<Record*>::iterator IC = Insts.begin(), EC = Insts.end(); 390 IC != EC; ++IC) { 391 Record *R = *IC; 392 if (R->getValueAsString("Namespace") == "TargetOpcode" || 393 R->getValueAsBit("isPseudo")) 394 continue; 395 std::string InstName = 396 (R->getValueAsString("Namespace") + "::" + R->getName()).str(); 397 std::string Case = getInstructionCase(R, Target); 398 399 CaseMap[Case].push_back(std::move(InstName)); 400 } 401 402 // Emit initial function code 403 if (UseAPInt) { 404 int NumWords = APInt::getNumWords(BitWidth); 405 int NumBytes = (BitWidth + 7) / 8; 406 o << " const unsigned opcode = MI.getOpcode();\n" 407 << " if (Inst.getBitWidth() != " << BitWidth << ")\n" 408 << " Inst = Inst.zext(" << BitWidth << ");\n" 409 << " if (Scratch.getBitWidth() != " << BitWidth << ")\n" 410 << " Scratch = Scratch.zext(" << BitWidth << ");\n" 411 << " LoadIntFromMemory(Inst, (uint8_t*)&InstBits[opcode * " << NumWords 412 << "], " << NumBytes << ");\n" 413 << " APInt &Value = Inst;\n" 414 << " APInt &op = Scratch;\n" 415 << " switch (opcode) {\n"; 416 } else { 417 o << " const unsigned opcode = MI.getOpcode();\n" 418 << " uint64_t Value = InstBits[opcode];\n" 419 << " uint64_t op = 0;\n" 420 << " (void)op; // suppress warning\n" 421 << " switch (opcode) {\n"; 422 } 423 424 // Emit each case statement 425 std::map<std::string, std::vector<std::string>>::iterator IE, EE; 426 for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) { 427 const std::string &Case = IE->first; 428 std::vector<std::string> &InstList = IE->second; 429 430 for (int i = 0, N = InstList.size(); i < N; i++) { 431 if (i) o << "\n"; 432 o << " case " << InstList[i] << ":"; 433 } 434 o << " {\n"; 435 o << Case; 436 o << " break;\n" 437 << " }\n"; 438 } 439 440 // Default case: unhandled opcode 441 o << " default:\n" 442 << " std::string msg;\n" 443 << " raw_string_ostream Msg(msg);\n" 444 << " Msg << \"Not supported instr: \" << MI;\n" 445 << " report_fatal_error(Msg.str());\n" 446 << " }\n"; 447 if (UseAPInt) 448 o << " Inst = Value;\n"; 449 else 450 o << " return Value;\n"; 451 o << "}\n\n"; 452 453 const auto &All = SubtargetFeatureInfo::getAll(Records); 454 std::map<Record *, SubtargetFeatureInfo, LessRecordByID> SubtargetFeatures; 455 SubtargetFeatures.insert(All.begin(), All.end()); 456 457 o << "#ifdef ENABLE_INSTR_PREDICATE_VERIFIER\n" 458 << "#undef ENABLE_INSTR_PREDICATE_VERIFIER\n" 459 << "#include <sstream>\n\n"; 460 461 // Emit the subtarget feature enumeration. 462 SubtargetFeatureInfo::emitSubtargetFeatureBitEnumeration(SubtargetFeatures, 463 o); 464 465 // Emit the name table for error messages. 466 o << "#ifndef NDEBUG\n"; 467 SubtargetFeatureInfo::emitNameTable(SubtargetFeatures, o); 468 o << "#endif // NDEBUG\n"; 469 470 // Emit the available features compute function. 471 SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures( 472 Target.getName(), "MCCodeEmitter", "computeAvailableFeatures", 473 SubtargetFeatures, o); 474 475 std::vector<std::vector<Record *>> FeatureBitsets; 476 for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) { 477 FeatureBitsets.emplace_back(); 478 for (Record *Predicate : Inst->TheDef->getValueAsListOfDefs("Predicates")) { 479 const auto &I = SubtargetFeatures.find(Predicate); 480 if (I != SubtargetFeatures.end()) 481 FeatureBitsets.back().push_back(I->second.TheDef); 482 } 483 } 484 485 llvm::sort(FeatureBitsets, [&](const std::vector<Record *> &A, 486 const std::vector<Record *> &B) { 487 if (A.size() < B.size()) 488 return true; 489 if (A.size() > B.size()) 490 return false; 491 for (const auto &Pair : zip(A, B)) { 492 if (std::get<0>(Pair)->getName() < std::get<1>(Pair)->getName()) 493 return true; 494 if (std::get<0>(Pair)->getName() > std::get<1>(Pair)->getName()) 495 return false; 496 } 497 return false; 498 }); 499 FeatureBitsets.erase( 500 std::unique(FeatureBitsets.begin(), FeatureBitsets.end()), 501 FeatureBitsets.end()); 502 o << "#ifndef NDEBUG\n" 503 << "// Feature bitsets.\n" 504 << "enum : " << getMinimalTypeForRange(FeatureBitsets.size()) << " {\n" 505 << " CEFBS_None,\n"; 506 for (const auto &FeatureBitset : FeatureBitsets) { 507 if (FeatureBitset.empty()) 508 continue; 509 o << " " << getNameForFeatureBitset(FeatureBitset) << ",\n"; 510 } 511 o << "};\n\n" 512 << "static constexpr FeatureBitset FeatureBitsets[] = {\n" 513 << " {}, // CEFBS_None\n"; 514 for (const auto &FeatureBitset : FeatureBitsets) { 515 if (FeatureBitset.empty()) 516 continue; 517 o << " {"; 518 for (const auto &Feature : FeatureBitset) { 519 const auto &I = SubtargetFeatures.find(Feature); 520 assert(I != SubtargetFeatures.end() && "Didn't import predicate?"); 521 o << I->second.getEnumBitName() << ", "; 522 } 523 o << "},\n"; 524 } 525 o << "};\n" 526 << "#endif // NDEBUG\n\n"; 527 528 529 // Emit the predicate verifier. 530 o << "void " << Target.getName() 531 << "MCCodeEmitter::verifyInstructionPredicates(\n" 532 << " const MCInst &Inst, const FeatureBitset &AvailableFeatures) const {\n" 533 << "#ifndef NDEBUG\n" 534 << " static " << getMinimalTypeForRange(FeatureBitsets.size()) 535 << " RequiredFeaturesRefs[] = {\n"; 536 unsigned InstIdx = 0; 537 for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) { 538 o << " CEFBS"; 539 unsigned NumPredicates = 0; 540 for (Record *Predicate : Inst->TheDef->getValueAsListOfDefs("Predicates")) { 541 const auto &I = SubtargetFeatures.find(Predicate); 542 if (I != SubtargetFeatures.end()) { 543 o << '_' << I->second.TheDef->getName(); 544 NumPredicates++; 545 } 546 } 547 if (!NumPredicates) 548 o << "_None"; 549 o << ", // " << Inst->TheDef->getName() << " = " << InstIdx << "\n"; 550 InstIdx++; 551 } 552 o << " };\n\n"; 553 o << " assert(Inst.getOpcode() < " << InstIdx << ");\n"; 554 o << " const FeatureBitset &RequiredFeatures = " 555 "FeatureBitsets[RequiredFeaturesRefs[Inst.getOpcode()]];\n"; 556 o << " FeatureBitset MissingFeatures =\n" 557 << " (AvailableFeatures & RequiredFeatures) ^\n" 558 << " RequiredFeatures;\n" 559 << " if (MissingFeatures.any()) {\n" 560 << " std::ostringstream Msg;\n" 561 << " Msg << \"Attempting to emit \" << " 562 "MCII.getName(Inst.getOpcode()).str()\n" 563 << " << \" instruction but the \";\n" 564 << " for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i)\n" 565 << " if (MissingFeatures.test(i))\n" 566 << " Msg << SubtargetFeatureNames[i] << \" \";\n" 567 << " Msg << \"predicate(s) are not met\";\n" 568 << " report_fatal_error(Msg.str());\n" 569 << " }\n" 570 << "#else\n" 571 << "// Silence unused variable warning on targets that don't use MCII for " 572 "other purposes (e.g. BPF).\n" 573 << "(void)MCII;\n" 574 << "#endif // NDEBUG\n"; 575 o << "}\n"; 576 o << "#endif\n"; 577 } 578 579 } // end anonymous namespace 580 581 namespace llvm { 582 583 void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS) { 584 emitSourceFileHeader("Machine Code Emitter", OS); 585 CodeEmitterGen(RK).run(OS); 586 } 587 588 } // end namespace llvm 589