1 //===- VarLenCodeEmitterGen.cpp - CEG for variable-length insts -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // The CodeEmitterGen component for variable-length instructions. 10 // 11 // The basic CodeEmitterGen is almost exclusively designed for fixed- 12 // length instructions. A good analogy for its encoding scheme is how printf 13 // works: The (immutable) formatting string represent the fixed values in the 14 // encoded instruction. Placeholders (i.e. %something), on the other hand, 15 // represent encoding for instruction operands. 16 // ``` 17 // printf("1101 %src 1001 %dst", <encoded value for operand `src`>, 18 // <encoded value for operand `dst`>); 19 // ``` 20 // VarLenCodeEmitterGen in this file provides an alternative encoding scheme 21 // that works more like a C++ stream operator: 22 // ``` 23 // OS << 0b1101; 24 // if (Cond) 25 // OS << OperandEncoding0; 26 // OS << 0b1001 << OperandEncoding1; 27 // ``` 28 // You are free to concatenate arbitrary types (and sizes) of encoding 29 // fragments on any bit position, bringing more flexibilities on defining 30 // encoding for variable-length instructions. 31 // 32 // In a more specific way, instruction encoding is represented by a DAG type 33 // `Inst` field. Here is an example: 34 // ``` 35 // dag Inst = (descend 0b1101, (operand "$src", 4), 0b1001, 36 // (operand "$dst", 4)); 37 // ``` 38 // It represents the following instruction encoding: 39 // ``` 40 // MSB LSB 41 // 1101<encoding for operand src>1001<encoding for operand dst> 42 // ``` 43 // For more details about DAG operators in the above snippet, please 44 // refer to \file include/llvm/Target/Target.td. 45 // 46 // VarLenCodeEmitter will convert the above DAG into the same helper function 47 // generated by CodeEmitter, `MCCodeEmitter::getBinaryCodeForInstr` (except 48 // for few details). 49 // 50 //===----------------------------------------------------------------------===// 51 52 #include "VarLenCodeEmitterGen.h" 53 #include "CodeGenInstruction.h" 54 #include "CodeGenTarget.h" 55 #include "SubtargetFeatureInfo.h" 56 #include "llvm/ADT/ArrayRef.h" 57 #include "llvm/ADT/DenseMap.h" 58 #include "llvm/Support/raw_ostream.h" 59 #include "llvm/TableGen/Error.h" 60 #include "llvm/TableGen/Record.h" 61 62 using namespace llvm; 63 64 namespace { 65 66 class VarLenCodeEmitterGen { 67 RecordKeeper &Records; 68 69 struct EncodingSegment { 70 unsigned BitWidth; 71 const Init *Value; 72 StringRef CustomEncoder = ""; 73 }; 74 75 class VarLenInst { 76 size_t NumBits; 77 78 // Set if any of the segment is not fixed value. 79 bool HasDynamicSegment; 80 81 SmallVector<EncodingSegment, 4> Segments; 82 83 void buildRec(const DagInit *DI); 84 85 StringRef getCustomEncoderName(const Init *EI) const { 86 if (const auto *DI = dyn_cast<DagInit>(EI)) { 87 if (DI->getNumArgs() && isa<StringInit>(DI->getArg(0))) 88 return cast<StringInit>(DI->getArg(0))->getValue(); 89 } 90 return ""; 91 } 92 93 public: 94 VarLenInst() : NumBits(0U), HasDynamicSegment(false) {} 95 96 explicit VarLenInst(const DagInit *DI); 97 98 /// Number of bits 99 size_t size() const { return NumBits; } 100 101 using const_iterator = decltype(Segments)::const_iterator; 102 103 const_iterator begin() const { return Segments.begin(); } 104 const_iterator end() const { return Segments.end(); } 105 size_t getNumSegments() const { return Segments.size(); } 106 107 bool isFixedValueOnly() const { return !HasDynamicSegment; } 108 }; 109 110 DenseMap<Record *, VarLenInst> VarLenInsts; 111 112 // Emit based values (i.e. fixed bits in the encoded instructions) 113 void emitInstructionBaseValues( 114 raw_ostream &OS, 115 ArrayRef<const CodeGenInstruction *> NumberedInstructions, 116 CodeGenTarget &Target, int HwMode = -1); 117 118 std::string getInstructionCase(Record *R, CodeGenTarget &Target); 119 std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef, 120 CodeGenTarget &Target); 121 122 public: 123 explicit VarLenCodeEmitterGen(RecordKeeper &R) : Records(R) {} 124 125 void run(raw_ostream &OS); 126 }; 127 128 } // end anonymous namespace 129 130 VarLenCodeEmitterGen::VarLenInst::VarLenInst(const DagInit *DI) : NumBits(0U) { 131 buildRec(DI); 132 for (const auto &S : Segments) 133 NumBits += S.BitWidth; 134 } 135 136 void VarLenCodeEmitterGen::VarLenInst::buildRec(const DagInit *DI) { 137 std::string Op = DI->getOperator()->getAsString(); 138 139 if (Op == "ascend" || Op == "descend") { 140 bool Reverse = Op == "descend"; 141 int i = Reverse ? DI->getNumArgs() - 1 : 0; 142 int e = Reverse ? -1 : DI->getNumArgs(); 143 int s = Reverse ? -1 : 1; 144 for (; i != e; i += s) { 145 const Init *Arg = DI->getArg(i); 146 if (const auto *BI = dyn_cast<BitsInit>(Arg)) { 147 if (!BI->isComplete()) 148 PrintFatalError("Expecting complete bits init in `" + Op + "`"); 149 Segments.push_back({BI->getNumBits(), BI}); 150 } else if (const auto *BI = dyn_cast<BitInit>(Arg)) { 151 if (!BI->isConcrete()) 152 PrintFatalError("Expecting concrete bit init in `" + Op + "`"); 153 Segments.push_back({1, BI}); 154 } else if (const auto *SubDI = dyn_cast<DagInit>(Arg)) { 155 buildRec(SubDI); 156 } else { 157 PrintFatalError("Unrecognized type of argument in `" + Op + 158 "`: " + Arg->getAsString()); 159 } 160 } 161 } else if (Op == "operand") { 162 // (operand <operand name>, <# of bits>, [(encoder <custom encoder>)]) 163 if (DI->getNumArgs() < 2) 164 PrintFatalError("Expecting at least 2 arguments for `operand`"); 165 HasDynamicSegment = true; 166 const Init *OperandName = DI->getArg(0), *NumBits = DI->getArg(1); 167 if (!isa<StringInit>(OperandName) || !isa<IntInit>(NumBits)) 168 PrintFatalError("Invalid argument types for `operand`"); 169 170 auto NumBitsVal = cast<IntInit>(NumBits)->getValue(); 171 if (NumBitsVal <= 0) 172 PrintFatalError("Invalid number of bits for `operand`"); 173 174 StringRef CustomEncoder; 175 if (DI->getNumArgs() >= 3) 176 CustomEncoder = getCustomEncoderName(DI->getArg(2)); 177 Segments.push_back( 178 {static_cast<unsigned>(NumBitsVal), OperandName, CustomEncoder}); 179 } else if (Op == "slice") { 180 // (slice <operand name>, <high / low bit>, <low / high bit>, 181 // [(encoder <custom encoder>)]) 182 if (DI->getNumArgs() < 3) 183 PrintFatalError("Expecting at least 3 arguments for `slice`"); 184 HasDynamicSegment = true; 185 Init *OperandName = DI->getArg(0), *HiBit = DI->getArg(1), 186 *LoBit = DI->getArg(2); 187 if (!isa<StringInit>(OperandName) || !isa<IntInit>(HiBit) || 188 !isa<IntInit>(LoBit)) 189 PrintFatalError("Invalid argument types for `slice`"); 190 191 auto HiBitVal = cast<IntInit>(HiBit)->getValue(), 192 LoBitVal = cast<IntInit>(LoBit)->getValue(); 193 if (HiBitVal < 0 || LoBitVal < 0) 194 PrintFatalError("Invalid bit range for `slice`"); 195 bool NeedSwap = false; 196 unsigned NumBits = 0U; 197 if (HiBitVal < LoBitVal) { 198 NeedSwap = true; 199 NumBits = static_cast<unsigned>(LoBitVal - HiBitVal + 1); 200 } else { 201 NumBits = static_cast<unsigned>(HiBitVal - LoBitVal + 1); 202 } 203 204 StringRef CustomEncoder; 205 if (DI->getNumArgs() >= 4) 206 CustomEncoder = getCustomEncoderName(DI->getArg(3)); 207 208 if (NeedSwap) { 209 // Normalization: Hi bit should always be the second argument. 210 Init *const NewArgs[] = {OperandName, LoBit, HiBit}; 211 Segments.push_back({NumBits, 212 DagInit::get(DI->getOperator(), nullptr, NewArgs, {}), 213 CustomEncoder}); 214 } else { 215 Segments.push_back({NumBits, DI, CustomEncoder}); 216 } 217 } 218 } 219 220 void VarLenCodeEmitterGen::run(raw_ostream &OS) { 221 CodeGenTarget Target(Records); 222 auto Insts = Records.getAllDerivedDefinitions("Instruction"); 223 224 auto NumberedInstructions = Target.getInstructionsByEnumValue(); 225 const CodeGenHwModes &HWM = Target.getHwModes(); 226 227 // The set of HwModes used by instruction encodings. 228 std::set<unsigned> HwModes; 229 for (const CodeGenInstruction *CGI : NumberedInstructions) { 230 Record *R = CGI->TheDef; 231 232 // Create the corresponding VarLenInst instance. 233 if (R->getValueAsString("Namespace") == "TargetOpcode" || 234 R->getValueAsBit("isPseudo")) 235 continue; 236 237 if (const RecordVal *RV = R->getValue("EncodingInfos")) { 238 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 239 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 240 for (auto &KV : EBM) { 241 HwModes.insert(KV.first); 242 Record *EncodingDef = KV.second; 243 auto *DI = EncodingDef->getValueAsDag("Inst"); 244 VarLenInsts.insert({EncodingDef, VarLenInst(DI)}); 245 } 246 continue; 247 } 248 } 249 auto *DI = R->getValueAsDag("Inst"); 250 VarLenInsts.insert({R, VarLenInst(DI)}); 251 } 252 253 // Emit function declaration 254 OS << "void " << Target.getName() 255 << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" 256 << " SmallVectorImpl<MCFixup> &Fixups,\n" 257 << " APInt &Inst,\n" 258 << " APInt &Scratch,\n" 259 << " const MCSubtargetInfo &STI) const {\n"; 260 261 // Emit instruction base values 262 if (HwModes.empty()) { 263 emitInstructionBaseValues(OS, NumberedInstructions, Target); 264 } else { 265 for (unsigned HwMode : HwModes) 266 emitInstructionBaseValues(OS, NumberedInstructions, Target, (int)HwMode); 267 } 268 269 if (!HwModes.empty()) { 270 OS << " const unsigned **Index;\n"; 271 OS << " const uint64_t *InstBits;\n"; 272 OS << " unsigned HwMode = STI.getHwMode();\n"; 273 OS << " switch (HwMode) {\n"; 274 OS << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n"; 275 for (unsigned I : HwModes) { 276 OS << " case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name 277 << "; Index = Index_" << HWM.getMode(I).Name << "; break;\n"; 278 } 279 OS << " };\n"; 280 } 281 282 // Emit helper function to retrieve base values. 283 OS << " auto getInstBits = [&](unsigned Opcode) -> APInt {\n" 284 << " unsigned NumBits = Index[Opcode][0];\n" 285 << " if (!NumBits)\n" 286 << " return APInt::getZeroWidth();\n" 287 << " unsigned Idx = Index[Opcode][1];\n" 288 << " ArrayRef<uint64_t> Data(&InstBits[Idx], " 289 << "APInt::getNumWords(NumBits));\n" 290 << " return APInt(NumBits, Data);\n" 291 << " };\n"; 292 293 // Map to accumulate all the cases. 294 std::map<std::string, std::vector<std::string>> CaseMap; 295 296 // Construct all cases statement for each opcode 297 for (Record *R : Insts) { 298 if (R->getValueAsString("Namespace") == "TargetOpcode" || 299 R->getValueAsBit("isPseudo")) 300 continue; 301 std::string InstName = 302 (R->getValueAsString("Namespace") + "::" + R->getName()).str(); 303 std::string Case = getInstructionCase(R, Target); 304 305 CaseMap[Case].push_back(std::move(InstName)); 306 } 307 308 // Emit initial function code 309 OS << " const unsigned opcode = MI.getOpcode();\n" 310 << " switch (opcode) {\n"; 311 312 // Emit each case statement 313 for (const auto &C : CaseMap) { 314 const std::string &Case = C.first; 315 const auto &InstList = C.second; 316 317 ListSeparator LS("\n"); 318 for (const auto &InstName : InstList) 319 OS << LS << " case " << InstName << ":"; 320 321 OS << " {\n"; 322 OS << Case; 323 OS << " break;\n" 324 << " }\n"; 325 } 326 // Default case: unhandled opcode 327 OS << " default:\n" 328 << " std::string msg;\n" 329 << " raw_string_ostream Msg(msg);\n" 330 << " Msg << \"Not supported instr: \" << MI;\n" 331 << " report_fatal_error(Msg.str().c_str());\n" 332 << " }\n"; 333 OS << "}\n\n"; 334 } 335 336 static void emitInstBits(raw_ostream &IS, raw_ostream &SS, const APInt &Bits, 337 unsigned &Index) { 338 if (!Bits.getNumWords()) { 339 IS.indent(4) << "{/*NumBits*/0, /*Index*/0},"; 340 return; 341 } 342 343 IS.indent(4) << "{/*NumBits*/" << Bits.getBitWidth() << ", " 344 << "/*Index*/" << Index << "},"; 345 346 SS.indent(4); 347 for (unsigned I = 0; I < Bits.getNumWords(); ++I, ++Index) 348 SS << "UINT64_C(" << utostr(Bits.getRawData()[I]) << "),"; 349 } 350 351 void VarLenCodeEmitterGen::emitInstructionBaseValues( 352 raw_ostream &OS, ArrayRef<const CodeGenInstruction *> NumberedInstructions, 353 CodeGenTarget &Target, int HwMode) { 354 std::string IndexArray, StorageArray; 355 raw_string_ostream IS(IndexArray), SS(StorageArray); 356 357 const CodeGenHwModes &HWM = Target.getHwModes(); 358 if (HwMode == -1) { 359 IS << " static const unsigned Index[][2] = {\n"; 360 SS << " static const uint64_t InstBits[] = {\n"; 361 } else { 362 StringRef Name = HWM.getMode(HwMode).Name; 363 IS << " static const unsigned Index_" << Name << "[][2] = {\n"; 364 SS << " static const uint64_t InstBits_" << Name << "[] = {\n"; 365 } 366 367 unsigned NumFixedValueWords = 0U; 368 for (const CodeGenInstruction *CGI : NumberedInstructions) { 369 Record *R = CGI->TheDef; 370 371 if (R->getValueAsString("Namespace") == "TargetOpcode" || 372 R->getValueAsBit("isPseudo")) { 373 IS.indent(4) << "{/*NumBits*/0, /*Index*/0},\n"; 374 continue; 375 } 376 377 Record *EncodingDef = R; 378 if (const RecordVal *RV = R->getValue("EncodingInfos")) { 379 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 380 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 381 if (EBM.hasMode(HwMode)) 382 EncodingDef = EBM.get(HwMode); 383 } 384 } 385 386 auto It = VarLenInsts.find(EncodingDef); 387 if (It == VarLenInsts.end()) 388 PrintFatalError(EncodingDef, "VarLenInst not found for this record"); 389 const VarLenInst &VLI = It->second; 390 391 unsigned i = 0U, BitWidth = VLI.size(); 392 393 // Start by filling in fixed values. 394 APInt Value(BitWidth, 0); 395 auto SI = VLI.begin(), SE = VLI.end(); 396 // Scan through all the segments that have fixed-bits values. 397 while (i < BitWidth && SI != SE) { 398 unsigned SegmentNumBits = SI->BitWidth; 399 if (const auto *BI = dyn_cast<BitsInit>(SI->Value)) { 400 for (unsigned Idx = 0U; Idx != SegmentNumBits; ++Idx) { 401 auto *B = cast<BitInit>(BI->getBit(Idx)); 402 Value.setBitVal(i + Idx, B->getValue()); 403 } 404 } 405 if (const auto *BI = dyn_cast<BitInit>(SI->Value)) 406 Value.setBitVal(i, BI->getValue()); 407 408 i += SegmentNumBits; 409 ++SI; 410 } 411 412 emitInstBits(IS, SS, Value, NumFixedValueWords); 413 IS << '\t' << "// " << R->getName() << "\n"; 414 if (Value.getNumWords()) 415 SS << '\t' << "// " << R->getName() << "\n"; 416 } 417 IS.indent(4) << "{/*NumBits*/0, /*Index*/0}\n };\n"; 418 SS.indent(4) << "UINT64_C(0)\n };\n"; 419 420 OS << IS.str() << SS.str(); 421 } 422 423 std::string VarLenCodeEmitterGen::getInstructionCase(Record *R, 424 CodeGenTarget &Target) { 425 std::string Case; 426 if (const RecordVal *RV = R->getValue("EncodingInfos")) { 427 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 428 const CodeGenHwModes &HWM = Target.getHwModes(); 429 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 430 Case += " switch (HwMode) {\n"; 431 Case += " default: llvm_unreachable(\"Unhandled HwMode\");\n"; 432 for (auto &KV : EBM) { 433 Case += " case " + itostr(KV.first) + ": {\n"; 434 Case += getInstructionCaseForEncoding(R, KV.second, Target); 435 Case += " break;\n"; 436 Case += " }\n"; 437 } 438 Case += " }\n"; 439 return Case; 440 } 441 } 442 return getInstructionCaseForEncoding(R, R, Target); 443 } 444 445 std::string VarLenCodeEmitterGen::getInstructionCaseForEncoding( 446 Record *R, Record *EncodingDef, CodeGenTarget &Target) { 447 auto It = VarLenInsts.find(EncodingDef); 448 if (It == VarLenInsts.end()) 449 PrintFatalError(EncodingDef, "Parsed encoding record not found"); 450 const VarLenInst &VLI = It->second; 451 size_t BitWidth = VLI.size(); 452 453 CodeGenInstruction &CGI = Target.getInstruction(R); 454 455 std::string Case; 456 raw_string_ostream SS(Case); 457 // Resize the scratch buffer. 458 if (BitWidth && !VLI.isFixedValueOnly()) 459 SS.indent(6) << "Scratch = Scratch.zextOrSelf(" << BitWidth << ");\n"; 460 // Populate based value. 461 SS.indent(6) << "Inst = getInstBits(opcode);\n"; 462 463 // Process each segment in VLI. 464 size_t Offset = 0U; 465 for (const auto &ES : VLI) { 466 unsigned NumBits = ES.BitWidth; 467 const Init *Val = ES.Value; 468 // If it's a StringInit or DagInit, it's a reference to an operand 469 // or part of an operand. 470 if (isa<StringInit>(Val) || isa<DagInit>(Val)) { 471 StringRef OperandName; 472 unsigned LoBit = 0U; 473 if (const auto *SV = dyn_cast<StringInit>(Val)) { 474 OperandName = SV->getValue(); 475 } else { 476 // Normalized: (slice <operand name>, <high bit>, <low bit>) 477 const auto *DV = cast<DagInit>(Val); 478 OperandName = cast<StringInit>(DV->getArg(0))->getValue(); 479 LoBit = static_cast<unsigned>(cast<IntInit>(DV->getArg(2))->getValue()); 480 } 481 482 auto OpIdx = CGI.Operands.ParseOperandName(OperandName); 483 unsigned FlatOpIdx = CGI.Operands.getFlattenedOperandNumber(OpIdx); 484 StringRef CustomEncoder = CGI.Operands[OpIdx.first].EncoderMethodName; 485 if (ES.CustomEncoder.size()) 486 CustomEncoder = ES.CustomEncoder; 487 488 SS.indent(6) << "Scratch.clearAllBits();\n"; 489 SS.indent(6) << "// op: " << OperandName.drop_front(1) << "\n"; 490 if (CustomEncoder.empty()) 491 SS.indent(6) << "getMachineOpValue(MI, MI.getOperand(" 492 << utostr(FlatOpIdx) << ")"; 493 else 494 SS.indent(6) << CustomEncoder << "(MI, /*OpIdx=*/" << utostr(FlatOpIdx); 495 496 SS << ", /*Pos=*/" << utostr(Offset) << ", Scratch, Fixups, STI);\n"; 497 498 SS.indent(6) << "Inst.insertBits(" 499 << "Scratch.extractBits(" << utostr(NumBits) << ", " 500 << utostr(LoBit) << ")" 501 << ", " << Offset << ");\n"; 502 } 503 Offset += NumBits; 504 } 505 506 StringRef PostEmitter = R->getValueAsString("PostEncoderMethod"); 507 if (!PostEmitter.empty()) 508 SS.indent(6) << "Inst = " << PostEmitter << "(MI, Inst, STI);\n"; 509 510 return Case; 511 } 512 513 namespace llvm { 514 515 void emitVarLenCodeEmitter(RecordKeeper &R, raw_ostream &OS) { 516 VarLenCodeEmitterGen(R).run(OS); 517 } 518 519 } // end namespace llvm 520