1 //===- VarLenCodeEmitterGen.cpp - CEG for variable-length insts -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // The CodeEmitterGen component for variable-length instructions. 10 // 11 // The basic CodeEmitterGen is almost exclusively designed for fixed- 12 // length instructions. A good analogy for its encoding scheme is how printf 13 // works: The (immutable) formatting string represent the fixed values in the 14 // encoded instruction. Placeholders (i.e. %something), on the other hand, 15 // represent encoding for instruction operands. 16 // ``` 17 // printf("1101 %src 1001 %dst", <encoded value for operand `src`>, 18 // <encoded value for operand `dst`>); 19 // ``` 20 // VarLenCodeEmitterGen in this file provides an alternative encoding scheme 21 // that works more like a C++ stream operator: 22 // ``` 23 // OS << 0b1101; 24 // if (Cond) 25 // OS << OperandEncoding0; 26 // OS << 0b1001 << OperandEncoding1; 27 // ``` 28 // You are free to concatenate arbitrary types (and sizes) of encoding 29 // fragments on any bit position, bringing more flexibilities on defining 30 // encoding for variable-length instructions. 31 // 32 // In a more specific way, instruction encoding is represented by a DAG type 33 // `Inst` field. Here is an example: 34 // ``` 35 // dag Inst = (descend 0b1101, (operand "$src", 4), 0b1001, 36 // (operand "$dst", 4)); 37 // ``` 38 // It represents the following instruction encoding: 39 // ``` 40 // MSB LSB 41 // 1101<encoding for operand src>1001<encoding for operand dst> 42 // ``` 43 // For more details about DAG operators in the above snippet, please 44 // refer to \file include/llvm/Target/Target.td. 45 // 46 // VarLenCodeEmitter will convert the above DAG into the same helper function 47 // generated by CodeEmitter, `MCCodeEmitter::getBinaryCodeForInstr` (except 48 // for few details). 49 // 50 //===----------------------------------------------------------------------===// 51 52 #include "VarLenCodeEmitterGen.h" 53 #include "CodeGenInstruction.h" 54 #include "CodeGenTarget.h" 55 #include "SubtargetFeatureInfo.h" 56 #include "llvm/ADT/ArrayRef.h" 57 #include "llvm/ADT/DenseMap.h" 58 #include "llvm/Support/raw_ostream.h" 59 #include "llvm/TableGen/Error.h" 60 #include "llvm/TableGen/Record.h" 61 62 using namespace llvm; 63 64 namespace { 65 66 class VarLenCodeEmitterGen { 67 RecordKeeper &Records; 68 69 class VarLenInst { 70 size_t NumBits; 71 72 // Set if any of the segment is not fixed value. 73 bool HasDynamicSegment; 74 75 // {Number of bits, Value} 76 SmallVector<std::pair<unsigned, const Init *>, 4> Segments; 77 78 void buildRec(const DagInit *DI); 79 80 public: 81 VarLenInst() : NumBits(0U), HasDynamicSegment(false) {} 82 83 explicit VarLenInst(const DagInit *DI); 84 85 /// Number of bits 86 size_t size() const { return NumBits; } 87 88 using const_iterator = decltype(Segments)::const_iterator; 89 90 const_iterator begin() const { return Segments.begin(); } 91 const_iterator end() const { return Segments.end(); } 92 size_t getNumSegments() const { return Segments.size(); } 93 94 bool isFixedValueOnly() const { return !HasDynamicSegment; } 95 }; 96 97 DenseMap<Record *, VarLenInst> VarLenInsts; 98 99 // Emit based values (i.e. fixed bits in the encoded instructions) 100 void emitInstructionBaseValues( 101 raw_ostream &OS, 102 ArrayRef<const CodeGenInstruction *> NumberedInstructions, 103 CodeGenTarget &Target, int HwMode = -1); 104 105 std::string getInstructionCase(Record *R, CodeGenTarget &Target); 106 std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef, 107 CodeGenTarget &Target); 108 109 public: 110 explicit VarLenCodeEmitterGen(RecordKeeper &R) : Records(R) {} 111 112 void run(raw_ostream &OS); 113 }; 114 115 } // end anonymous namespace 116 117 VarLenCodeEmitterGen::VarLenInst::VarLenInst(const DagInit *DI) : NumBits(0U) { 118 buildRec(DI); 119 for (const auto &S : Segments) 120 NumBits += S.first; 121 } 122 123 void VarLenCodeEmitterGen::VarLenInst::buildRec(const DagInit *DI) { 124 std::string Op = DI->getOperator()->getAsString(); 125 126 if (Op == "ascend" || Op == "descend") { 127 bool Reverse = Op == "descend"; 128 int i = Reverse ? DI->getNumArgs() - 1 : 0; 129 int e = Reverse ? -1 : DI->getNumArgs(); 130 int s = Reverse ? -1 : 1; 131 for (; i != e; i += s) { 132 const Init *Arg = DI->getArg(i); 133 if (const auto *BI = dyn_cast<BitsInit>(Arg)) { 134 if (!BI->isComplete()) 135 PrintFatalError("Expecting complete bits init in `" + Op + "`"); 136 Segments.push_back({BI->getNumBits(), BI}); 137 } else if (const auto *BI = dyn_cast<BitInit>(Arg)) { 138 if (!BI->isConcrete()) 139 PrintFatalError("Expecting concrete bit init in `" + Op + "`"); 140 Segments.push_back({1, BI}); 141 } else if (const auto *SubDI = dyn_cast<DagInit>(Arg)) { 142 buildRec(SubDI); 143 } else { 144 PrintFatalError("Unrecognized type of argument in `" + Op + 145 "`: " + Arg->getAsString()); 146 } 147 } 148 } else if (Op == "operand") { 149 // (operand <operand name>, <# of bits>) 150 if (DI->getNumArgs() != 2) 151 PrintFatalError("Expecting 2 arguments for `operand`"); 152 HasDynamicSegment = true; 153 const Init *OperandName = DI->getArg(0), *NumBits = DI->getArg(1); 154 if (!isa<StringInit>(OperandName) || !isa<IntInit>(NumBits)) 155 PrintFatalError("Invalid argument types for `operand`"); 156 157 auto NumBitsVal = cast<IntInit>(NumBits)->getValue(); 158 if (NumBitsVal <= 0) 159 PrintFatalError("Invalid number of bits for `operand`"); 160 161 Segments.push_back({NumBitsVal, OperandName}); 162 } else if (Op == "slice") { 163 // (slice <operand name>, <high / low bit>, <low / high bit>) 164 if (DI->getNumArgs() != 3) 165 PrintFatalError("Expecting 3 arguments for `slice`"); 166 HasDynamicSegment = true; 167 Init *OperandName = DI->getArg(0), *HiBit = DI->getArg(1), 168 *LoBit = DI->getArg(2); 169 if (!isa<StringInit>(OperandName) || !isa<IntInit>(HiBit) || 170 !isa<IntInit>(LoBit)) 171 PrintFatalError("Invalid argument types for `slice`"); 172 173 auto HiBitVal = cast<IntInit>(HiBit)->getValue(), 174 LoBitVal = cast<IntInit>(LoBit)->getValue(); 175 if (HiBitVal < 0 || LoBitVal < 0) 176 PrintFatalError("Invalid bit range for `slice`"); 177 bool NeedSwap = false; 178 unsigned NumBits = 0U; 179 if (HiBitVal < LoBitVal) { 180 NeedSwap = true; 181 NumBits = static_cast<unsigned>(LoBitVal - HiBitVal + 1); 182 } else { 183 NumBits = static_cast<unsigned>(HiBitVal - LoBitVal + 1); 184 } 185 186 if (NeedSwap) { 187 // Normalization: Hi bit should always be the second argument. 188 Init *const NewArgs[] = {OperandName, LoBit, HiBit}; 189 Segments.push_back( 190 {NumBits, DagInit::get(DI->getOperator(), nullptr, NewArgs, {})}); 191 } else { 192 Segments.push_back({NumBits, DI}); 193 } 194 } 195 } 196 197 void VarLenCodeEmitterGen::run(raw_ostream &OS) { 198 CodeGenTarget Target(Records); 199 auto Insts = Records.getAllDerivedDefinitions("Instruction"); 200 201 auto NumberedInstructions = Target.getInstructionsByEnumValue(); 202 const CodeGenHwModes &HWM = Target.getHwModes(); 203 204 // The set of HwModes used by instruction encodings. 205 std::set<unsigned> HwModes; 206 for (const CodeGenInstruction *CGI : NumberedInstructions) { 207 Record *R = CGI->TheDef; 208 209 // Create the corresponding VarLenInst instance. 210 if (R->getValueAsString("Namespace") == "TargetOpcode" || 211 R->getValueAsBit("isPseudo")) 212 continue; 213 214 if (const RecordVal *RV = R->getValue("EncodingInfos")) { 215 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 216 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 217 for (auto &KV : EBM) { 218 HwModes.insert(KV.first); 219 Record *EncodingDef = KV.second; 220 auto *DI = EncodingDef->getValueAsDag("Inst"); 221 VarLenInsts.insert({EncodingDef, VarLenInst(DI)}); 222 } 223 continue; 224 } 225 } 226 auto *DI = R->getValueAsDag("Inst"); 227 VarLenInsts.insert({R, VarLenInst(DI)}); 228 } 229 230 // Emit function declaration 231 OS << "void " << Target.getName() 232 << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" 233 << " SmallVectorImpl<MCFixup> &Fixups,\n" 234 << " APInt &Inst,\n" 235 << " APInt &Scratch,\n" 236 << " const MCSubtargetInfo &STI) const {\n"; 237 238 // Emit instruction base values 239 if (HwModes.empty()) { 240 emitInstructionBaseValues(OS, NumberedInstructions, Target); 241 } else { 242 for (unsigned HwMode : HwModes) 243 emitInstructionBaseValues(OS, NumberedInstructions, Target, (int)HwMode); 244 } 245 246 if (!HwModes.empty()) { 247 OS << " const unsigned **Index;\n"; 248 OS << " const uint64_t *InstBits;\n"; 249 OS << " unsigned HwMode = STI.getHwMode();\n"; 250 OS << " switch (HwMode) {\n"; 251 OS << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n"; 252 for (unsigned I : HwModes) { 253 OS << " case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name 254 << "; Index = Index_" << HWM.getMode(I).Name << "; break;\n"; 255 } 256 OS << " };\n"; 257 } 258 259 // Emit helper function to retrieve base values. 260 OS << " auto getInstBits = [&](unsigned Opcode) -> APInt {\n" 261 << " unsigned NumBits = Index[Opcode][0];\n" 262 << " if (!NumBits)\n" 263 << " return APInt::getZeroWidth();\n" 264 << " unsigned Idx = Index[Opcode][1];\n" 265 << " ArrayRef<uint64_t> Data(&InstBits[Idx], " 266 << "APInt::getNumWords(NumBits));\n" 267 << " return APInt(NumBits, Data);\n" 268 << " };\n"; 269 270 // Map to accumulate all the cases. 271 std::map<std::string, std::vector<std::string>> CaseMap; 272 273 // Construct all cases statement for each opcode 274 for (Record *R : Insts) { 275 if (R->getValueAsString("Namespace") == "TargetOpcode" || 276 R->getValueAsBit("isPseudo")) 277 continue; 278 std::string InstName = 279 (R->getValueAsString("Namespace") + "::" + R->getName()).str(); 280 std::string Case = getInstructionCase(R, Target); 281 282 CaseMap[Case].push_back(std::move(InstName)); 283 } 284 285 // Emit initial function code 286 OS << " const unsigned opcode = MI.getOpcode();\n" 287 << " switch (opcode) {\n"; 288 289 // Emit each case statement 290 for (const auto &C : CaseMap) { 291 const std::string &Case = C.first; 292 const auto &InstList = C.second; 293 294 ListSeparator LS("\n"); 295 for (const auto &InstName : InstList) 296 OS << LS << " case " << InstName << ":"; 297 298 OS << " {\n"; 299 OS << Case; 300 OS << " break;\n" 301 << " }\n"; 302 } 303 // Default case: unhandled opcode 304 OS << " default:\n" 305 << " std::string msg;\n" 306 << " raw_string_ostream Msg(msg);\n" 307 << " Msg << \"Not supported instr: \" << MI;\n" 308 << " report_fatal_error(Msg.str().c_str());\n" 309 << " }\n"; 310 OS << "}\n\n"; 311 } 312 313 static void emitInstBits(raw_ostream &IS, raw_ostream &SS, const APInt &Bits, 314 unsigned &Index) { 315 if (!Bits.getNumWords()) { 316 IS.indent(4) << "{/*NumBits*/0, /*Index*/0},"; 317 return; 318 } 319 320 IS.indent(4) << "{/*NumBits*/" << Bits.getBitWidth() << ", " 321 << "/*Index*/" << Index << "},"; 322 323 SS.indent(4); 324 for (unsigned I = 0; I < Bits.getNumWords(); ++I, ++Index) 325 SS << "UINT64_C(" << utostr(Bits.getRawData()[I]) << "),"; 326 } 327 328 void VarLenCodeEmitterGen::emitInstructionBaseValues( 329 raw_ostream &OS, ArrayRef<const CodeGenInstruction *> NumberedInstructions, 330 CodeGenTarget &Target, int HwMode) { 331 std::string IndexArray, StorageArray; 332 raw_string_ostream IS(IndexArray), SS(StorageArray); 333 334 const CodeGenHwModes &HWM = Target.getHwModes(); 335 if (HwMode == -1) { 336 IS << " static const unsigned Index[][2] = {\n"; 337 SS << " static const uint64_t InstBits[] = {\n"; 338 } else { 339 StringRef Name = HWM.getMode(HwMode).Name; 340 IS << " static const unsigned Index_" << Name << "[][2] = {\n"; 341 SS << " static const uint64_t InstBits_" << Name << "[] = {\n"; 342 } 343 344 unsigned NumFixedValueWords = 0U; 345 for (const CodeGenInstruction *CGI : NumberedInstructions) { 346 Record *R = CGI->TheDef; 347 348 if (R->getValueAsString("Namespace") == "TargetOpcode" || 349 R->getValueAsBit("isPseudo")) { 350 IS.indent(4) << "{/*NumBits*/0, /*Index*/0},\n"; 351 continue; 352 } 353 354 Record *EncodingDef = R; 355 if (const RecordVal *RV = R->getValue("EncodingInfos")) { 356 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 357 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 358 if (EBM.hasMode(HwMode)) 359 EncodingDef = EBM.get(HwMode); 360 } 361 } 362 363 auto It = VarLenInsts.find(EncodingDef); 364 if (It == VarLenInsts.end()) 365 PrintFatalError(EncodingDef, "VarLenInst not found for this record"); 366 const VarLenInst &VLI = It->second; 367 368 unsigned i = 0U, BitWidth = VLI.size(); 369 370 // Start by filling in fixed values. 371 APInt Value(BitWidth, 0); 372 auto SI = VLI.begin(), SE = VLI.end(); 373 // Scan through all the segments that have fixed-bits values. 374 while (i < BitWidth && SI != SE) { 375 unsigned SegmentNumBits = SI->first; 376 if (const auto *BI = dyn_cast<BitsInit>(SI->second)) { 377 for (unsigned Idx = 0U; Idx != SegmentNumBits; ++Idx) { 378 auto *B = cast<BitInit>(BI->getBit(Idx)); 379 Value.setBitVal(i + Idx, B->getValue()); 380 } 381 } 382 if (const auto *BI = dyn_cast<BitInit>(SI->second)) 383 Value.setBitVal(i, BI->getValue()); 384 385 i += SegmentNumBits; 386 ++SI; 387 } 388 389 emitInstBits(IS, SS, Value, NumFixedValueWords); 390 IS << '\t' << "// " << R->getName() << "\n"; 391 if (Value.getNumWords()) 392 SS << '\t' << "// " << R->getName() << "\n"; 393 } 394 IS.indent(4) << "{/*NumBits*/0, /*Index*/0}\n };\n"; 395 SS.indent(4) << "UINT64_C(0)\n };\n"; 396 397 OS << IS.str() << SS.str(); 398 } 399 400 std::string VarLenCodeEmitterGen::getInstructionCase(Record *R, 401 CodeGenTarget &Target) { 402 std::string Case; 403 if (const RecordVal *RV = R->getValue("EncodingInfos")) { 404 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 405 const CodeGenHwModes &HWM = Target.getHwModes(); 406 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 407 Case += " switch (HwMode) {\n"; 408 Case += " default: llvm_unreachable(\"Unhandled HwMode\");\n"; 409 for (auto &KV : EBM) { 410 Case += " case " + itostr(KV.first) + ": {\n"; 411 Case += getInstructionCaseForEncoding(R, KV.second, Target); 412 Case += " break;\n"; 413 Case += " }\n"; 414 } 415 Case += " }\n"; 416 return Case; 417 } 418 } 419 return getInstructionCaseForEncoding(R, R, Target); 420 } 421 422 std::string VarLenCodeEmitterGen::getInstructionCaseForEncoding( 423 Record *R, Record *EncodingDef, CodeGenTarget &Target) { 424 auto It = VarLenInsts.find(EncodingDef); 425 if (It == VarLenInsts.end()) 426 PrintFatalError(EncodingDef, "Parsed encoding record not found"); 427 const VarLenInst &VLI = It->second; 428 size_t BitWidth = VLI.size(); 429 430 CodeGenInstruction &CGI = Target.getInstruction(R); 431 432 std::string Case; 433 raw_string_ostream SS(Case); 434 // Resize the scratch buffer. 435 if (BitWidth && !VLI.isFixedValueOnly()) 436 SS.indent(6) << "Scratch = Scratch.zextOrSelf(" << BitWidth << ");\n"; 437 // Populate based value. 438 SS.indent(6) << "Inst = getInstBits(opcode);\n"; 439 440 // Process each segment in VLI. 441 size_t Offset = 0U; 442 for (const auto &Pair : VLI) { 443 unsigned NumBits = Pair.first; 444 const Init *Val = Pair.second; 445 // If it's a StringInit or DagInit, it's a reference to an operand 446 // or part of an operand. 447 if (isa<StringInit>(Val) || isa<DagInit>(Val)) { 448 StringRef OperandName; 449 unsigned LoBit = 0U; 450 if (const auto *SV = dyn_cast<StringInit>(Val)) { 451 OperandName = SV->getValue(); 452 } else { 453 // Normalized: (slice <operand name>, <high bit>, <low bit>) 454 const auto *DV = cast<DagInit>(Val); 455 OperandName = cast<StringInit>(DV->getArg(0))->getValue(); 456 LoBit = static_cast<unsigned>(cast<IntInit>(DV->getArg(2))->getValue()); 457 } 458 459 auto OpIdx = CGI.Operands.ParseOperandName(OperandName); 460 unsigned FlatOpIdx = CGI.Operands.getFlattenedOperandNumber(OpIdx); 461 StringRef EncoderMethodName = "getMachineOpValue"; 462 auto &CustomEncoder = CGI.Operands[OpIdx.first].EncoderMethodName; 463 if (!CustomEncoder.empty()) 464 EncoderMethodName = CustomEncoder; 465 466 SS.indent(6) << "Scratch.clearAllBits();\n"; 467 SS.indent(6) << "// op: " << OperandName.drop_front(1) << "\n"; 468 SS.indent(6) << EncoderMethodName << "(MI, MI.getOperand(" 469 << utostr(FlatOpIdx) << "), Scratch, Fixups, STI);\n"; 470 SS.indent(6) << "Inst.insertBits(" 471 << "Scratch.extractBits(" << utostr(NumBits) << ", " 472 << utostr(LoBit) << ")" 473 << ", " << Offset << ");\n"; 474 } 475 Offset += NumBits; 476 } 477 478 StringRef PostEmitter = R->getValueAsString("PostEncoderMethod"); 479 if (!PostEmitter.empty()) 480 SS.indent(6) << "Inst = " << PostEmitter << "(MI, Inst, STI);\n"; 481 482 return Case; 483 } 484 485 namespace llvm { 486 487 void emitVarLenCodeEmitter(RecordKeeper &R, raw_ostream &OS) { 488 VarLenCodeEmitterGen(R).run(OS); 489 } 490 491 } // end namespace llvm 492