1 //===---------------- DecoderEmitter.cpp - Decoder Generator --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // It contains the tablegen backend that emits the decoder functions for 10 // targets with fixed/variable length instruction set. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CodeGenInstruction.h" 15 #include "CodeGenTarget.h" 16 #include "InfoByHwMode.h" 17 #include "VarLenCodeEmitterGen.h" 18 #include "llvm/ADT/APInt.h" 19 #include "llvm/ADT/ArrayRef.h" 20 #include "llvm/ADT/CachedHashString.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/SetVector.h" 23 #include "llvm/ADT/SmallString.h" 24 #include "llvm/ADT/Statistic.h" 25 #include "llvm/ADT/StringExtras.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/MC/MCFixedLenDisassembler.h" 28 #include "llvm/Support/Casting.h" 29 #include "llvm/Support/Debug.h" 30 #include "llvm/Support/ErrorHandling.h" 31 #include "llvm/Support/FormattedStream.h" 32 #include "llvm/Support/LEB128.h" 33 #include "llvm/Support/raw_ostream.h" 34 #include "llvm/TableGen/Error.h" 35 #include "llvm/TableGen/Record.h" 36 #include <algorithm> 37 #include <cassert> 38 #include <cstddef> 39 #include <cstdint> 40 #include <map> 41 #include <memory> 42 #include <set> 43 #include <string> 44 #include <utility> 45 #include <vector> 46 47 using namespace llvm; 48 49 #define DEBUG_TYPE "decoder-emitter" 50 51 namespace { 52 53 STATISTIC(NumEncodings, "Number of encodings considered"); 54 STATISTIC(NumEncodingsLackingDisasm, "Number of encodings without disassembler info"); 55 STATISTIC(NumInstructions, "Number of instructions considered"); 56 STATISTIC(NumEncodingsSupported, "Number of encodings supported"); 57 STATISTIC(NumEncodingsOmitted, "Number of encodings omitted"); 58 59 struct EncodingField { 60 unsigned Base, Width, Offset; 61 EncodingField(unsigned B, unsigned W, unsigned O) 62 : Base(B), Width(W), Offset(O) { } 63 }; 64 65 struct OperandInfo { 66 std::vector<EncodingField> Fields; 67 std::string Decoder; 68 bool HasCompleteDecoder; 69 uint64_t InitValue; 70 71 OperandInfo(std::string D, bool HCD) 72 : Decoder(std::move(D)), HasCompleteDecoder(HCD), InitValue(0) {} 73 74 void addField(unsigned Base, unsigned Width, unsigned Offset) { 75 Fields.push_back(EncodingField(Base, Width, Offset)); 76 } 77 78 unsigned numFields() const { return Fields.size(); } 79 80 typedef std::vector<EncodingField>::const_iterator const_iterator; 81 82 const_iterator begin() const { return Fields.begin(); } 83 const_iterator end() const { return Fields.end(); } 84 }; 85 86 typedef std::vector<uint8_t> DecoderTable; 87 typedef uint32_t DecoderFixup; 88 typedef std::vector<DecoderFixup> FixupList; 89 typedef std::vector<FixupList> FixupScopeList; 90 typedef SmallSetVector<CachedHashString, 16> PredicateSet; 91 typedef SmallSetVector<CachedHashString, 16> DecoderSet; 92 struct DecoderTableInfo { 93 DecoderTable Table; 94 FixupScopeList FixupStack; 95 PredicateSet Predicates; 96 DecoderSet Decoders; 97 }; 98 99 struct EncodingAndInst { 100 const Record *EncodingDef; 101 const CodeGenInstruction *Inst; 102 StringRef HwModeName; 103 104 EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst, 105 StringRef HwModeName = "") 106 : EncodingDef(EncodingDef), Inst(Inst), HwModeName(HwModeName) {} 107 }; 108 109 struct EncodingIDAndOpcode { 110 unsigned EncodingID; 111 unsigned Opcode; 112 113 EncodingIDAndOpcode() : EncodingID(0), Opcode(0) {} 114 EncodingIDAndOpcode(unsigned EncodingID, unsigned Opcode) 115 : EncodingID(EncodingID), Opcode(Opcode) {} 116 }; 117 118 raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) { 119 if (Value.EncodingDef != Value.Inst->TheDef) 120 OS << Value.EncodingDef->getName() << ":"; 121 OS << Value.Inst->TheDef->getName(); 122 return OS; 123 } 124 125 class DecoderEmitter { 126 RecordKeeper &RK; 127 std::vector<EncodingAndInst> NumberedEncodings; 128 129 public: 130 // Defaults preserved here for documentation, even though they aren't 131 // strictly necessary given the way that this is currently being called. 132 DecoderEmitter(RecordKeeper &R, std::string PredicateNamespace, 133 std::string GPrefix = "if (", 134 std::string GPostfix = " == MCDisassembler::Fail)", 135 std::string ROK = "MCDisassembler::Success", 136 std::string RFail = "MCDisassembler::Fail", std::string L = "") 137 : RK(R), Target(R), PredicateNamespace(std::move(PredicateNamespace)), 138 GuardPrefix(std::move(GPrefix)), GuardPostfix(std::move(GPostfix)), 139 ReturnOK(std::move(ROK)), ReturnFail(std::move(RFail)), 140 Locals(std::move(L)) {} 141 142 // Emit the decoder state machine table. 143 void emitTable(formatted_raw_ostream &o, DecoderTable &Table, 144 unsigned Indentation, unsigned BitWidth, 145 StringRef Namespace) const; 146 void emitInstrLenTable(formatted_raw_ostream &OS, 147 std::vector<unsigned> &InstrLen) const; 148 void emitPredicateFunction(formatted_raw_ostream &OS, 149 PredicateSet &Predicates, 150 unsigned Indentation) const; 151 void emitDecoderFunction(formatted_raw_ostream &OS, 152 DecoderSet &Decoders, 153 unsigned Indentation) const; 154 155 // run - Output the code emitter 156 void run(raw_ostream &o); 157 158 private: 159 CodeGenTarget Target; 160 161 public: 162 std::string PredicateNamespace; 163 std::string GuardPrefix, GuardPostfix; 164 std::string ReturnOK, ReturnFail; 165 std::string Locals; 166 }; 167 168 } // end anonymous namespace 169 170 // The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system 171 // for a bit value. 172 // 173 // BIT_UNFILTERED is used as the init value for a filter position. It is used 174 // only for filter processings. 175 typedef enum { 176 BIT_TRUE, // '1' 177 BIT_FALSE, // '0' 178 BIT_UNSET, // '?' 179 BIT_UNFILTERED // unfiltered 180 } bit_value_t; 181 182 static bool ValueSet(bit_value_t V) { 183 return (V == BIT_TRUE || V == BIT_FALSE); 184 } 185 186 static bool ValueNotSet(bit_value_t V) { 187 return (V == BIT_UNSET); 188 } 189 190 static int Value(bit_value_t V) { 191 return ValueNotSet(V) ? -1 : (V == BIT_FALSE ? 0 : 1); 192 } 193 194 static bit_value_t bitFromBits(const BitsInit &bits, unsigned index) { 195 if (BitInit *bit = dyn_cast<BitInit>(bits.getBit(index))) 196 return bit->getValue() ? BIT_TRUE : BIT_FALSE; 197 198 // The bit is uninitialized. 199 return BIT_UNSET; 200 } 201 202 // Prints the bit value for each position. 203 static void dumpBits(raw_ostream &o, const BitsInit &bits) { 204 for (unsigned index = bits.getNumBits(); index > 0; --index) { 205 switch (bitFromBits(bits, index - 1)) { 206 case BIT_TRUE: 207 o << "1"; 208 break; 209 case BIT_FALSE: 210 o << "0"; 211 break; 212 case BIT_UNSET: 213 o << "_"; 214 break; 215 default: 216 llvm_unreachable("unexpected return value from bitFromBits"); 217 } 218 } 219 } 220 221 static BitsInit &getBitsField(const Record &def, StringRef str) { 222 const RecordVal *RV = def.getValue(str); 223 if (BitsInit *Bits = dyn_cast<BitsInit>(RV->getValue())) 224 return *Bits; 225 226 // variable length instruction 227 VarLenInst VLI = VarLenInst(cast<DagInit>(RV->getValue()), RV); 228 SmallVector<Init *, 16> Bits; 229 230 for (auto &SI : VLI) { 231 if (const BitsInit *BI = dyn_cast<BitsInit>(SI.Value)) { 232 for (unsigned Idx = 0U; Idx < BI->getNumBits(); ++Idx) { 233 Bits.push_back(BI->getBit(Idx)); 234 } 235 } else if (const BitInit *BI = dyn_cast<BitInit>(SI.Value)) { 236 Bits.push_back(const_cast<BitInit *>(BI)); 237 } else { 238 for (unsigned Idx = 0U; Idx < SI.BitWidth; ++Idx) 239 Bits.push_back(UnsetInit::get()); 240 } 241 } 242 243 return *BitsInit::get(Bits); 244 } 245 246 // Representation of the instruction to work on. 247 typedef std::vector<bit_value_t> insn_t; 248 249 namespace { 250 251 static const uint64_t NO_FIXED_SEGMENTS_SENTINEL = -1ULL; 252 253 class FilterChooser; 254 255 /// Filter - Filter works with FilterChooser to produce the decoding tree for 256 /// the ISA. 257 /// 258 /// It is useful to think of a Filter as governing the switch stmts of the 259 /// decoding tree in a certain level. Each case stmt delegates to an inferior 260 /// FilterChooser to decide what further decoding logic to employ, or in another 261 /// words, what other remaining bits to look at. The FilterChooser eventually 262 /// chooses a best Filter to do its job. 263 /// 264 /// This recursive scheme ends when the number of Opcodes assigned to the 265 /// FilterChooser becomes 1 or if there is a conflict. A conflict happens when 266 /// the Filter/FilterChooser combo does not know how to distinguish among the 267 /// Opcodes assigned. 268 /// 269 /// An example of a conflict is 270 /// 271 /// Conflict: 272 /// 111101000.00........00010000.... 273 /// 111101000.00........0001........ 274 /// 1111010...00........0001........ 275 /// 1111010...00.................... 276 /// 1111010......................... 277 /// 1111............................ 278 /// ................................ 279 /// VST4q8a 111101000_00________00010000____ 280 /// VST4q8b 111101000_00________00010000____ 281 /// 282 /// The Debug output shows the path that the decoding tree follows to reach the 283 /// the conclusion that there is a conflict. VST4q8a is a vst4 to double-spaced 284 /// even registers, while VST4q8b is a vst4 to double-spaced odd registers. 285 /// 286 /// The encoding info in the .td files does not specify this meta information, 287 /// which could have been used by the decoder to resolve the conflict. The 288 /// decoder could try to decode the even/odd register numbering and assign to 289 /// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a" 290 /// version and return the Opcode since the two have the same Asm format string. 291 class Filter { 292 protected: 293 const FilterChooser *Owner;// points to the FilterChooser who owns this filter 294 unsigned StartBit; // the starting bit position 295 unsigned NumBits; // number of bits to filter 296 bool Mixed; // a mixed region contains both set and unset bits 297 298 // Map of well-known segment value to the set of uid's with that value. 299 std::map<uint64_t, std::vector<EncodingIDAndOpcode>> 300 FilteredInstructions; 301 302 // Set of uid's with non-constant segment values. 303 std::vector<EncodingIDAndOpcode> VariableInstructions; 304 305 // Map of well-known segment value to its delegate. 306 std::map<uint64_t, std::unique_ptr<const FilterChooser>> FilterChooserMap; 307 308 // Number of instructions which fall under FilteredInstructions category. 309 unsigned NumFiltered; 310 311 // Keeps track of the last opcode in the filtered bucket. 312 EncodingIDAndOpcode LastOpcFiltered; 313 314 public: 315 Filter(Filter &&f); 316 Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, bool mixed); 317 318 ~Filter() = default; 319 320 unsigned getNumFiltered() const { return NumFiltered; } 321 322 EncodingIDAndOpcode getSingletonOpc() const { 323 assert(NumFiltered == 1); 324 return LastOpcFiltered; 325 } 326 327 // Return the filter chooser for the group of instructions without constant 328 // segment values. 329 const FilterChooser &getVariableFC() const { 330 assert(NumFiltered == 1); 331 assert(FilterChooserMap.size() == 1); 332 return *(FilterChooserMap.find(NO_FIXED_SEGMENTS_SENTINEL)->second); 333 } 334 335 // Divides the decoding task into sub tasks and delegates them to the 336 // inferior FilterChooser's. 337 // 338 // A special case arises when there's only one entry in the filtered 339 // instructions. In order to unambiguously decode the singleton, we need to 340 // match the remaining undecoded encoding bits against the singleton. 341 void recurse(); 342 343 // Emit table entries to decode instructions given a segment or segments of 344 // bits. 345 void emitTableEntry(DecoderTableInfo &TableInfo) const; 346 347 // Returns the number of fanout produced by the filter. More fanout implies 348 // the filter distinguishes more categories of instructions. 349 unsigned usefulness() const; 350 }; // end class Filter 351 352 } // end anonymous namespace 353 354 // These are states of our finite state machines used in FilterChooser's 355 // filterProcessor() which produces the filter candidates to use. 356 typedef enum { 357 ATTR_NONE, 358 ATTR_FILTERED, 359 ATTR_ALL_SET, 360 ATTR_ALL_UNSET, 361 ATTR_MIXED 362 } bitAttr_t; 363 364 /// FilterChooser - FilterChooser chooses the best filter among a set of Filters 365 /// in order to perform the decoding of instructions at the current level. 366 /// 367 /// Decoding proceeds from the top down. Based on the well-known encoding bits 368 /// of instructions available, FilterChooser builds up the possible Filters that 369 /// can further the task of decoding by distinguishing among the remaining 370 /// candidate instructions. 371 /// 372 /// Once a filter has been chosen, it is called upon to divide the decoding task 373 /// into sub-tasks and delegates them to its inferior FilterChoosers for further 374 /// processings. 375 /// 376 /// It is useful to think of a Filter as governing the switch stmts of the 377 /// decoding tree. And each case is delegated to an inferior FilterChooser to 378 /// decide what further remaining bits to look at. 379 namespace { 380 381 class FilterChooser { 382 protected: 383 friend class Filter; 384 385 // Vector of codegen instructions to choose our filter. 386 ArrayRef<EncodingAndInst> AllInstructions; 387 388 // Vector of uid's for this filter chooser to work on. 389 // The first member of the pair is the opcode id being decoded, the second is 390 // the opcode id that should be emitted. 391 const std::vector<EncodingIDAndOpcode> &Opcodes; 392 393 // Lookup table for the operand decoding of instructions. 394 const std::map<unsigned, std::vector<OperandInfo>> &Operands; 395 396 // Vector of candidate filters. 397 std::vector<Filter> Filters; 398 399 // Array of bit values passed down from our parent. 400 // Set to all BIT_UNFILTERED's for Parent == NULL. 401 std::vector<bit_value_t> FilterBitValues; 402 403 // Links to the FilterChooser above us in the decoding tree. 404 const FilterChooser *Parent; 405 406 // Index of the best filter from Filters. 407 int BestIndex; 408 409 // Width of instructions 410 unsigned BitWidth; 411 412 // Parent emitter 413 const DecoderEmitter *Emitter; 414 415 public: 416 FilterChooser(ArrayRef<EncodingAndInst> Insts, 417 const std::vector<EncodingIDAndOpcode> &IDs, 418 const std::map<unsigned, std::vector<OperandInfo>> &Ops, 419 unsigned BW, const DecoderEmitter *E) 420 : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), 421 FilterBitValues(BW, BIT_UNFILTERED), Parent(nullptr), BestIndex(-1), 422 BitWidth(BW), Emitter(E) { 423 doFilter(); 424 } 425 426 FilterChooser(ArrayRef<EncodingAndInst> Insts, 427 const std::vector<EncodingIDAndOpcode> &IDs, 428 const std::map<unsigned, std::vector<OperandInfo>> &Ops, 429 const std::vector<bit_value_t> &ParentFilterBitValues, 430 const FilterChooser &parent) 431 : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), 432 FilterBitValues(ParentFilterBitValues), Parent(&parent), BestIndex(-1), 433 BitWidth(parent.BitWidth), Emitter(parent.Emitter) { 434 doFilter(); 435 } 436 437 FilterChooser(const FilterChooser &) = delete; 438 void operator=(const FilterChooser &) = delete; 439 440 unsigned getBitWidth() const { return BitWidth; } 441 442 protected: 443 // Populates the insn given the uid. 444 void insnWithID(insn_t &Insn, unsigned Opcode) const { 445 BitsInit &Bits = getBitsField(*AllInstructions[Opcode].EncodingDef, "Inst"); 446 Insn.resize(BitWidth > Bits.getNumBits() ? BitWidth : Bits.getNumBits(), 447 BIT_UNSET); 448 // We may have a SoftFail bitmask, which specifies a mask where an encoding 449 // may differ from the value in "Inst" and yet still be valid, but the 450 // disassembler should return SoftFail instead of Success. 451 // 452 // This is used for marking UNPREDICTABLE instructions in the ARM world. 453 const RecordVal *RV = 454 AllInstructions[Opcode].EncodingDef->getValue("SoftFail"); 455 const BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr; 456 for (unsigned i = 0; i < Bits.getNumBits(); ++i) { 457 if (SFBits && bitFromBits(*SFBits, i) == BIT_TRUE) 458 Insn[i] = BIT_UNSET; 459 else 460 Insn[i] = bitFromBits(Bits, i); 461 } 462 } 463 464 // Emit the name of the encoding/instruction pair. 465 void emitNameWithID(raw_ostream &OS, unsigned Opcode) const { 466 const Record *EncodingDef = AllInstructions[Opcode].EncodingDef; 467 const Record *InstDef = AllInstructions[Opcode].Inst->TheDef; 468 if (EncodingDef != InstDef) 469 OS << EncodingDef->getName() << ":"; 470 OS << InstDef->getName(); 471 } 472 473 // Populates the field of the insn given the start position and the number of 474 // consecutive bits to scan for. 475 // 476 // Returns false if there exists any uninitialized bit value in the range. 477 // Returns true, otherwise. 478 bool fieldFromInsn(uint64_t &Field, insn_t &Insn, unsigned StartBit, 479 unsigned NumBits) const; 480 481 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given 482 /// filter array as a series of chars. 483 void dumpFilterArray(raw_ostream &o, 484 const std::vector<bit_value_t> & filter) const; 485 486 /// dumpStack - dumpStack traverses the filter chooser chain and calls 487 /// dumpFilterArray on each filter chooser up to the top level one. 488 void dumpStack(raw_ostream &o, const char *prefix) const; 489 490 Filter &bestFilter() { 491 assert(BestIndex != -1 && "BestIndex not set"); 492 return Filters[BestIndex]; 493 } 494 495 bool PositionFiltered(unsigned i) const { 496 return ValueSet(FilterBitValues[i]); 497 } 498 499 // Calculates the island(s) needed to decode the instruction. 500 // This returns a lit of undecoded bits of an instructions, for example, 501 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be 502 // decoded bits in order to verify that the instruction matches the Opcode. 503 unsigned getIslands(std::vector<unsigned> &StartBits, 504 std::vector<unsigned> &EndBits, 505 std::vector<uint64_t> &FieldVals, 506 const insn_t &Insn) const; 507 508 // Emits code to check the Predicates member of an instruction are true. 509 // Returns true if predicate matches were emitted, false otherwise. 510 bool emitPredicateMatch(raw_ostream &o, unsigned &Indentation, 511 unsigned Opc) const; 512 513 bool doesOpcodeNeedPredicate(unsigned Opc) const; 514 unsigned getPredicateIndex(DecoderTableInfo &TableInfo, StringRef P) const; 515 void emitPredicateTableEntry(DecoderTableInfo &TableInfo, 516 unsigned Opc) const; 517 518 void emitSoftFailTableEntry(DecoderTableInfo &TableInfo, 519 unsigned Opc) const; 520 521 // Emits table entries to decode the singleton. 522 void emitSingletonTableEntry(DecoderTableInfo &TableInfo, 523 EncodingIDAndOpcode Opc) const; 524 525 // Emits code to decode the singleton, and then to decode the rest. 526 void emitSingletonTableEntry(DecoderTableInfo &TableInfo, 527 const Filter &Best) const; 528 529 void emitBinaryParser(raw_ostream &o, unsigned &Indentation, 530 const OperandInfo &OpInfo, 531 bool &OpHasCompleteDecoder) const; 532 533 void emitDecoder(raw_ostream &OS, unsigned Indentation, unsigned Opc, 534 bool &HasCompleteDecoder) const; 535 unsigned getDecoderIndex(DecoderSet &Decoders, unsigned Opc, 536 bool &HasCompleteDecoder) const; 537 538 // Assign a single filter and run with it. 539 void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed); 540 541 // reportRegion is a helper function for filterProcessor to mark a region as 542 // eligible for use as a filter region. 543 void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex, 544 bool AllowMixed); 545 546 // FilterProcessor scans the well-known encoding bits of the instructions and 547 // builds up a list of candidate filters. It chooses the best filter and 548 // recursively descends down the decoding tree. 549 bool filterProcessor(bool AllowMixed, bool Greedy = true); 550 551 // Decides on the best configuration of filter(s) to use in order to decode 552 // the instructions. A conflict of instructions may occur, in which case we 553 // dump the conflict set to the standard error. 554 void doFilter(); 555 556 public: 557 // emitTableEntries - Emit state machine entries to decode our share of 558 // instructions. 559 void emitTableEntries(DecoderTableInfo &TableInfo) const; 560 }; 561 562 } // end anonymous namespace 563 564 /////////////////////////// 565 // // 566 // Filter Implementation // 567 // // 568 /////////////////////////// 569 570 Filter::Filter(Filter &&f) 571 : Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed), 572 FilteredInstructions(std::move(f.FilteredInstructions)), 573 VariableInstructions(std::move(f.VariableInstructions)), 574 FilterChooserMap(std::move(f.FilterChooserMap)), NumFiltered(f.NumFiltered), 575 LastOpcFiltered(f.LastOpcFiltered) { 576 } 577 578 Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, 579 bool mixed) 580 : Owner(&owner), StartBit(startBit), NumBits(numBits), Mixed(mixed) { 581 assert(StartBit + NumBits - 1 < Owner->BitWidth); 582 583 NumFiltered = 0; 584 LastOpcFiltered = {0, 0}; 585 586 for (unsigned i = 0, e = Owner->Opcodes.size(); i != e; ++i) { 587 insn_t Insn; 588 589 // Populates the insn given the uid. 590 Owner->insnWithID(Insn, Owner->Opcodes[i].EncodingID); 591 592 uint64_t Field; 593 // Scans the segment for possibly well-specified encoding bits. 594 bool ok = Owner->fieldFromInsn(Field, Insn, StartBit, NumBits); 595 596 if (ok) { 597 // The encoding bits are well-known. Lets add the uid of the 598 // instruction into the bucket keyed off the constant field value. 599 LastOpcFiltered = Owner->Opcodes[i]; 600 FilteredInstructions[Field].push_back(LastOpcFiltered); 601 ++NumFiltered; 602 } else { 603 // Some of the encoding bit(s) are unspecified. This contributes to 604 // one additional member of "Variable" instructions. 605 VariableInstructions.push_back(Owner->Opcodes[i]); 606 } 607 } 608 609 assert((FilteredInstructions.size() + VariableInstructions.size() > 0) 610 && "Filter returns no instruction categories"); 611 } 612 613 // Divides the decoding task into sub tasks and delegates them to the 614 // inferior FilterChooser's. 615 // 616 // A special case arises when there's only one entry in the filtered 617 // instructions. In order to unambiguously decode the singleton, we need to 618 // match the remaining undecoded encoding bits against the singleton. 619 void Filter::recurse() { 620 // Starts by inheriting our parent filter chooser's filter bit values. 621 std::vector<bit_value_t> BitValueArray(Owner->FilterBitValues); 622 623 if (!VariableInstructions.empty()) { 624 // Conservatively marks each segment position as BIT_UNSET. 625 for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) 626 BitValueArray[StartBit + bitIndex] = BIT_UNSET; 627 628 // Delegates to an inferior filter chooser for further processing on this 629 // group of instructions whose segment values are variable. 630 FilterChooserMap.insert(std::make_pair(NO_FIXED_SEGMENTS_SENTINEL, 631 std::make_unique<FilterChooser>(Owner->AllInstructions, 632 VariableInstructions, Owner->Operands, BitValueArray, *Owner))); 633 } 634 635 // No need to recurse for a singleton filtered instruction. 636 // See also Filter::emit*(). 637 if (getNumFiltered() == 1) { 638 assert(FilterChooserMap.size() == 1); 639 return; 640 } 641 642 // Otherwise, create sub choosers. 643 for (const auto &Inst : FilteredInstructions) { 644 645 // Marks all the segment positions with either BIT_TRUE or BIT_FALSE. 646 for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) { 647 if (Inst.first & (1ULL << bitIndex)) 648 BitValueArray[StartBit + bitIndex] = BIT_TRUE; 649 else 650 BitValueArray[StartBit + bitIndex] = BIT_FALSE; 651 } 652 653 // Delegates to an inferior filter chooser for further processing on this 654 // category of instructions. 655 FilterChooserMap.insert(std::make_pair( 656 Inst.first, std::make_unique<FilterChooser>( 657 Owner->AllInstructions, Inst.second, 658 Owner->Operands, BitValueArray, *Owner))); 659 } 660 } 661 662 static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups, 663 uint32_t DestIdx) { 664 // Any NumToSkip fixups in the current scope can resolve to the 665 // current location. 666 for (FixupList::const_reverse_iterator I = Fixups.rbegin(), 667 E = Fixups.rend(); 668 I != E; ++I) { 669 // Calculate the distance from the byte following the fixup entry byte 670 // to the destination. The Target is calculated from after the 16-bit 671 // NumToSkip entry itself, so subtract two from the displacement here 672 // to account for that. 673 uint32_t FixupIdx = *I; 674 uint32_t Delta = DestIdx - FixupIdx - 3; 675 // Our NumToSkip entries are 24-bits. Make sure our table isn't too 676 // big. 677 assert(Delta < (1u << 24)); 678 Table[FixupIdx] = (uint8_t)Delta; 679 Table[FixupIdx + 1] = (uint8_t)(Delta >> 8); 680 Table[FixupIdx + 2] = (uint8_t)(Delta >> 16); 681 } 682 } 683 684 // Emit table entries to decode instructions given a segment or segments 685 // of bits. 686 void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const { 687 TableInfo.Table.push_back(MCD::OPC_ExtractField); 688 TableInfo.Table.push_back(StartBit); 689 TableInfo.Table.push_back(NumBits); 690 691 // A new filter entry begins a new scope for fixup resolution. 692 TableInfo.FixupStack.emplace_back(); 693 694 DecoderTable &Table = TableInfo.Table; 695 696 size_t PrevFilter = 0; 697 bool HasFallthrough = false; 698 for (auto &Filter : FilterChooserMap) { 699 // Field value -1 implies a non-empty set of variable instructions. 700 // See also recurse(). 701 if (Filter.first == NO_FIXED_SEGMENTS_SENTINEL) { 702 HasFallthrough = true; 703 704 // Each scope should always have at least one filter value to check 705 // for. 706 assert(PrevFilter != 0 && "empty filter set!"); 707 FixupList &CurScope = TableInfo.FixupStack.back(); 708 // Resolve any NumToSkip fixups in the current scope. 709 resolveTableFixups(Table, CurScope, Table.size()); 710 CurScope.clear(); 711 PrevFilter = 0; // Don't re-process the filter's fallthrough. 712 } else { 713 Table.push_back(MCD::OPC_FilterValue); 714 // Encode and emit the value to filter against. 715 uint8_t Buffer[16]; 716 unsigned Len = encodeULEB128(Filter.first, Buffer); 717 Table.insert(Table.end(), Buffer, Buffer + Len); 718 // Reserve space for the NumToSkip entry. We'll backpatch the value 719 // later. 720 PrevFilter = Table.size(); 721 Table.push_back(0); 722 Table.push_back(0); 723 Table.push_back(0); 724 } 725 726 // We arrive at a category of instructions with the same segment value. 727 // Now delegate to the sub filter chooser for further decodings. 728 // The case may fallthrough, which happens if the remaining well-known 729 // encoding bits do not match exactly. 730 Filter.second->emitTableEntries(TableInfo); 731 732 // Now that we've emitted the body of the handler, update the NumToSkip 733 // of the filter itself to be able to skip forward when false. Subtract 734 // two as to account for the width of the NumToSkip field itself. 735 if (PrevFilter) { 736 uint32_t NumToSkip = Table.size() - PrevFilter - 3; 737 assert(NumToSkip < (1u << 24) && "disassembler decoding table too large!"); 738 Table[PrevFilter] = (uint8_t)NumToSkip; 739 Table[PrevFilter + 1] = (uint8_t)(NumToSkip >> 8); 740 Table[PrevFilter + 2] = (uint8_t)(NumToSkip >> 16); 741 } 742 } 743 744 // Any remaining unresolved fixups bubble up to the parent fixup scope. 745 assert(TableInfo.FixupStack.size() > 1 && "fixup stack underflow!"); 746 FixupScopeList::iterator Source = TableInfo.FixupStack.end() - 1; 747 FixupScopeList::iterator Dest = Source - 1; 748 llvm::append_range(*Dest, *Source); 749 TableInfo.FixupStack.pop_back(); 750 751 // If there is no fallthrough, then the final filter should get fixed 752 // up according to the enclosing scope rather than the current position. 753 if (!HasFallthrough) 754 TableInfo.FixupStack.back().push_back(PrevFilter); 755 } 756 757 // Returns the number of fanout produced by the filter. More fanout implies 758 // the filter distinguishes more categories of instructions. 759 unsigned Filter::usefulness() const { 760 if (!VariableInstructions.empty()) 761 return FilteredInstructions.size(); 762 else 763 return FilteredInstructions.size() + 1; 764 } 765 766 ////////////////////////////////// 767 // // 768 // Filterchooser Implementation // 769 // // 770 ////////////////////////////////// 771 772 // Emit the decoder state machine table. 773 void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table, 774 unsigned Indentation, unsigned BitWidth, 775 StringRef Namespace) const { 776 OS.indent(Indentation) << "static const uint8_t DecoderTable" << Namespace 777 << BitWidth << "[] = {\n"; 778 779 Indentation += 2; 780 781 // FIXME: We may be able to use the NumToSkip values to recover 782 // appropriate indentation levels. 783 DecoderTable::const_iterator I = Table.begin(); 784 DecoderTable::const_iterator E = Table.end(); 785 while (I != E) { 786 assert (I < E && "incomplete decode table entry!"); 787 788 uint64_t Pos = I - Table.begin(); 789 OS << "/* " << Pos << " */"; 790 OS.PadToColumn(12); 791 792 switch (*I) { 793 default: 794 PrintFatalError("invalid decode table opcode"); 795 case MCD::OPC_ExtractField: { 796 ++I; 797 unsigned Start = *I++; 798 unsigned Len = *I++; 799 OS.indent(Indentation) << "MCD::OPC_ExtractField, " << Start << ", " 800 << Len << ", // Inst{"; 801 if (Len > 1) 802 OS << (Start + Len - 1) << "-"; 803 OS << Start << "} ...\n"; 804 break; 805 } 806 case MCD::OPC_FilterValue: { 807 ++I; 808 OS.indent(Indentation) << "MCD::OPC_FilterValue, "; 809 // The filter value is ULEB128 encoded. 810 while (*I >= 128) 811 OS << (unsigned)*I++ << ", "; 812 OS << (unsigned)*I++ << ", "; 813 814 // 24-bit numtoskip value. 815 uint8_t Byte = *I++; 816 uint32_t NumToSkip = Byte; 817 OS << (unsigned)Byte << ", "; 818 Byte = *I++; 819 OS << (unsigned)Byte << ", "; 820 NumToSkip |= Byte << 8; 821 Byte = *I++; 822 OS << utostr(Byte) << ", "; 823 NumToSkip |= Byte << 16; 824 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 825 break; 826 } 827 case MCD::OPC_CheckField: { 828 ++I; 829 unsigned Start = *I++; 830 unsigned Len = *I++; 831 OS.indent(Indentation) << "MCD::OPC_CheckField, " << Start << ", " 832 << Len << ", ";// << Val << ", " << NumToSkip << ",\n"; 833 // ULEB128 encoded field value. 834 for (; *I >= 128; ++I) 835 OS << (unsigned)*I << ", "; 836 OS << (unsigned)*I++ << ", "; 837 // 24-bit numtoskip value. 838 uint8_t Byte = *I++; 839 uint32_t NumToSkip = Byte; 840 OS << (unsigned)Byte << ", "; 841 Byte = *I++; 842 OS << (unsigned)Byte << ", "; 843 NumToSkip |= Byte << 8; 844 Byte = *I++; 845 OS << utostr(Byte) << ", "; 846 NumToSkip |= Byte << 16; 847 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 848 break; 849 } 850 case MCD::OPC_CheckPredicate: { 851 ++I; 852 OS.indent(Indentation) << "MCD::OPC_CheckPredicate, "; 853 for (; *I >= 128; ++I) 854 OS << (unsigned)*I << ", "; 855 OS << (unsigned)*I++ << ", "; 856 857 // 24-bit numtoskip value. 858 uint8_t Byte = *I++; 859 uint32_t NumToSkip = Byte; 860 OS << (unsigned)Byte << ", "; 861 Byte = *I++; 862 OS << (unsigned)Byte << ", "; 863 NumToSkip |= Byte << 8; 864 Byte = *I++; 865 OS << utostr(Byte) << ", "; 866 NumToSkip |= Byte << 16; 867 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 868 break; 869 } 870 case MCD::OPC_Decode: 871 case MCD::OPC_TryDecode: { 872 bool IsTry = *I == MCD::OPC_TryDecode; 873 ++I; 874 // Extract the ULEB128 encoded Opcode to a buffer. 875 uint8_t Buffer[16], *p = Buffer; 876 while ((*p++ = *I++) >= 128) 877 assert((p - Buffer) <= (ptrdiff_t)sizeof(Buffer) 878 && "ULEB128 value too large!"); 879 // Decode the Opcode value. 880 unsigned Opc = decodeULEB128(Buffer); 881 OS.indent(Indentation) << "MCD::OPC_" << (IsTry ? "Try" : "") 882 << "Decode, "; 883 for (p = Buffer; *p >= 128; ++p) 884 OS << (unsigned)*p << ", "; 885 OS << (unsigned)*p << ", "; 886 887 // Decoder index. 888 for (; *I >= 128; ++I) 889 OS << (unsigned)*I << ", "; 890 OS << (unsigned)*I++ << ", "; 891 892 if (!IsTry) { 893 OS << "// Opcode: " << NumberedEncodings[Opc] << "\n"; 894 break; 895 } 896 897 // Fallthrough for OPC_TryDecode. 898 899 // 24-bit numtoskip value. 900 uint8_t Byte = *I++; 901 uint32_t NumToSkip = Byte; 902 OS << (unsigned)Byte << ", "; 903 Byte = *I++; 904 OS << (unsigned)Byte << ", "; 905 NumToSkip |= Byte << 8; 906 Byte = *I++; 907 OS << utostr(Byte) << ", "; 908 NumToSkip |= Byte << 16; 909 910 OS << "// Opcode: " << NumberedEncodings[Opc] 911 << ", skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 912 break; 913 } 914 case MCD::OPC_SoftFail: { 915 ++I; 916 OS.indent(Indentation) << "MCD::OPC_SoftFail"; 917 // Positive mask 918 uint64_t Value = 0; 919 unsigned Shift = 0; 920 do { 921 OS << ", " << (unsigned)*I; 922 Value += (*I & 0x7f) << Shift; 923 Shift += 7; 924 } while (*I++ >= 128); 925 if (Value > 127) { 926 OS << " /* 0x"; 927 OS.write_hex(Value); 928 OS << " */"; 929 } 930 // Negative mask 931 Value = 0; 932 Shift = 0; 933 do { 934 OS << ", " << (unsigned)*I; 935 Value += (*I & 0x7f) << Shift; 936 Shift += 7; 937 } while (*I++ >= 128); 938 if (Value > 127) { 939 OS << " /* 0x"; 940 OS.write_hex(Value); 941 OS << " */"; 942 } 943 OS << ",\n"; 944 break; 945 } 946 case MCD::OPC_Fail: { 947 ++I; 948 OS.indent(Indentation) << "MCD::OPC_Fail,\n"; 949 break; 950 } 951 } 952 } 953 OS.indent(Indentation) << "0\n"; 954 955 Indentation -= 2; 956 957 OS.indent(Indentation) << "};\n\n"; 958 } 959 960 void DecoderEmitter::emitInstrLenTable(formatted_raw_ostream &OS, 961 std::vector<unsigned> &InstrLen) const { 962 OS << "static const uint8_t InstrLenTable[] = {\n"; 963 for (unsigned &Len : InstrLen) { 964 OS << Len << ",\n"; 965 } 966 OS << "};\n\n"; 967 } 968 969 void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS, 970 PredicateSet &Predicates, 971 unsigned Indentation) const { 972 // The predicate function is just a big switch statement based on the 973 // input predicate index. 974 OS.indent(Indentation) << "static bool checkDecoderPredicate(unsigned Idx, " 975 << "const FeatureBitset &Bits) {\n"; 976 Indentation += 2; 977 if (!Predicates.empty()) { 978 OS.indent(Indentation) << "switch (Idx) {\n"; 979 OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n"; 980 unsigned Index = 0; 981 for (const auto &Predicate : Predicates) { 982 OS.indent(Indentation) << "case " << Index++ << ":\n"; 983 OS.indent(Indentation+2) << "return (" << Predicate << ");\n"; 984 } 985 OS.indent(Indentation) << "}\n"; 986 } else { 987 // No case statement to emit 988 OS.indent(Indentation) << "llvm_unreachable(\"Invalid index!\");\n"; 989 } 990 Indentation -= 2; 991 OS.indent(Indentation) << "}\n\n"; 992 } 993 994 void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS, 995 DecoderSet &Decoders, 996 unsigned Indentation) const { 997 // The decoder function is just a big switch statement based on the 998 // input decoder index. 999 OS.indent(Indentation) << "template <typename InsnType>\n"; 1000 OS.indent(Indentation) << "static DecodeStatus decodeToMCInst(DecodeStatus S," 1001 << " unsigned Idx, InsnType insn, MCInst &MI,\n"; 1002 OS.indent(Indentation) 1003 << " uint64_t " 1004 << "Address, const MCDisassembler *Decoder, bool &DecodeComplete) {\n"; 1005 Indentation += 2; 1006 OS.indent(Indentation) << "DecodeComplete = true;\n"; 1007 // TODO: When InsnType is large, using uint64_t limits all fields to 64 bits 1008 // It would be better for emitBinaryParser to use a 64-bit tmp whenever 1009 // possible but fall back to an InsnType-sized tmp for truly large fields. 1010 OS.indent(Indentation) << "using TmpType = " 1011 "std::conditional_t<std::is_integral<InsnType>::" 1012 "value, InsnType, uint64_t>;\n"; 1013 OS.indent(Indentation) << "TmpType tmp;\n"; 1014 OS.indent(Indentation) << "switch (Idx) {\n"; 1015 OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n"; 1016 unsigned Index = 0; 1017 for (const auto &Decoder : Decoders) { 1018 OS.indent(Indentation) << "case " << Index++ << ":\n"; 1019 OS << Decoder; 1020 OS.indent(Indentation+2) << "return S;\n"; 1021 } 1022 OS.indent(Indentation) << "}\n"; 1023 Indentation -= 2; 1024 OS.indent(Indentation) << "}\n\n"; 1025 } 1026 1027 // Populates the field of the insn given the start position and the number of 1028 // consecutive bits to scan for. 1029 // 1030 // Returns false if and on the first uninitialized bit value encountered. 1031 // Returns true, otherwise. 1032 bool FilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn, 1033 unsigned StartBit, unsigned NumBits) const { 1034 Field = 0; 1035 1036 for (unsigned i = 0; i < NumBits; ++i) { 1037 if (Insn[StartBit + i] == BIT_UNSET) 1038 return false; 1039 1040 if (Insn[StartBit + i] == BIT_TRUE) 1041 Field = Field | (1ULL << i); 1042 } 1043 1044 return true; 1045 } 1046 1047 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given 1048 /// filter array as a series of chars. 1049 void FilterChooser::dumpFilterArray(raw_ostream &o, 1050 const std::vector<bit_value_t> &filter) const { 1051 for (unsigned bitIndex = BitWidth; bitIndex > 0; bitIndex--) { 1052 switch (filter[bitIndex - 1]) { 1053 case BIT_UNFILTERED: 1054 o << "."; 1055 break; 1056 case BIT_UNSET: 1057 o << "_"; 1058 break; 1059 case BIT_TRUE: 1060 o << "1"; 1061 break; 1062 case BIT_FALSE: 1063 o << "0"; 1064 break; 1065 } 1066 } 1067 } 1068 1069 /// dumpStack - dumpStack traverses the filter chooser chain and calls 1070 /// dumpFilterArray on each filter chooser up to the top level one. 1071 void FilterChooser::dumpStack(raw_ostream &o, const char *prefix) const { 1072 const FilterChooser *current = this; 1073 1074 while (current) { 1075 o << prefix; 1076 dumpFilterArray(o, current->FilterBitValues); 1077 o << '\n'; 1078 current = current->Parent; 1079 } 1080 } 1081 1082 // Calculates the island(s) needed to decode the instruction. 1083 // This returns a list of undecoded bits of an instructions, for example, 1084 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be 1085 // decoded bits in order to verify that the instruction matches the Opcode. 1086 unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits, 1087 std::vector<unsigned> &EndBits, 1088 std::vector<uint64_t> &FieldVals, 1089 const insn_t &Insn) const { 1090 unsigned Num, BitNo; 1091 Num = BitNo = 0; 1092 1093 uint64_t FieldVal = 0; 1094 1095 // 0: Init 1096 // 1: Water (the bit value does not affect decoding) 1097 // 2: Island (well-known bit value needed for decoding) 1098 int State = 0; 1099 1100 for (unsigned i = 0; i < BitWidth; ++i) { 1101 int64_t Val = Value(Insn[i]); 1102 bool Filtered = PositionFiltered(i); 1103 switch (State) { 1104 default: llvm_unreachable("Unreachable code!"); 1105 case 0: 1106 case 1: 1107 if (Filtered || Val == -1) 1108 State = 1; // Still in Water 1109 else { 1110 State = 2; // Into the Island 1111 BitNo = 0; 1112 StartBits.push_back(i); 1113 FieldVal = Val; 1114 } 1115 break; 1116 case 2: 1117 if (Filtered || Val == -1) { 1118 State = 1; // Into the Water 1119 EndBits.push_back(i - 1); 1120 FieldVals.push_back(FieldVal); 1121 ++Num; 1122 } else { 1123 State = 2; // Still in Island 1124 ++BitNo; 1125 FieldVal = FieldVal | Val << BitNo; 1126 } 1127 break; 1128 } 1129 } 1130 // If we are still in Island after the loop, do some housekeeping. 1131 if (State == 2) { 1132 EndBits.push_back(BitWidth - 1); 1133 FieldVals.push_back(FieldVal); 1134 ++Num; 1135 } 1136 1137 assert(StartBits.size() == Num && EndBits.size() == Num && 1138 FieldVals.size() == Num); 1139 return Num; 1140 } 1141 1142 void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation, 1143 const OperandInfo &OpInfo, 1144 bool &OpHasCompleteDecoder) const { 1145 const std::string &Decoder = OpInfo.Decoder; 1146 1147 bool UseInsertBits = OpInfo.numFields() != 1 || OpInfo.InitValue != 0; 1148 1149 if (UseInsertBits) { 1150 o.indent(Indentation) << "tmp = 0x"; 1151 o.write_hex(OpInfo.InitValue); 1152 o << ";\n"; 1153 } 1154 1155 for (const EncodingField &EF : OpInfo) { 1156 o.indent(Indentation); 1157 if (UseInsertBits) 1158 o << "insertBits(tmp, "; 1159 else 1160 o << "tmp = "; 1161 o << "fieldFromInstruction(insn, " << EF.Base << ", " << EF.Width << ')'; 1162 if (UseInsertBits) 1163 o << ", " << EF.Offset << ", " << EF.Width << ')'; 1164 else if (EF.Offset != 0) 1165 o << " << " << EF.Offset; 1166 o << ";\n"; 1167 } 1168 1169 if (Decoder != "") { 1170 OpHasCompleteDecoder = OpInfo.HasCompleteDecoder; 1171 o.indent(Indentation) << Emitter->GuardPrefix << Decoder 1172 << "(MI, tmp, Address, Decoder)" 1173 << Emitter->GuardPostfix 1174 << " { " << (OpHasCompleteDecoder ? "" : "DecodeComplete = false; ") 1175 << "return MCDisassembler::Fail; }\n"; 1176 } else { 1177 OpHasCompleteDecoder = true; 1178 o.indent(Indentation) << "MI.addOperand(MCOperand::createImm(tmp));\n"; 1179 } 1180 } 1181 1182 void FilterChooser::emitDecoder(raw_ostream &OS, unsigned Indentation, 1183 unsigned Opc, bool &HasCompleteDecoder) const { 1184 HasCompleteDecoder = true; 1185 1186 for (const auto &Op : Operands.find(Opc)->second) { 1187 // If a custom instruction decoder was specified, use that. 1188 if (Op.numFields() == 0 && !Op.Decoder.empty()) { 1189 HasCompleteDecoder = Op.HasCompleteDecoder; 1190 OS.indent(Indentation) << Emitter->GuardPrefix << Op.Decoder 1191 << "(MI, insn, Address, Decoder)" 1192 << Emitter->GuardPostfix 1193 << " { " << (HasCompleteDecoder ? "" : "DecodeComplete = false; ") 1194 << "return MCDisassembler::Fail; }\n"; 1195 break; 1196 } 1197 1198 bool OpHasCompleteDecoder; 1199 emitBinaryParser(OS, Indentation, Op, OpHasCompleteDecoder); 1200 if (!OpHasCompleteDecoder) 1201 HasCompleteDecoder = false; 1202 } 1203 } 1204 1205 unsigned FilterChooser::getDecoderIndex(DecoderSet &Decoders, 1206 unsigned Opc, 1207 bool &HasCompleteDecoder) const { 1208 // Build up the predicate string. 1209 SmallString<256> Decoder; 1210 // FIXME: emitDecoder() function can take a buffer directly rather than 1211 // a stream. 1212 raw_svector_ostream S(Decoder); 1213 unsigned I = 4; 1214 emitDecoder(S, I, Opc, HasCompleteDecoder); 1215 1216 // Using the full decoder string as the key value here is a bit 1217 // heavyweight, but is effective. If the string comparisons become a 1218 // performance concern, we can implement a mangling of the predicate 1219 // data easily enough with a map back to the actual string. That's 1220 // overkill for now, though. 1221 1222 // Make sure the predicate is in the table. 1223 Decoders.insert(CachedHashString(Decoder)); 1224 // Now figure out the index for when we write out the table. 1225 DecoderSet::const_iterator P = find(Decoders, Decoder.str()); 1226 return (unsigned)(P - Decoders.begin()); 1227 } 1228 1229 bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation, 1230 unsigned Opc) const { 1231 ListInit *Predicates = 1232 AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates"); 1233 bool IsFirstEmission = true; 1234 for (unsigned i = 0; i < Predicates->size(); ++i) { 1235 Record *Pred = Predicates->getElementAsRecord(i); 1236 if (!Pred->getValue("AssemblerMatcherPredicate")) 1237 continue; 1238 1239 if (!isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue())) 1240 continue; 1241 1242 const DagInit *D = Pred->getValueAsDag("AssemblerCondDag"); 1243 std::string CombineType = D->getOperator()->getAsString(); 1244 if (CombineType != "any_of" && CombineType != "all_of") 1245 PrintFatalError(Pred->getLoc(), "Invalid AssemblerCondDag!"); 1246 if (D->getNumArgs() == 0) 1247 PrintFatalError(Pred->getLoc(), "Invalid AssemblerCondDag!"); 1248 bool IsOr = CombineType == "any_of"; 1249 1250 if (!IsFirstEmission) 1251 o << " && "; 1252 1253 if (IsOr) 1254 o << "("; 1255 1256 ListSeparator LS(IsOr ? " || " : " && "); 1257 for (auto *Arg : D->getArgs()) { 1258 o << LS; 1259 if (auto *NotArg = dyn_cast<DagInit>(Arg)) { 1260 if (NotArg->getOperator()->getAsString() != "not" || 1261 NotArg->getNumArgs() != 1) 1262 PrintFatalError(Pred->getLoc(), "Invalid AssemblerCondDag!"); 1263 Arg = NotArg->getArg(0); 1264 o << "!"; 1265 } 1266 if (!isa<DefInit>(Arg) || 1267 !cast<DefInit>(Arg)->getDef()->isSubClassOf("SubtargetFeature")) 1268 PrintFatalError(Pred->getLoc(), "Invalid AssemblerCondDag!"); 1269 o << "Bits[" << Emitter->PredicateNamespace << "::" << Arg->getAsString() 1270 << "]"; 1271 } 1272 1273 if (IsOr) 1274 o << ")"; 1275 1276 IsFirstEmission = false; 1277 } 1278 return !Predicates->empty(); 1279 } 1280 1281 bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const { 1282 ListInit *Predicates = 1283 AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates"); 1284 for (unsigned i = 0; i < Predicates->size(); ++i) { 1285 Record *Pred = Predicates->getElementAsRecord(i); 1286 if (!Pred->getValue("AssemblerMatcherPredicate")) 1287 continue; 1288 1289 if (isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue())) 1290 return true; 1291 } 1292 return false; 1293 } 1294 1295 unsigned FilterChooser::getPredicateIndex(DecoderTableInfo &TableInfo, 1296 StringRef Predicate) const { 1297 // Using the full predicate string as the key value here is a bit 1298 // heavyweight, but is effective. If the string comparisons become a 1299 // performance concern, we can implement a mangling of the predicate 1300 // data easily enough with a map back to the actual string. That's 1301 // overkill for now, though. 1302 1303 // Make sure the predicate is in the table. 1304 TableInfo.Predicates.insert(CachedHashString(Predicate)); 1305 // Now figure out the index for when we write out the table. 1306 PredicateSet::const_iterator P = find(TableInfo.Predicates, Predicate); 1307 return (unsigned)(P - TableInfo.Predicates.begin()); 1308 } 1309 1310 void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo, 1311 unsigned Opc) const { 1312 if (!doesOpcodeNeedPredicate(Opc)) 1313 return; 1314 1315 // Build up the predicate string. 1316 SmallString<256> Predicate; 1317 // FIXME: emitPredicateMatch() functions can take a buffer directly rather 1318 // than a stream. 1319 raw_svector_ostream PS(Predicate); 1320 unsigned I = 0; 1321 emitPredicateMatch(PS, I, Opc); 1322 1323 // Figure out the index into the predicate table for the predicate just 1324 // computed. 1325 unsigned PIdx = getPredicateIndex(TableInfo, PS.str()); 1326 SmallString<16> PBytes; 1327 raw_svector_ostream S(PBytes); 1328 encodeULEB128(PIdx, S); 1329 1330 TableInfo.Table.push_back(MCD::OPC_CheckPredicate); 1331 // Predicate index 1332 for (unsigned i = 0, e = PBytes.size(); i != e; ++i) 1333 TableInfo.Table.push_back(PBytes[i]); 1334 // Push location for NumToSkip backpatching. 1335 TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); 1336 TableInfo.Table.push_back(0); 1337 TableInfo.Table.push_back(0); 1338 TableInfo.Table.push_back(0); 1339 } 1340 1341 void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo, 1342 unsigned Opc) const { 1343 const RecordVal *RV = AllInstructions[Opc].EncodingDef->getValue("SoftFail"); 1344 BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr; 1345 1346 if (!SFBits) return; 1347 BitsInit *InstBits = 1348 AllInstructions[Opc].EncodingDef->getValueAsBitsInit("Inst"); 1349 1350 APInt PositiveMask(BitWidth, 0ULL); 1351 APInt NegativeMask(BitWidth, 0ULL); 1352 for (unsigned i = 0; i < BitWidth; ++i) { 1353 bit_value_t B = bitFromBits(*SFBits, i); 1354 bit_value_t IB = bitFromBits(*InstBits, i); 1355 1356 if (B != BIT_TRUE) continue; 1357 1358 switch (IB) { 1359 case BIT_FALSE: 1360 // The bit is meant to be false, so emit a check to see if it is true. 1361 PositiveMask.setBit(i); 1362 break; 1363 case BIT_TRUE: 1364 // The bit is meant to be true, so emit a check to see if it is false. 1365 NegativeMask.setBit(i); 1366 break; 1367 default: 1368 // The bit is not set; this must be an error! 1369 errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in " 1370 << AllInstructions[Opc] << " is set but Inst{" << i 1371 << "} is unset!\n" 1372 << " - You can only mark a bit as SoftFail if it is fully defined" 1373 << " (1/0 - not '?') in Inst\n"; 1374 return; 1375 } 1376 } 1377 1378 bool NeedPositiveMask = PositiveMask.getBoolValue(); 1379 bool NeedNegativeMask = NegativeMask.getBoolValue(); 1380 1381 if (!NeedPositiveMask && !NeedNegativeMask) 1382 return; 1383 1384 TableInfo.Table.push_back(MCD::OPC_SoftFail); 1385 1386 SmallString<16> MaskBytes; 1387 raw_svector_ostream S(MaskBytes); 1388 if (NeedPositiveMask) { 1389 encodeULEB128(PositiveMask.getZExtValue(), S); 1390 for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) 1391 TableInfo.Table.push_back(MaskBytes[i]); 1392 } else 1393 TableInfo.Table.push_back(0); 1394 if (NeedNegativeMask) { 1395 MaskBytes.clear(); 1396 encodeULEB128(NegativeMask.getZExtValue(), S); 1397 for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) 1398 TableInfo.Table.push_back(MaskBytes[i]); 1399 } else 1400 TableInfo.Table.push_back(0); 1401 } 1402 1403 // Emits table entries to decode the singleton. 1404 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, 1405 EncodingIDAndOpcode Opc) const { 1406 std::vector<unsigned> StartBits; 1407 std::vector<unsigned> EndBits; 1408 std::vector<uint64_t> FieldVals; 1409 insn_t Insn; 1410 insnWithID(Insn, Opc.EncodingID); 1411 1412 // Look for islands of undecoded bits of the singleton. 1413 getIslands(StartBits, EndBits, FieldVals, Insn); 1414 1415 unsigned Size = StartBits.size(); 1416 1417 // Emit the predicate table entry if one is needed. 1418 emitPredicateTableEntry(TableInfo, Opc.EncodingID); 1419 1420 // Check any additional encoding fields needed. 1421 for (unsigned I = Size; I != 0; --I) { 1422 unsigned NumBits = EndBits[I-1] - StartBits[I-1] + 1; 1423 TableInfo.Table.push_back(MCD::OPC_CheckField); 1424 TableInfo.Table.push_back(StartBits[I-1]); 1425 TableInfo.Table.push_back(NumBits); 1426 uint8_t Buffer[16], *p; 1427 encodeULEB128(FieldVals[I-1], Buffer); 1428 for (p = Buffer; *p >= 128 ; ++p) 1429 TableInfo.Table.push_back(*p); 1430 TableInfo.Table.push_back(*p); 1431 // Push location for NumToSkip backpatching. 1432 TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); 1433 // The fixup is always 24-bits, so go ahead and allocate the space 1434 // in the table so all our relative position calculations work OK even 1435 // before we fully resolve the real value here. 1436 TableInfo.Table.push_back(0); 1437 TableInfo.Table.push_back(0); 1438 TableInfo.Table.push_back(0); 1439 } 1440 1441 // Check for soft failure of the match. 1442 emitSoftFailTableEntry(TableInfo, Opc.EncodingID); 1443 1444 bool HasCompleteDecoder; 1445 unsigned DIdx = 1446 getDecoderIndex(TableInfo.Decoders, Opc.EncodingID, HasCompleteDecoder); 1447 1448 // Produce OPC_Decode or OPC_TryDecode opcode based on the information 1449 // whether the instruction decoder is complete or not. If it is complete 1450 // then it handles all possible values of remaining variable/unfiltered bits 1451 // and for any value can determine if the bitpattern is a valid instruction 1452 // or not. This means OPC_Decode will be the final step in the decoding 1453 // process. If it is not complete, then the Fail return code from the 1454 // decoder method indicates that additional processing should be done to see 1455 // if there is any other instruction that also matches the bitpattern and 1456 // can decode it. 1457 TableInfo.Table.push_back(HasCompleteDecoder ? MCD::OPC_Decode : 1458 MCD::OPC_TryDecode); 1459 NumEncodingsSupported++; 1460 uint8_t Buffer[16], *p; 1461 encodeULEB128(Opc.Opcode, Buffer); 1462 for (p = Buffer; *p >= 128 ; ++p) 1463 TableInfo.Table.push_back(*p); 1464 TableInfo.Table.push_back(*p); 1465 1466 SmallString<16> Bytes; 1467 raw_svector_ostream S(Bytes); 1468 encodeULEB128(DIdx, S); 1469 1470 // Decoder index 1471 for (unsigned i = 0, e = Bytes.size(); i != e; ++i) 1472 TableInfo.Table.push_back(Bytes[i]); 1473 1474 if (!HasCompleteDecoder) { 1475 // Push location for NumToSkip backpatching. 1476 TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); 1477 // Allocate the space for the fixup. 1478 TableInfo.Table.push_back(0); 1479 TableInfo.Table.push_back(0); 1480 TableInfo.Table.push_back(0); 1481 } 1482 } 1483 1484 // Emits table entries to decode the singleton, and then to decode the rest. 1485 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, 1486 const Filter &Best) const { 1487 EncodingIDAndOpcode Opc = Best.getSingletonOpc(); 1488 1489 // complex singletons need predicate checks from the first singleton 1490 // to refer forward to the variable filterchooser that follows. 1491 TableInfo.FixupStack.emplace_back(); 1492 1493 emitSingletonTableEntry(TableInfo, Opc); 1494 1495 resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(), 1496 TableInfo.Table.size()); 1497 TableInfo.FixupStack.pop_back(); 1498 1499 Best.getVariableFC().emitTableEntries(TableInfo); 1500 } 1501 1502 // Assign a single filter and run with it. Top level API client can initialize 1503 // with a single filter to start the filtering process. 1504 void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit, 1505 bool mixed) { 1506 Filters.clear(); 1507 Filters.emplace_back(*this, startBit, numBit, true); 1508 BestIndex = 0; // Sole Filter instance to choose from. 1509 bestFilter().recurse(); 1510 } 1511 1512 // reportRegion is a helper function for filterProcessor to mark a region as 1513 // eligible for use as a filter region. 1514 void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit, 1515 unsigned BitIndex, bool AllowMixed) { 1516 if (RA == ATTR_MIXED && AllowMixed) 1517 Filters.emplace_back(*this, StartBit, BitIndex - StartBit, true); 1518 else if (RA == ATTR_ALL_SET && !AllowMixed) 1519 Filters.emplace_back(*this, StartBit, BitIndex - StartBit, false); 1520 } 1521 1522 // FilterProcessor scans the well-known encoding bits of the instructions and 1523 // builds up a list of candidate filters. It chooses the best filter and 1524 // recursively descends down the decoding tree. 1525 bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) { 1526 Filters.clear(); 1527 BestIndex = -1; 1528 unsigned numInstructions = Opcodes.size(); 1529 1530 assert(numInstructions && "Filter created with no instructions"); 1531 1532 // No further filtering is necessary. 1533 if (numInstructions == 1) 1534 return true; 1535 1536 // Heuristics. See also doFilter()'s "Heuristics" comment when num of 1537 // instructions is 3. 1538 if (AllowMixed && !Greedy) { 1539 assert(numInstructions == 3); 1540 1541 for (auto Opcode : Opcodes) { 1542 std::vector<unsigned> StartBits; 1543 std::vector<unsigned> EndBits; 1544 std::vector<uint64_t> FieldVals; 1545 insn_t Insn; 1546 1547 insnWithID(Insn, Opcode.EncodingID); 1548 1549 // Look for islands of undecoded bits of any instruction. 1550 if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) { 1551 // Found an instruction with island(s). Now just assign a filter. 1552 runSingleFilter(StartBits[0], EndBits[0] - StartBits[0] + 1, true); 1553 return true; 1554 } 1555 } 1556 } 1557 1558 unsigned BitIndex; 1559 1560 // We maintain BIT_WIDTH copies of the bitAttrs automaton. 1561 // The automaton consumes the corresponding bit from each 1562 // instruction. 1563 // 1564 // Input symbols: 0, 1, and _ (unset). 1565 // States: NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED. 1566 // Initial state: NONE. 1567 // 1568 // (NONE) ------- [01] -> (ALL_SET) 1569 // (NONE) ------- _ ----> (ALL_UNSET) 1570 // (ALL_SET) ---- [01] -> (ALL_SET) 1571 // (ALL_SET) ---- _ ----> (MIXED) 1572 // (ALL_UNSET) -- [01] -> (MIXED) 1573 // (ALL_UNSET) -- _ ----> (ALL_UNSET) 1574 // (MIXED) ------ . ----> (MIXED) 1575 // (FILTERED)---- . ----> (FILTERED) 1576 1577 std::vector<bitAttr_t> bitAttrs; 1578 1579 // FILTERED bit positions provide no entropy and are not worthy of pursuing. 1580 // Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position. 1581 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) 1582 if (FilterBitValues[BitIndex] == BIT_TRUE || 1583 FilterBitValues[BitIndex] == BIT_FALSE) 1584 bitAttrs.push_back(ATTR_FILTERED); 1585 else 1586 bitAttrs.push_back(ATTR_NONE); 1587 1588 for (unsigned InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) { 1589 insn_t insn; 1590 1591 insnWithID(insn, Opcodes[InsnIndex].EncodingID); 1592 1593 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) { 1594 switch (bitAttrs[BitIndex]) { 1595 case ATTR_NONE: 1596 if (insn[BitIndex] == BIT_UNSET) 1597 bitAttrs[BitIndex] = ATTR_ALL_UNSET; 1598 else 1599 bitAttrs[BitIndex] = ATTR_ALL_SET; 1600 break; 1601 case ATTR_ALL_SET: 1602 if (insn[BitIndex] == BIT_UNSET) 1603 bitAttrs[BitIndex] = ATTR_MIXED; 1604 break; 1605 case ATTR_ALL_UNSET: 1606 if (insn[BitIndex] != BIT_UNSET) 1607 bitAttrs[BitIndex] = ATTR_MIXED; 1608 break; 1609 case ATTR_MIXED: 1610 case ATTR_FILTERED: 1611 break; 1612 } 1613 } 1614 } 1615 1616 // The regionAttr automaton consumes the bitAttrs automatons' state, 1617 // lowest-to-highest. 1618 // 1619 // Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed) 1620 // States: NONE, ALL_SET, MIXED 1621 // Initial state: NONE 1622 // 1623 // (NONE) ----- F --> (NONE) 1624 // (NONE) ----- S --> (ALL_SET) ; and set region start 1625 // (NONE) ----- U --> (NONE) 1626 // (NONE) ----- M --> (MIXED) ; and set region start 1627 // (ALL_SET) -- F --> (NONE) ; and report an ALL_SET region 1628 // (ALL_SET) -- S --> (ALL_SET) 1629 // (ALL_SET) -- U --> (NONE) ; and report an ALL_SET region 1630 // (ALL_SET) -- M --> (MIXED) ; and report an ALL_SET region 1631 // (MIXED) ---- F --> (NONE) ; and report a MIXED region 1632 // (MIXED) ---- S --> (ALL_SET) ; and report a MIXED region 1633 // (MIXED) ---- U --> (NONE) ; and report a MIXED region 1634 // (MIXED) ---- M --> (MIXED) 1635 1636 bitAttr_t RA = ATTR_NONE; 1637 unsigned StartBit = 0; 1638 1639 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) { 1640 bitAttr_t bitAttr = bitAttrs[BitIndex]; 1641 1642 assert(bitAttr != ATTR_NONE && "Bit without attributes"); 1643 1644 switch (RA) { 1645 case ATTR_NONE: 1646 switch (bitAttr) { 1647 case ATTR_FILTERED: 1648 break; 1649 case ATTR_ALL_SET: 1650 StartBit = BitIndex; 1651 RA = ATTR_ALL_SET; 1652 break; 1653 case ATTR_ALL_UNSET: 1654 break; 1655 case ATTR_MIXED: 1656 StartBit = BitIndex; 1657 RA = ATTR_MIXED; 1658 break; 1659 default: 1660 llvm_unreachable("Unexpected bitAttr!"); 1661 } 1662 break; 1663 case ATTR_ALL_SET: 1664 switch (bitAttr) { 1665 case ATTR_FILTERED: 1666 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1667 RA = ATTR_NONE; 1668 break; 1669 case ATTR_ALL_SET: 1670 break; 1671 case ATTR_ALL_UNSET: 1672 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1673 RA = ATTR_NONE; 1674 break; 1675 case ATTR_MIXED: 1676 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1677 StartBit = BitIndex; 1678 RA = ATTR_MIXED; 1679 break; 1680 default: 1681 llvm_unreachable("Unexpected bitAttr!"); 1682 } 1683 break; 1684 case ATTR_MIXED: 1685 switch (bitAttr) { 1686 case ATTR_FILTERED: 1687 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1688 StartBit = BitIndex; 1689 RA = ATTR_NONE; 1690 break; 1691 case ATTR_ALL_SET: 1692 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1693 StartBit = BitIndex; 1694 RA = ATTR_ALL_SET; 1695 break; 1696 case ATTR_ALL_UNSET: 1697 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1698 RA = ATTR_NONE; 1699 break; 1700 case ATTR_MIXED: 1701 break; 1702 default: 1703 llvm_unreachable("Unexpected bitAttr!"); 1704 } 1705 break; 1706 case ATTR_ALL_UNSET: 1707 llvm_unreachable("regionAttr state machine has no ATTR_UNSET state"); 1708 case ATTR_FILTERED: 1709 llvm_unreachable("regionAttr state machine has no ATTR_FILTERED state"); 1710 } 1711 } 1712 1713 // At the end, if we're still in ALL_SET or MIXED states, report a region 1714 switch (RA) { 1715 case ATTR_NONE: 1716 break; 1717 case ATTR_FILTERED: 1718 break; 1719 case ATTR_ALL_SET: 1720 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1721 break; 1722 case ATTR_ALL_UNSET: 1723 break; 1724 case ATTR_MIXED: 1725 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1726 break; 1727 } 1728 1729 // We have finished with the filter processings. Now it's time to choose 1730 // the best performing filter. 1731 BestIndex = 0; 1732 bool AllUseless = true; 1733 unsigned BestScore = 0; 1734 1735 for (unsigned i = 0, e = Filters.size(); i != e; ++i) { 1736 unsigned Usefulness = Filters[i].usefulness(); 1737 1738 if (Usefulness) 1739 AllUseless = false; 1740 1741 if (Usefulness > BestScore) { 1742 BestIndex = i; 1743 BestScore = Usefulness; 1744 } 1745 } 1746 1747 if (!AllUseless) 1748 bestFilter().recurse(); 1749 1750 return !AllUseless; 1751 } // end of FilterChooser::filterProcessor(bool) 1752 1753 // Decides on the best configuration of filter(s) to use in order to decode 1754 // the instructions. A conflict of instructions may occur, in which case we 1755 // dump the conflict set to the standard error. 1756 void FilterChooser::doFilter() { 1757 unsigned Num = Opcodes.size(); 1758 assert(Num && "FilterChooser created with no instructions"); 1759 1760 // Try regions of consecutive known bit values first. 1761 if (filterProcessor(false)) 1762 return; 1763 1764 // Then regions of mixed bits (both known and unitialized bit values allowed). 1765 if (filterProcessor(true)) 1766 return; 1767 1768 // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where 1769 // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a 1770 // well-known encoding pattern. In such case, we backtrack and scan for the 1771 // the very first consecutive ATTR_ALL_SET region and assign a filter to it. 1772 if (Num == 3 && filterProcessor(true, false)) 1773 return; 1774 1775 // If we come to here, the instruction decoding has failed. 1776 // Set the BestIndex to -1 to indicate so. 1777 BestIndex = -1; 1778 } 1779 1780 // emitTableEntries - Emit state machine entries to decode our share of 1781 // instructions. 1782 void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const { 1783 if (Opcodes.size() == 1) { 1784 // There is only one instruction in the set, which is great! 1785 // Call emitSingletonDecoder() to see whether there are any remaining 1786 // encodings bits. 1787 emitSingletonTableEntry(TableInfo, Opcodes[0]); 1788 return; 1789 } 1790 1791 // Choose the best filter to do the decodings! 1792 if (BestIndex != -1) { 1793 const Filter &Best = Filters[BestIndex]; 1794 if (Best.getNumFiltered() == 1) 1795 emitSingletonTableEntry(TableInfo, Best); 1796 else 1797 Best.emitTableEntry(TableInfo); 1798 return; 1799 } 1800 1801 // We don't know how to decode these instructions! Dump the 1802 // conflict set and bail. 1803 1804 // Print out useful conflict information for postmortem analysis. 1805 errs() << "Decoding Conflict:\n"; 1806 1807 dumpStack(errs(), "\t\t"); 1808 1809 for (auto Opcode : Opcodes) { 1810 errs() << '\t'; 1811 emitNameWithID(errs(), Opcode.EncodingID); 1812 errs() << " "; 1813 dumpBits( 1814 errs(), 1815 getBitsField(*AllInstructions[Opcode.EncodingID].EncodingDef, "Inst")); 1816 errs() << '\n'; 1817 } 1818 } 1819 1820 static std::string findOperandDecoderMethod(Record *Record) { 1821 std::string Decoder; 1822 1823 RecordVal *DecoderString = Record->getValue("DecoderMethod"); 1824 StringInit *String = DecoderString ? 1825 dyn_cast<StringInit>(DecoderString->getValue()) : nullptr; 1826 if (String) { 1827 Decoder = std::string(String->getValue()); 1828 if (!Decoder.empty()) 1829 return Decoder; 1830 } 1831 1832 if (Record->isSubClassOf("RegisterOperand")) 1833 Record = Record->getValueAsDef("RegClass"); 1834 1835 if (Record->isSubClassOf("RegisterClass")) { 1836 Decoder = "Decode" + Record->getName().str() + "RegisterClass"; 1837 } else if (Record->isSubClassOf("PointerLikeRegClass")) { 1838 Decoder = "DecodePointerLikeRegClass" + 1839 utostr(Record->getValueAsInt("RegClassKind")); 1840 } 1841 1842 return Decoder; 1843 } 1844 1845 OperandInfo getOpInfo(Record *TypeRecord) { 1846 std::string Decoder = findOperandDecoderMethod(TypeRecord); 1847 1848 RecordVal *HasCompleteDecoderVal = TypeRecord->getValue("hasCompleteDecoder"); 1849 BitInit *HasCompleteDecoderBit = 1850 HasCompleteDecoderVal 1851 ? dyn_cast<BitInit>(HasCompleteDecoderVal->getValue()) 1852 : nullptr; 1853 bool HasCompleteDecoder = 1854 HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true; 1855 1856 return OperandInfo(Decoder, HasCompleteDecoder); 1857 } 1858 1859 void parseVarLenInstOperand(const Record &Def, 1860 std::vector<OperandInfo> &Operands, 1861 const CodeGenInstruction &CGI) { 1862 1863 const RecordVal *RV = Def.getValue("Inst"); 1864 VarLenInst VLI(cast<DagInit>(RV->getValue()), RV); 1865 SmallVector<int> TiedTo; 1866 1867 for (unsigned Idx = 0; Idx < CGI.Operands.size(); ++Idx) { 1868 auto &Op = CGI.Operands[Idx]; 1869 if (Op.MIOperandInfo && Op.MIOperandInfo->getNumArgs() > 0) 1870 for (auto *Arg : Op.MIOperandInfo->getArgs()) 1871 Operands.push_back(getOpInfo(cast<DefInit>(Arg)->getDef())); 1872 else 1873 Operands.push_back(getOpInfo(Op.Rec)); 1874 1875 int TiedReg = Op.getTiedRegister(); 1876 TiedTo.push_back(-1); 1877 if (TiedReg != -1) { 1878 TiedTo[Idx] = TiedReg; 1879 TiedTo[TiedReg] = Idx; 1880 } 1881 } 1882 1883 unsigned CurrBitPos = 0; 1884 for (auto &EncodingSegment : VLI) { 1885 unsigned Offset = 0; 1886 StringRef OpName; 1887 1888 if (const StringInit *SI = dyn_cast<StringInit>(EncodingSegment.Value)) { 1889 OpName = SI->getValue(); 1890 } else if (const DagInit *DI = dyn_cast<DagInit>(EncodingSegment.Value)) { 1891 OpName = cast<StringInit>(DI->getArg(0))->getValue(); 1892 Offset = cast<IntInit>(DI->getArg(2))->getValue(); 1893 } 1894 1895 if (!OpName.empty()) { 1896 auto OpSubOpPair = 1897 const_cast<CodeGenInstruction &>(CGI).Operands.ParseOperandName( 1898 OpName); 1899 unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(OpSubOpPair); 1900 Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset); 1901 1902 int TiedReg = TiedTo[OpSubOpPair.first]; 1903 if (TiedReg != -1) { 1904 unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber( 1905 std::make_pair(TiedReg, OpSubOpPair.second)); 1906 Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset); 1907 } 1908 } 1909 1910 CurrBitPos += EncodingSegment.BitWidth; 1911 } 1912 } 1913 1914 static unsigned 1915 populateInstruction(CodeGenTarget &Target, const Record &EncodingDef, 1916 const CodeGenInstruction &CGI, unsigned Opc, 1917 std::map<unsigned, std::vector<OperandInfo>> &Operands, 1918 bool IsVarLenInst) { 1919 const Record &Def = *CGI.TheDef; 1920 // If all the bit positions are not specified; do not decode this instruction. 1921 // We are bound to fail! For proper disassembly, the well-known encoding bits 1922 // of the instruction must be fully specified. 1923 1924 BitsInit &Bits = getBitsField(EncodingDef, "Inst"); 1925 if (Bits.allInComplete()) 1926 return 0; 1927 1928 std::vector<OperandInfo> InsnOperands; 1929 1930 // If the instruction has specified a custom decoding hook, use that instead 1931 // of trying to auto-generate the decoder. 1932 StringRef InstDecoder = EncodingDef.getValueAsString("DecoderMethod"); 1933 if (InstDecoder != "") { 1934 bool HasCompleteInstDecoder = EncodingDef.getValueAsBit("hasCompleteDecoder"); 1935 InsnOperands.push_back( 1936 OperandInfo(std::string(InstDecoder), HasCompleteInstDecoder)); 1937 Operands[Opc] = InsnOperands; 1938 return Bits.getNumBits(); 1939 } 1940 1941 // Generate a description of the operand of the instruction that we know 1942 // how to decode automatically. 1943 // FIXME: We'll need to have a way to manually override this as needed. 1944 1945 // Gather the outputs/inputs of the instruction, so we can find their 1946 // positions in the encoding. This assumes for now that they appear in the 1947 // MCInst in the order that they're listed. 1948 std::vector<std::pair<Init*, StringRef>> InOutOperands; 1949 DagInit *Out = Def.getValueAsDag("OutOperandList"); 1950 DagInit *In = Def.getValueAsDag("InOperandList"); 1951 for (unsigned i = 0; i < Out->getNumArgs(); ++i) 1952 InOutOperands.push_back( 1953 std::make_pair(Out->getArg(i), Out->getArgNameStr(i))); 1954 for (unsigned i = 0; i < In->getNumArgs(); ++i) 1955 InOutOperands.push_back( 1956 std::make_pair(In->getArg(i), In->getArgNameStr(i))); 1957 1958 // Search for tied operands, so that we can correctly instantiate 1959 // operands that are not explicitly represented in the encoding. 1960 std::map<std::string, std::string> TiedNames; 1961 for (unsigned i = 0; i < CGI.Operands.size(); ++i) { 1962 int tiedTo = CGI.Operands[i].getTiedRegister(); 1963 if (tiedTo != -1) { 1964 std::pair<unsigned, unsigned> SO = 1965 CGI.Operands.getSubOperandNumber(tiedTo); 1966 TiedNames[std::string(InOutOperands[i].second)] = 1967 std::string(InOutOperands[SO.first].second); 1968 TiedNames[std::string(InOutOperands[SO.first].second)] = 1969 std::string(InOutOperands[i].second); 1970 } 1971 } 1972 1973 if (IsVarLenInst) { 1974 parseVarLenInstOperand(EncodingDef, InsnOperands, CGI); 1975 } else { 1976 std::map<std::string, std::vector<OperandInfo>> NumberedInsnOperands; 1977 std::set<std::string> NumberedInsnOperandsNoTie; 1978 if (Target.getInstructionSet()->getValueAsBit( 1979 "decodePositionallyEncodedOperands")) { 1980 const std::vector<RecordVal> &Vals = Def.getValues(); 1981 unsigned NumberedOp = 0; 1982 1983 std::set<unsigned> NamedOpIndices; 1984 if (Target.getInstructionSet()->getValueAsBit( 1985 "noNamedPositionallyEncodedOperands")) 1986 // Collect the set of operand indices that might correspond to named 1987 // operand, and skip these when assigning operands based on position. 1988 for (unsigned i = 0, e = Vals.size(); i != e; ++i) { 1989 unsigned OpIdx; 1990 if (!CGI.Operands.hasOperandNamed(Vals[i].getName(), OpIdx)) 1991 continue; 1992 1993 NamedOpIndices.insert(OpIdx); 1994 } 1995 1996 for (unsigned i = 0, e = Vals.size(); i != e; ++i) { 1997 // Ignore fixed fields in the record, we're looking for values like: 1998 // bits<5> RST = { ?, ?, ?, ?, ? }; 1999 if (Vals[i].isNonconcreteOK() || Vals[i].getValue()->isComplete()) 2000 continue; 2001 2002 // Determine if Vals[i] actually contributes to the Inst encoding. 2003 unsigned bi = 0; 2004 for (; bi < Bits.getNumBits(); ++bi) { 2005 VarInit *Var = nullptr; 2006 VarBitInit *BI = dyn_cast<VarBitInit>(Bits.getBit(bi)); 2007 if (BI) 2008 Var = dyn_cast<VarInit>(BI->getBitVar()); 2009 else 2010 Var = dyn_cast<VarInit>(Bits.getBit(bi)); 2011 2012 if (Var && Var->getName() == Vals[i].getName()) 2013 break; 2014 } 2015 2016 if (bi == Bits.getNumBits()) 2017 continue; 2018 2019 // Skip variables that correspond to explicitly-named operands. 2020 unsigned OpIdx; 2021 if (CGI.Operands.hasOperandNamed(Vals[i].getName(), OpIdx)) 2022 continue; 2023 2024 // Get the bit range for this operand: 2025 unsigned bitStart = bi++, bitWidth = 1; 2026 for (; bi < Bits.getNumBits(); ++bi) { 2027 VarInit *Var = nullptr; 2028 VarBitInit *BI = dyn_cast<VarBitInit>(Bits.getBit(bi)); 2029 if (BI) 2030 Var = dyn_cast<VarInit>(BI->getBitVar()); 2031 else 2032 Var = dyn_cast<VarInit>(Bits.getBit(bi)); 2033 2034 if (!Var) 2035 break; 2036 2037 if (Var->getName() != Vals[i].getName()) 2038 break; 2039 2040 ++bitWidth; 2041 } 2042 2043 unsigned NumberOps = CGI.Operands.size(); 2044 while (NumberedOp < NumberOps && 2045 (CGI.Operands.isFlatOperandNotEmitted(NumberedOp) || 2046 (!NamedOpIndices.empty() && 2047 NamedOpIndices.count( 2048 CGI.Operands.getSubOperandNumber(NumberedOp).first)))) 2049 ++NumberedOp; 2050 2051 OpIdx = NumberedOp++; 2052 2053 // OpIdx now holds the ordered operand number of Vals[i]. 2054 std::pair<unsigned, unsigned> SO = 2055 CGI.Operands.getSubOperandNumber(OpIdx); 2056 const std::string &Name = CGI.Operands[SO.first].Name; 2057 2058 LLVM_DEBUG(dbgs() << "Numbered operand mapping for " << Def.getName() 2059 << ": " << Name << "(" << SO.first << ", " 2060 << SO.second << ") => " << Vals[i].getName() << "\n"); 2061 2062 std::string Decoder; 2063 Record *TypeRecord = CGI.Operands[SO.first].Rec; 2064 2065 RecordVal *DecoderString = TypeRecord->getValue("DecoderMethod"); 2066 StringInit *String = 2067 DecoderString ? dyn_cast<StringInit>(DecoderString->getValue()) 2068 : nullptr; 2069 if (String && String->getValue() != "") 2070 Decoder = std::string(String->getValue()); 2071 2072 if (Decoder == "" && CGI.Operands[SO.first].MIOperandInfo && 2073 CGI.Operands[SO.first].MIOperandInfo->getNumArgs()) { 2074 Init *Arg = CGI.Operands[SO.first].MIOperandInfo->getArg(SO.second); 2075 if (DefInit *DI = cast<DefInit>(Arg)) 2076 TypeRecord = DI->getDef(); 2077 } 2078 2079 bool isReg = false; 2080 if (TypeRecord->isSubClassOf("RegisterOperand")) 2081 TypeRecord = TypeRecord->getValueAsDef("RegClass"); 2082 if (TypeRecord->isSubClassOf("RegisterClass")) { 2083 Decoder = "Decode" + TypeRecord->getName().str() + "RegisterClass"; 2084 isReg = true; 2085 } else if (TypeRecord->isSubClassOf("PointerLikeRegClass")) { 2086 Decoder = "DecodePointerLikeRegClass" + 2087 utostr(TypeRecord->getValueAsInt("RegClassKind")); 2088 isReg = true; 2089 } 2090 2091 DecoderString = TypeRecord->getValue("DecoderMethod"); 2092 String = DecoderString ? dyn_cast<StringInit>(DecoderString->getValue()) 2093 : nullptr; 2094 if (!isReg && String && String->getValue() != "") 2095 Decoder = std::string(String->getValue()); 2096 2097 RecordVal *HasCompleteDecoderVal = 2098 TypeRecord->getValue("hasCompleteDecoder"); 2099 BitInit *HasCompleteDecoderBit = 2100 HasCompleteDecoderVal 2101 ? dyn_cast<BitInit>(HasCompleteDecoderVal->getValue()) 2102 : nullptr; 2103 bool HasCompleteDecoder = 2104 HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true; 2105 2106 OperandInfo OpInfo(Decoder, HasCompleteDecoder); 2107 OpInfo.addField(bitStart, bitWidth, 0); 2108 2109 NumberedInsnOperands[Name].push_back(OpInfo); 2110 2111 // FIXME: For complex operands with custom decoders we can't handle tied 2112 // sub-operands automatically. Skip those here and assume that this is 2113 // fixed up elsewhere. 2114 if (CGI.Operands[SO.first].MIOperandInfo && 2115 CGI.Operands[SO.first].MIOperandInfo->getNumArgs() > 1 && String && 2116 String->getValue() != "") 2117 NumberedInsnOperandsNoTie.insert(Name); 2118 } 2119 } 2120 2121 // For each operand, see if we can figure out where it is encoded. 2122 for (const auto &Op : InOutOperands) { 2123 if (!NumberedInsnOperands[std::string(Op.second)].empty()) { 2124 llvm::append_range(InsnOperands, 2125 NumberedInsnOperands[std::string(Op.second)]); 2126 continue; 2127 } 2128 if (!NumberedInsnOperands[TiedNames[std::string(Op.second)]].empty()) { 2129 if (!NumberedInsnOperandsNoTie.count( 2130 TiedNames[std::string(Op.second)])) { 2131 // Figure out to which (sub)operand we're tied. 2132 unsigned i = 2133 CGI.Operands.getOperandNamed(TiedNames[std::string(Op.second)]); 2134 int tiedTo = CGI.Operands[i].getTiedRegister(); 2135 if (tiedTo == -1) { 2136 i = CGI.Operands.getOperandNamed(Op.second); 2137 tiedTo = CGI.Operands[i].getTiedRegister(); 2138 } 2139 2140 if (tiedTo != -1) { 2141 std::pair<unsigned, unsigned> SO = 2142 CGI.Operands.getSubOperandNumber(tiedTo); 2143 2144 InsnOperands.push_back( 2145 NumberedInsnOperands[TiedNames[std::string(Op.second)]] 2146 [SO.second]); 2147 } 2148 } 2149 continue; 2150 } 2151 2152 // At this point, we can locate the decoder field, but we need to know how 2153 // to interpret it. As a first step, require the target to provide 2154 // callbacks for decoding register classes. 2155 2156 OperandInfo OpInfo = getOpInfo(cast<DefInit>(Op.first)->getDef()); 2157 2158 // Some bits of the operand may be required to be 1 depending on the 2159 // instruction's encoding. Collect those bits. 2160 if (const RecordVal *EncodedValue = EncodingDef.getValue(Op.second)) 2161 if (const BitsInit *OpBits = 2162 dyn_cast<BitsInit>(EncodedValue->getValue())) 2163 for (unsigned I = 0; I < OpBits->getNumBits(); ++I) 2164 if (const BitInit *OpBit = dyn_cast<BitInit>(OpBits->getBit(I))) 2165 if (OpBit->getValue()) 2166 OpInfo.InitValue |= 1ULL << I; 2167 2168 unsigned Base = ~0U; 2169 unsigned Width = 0; 2170 unsigned Offset = 0; 2171 2172 for (unsigned bi = 0; bi < Bits.getNumBits(); ++bi) { 2173 VarInit *Var = nullptr; 2174 VarBitInit *BI = dyn_cast<VarBitInit>(Bits.getBit(bi)); 2175 if (BI) 2176 Var = dyn_cast<VarInit>(BI->getBitVar()); 2177 else 2178 Var = dyn_cast<VarInit>(Bits.getBit(bi)); 2179 2180 if (!Var) { 2181 if (Base != ~0U) { 2182 OpInfo.addField(Base, Width, Offset); 2183 Base = ~0U; 2184 Width = 0; 2185 Offset = 0; 2186 } 2187 continue; 2188 } 2189 2190 if ((Var->getName() != Op.second && 2191 Var->getName() != TiedNames[std::string(Op.second)])) { 2192 if (Base != ~0U) { 2193 OpInfo.addField(Base, Width, Offset); 2194 Base = ~0U; 2195 Width = 0; 2196 Offset = 0; 2197 } 2198 continue; 2199 } 2200 2201 if (Base == ~0U) { 2202 Base = bi; 2203 Width = 1; 2204 Offset = BI ? BI->getBitNum() : 0; 2205 } else if (BI && BI->getBitNum() != Offset + Width) { 2206 OpInfo.addField(Base, Width, Offset); 2207 Base = bi; 2208 Width = 1; 2209 Offset = BI->getBitNum(); 2210 } else { 2211 ++Width; 2212 } 2213 } 2214 2215 if (Base != ~0U) 2216 OpInfo.addField(Base, Width, Offset); 2217 2218 if (OpInfo.numFields() > 0) 2219 InsnOperands.push_back(OpInfo); 2220 } 2221 } 2222 2223 Operands[Opc] = InsnOperands; 2224 2225 #if 0 2226 LLVM_DEBUG({ 2227 // Dumps the instruction encoding bits. 2228 dumpBits(errs(), Bits); 2229 2230 errs() << '\n'; 2231 2232 // Dumps the list of operand info. 2233 for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) { 2234 const CGIOperandList::OperandInfo &Info = CGI.Operands[i]; 2235 const std::string &OperandName = Info.Name; 2236 const Record &OperandDef = *Info.Rec; 2237 2238 errs() << "\t" << OperandName << " (" << OperandDef.getName() << ")\n"; 2239 } 2240 }); 2241 #endif 2242 2243 return Bits.getNumBits(); 2244 } 2245 2246 // emitFieldFromInstruction - Emit the templated helper function 2247 // fieldFromInstruction(). 2248 // On Windows we make sure that this function is not inlined when 2249 // using the VS compiler. It has a bug which causes the function 2250 // to be optimized out in some circustances. See llvm.org/pr38292 2251 static void emitFieldFromInstruction(formatted_raw_ostream &OS) { 2252 OS << "// Helper functions for extracting fields from encoded instructions.\n" 2253 << "// InsnType must either be integral or an APInt-like object that " 2254 "must:\n" 2255 << "// * be default-constructible and copy-constructible\n" 2256 << "// * be constructible from a uint64_t\n" 2257 << "// * be constructible from an APInt (this can be private)\n" 2258 << "// * Support insertBits(bits, startBit, numBits)\n" 2259 << "// * Support extractBitsAsZExtValue(numBits, startBit)\n" 2260 << "// * be convertible to bool\n" 2261 << "// * Support the ~, &, ==, and != operators with other objects of " 2262 "the same type\n" 2263 << "// * Support put (<<) to raw_ostream&\n" 2264 << "template <typename InsnType>\n" 2265 << "#if defined(_MSC_VER) && !defined(__clang__)\n" 2266 << "__declspec(noinline)\n" 2267 << "#endif\n" 2268 << "static std::enable_if_t<std::is_integral<InsnType>::value, InsnType>\n" 2269 << "fieldFromInstruction(const InsnType &insn, unsigned startBit,\n" 2270 << " unsigned numBits) {\n" 2271 << " assert(startBit + numBits <= 64 && \"Cannot support >64-bit " 2272 "extractions!\");\n" 2273 << " assert(startBit + numBits <= (sizeof(InsnType) * 8) &&\n" 2274 << " \"Instruction field out of bounds!\");\n" 2275 << " InsnType fieldMask;\n" 2276 << " if (numBits == sizeof(InsnType) * 8)\n" 2277 << " fieldMask = (InsnType)(-1LL);\n" 2278 << " else\n" 2279 << " fieldMask = (((InsnType)1 << numBits) - 1) << startBit;\n" 2280 << " return (insn & fieldMask) >> startBit;\n" 2281 << "}\n" 2282 << "\n" 2283 << "template <typename InsnType>\n" 2284 << "static std::enable_if_t<!std::is_integral<InsnType>::value, " 2285 "uint64_t>\n" 2286 << "fieldFromInstruction(const InsnType &insn, unsigned startBit,\n" 2287 << " unsigned numBits) {\n" 2288 << " return insn.extractBitsAsZExtValue(numBits, startBit);\n" 2289 << "}\n\n"; 2290 } 2291 2292 // emitInsertBits - Emit the templated helper function insertBits(). 2293 static void emitInsertBits(formatted_raw_ostream &OS) { 2294 OS << "// Helper function for inserting bits extracted from an encoded " 2295 "instruction into\n" 2296 << "// a field.\n" 2297 << "template <typename InsnType>\n" 2298 << "static std::enable_if_t<std::is_integral<InsnType>::value>\n" 2299 << "insertBits(InsnType &field, InsnType bits, unsigned startBit, " 2300 "unsigned numBits) {\n" 2301 << " assert(startBit + numBits <= sizeof field * 8);\n" 2302 << " field |= (InsnType)bits << startBit;\n" 2303 << "}\n" 2304 << "\n" 2305 << "template <typename InsnType>\n" 2306 << "static std::enable_if_t<!std::is_integral<InsnType>::value>\n" 2307 << "insertBits(InsnType &field, uint64_t bits, unsigned startBit, " 2308 "unsigned numBits) {\n" 2309 << " field.insertBits(bits, startBit, numBits);\n" 2310 << "}\n\n"; 2311 } 2312 2313 // emitDecodeInstruction - Emit the templated helper function 2314 // decodeInstruction(). 2315 static void emitDecodeInstruction(formatted_raw_ostream &OS, 2316 bool IsVarLenInst) { 2317 OS << "template <typename InsnType>\n" 2318 << "static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], " 2319 "MCInst &MI,\n" 2320 << " InsnType insn, uint64_t " 2321 "Address,\n" 2322 << " const MCDisassembler *DisAsm,\n" 2323 << " const MCSubtargetInfo &STI"; 2324 if (IsVarLenInst) { 2325 OS << ",\n" 2326 << " llvm::function_ref<void(APInt " 2327 "&," 2328 << " uint64_t)> makeUp"; 2329 } 2330 OS << ") {\n" 2331 << " const FeatureBitset &Bits = STI.getFeatureBits();\n" 2332 << "\n" 2333 << " const uint8_t *Ptr = DecodeTable;\n" 2334 << " uint64_t CurFieldValue = 0;\n" 2335 << " DecodeStatus S = MCDisassembler::Success;\n" 2336 << " while (true) {\n" 2337 << " ptrdiff_t Loc = Ptr - DecodeTable;\n" 2338 << " switch (*Ptr) {\n" 2339 << " default:\n" 2340 << " errs() << Loc << \": Unexpected decode table opcode!\\n\";\n" 2341 << " return MCDisassembler::Fail;\n" 2342 << " case MCD::OPC_ExtractField: {\n" 2343 << " unsigned Start = *++Ptr;\n" 2344 << " unsigned Len = *++Ptr;\n" 2345 << " ++Ptr;\n"; 2346 if (IsVarLenInst) 2347 OS << " makeUp(insn, Start + Len);\n"; 2348 OS << " CurFieldValue = fieldFromInstruction(insn, Start, Len);\n" 2349 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_ExtractField(\" << Start << " 2350 "\", \"\n" 2351 << " << Len << \"): \" << CurFieldValue << \"\\n\");\n" 2352 << " break;\n" 2353 << " }\n" 2354 << " case MCD::OPC_FilterValue: {\n" 2355 << " // Decode the field value.\n" 2356 << " unsigned Len;\n" 2357 << " uint64_t Val = decodeULEB128(++Ptr, &Len);\n" 2358 << " Ptr += Len;\n" 2359 << " // NumToSkip is a plain 24-bit integer.\n" 2360 << " unsigned NumToSkip = *Ptr++;\n" 2361 << " NumToSkip |= (*Ptr++) << 8;\n" 2362 << " NumToSkip |= (*Ptr++) << 16;\n" 2363 << "\n" 2364 << " // Perform the filter operation.\n" 2365 << " if (Val != CurFieldValue)\n" 2366 << " Ptr += NumToSkip;\n" 2367 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_FilterValue(\" << Val << " 2368 "\", \" << NumToSkip\n" 2369 << " << \"): \" << ((Val != CurFieldValue) ? \"FAIL:\" " 2370 ": \"PASS:\")\n" 2371 << " << \" continuing at \" << (Ptr - DecodeTable) << " 2372 "\"\\n\");\n" 2373 << "\n" 2374 << " break;\n" 2375 << " }\n" 2376 << " case MCD::OPC_CheckField: {\n" 2377 << " unsigned Start = *++Ptr;\n" 2378 << " unsigned Len = *++Ptr;\n"; 2379 if (IsVarLenInst) 2380 OS << " makeUp(insn, Start + Len);\n"; 2381 OS << " uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);\n" 2382 << " // Decode the field value.\n" 2383 << " unsigned PtrLen = 0;\n" 2384 << " uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen);\n" 2385 << " Ptr += PtrLen;\n" 2386 << " // NumToSkip is a plain 24-bit integer.\n" 2387 << " unsigned NumToSkip = *Ptr++;\n" 2388 << " NumToSkip |= (*Ptr++) << 8;\n" 2389 << " NumToSkip |= (*Ptr++) << 16;\n" 2390 << "\n" 2391 << " // If the actual and expected values don't match, skip.\n" 2392 << " if (ExpectedValue != FieldValue)\n" 2393 << " Ptr += NumToSkip;\n" 2394 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_CheckField(\" << Start << " 2395 "\", \"\n" 2396 << " << Len << \", \" << ExpectedValue << \", \" << " 2397 "NumToSkip\n" 2398 << " << \"): FieldValue = \" << FieldValue << \", " 2399 "ExpectedValue = \"\n" 2400 << " << ExpectedValue << \": \"\n" 2401 << " << ((ExpectedValue == FieldValue) ? \"PASS\\n\" : " 2402 "\"FAIL\\n\"));\n" 2403 << " break;\n" 2404 << " }\n" 2405 << " case MCD::OPC_CheckPredicate: {\n" 2406 << " unsigned Len;\n" 2407 << " // Decode the Predicate Index value.\n" 2408 << " unsigned PIdx = decodeULEB128(++Ptr, &Len);\n" 2409 << " Ptr += Len;\n" 2410 << " // NumToSkip is a plain 24-bit integer.\n" 2411 << " unsigned NumToSkip = *Ptr++;\n" 2412 << " NumToSkip |= (*Ptr++) << 8;\n" 2413 << " NumToSkip |= (*Ptr++) << 16;\n" 2414 << " // Check the predicate.\n" 2415 << " bool Pred;\n" 2416 << " if (!(Pred = checkDecoderPredicate(PIdx, Bits)))\n" 2417 << " Ptr += NumToSkip;\n" 2418 << " (void)Pred;\n" 2419 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_CheckPredicate(\" << PIdx " 2420 "<< \"): \"\n" 2421 << " << (Pred ? \"PASS\\n\" : \"FAIL\\n\"));\n" 2422 << "\n" 2423 << " break;\n" 2424 << " }\n" 2425 << " case MCD::OPC_Decode: {\n" 2426 << " unsigned Len;\n" 2427 << " // Decode the Opcode value.\n" 2428 << " unsigned Opc = decodeULEB128(++Ptr, &Len);\n" 2429 << " Ptr += Len;\n" 2430 << " unsigned DecodeIdx = decodeULEB128(Ptr, &Len);\n" 2431 << " Ptr += Len;\n" 2432 << "\n" 2433 << " MI.clear();\n" 2434 << " MI.setOpcode(Opc);\n" 2435 << " bool DecodeComplete;\n"; 2436 if (IsVarLenInst) { 2437 OS << " Len = InstrLenTable[Opc];\n" 2438 << " makeUp(insn, Len);\n"; 2439 } 2440 OS << " S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, " 2441 "DecodeComplete);\n" 2442 << " assert(DecodeComplete);\n" 2443 << "\n" 2444 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_Decode: opcode \" << Opc\n" 2445 << " << \", using decoder \" << DecodeIdx << \": \"\n" 2446 << " << (S != MCDisassembler::Fail ? \"PASS\" : " 2447 "\"FAIL\") << \"\\n\");\n" 2448 << " return S;\n" 2449 << " }\n" 2450 << " case MCD::OPC_TryDecode: {\n" 2451 << " unsigned Len;\n" 2452 << " // Decode the Opcode value.\n" 2453 << " unsigned Opc = decodeULEB128(++Ptr, &Len);\n" 2454 << " Ptr += Len;\n" 2455 << " unsigned DecodeIdx = decodeULEB128(Ptr, &Len);\n" 2456 << " Ptr += Len;\n" 2457 << " // NumToSkip is a plain 24-bit integer.\n" 2458 << " unsigned NumToSkip = *Ptr++;\n" 2459 << " NumToSkip |= (*Ptr++) << 8;\n" 2460 << " NumToSkip |= (*Ptr++) << 16;\n" 2461 << "\n" 2462 << " // Perform the decode operation.\n" 2463 << " MCInst TmpMI;\n" 2464 << " TmpMI.setOpcode(Opc);\n" 2465 << " bool DecodeComplete;\n" 2466 << " S = decodeToMCInst(S, DecodeIdx, insn, TmpMI, Address, DisAsm, " 2467 "DecodeComplete);\n" 2468 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_TryDecode: opcode \" << " 2469 "Opc\n" 2470 << " << \", using decoder \" << DecodeIdx << \": \");\n" 2471 << "\n" 2472 << " if (DecodeComplete) {\n" 2473 << " // Decoding complete.\n" 2474 << " LLVM_DEBUG(dbgs() << (S != MCDisassembler::Fail ? \"PASS\" : " 2475 "\"FAIL\") << \"\\n\");\n" 2476 << " MI = TmpMI;\n" 2477 << " return S;\n" 2478 << " } else {\n" 2479 << " assert(S == MCDisassembler::Fail);\n" 2480 << " // If the decoding was incomplete, skip.\n" 2481 << " Ptr += NumToSkip;\n" 2482 << " LLVM_DEBUG(dbgs() << \"FAIL: continuing at \" << (Ptr - " 2483 "DecodeTable) << \"\\n\");\n" 2484 << " // Reset decode status. This also drops a SoftFail status " 2485 "that could be\n" 2486 << " // set before the decode attempt.\n" 2487 << " S = MCDisassembler::Success;\n" 2488 << " }\n" 2489 << " break;\n" 2490 << " }\n" 2491 << " case MCD::OPC_SoftFail: {\n" 2492 << " // Decode the mask values.\n" 2493 << " unsigned Len;\n" 2494 << " uint64_t PositiveMask = decodeULEB128(++Ptr, &Len);\n" 2495 << " Ptr += Len;\n" 2496 << " uint64_t NegativeMask = decodeULEB128(Ptr, &Len);\n" 2497 << " Ptr += Len;\n" 2498 << " bool Fail = (insn & PositiveMask) != 0 || (~insn & " 2499 "NegativeMask) != 0;\n" 2500 << " if (Fail)\n" 2501 << " S = MCDisassembler::SoftFail;\n" 2502 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_SoftFail: \" << (Fail ? " 2503 "\"FAIL\\n\" : \"PASS\\n\"));\n" 2504 << " break;\n" 2505 << " }\n" 2506 << " case MCD::OPC_Fail: {\n" 2507 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_Fail\\n\");\n" 2508 << " return MCDisassembler::Fail;\n" 2509 << " }\n" 2510 << " }\n" 2511 << " }\n" 2512 << " llvm_unreachable(\"bogosity detected in disassembler state " 2513 "machine!\");\n" 2514 << "}\n\n"; 2515 } 2516 2517 // Emits disassembler code for instruction decoding. 2518 void DecoderEmitter::run(raw_ostream &o) { 2519 formatted_raw_ostream OS(o); 2520 OS << "#include \"llvm/MC/MCInst.h\"\n"; 2521 OS << "#include \"llvm/MC/MCSubtargetInfo.h\"\n"; 2522 OS << "#include \"llvm/MC/SubtargetFeature.h\"\n"; 2523 OS << "#include \"llvm/Support/DataTypes.h\"\n"; 2524 OS << "#include \"llvm/Support/Debug.h\"\n"; 2525 OS << "#include \"llvm/Support/LEB128.h\"\n"; 2526 OS << "#include \"llvm/Support/raw_ostream.h\"\n"; 2527 OS << "#include <assert.h>\n"; 2528 OS << '\n'; 2529 OS << "namespace llvm {\n\n"; 2530 2531 emitFieldFromInstruction(OS); 2532 emitInsertBits(OS); 2533 2534 Target.reverseBitsForLittleEndianEncoding(); 2535 2536 // Parameterize the decoders based on namespace and instruction width. 2537 std::set<StringRef> HwModeNames; 2538 const auto &NumberedInstructions = Target.getInstructionsByEnumValue(); 2539 NumberedEncodings.reserve(NumberedInstructions.size()); 2540 DenseMap<Record *, unsigned> IndexOfInstruction; 2541 // First, collect all HwModes referenced by the target. 2542 for (const auto &NumberedInstruction : NumberedInstructions) { 2543 IndexOfInstruction[NumberedInstruction->TheDef] = NumberedEncodings.size(); 2544 2545 if (const RecordVal *RV = 2546 NumberedInstruction->TheDef->getValue("EncodingInfos")) { 2547 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 2548 const CodeGenHwModes &HWM = Target.getHwModes(); 2549 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 2550 for (auto &KV : EBM) 2551 HwModeNames.insert(HWM.getMode(KV.first).Name); 2552 } 2553 } 2554 } 2555 2556 // If HwModeNames is empty, add the empty string so we always have one HwMode. 2557 if (HwModeNames.empty()) 2558 HwModeNames.insert(""); 2559 2560 for (const auto &NumberedInstruction : NumberedInstructions) { 2561 IndexOfInstruction[NumberedInstruction->TheDef] = NumberedEncodings.size(); 2562 2563 if (const RecordVal *RV = 2564 NumberedInstruction->TheDef->getValue("EncodingInfos")) { 2565 if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 2566 const CodeGenHwModes &HWM = Target.getHwModes(); 2567 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 2568 for (auto &KV : EBM) { 2569 NumberedEncodings.emplace_back(KV.second, NumberedInstruction, 2570 HWM.getMode(KV.first).Name); 2571 HwModeNames.insert(HWM.getMode(KV.first).Name); 2572 } 2573 continue; 2574 } 2575 } 2576 // This instruction is encoded the same on all HwModes. Emit it for all 2577 // HwModes. 2578 for (StringRef HwModeName : HwModeNames) 2579 NumberedEncodings.emplace_back(NumberedInstruction->TheDef, 2580 NumberedInstruction, HwModeName); 2581 } 2582 for (const auto &NumberedAlias : RK.getAllDerivedDefinitions("AdditionalEncoding")) 2583 NumberedEncodings.emplace_back( 2584 NumberedAlias, 2585 &Target.getInstruction(NumberedAlias->getValueAsDef("AliasOf"))); 2586 2587 std::map<std::pair<std::string, unsigned>, std::vector<EncodingIDAndOpcode>> 2588 OpcMap; 2589 std::map<unsigned, std::vector<OperandInfo>> Operands; 2590 std::vector<unsigned> InstrLen; 2591 2592 bool IsVarLenInst = 2593 any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) { 2594 RecordVal *RV = CGI->TheDef->getValue("Inst"); 2595 return RV && isa<DagInit>(RV->getValue()); 2596 }); 2597 unsigned MaxInstLen = 0; 2598 2599 for (unsigned i = 0; i < NumberedEncodings.size(); ++i) { 2600 const Record *EncodingDef = NumberedEncodings[i].EncodingDef; 2601 const CodeGenInstruction *Inst = NumberedEncodings[i].Inst; 2602 const Record *Def = Inst->TheDef; 2603 unsigned Size = EncodingDef->getValueAsInt("Size"); 2604 if (Def->getValueAsString("Namespace") == "TargetOpcode" || 2605 Def->getValueAsBit("isPseudo") || 2606 Def->getValueAsBit("isAsmParserOnly") || 2607 Def->getValueAsBit("isCodeGenOnly")) { 2608 NumEncodingsLackingDisasm++; 2609 continue; 2610 } 2611 2612 if (i < NumberedInstructions.size()) 2613 NumInstructions++; 2614 NumEncodings++; 2615 2616 if (!Size && !IsVarLenInst) 2617 continue; 2618 2619 if (IsVarLenInst) 2620 InstrLen.resize(NumberedInstructions.size(), 0); 2621 2622 if (unsigned Len = populateInstruction(Target, *EncodingDef, *Inst, i, 2623 Operands, IsVarLenInst)) { 2624 if (IsVarLenInst) { 2625 MaxInstLen = std::max(MaxInstLen, Len); 2626 InstrLen[i] = Len; 2627 } 2628 std::string DecoderNamespace = 2629 std::string(EncodingDef->getValueAsString("DecoderNamespace")); 2630 if (!NumberedEncodings[i].HwModeName.empty()) 2631 DecoderNamespace += 2632 std::string("_") + NumberedEncodings[i].HwModeName.str(); 2633 OpcMap[std::make_pair(DecoderNamespace, Size)].emplace_back( 2634 i, IndexOfInstruction.find(Def)->second); 2635 } else { 2636 NumEncodingsOmitted++; 2637 } 2638 } 2639 2640 DecoderTableInfo TableInfo; 2641 for (const auto &Opc : OpcMap) { 2642 // Emit the decoder for this namespace+width combination. 2643 ArrayRef<EncodingAndInst> NumberedEncodingsRef( 2644 NumberedEncodings.data(), NumberedEncodings.size()); 2645 FilterChooser FC(NumberedEncodingsRef, Opc.second, Operands, 2646 IsVarLenInst ? MaxInstLen : 8 * Opc.first.second, this); 2647 2648 // The decode table is cleared for each top level decoder function. The 2649 // predicates and decoders themselves, however, are shared across all 2650 // decoders to give more opportunities for uniqueing. 2651 TableInfo.Table.clear(); 2652 TableInfo.FixupStack.clear(); 2653 TableInfo.Table.reserve(16384); 2654 TableInfo.FixupStack.emplace_back(); 2655 FC.emitTableEntries(TableInfo); 2656 // Any NumToSkip fixups in the top level scope can resolve to the 2657 // OPC_Fail at the end of the table. 2658 assert(TableInfo.FixupStack.size() == 1 && "fixup stack phasing error!"); 2659 // Resolve any NumToSkip fixups in the current scope. 2660 resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(), 2661 TableInfo.Table.size()); 2662 TableInfo.FixupStack.clear(); 2663 2664 TableInfo.Table.push_back(MCD::OPC_Fail); 2665 2666 // Print the table to the output stream. 2667 emitTable(OS, TableInfo.Table, 0, FC.getBitWidth(), Opc.first.first); 2668 OS.flush(); 2669 } 2670 2671 // For variable instruction, we emit a instruction length table 2672 // to let the decoder know how long the instructions are. 2673 // You can see example usage in M68k's disassembler. 2674 if (IsVarLenInst) 2675 emitInstrLenTable(OS, InstrLen); 2676 // Emit the predicate function. 2677 emitPredicateFunction(OS, TableInfo.Predicates, 0); 2678 2679 // Emit the decoder function. 2680 emitDecoderFunction(OS, TableInfo.Decoders, 0); 2681 2682 // Emit the main entry point for the decoder, decodeInstruction(). 2683 emitDecodeInstruction(OS, IsVarLenInst); 2684 2685 OS << "\n} // end namespace llvm\n"; 2686 } 2687 2688 namespace llvm { 2689 2690 void EmitDecoder(RecordKeeper &RK, raw_ostream &OS, 2691 const std::string &PredicateNamespace, 2692 const std::string &GPrefix, const std::string &GPostfix, 2693 const std::string &ROK, const std::string &RFail, 2694 const std::string &L) { 2695 DecoderEmitter(RK, PredicateNamespace, GPrefix, GPostfix, ROK, RFail, L) 2696 .run(OS); 2697 } 2698 2699 } // end namespace llvm 2700