1 //===- X86DisassemblerTables.cpp - Disassembler tables ----------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is part of the X86 Disassembler Emitter. 11 // It contains the implementation of the disassembler tables. 12 // Documentation for the disassembler emitter in general can be found in 13 // X86DisasemblerEmitter.h. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "X86DisassemblerShared.h" 18 #include "X86DisassemblerTables.h" 19 20 #include "llvm/TableGen/TableGenBackend.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/Support/ErrorHandling.h" 23 #include "llvm/Support/Format.h" 24 25 using namespace llvm; 26 using namespace X86Disassembler; 27 28 /// inheritsFrom - Indicates whether all instructions in one class also belong 29 /// to another class. 30 /// 31 /// @param child - The class that may be the subset 32 /// @param parent - The class that may be the superset 33 /// @return - True if child is a subset of parent, false otherwise. 34 static inline bool inheritsFrom(InstructionContext child, 35 InstructionContext parent) { 36 if (child == parent) 37 return true; 38 39 switch (parent) { 40 case IC: 41 return(inheritsFrom(child, IC_64BIT) || 42 inheritsFrom(child, IC_OPSIZE) || 43 inheritsFrom(child, IC_XD) || 44 inheritsFrom(child, IC_XS)); 45 case IC_64BIT: 46 return(inheritsFrom(child, IC_64BIT_REXW) || 47 inheritsFrom(child, IC_64BIT_OPSIZE) || 48 inheritsFrom(child, IC_64BIT_XD) || 49 inheritsFrom(child, IC_64BIT_XS)); 50 case IC_OPSIZE: 51 return inheritsFrom(child, IC_64BIT_OPSIZE); 52 case IC_XD: 53 return inheritsFrom(child, IC_64BIT_XD); 54 case IC_XS: 55 return inheritsFrom(child, IC_64BIT_XS); 56 case IC_XD_OPSIZE: 57 return inheritsFrom(child, IC_64BIT_XD_OPSIZE); 58 case IC_64BIT_REXW: 59 return(inheritsFrom(child, IC_64BIT_REXW_XS) || 60 inheritsFrom(child, IC_64BIT_REXW_XD) || 61 inheritsFrom(child, IC_64BIT_REXW_OPSIZE)); 62 case IC_64BIT_OPSIZE: 63 return(inheritsFrom(child, IC_64BIT_REXW_OPSIZE)); 64 case IC_64BIT_XD: 65 return(inheritsFrom(child, IC_64BIT_REXW_XD)); 66 case IC_64BIT_XS: 67 return(inheritsFrom(child, IC_64BIT_REXW_XS)); 68 case IC_64BIT_XD_OPSIZE: 69 return false; 70 case IC_64BIT_REXW_XD: 71 return false; 72 case IC_64BIT_REXW_XS: 73 return false; 74 case IC_64BIT_REXW_OPSIZE: 75 return false; 76 case IC_VEX: 77 return inheritsFrom(child, IC_VEX_W); 78 case IC_VEX_XS: 79 return inheritsFrom(child, IC_VEX_W_XS); 80 case IC_VEX_XD: 81 return inheritsFrom(child, IC_VEX_W_XD); 82 case IC_VEX_OPSIZE: 83 return inheritsFrom(child, IC_VEX_W_OPSIZE); 84 case IC_VEX_W: 85 return false; 86 case IC_VEX_W_XS: 87 return false; 88 case IC_VEX_W_XD: 89 return false; 90 case IC_VEX_W_OPSIZE: 91 return false; 92 case IC_VEX_L: 93 return false; 94 case IC_VEX_L_XS: 95 return false; 96 case IC_VEX_L_XD: 97 return false; 98 case IC_VEX_L_OPSIZE: 99 return false; 100 default: 101 llvm_unreachable("Unknown instruction class"); 102 return false; 103 } 104 } 105 106 /// outranks - Indicates whether, if an instruction has two different applicable 107 /// classes, which class should be preferred when performing decode. This 108 /// imposes a total ordering (ties are resolved toward "lower") 109 /// 110 /// @param upper - The class that may be preferable 111 /// @param lower - The class that may be less preferable 112 /// @return - True if upper is to be preferred, false otherwise. 113 static inline bool outranks(InstructionContext upper, 114 InstructionContext lower) { 115 assert(upper < IC_max); 116 assert(lower < IC_max); 117 118 #define ENUM_ENTRY(n, r, d) r, 119 static int ranks[IC_max] = { 120 INSTRUCTION_CONTEXTS 121 }; 122 #undef ENUM_ENTRY 123 124 return (ranks[upper] > ranks[lower]); 125 } 126 127 /// stringForContext - Returns a string containing the name of a particular 128 /// InstructionContext, usually for diagnostic purposes. 129 /// 130 /// @param insnContext - The instruction class to transform to a string. 131 /// @return - A statically-allocated string constant that contains the 132 /// name of the instruction class. 133 static inline const char* stringForContext(InstructionContext insnContext) { 134 switch (insnContext) { 135 default: 136 llvm_unreachable("Unhandled instruction class"); 137 #define ENUM_ENTRY(n, r, d) case n: return #n; break; 138 INSTRUCTION_CONTEXTS 139 #undef ENUM_ENTRY 140 } 141 142 return 0; 143 } 144 145 /// stringForOperandType - Like stringForContext, but for OperandTypes. 146 static inline const char* stringForOperandType(OperandType type) { 147 switch (type) { 148 default: 149 llvm_unreachable("Unhandled type"); 150 #define ENUM_ENTRY(i, d) case i: return #i; 151 TYPES 152 #undef ENUM_ENTRY 153 } 154 } 155 156 /// stringForOperandEncoding - like stringForContext, but for 157 /// OperandEncodings. 158 static inline const char* stringForOperandEncoding(OperandEncoding encoding) { 159 switch (encoding) { 160 default: 161 llvm_unreachable("Unhandled encoding"); 162 #define ENUM_ENTRY(i, d) case i: return #i; 163 ENCODINGS 164 #undef ENUM_ENTRY 165 } 166 } 167 168 void DisassemblerTables::emitOneID(raw_ostream &o, 169 uint32_t &i, 170 InstrUID id, 171 bool addComma) const { 172 if (id) 173 o.indent(i * 2) << format("0x%hx", id); 174 else 175 o.indent(i * 2) << 0; 176 177 if (addComma) 178 o << ", "; 179 else 180 o << " "; 181 182 o << "/* "; 183 o << InstructionSpecifiers[id].name; 184 o << "*/"; 185 186 o << "\n"; 187 } 188 189 /// emitEmptyTable - Emits the modRMEmptyTable, which is used as a ID table by 190 /// all ModR/M decisions for instructions that are invalid for all possible 191 /// ModR/M byte values. 192 /// 193 /// @param o - The output stream on which to emit the table. 194 /// @param i - The indentation level for that output stream. 195 static void emitEmptyTable(raw_ostream &o, uint32_t &i) 196 { 197 o.indent(i * 2) << "static const InstrUID modRMEmptyTable[1] = { 0 };\n"; 198 o << "\n"; 199 } 200 201 /// getDecisionType - Determines whether a ModRM decision with 255 entries can 202 /// be compacted by eliminating redundant information. 203 /// 204 /// @param decision - The decision to be compacted. 205 /// @return - The compactest available representation for the decision. 206 static ModRMDecisionType getDecisionType(ModRMDecision &decision) 207 { 208 bool satisfiesOneEntry = true; 209 bool satisfiesSplitRM = true; 210 211 uint16_t index; 212 213 for (index = 0; index < 256; ++index) { 214 if (decision.instructionIDs[index] != decision.instructionIDs[0]) 215 satisfiesOneEntry = false; 216 217 if (((index & 0xc0) == 0xc0) && 218 (decision.instructionIDs[index] != decision.instructionIDs[0xc0])) 219 satisfiesSplitRM = false; 220 221 if (((index & 0xc0) != 0xc0) && 222 (decision.instructionIDs[index] != decision.instructionIDs[0x00])) 223 satisfiesSplitRM = false; 224 } 225 226 if (satisfiesOneEntry) 227 return MODRM_ONEENTRY; 228 229 if (satisfiesSplitRM) 230 return MODRM_SPLITRM; 231 232 return MODRM_FULL; 233 } 234 235 /// stringForDecisionType - Returns a statically-allocated string corresponding 236 /// to a particular decision type. 237 /// 238 /// @param dt - The decision type. 239 /// @return - A pointer to the statically-allocated string (e.g., 240 /// "MODRM_ONEENTRY" for MODRM_ONEENTRY). 241 static const char* stringForDecisionType(ModRMDecisionType dt) 242 { 243 #define ENUM_ENTRY(n) case n: return #n; 244 switch (dt) { 245 default: 246 llvm_unreachable("Unknown decision type"); 247 MODRMTYPES 248 }; 249 #undef ENUM_ENTRY 250 } 251 252 /// stringForModifierType - Returns a statically-allocated string corresponding 253 /// to an opcode modifier type. 254 /// 255 /// @param mt - The modifier type. 256 /// @return - A pointer to the statically-allocated string (e.g., 257 /// "MODIFIER_NONE" for MODIFIER_NONE). 258 static const char* stringForModifierType(ModifierType mt) 259 { 260 #define ENUM_ENTRY(n) case n: return #n; 261 switch(mt) { 262 default: 263 llvm_unreachable("Unknown modifier type"); 264 MODIFIER_TYPES 265 }; 266 #undef ENUM_ENTRY 267 } 268 269 DisassemblerTables::DisassemblerTables() { 270 unsigned i; 271 272 for (i = 0; i < array_lengthof(Tables); i++) { 273 Tables[i] = new ContextDecision; 274 memset(Tables[i], 0, sizeof(ContextDecision)); 275 } 276 277 HasConflicts = false; 278 } 279 280 DisassemblerTables::~DisassemblerTables() { 281 unsigned i; 282 283 for (i = 0; i < array_lengthof(Tables); i++) 284 delete Tables[i]; 285 } 286 287 void DisassemblerTables::emitModRMDecision(raw_ostream &o1, 288 raw_ostream &o2, 289 uint32_t &i1, 290 uint32_t &i2, 291 ModRMDecision &decision) 292 const { 293 static uint64_t sTableNumber = 0; 294 uint64_t thisTableNumber = sTableNumber; 295 ModRMDecisionType dt = getDecisionType(decision); 296 uint16_t index; 297 298 if (dt == MODRM_ONEENTRY && decision.instructionIDs[0] == 0) 299 { 300 o2.indent(i2) << "{ /* ModRMDecision */" << "\n"; 301 i2++; 302 303 o2.indent(i2) << stringForDecisionType(dt) << "," << "\n"; 304 o2.indent(i2) << "modRMEmptyTable"; 305 306 i2--; 307 o2.indent(i2) << "}"; 308 return; 309 } 310 311 o1.indent(i1) << "static const InstrUID modRMTable" << thisTableNumber; 312 313 switch (dt) { 314 default: 315 llvm_unreachable("Unknown decision type"); 316 case MODRM_ONEENTRY: 317 o1 << "[1]"; 318 break; 319 case MODRM_SPLITRM: 320 o1 << "[2]"; 321 break; 322 case MODRM_FULL: 323 o1 << "[256]"; 324 break; 325 } 326 327 o1 << " = {" << "\n"; 328 i1++; 329 330 switch (dt) { 331 default: 332 llvm_unreachable("Unknown decision type"); 333 case MODRM_ONEENTRY: 334 emitOneID(o1, i1, decision.instructionIDs[0], false); 335 break; 336 case MODRM_SPLITRM: 337 emitOneID(o1, i1, decision.instructionIDs[0x00], true); // mod = 0b00 338 emitOneID(o1, i1, decision.instructionIDs[0xc0], false); // mod = 0b11 339 break; 340 case MODRM_FULL: 341 for (index = 0; index < 256; ++index) 342 emitOneID(o1, i1, decision.instructionIDs[index], index < 255); 343 break; 344 } 345 346 i1--; 347 o1.indent(i1) << "};" << "\n"; 348 o1 << "\n"; 349 350 o2.indent(i2) << "{ /* struct ModRMDecision */" << "\n"; 351 i2++; 352 353 o2.indent(i2) << stringForDecisionType(dt) << "," << "\n"; 354 o2.indent(i2) << "modRMTable" << sTableNumber << "\n"; 355 356 i2--; 357 o2.indent(i2) << "}"; 358 359 ++sTableNumber; 360 } 361 362 void DisassemblerTables::emitOpcodeDecision( 363 raw_ostream &o1, 364 raw_ostream &o2, 365 uint32_t &i1, 366 uint32_t &i2, 367 OpcodeDecision &decision) const { 368 uint16_t index; 369 370 o2.indent(i2) << "{ /* struct OpcodeDecision */" << "\n"; 371 i2++; 372 o2.indent(i2) << "{" << "\n"; 373 i2++; 374 375 for (index = 0; index < 256; ++index) { 376 o2.indent(i2); 377 378 o2 << "/* 0x" << format("%02hhx", index) << " */" << "\n"; 379 380 emitModRMDecision(o1, o2, i1, i2, decision.modRMDecisions[index]); 381 382 if (index < 255) 383 o2 << ","; 384 385 o2 << "\n"; 386 } 387 388 i2--; 389 o2.indent(i2) << "}" << "\n"; 390 i2--; 391 o2.indent(i2) << "}" << "\n"; 392 } 393 394 void DisassemblerTables::emitContextDecision( 395 raw_ostream &o1, 396 raw_ostream &o2, 397 uint32_t &i1, 398 uint32_t &i2, 399 ContextDecision &decision, 400 const char* name) const { 401 o2.indent(i2) << "static const struct ContextDecision " << name << " = {\n"; 402 i2++; 403 o2.indent(i2) << "{ /* opcodeDecisions */" << "\n"; 404 i2++; 405 406 unsigned index; 407 408 for (index = 0; index < IC_max; ++index) { 409 o2.indent(i2) << "/* "; 410 o2 << stringForContext((InstructionContext)index); 411 o2 << " */"; 412 o2 << "\n"; 413 414 emitOpcodeDecision(o1, o2, i1, i2, decision.opcodeDecisions[index]); 415 416 if (index + 1 < IC_max) 417 o2 << ", "; 418 } 419 420 i2--; 421 o2.indent(i2) << "}" << "\n"; 422 i2--; 423 o2.indent(i2) << "};" << "\n"; 424 } 425 426 void DisassemblerTables::emitInstructionInfo(raw_ostream &o, uint32_t &i) 427 const { 428 o.indent(i * 2) << "static const struct InstructionSpecifier "; 429 o << INSTRUCTIONS_STR "[" << InstructionSpecifiers.size() << "] = {\n"; 430 431 i++; 432 433 uint16_t numInstructions = InstructionSpecifiers.size(); 434 uint16_t index, operandIndex; 435 436 for (index = 0; index < numInstructions; ++index) { 437 o.indent(i * 2) << "{ /* " << index << " */" << "\n"; 438 i++; 439 440 o.indent(i * 2) << 441 stringForModifierType(InstructionSpecifiers[index].modifierType); 442 o << "," << "\n"; 443 444 o.indent(i * 2) << "0x"; 445 o << format("%02hhx", (uint16_t)InstructionSpecifiers[index].modifierBase); 446 o << "," << "\n"; 447 448 o.indent(i * 2) << "{" << "\n"; 449 i++; 450 451 for (operandIndex = 0; operandIndex < X86_MAX_OPERANDS; ++operandIndex) { 452 o.indent(i * 2) << "{ "; 453 o << stringForOperandEncoding(InstructionSpecifiers[index] 454 .operands[operandIndex] 455 .encoding); 456 o << ", "; 457 o << stringForOperandType(InstructionSpecifiers[index] 458 .operands[operandIndex] 459 .type); 460 o << " }"; 461 462 if (operandIndex < X86_MAX_OPERANDS - 1) 463 o << ","; 464 465 o << "\n"; 466 } 467 468 i--; 469 o.indent(i * 2) << "}," << "\n"; 470 471 o.indent(i * 2) << "\"" << InstructionSpecifiers[index].name << "\""; 472 o << "\n"; 473 474 i--; 475 o.indent(i * 2) << "}"; 476 477 if (index + 1 < numInstructions) 478 o << ","; 479 480 o << "\n"; 481 } 482 483 i--; 484 o.indent(i * 2) << "};" << "\n"; 485 } 486 487 void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const { 488 uint16_t index; 489 490 o.indent(i * 2) << "static const InstructionContext " CONTEXTS_STR 491 "[256] = {\n"; 492 i++; 493 494 for (index = 0; index < 256; ++index) { 495 o.indent(i * 2); 496 497 if ((index & ATTR_VEXL) && (index & ATTR_OPSIZE)) 498 o << "IC_VEX_L_OPSIZE"; 499 else if ((index & ATTR_VEXL) && (index & ATTR_XD)) 500 o << "IC_VEX_L_XD"; 501 else if ((index & ATTR_VEXL) && (index & ATTR_XS)) 502 o << "IC_VEX_L_XS"; 503 else if ((index & ATTR_VEX) && (index & ATTR_REXW) && (index & ATTR_OPSIZE)) 504 o << "IC_VEX_W_OPSIZE"; 505 else if ((index & ATTR_VEX) && (index & ATTR_REXW) && (index & ATTR_XD)) 506 o << "IC_VEX_W_XD"; 507 else if ((index & ATTR_VEX) && (index & ATTR_REXW) && (index & ATTR_XS)) 508 o << "IC_VEX_W_XS"; 509 else if (index & ATTR_VEXL) 510 o << "IC_VEX_L"; 511 else if ((index & ATTR_VEX) && (index & ATTR_REXW)) 512 o << "IC_VEX_W"; 513 else if ((index & ATTR_VEX) && (index & ATTR_OPSIZE)) 514 o << "IC_VEX_OPSIZE"; 515 else if ((index & ATTR_VEX) && (index & ATTR_XD)) 516 o << "IC_VEX_XD"; 517 else if ((index & ATTR_VEX) && (index & ATTR_XS)) 518 o << "IC_VEX_XS"; 519 else if (index & ATTR_VEX) 520 o << "IC_VEX"; 521 else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS)) 522 o << "IC_64BIT_REXW_XS"; 523 else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XD)) 524 o << "IC_64BIT_REXW_XD"; 525 else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && 526 (index & ATTR_OPSIZE)) 527 o << "IC_64BIT_REXW_OPSIZE"; 528 else if ((index & ATTR_64BIT) && (index & ATTR_XD) && (index & ATTR_OPSIZE)) 529 o << "IC_64BIT_XD_OPSIZE"; 530 else if ((index & ATTR_64BIT) && (index & ATTR_XS)) 531 o << "IC_64BIT_XS"; 532 else if ((index & ATTR_64BIT) && (index & ATTR_XD)) 533 o << "IC_64BIT_XD"; 534 else if ((index & ATTR_64BIT) && (index & ATTR_OPSIZE)) 535 o << "IC_64BIT_OPSIZE"; 536 else if ((index & ATTR_64BIT) && (index & ATTR_REXW)) 537 o << "IC_64BIT_REXW"; 538 else if ((index & ATTR_64BIT)) 539 o << "IC_64BIT"; 540 else if ((index & ATTR_XD) && (index & ATTR_OPSIZE)) 541 o << "IC_XD_OPSIZE"; 542 else if (index & ATTR_XS) 543 o << "IC_XS"; 544 else if (index & ATTR_XD) 545 o << "IC_XD"; 546 else if (index & ATTR_OPSIZE) 547 o << "IC_OPSIZE"; 548 else 549 o << "IC"; 550 551 if (index < 255) 552 o << ","; 553 else 554 o << " "; 555 556 o << " /* " << index << " */"; 557 558 o << "\n"; 559 } 560 561 i--; 562 o.indent(i * 2) << "};" << "\n"; 563 } 564 565 void DisassemblerTables::emitContextDecisions(raw_ostream &o1, 566 raw_ostream &o2, 567 uint32_t &i1, 568 uint32_t &i2) 569 const { 570 emitContextDecision(o1, o2, i1, i2, *Tables[0], ONEBYTE_STR); 571 emitContextDecision(o1, o2, i1, i2, *Tables[1], TWOBYTE_STR); 572 emitContextDecision(o1, o2, i1, i2, *Tables[2], THREEBYTE38_STR); 573 emitContextDecision(o1, o2, i1, i2, *Tables[3], THREEBYTE3A_STR); 574 emitContextDecision(o1, o2, i1, i2, *Tables[4], THREEBYTEA6_STR); 575 emitContextDecision(o1, o2, i1, i2, *Tables[5], THREEBYTEA7_STR); 576 } 577 578 void DisassemblerTables::emit(raw_ostream &o) const { 579 uint32_t i1 = 0; 580 uint32_t i2 = 0; 581 582 std::string s1; 583 std::string s2; 584 585 raw_string_ostream o1(s1); 586 raw_string_ostream o2(s2); 587 588 emitInstructionInfo(o, i2); 589 o << "\n"; 590 591 emitContextTable(o, i2); 592 o << "\n"; 593 594 emitEmptyTable(o1, i1); 595 emitContextDecisions(o1, o2, i1, i2); 596 597 o << o1.str(); 598 o << "\n"; 599 o << o2.str(); 600 o << "\n"; 601 o << "\n"; 602 } 603 604 void DisassemblerTables::setTableFields(ModRMDecision &decision, 605 const ModRMFilter &filter, 606 InstrUID uid, 607 uint8_t opcode) { 608 unsigned index; 609 610 for (index = 0; index < 256; ++index) { 611 if (filter.accepts(index)) { 612 if (decision.instructionIDs[index] == uid) 613 continue; 614 615 if (decision.instructionIDs[index] != 0) { 616 InstructionSpecifier &newInfo = 617 InstructionSpecifiers[uid]; 618 InstructionSpecifier &previousInfo = 619 InstructionSpecifiers[decision.instructionIDs[index]]; 620 621 if(newInfo.filtered) 622 continue; // filtered instructions get lowest priority 623 624 if(previousInfo.name == "NOOP" && (newInfo.name == "XCHG16ar" || 625 newInfo.name == "XCHG32ar" || 626 newInfo.name == "XCHG64ar")) 627 continue; // special case for XCHG*ar and NOOP 628 629 if (outranks(previousInfo.insnContext, newInfo.insnContext)) 630 continue; 631 632 if (previousInfo.insnContext == newInfo.insnContext && 633 !previousInfo.filtered) { 634 errs() << "Error: Primary decode conflict: "; 635 errs() << newInfo.name << " would overwrite " << previousInfo.name; 636 errs() << "\n"; 637 errs() << "ModRM " << index << "\n"; 638 errs() << "Opcode " << (uint16_t)opcode << "\n"; 639 errs() << "Context " << stringForContext(newInfo.insnContext) << "\n"; 640 HasConflicts = true; 641 } 642 } 643 644 decision.instructionIDs[index] = uid; 645 } 646 } 647 } 648 649 void DisassemblerTables::setTableFields(OpcodeType type, 650 InstructionContext insnContext, 651 uint8_t opcode, 652 const ModRMFilter &filter, 653 InstrUID uid, 654 bool is32bit) { 655 unsigned index; 656 657 ContextDecision &decision = *Tables[type]; 658 659 for (index = 0; index < IC_max; ++index) { 660 if (is32bit && inheritsFrom((InstructionContext)index, IC_64BIT)) 661 continue; 662 663 if (inheritsFrom((InstructionContext)index, 664 InstructionSpecifiers[uid].insnContext)) 665 setTableFields(decision.opcodeDecisions[index].modRMDecisions[opcode], 666 filter, 667 uid, 668 opcode); 669 } 670 } 671