1 //===- X86DisassemblerTables.cpp - Disassembler tables ----------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is part of the X86 Disassembler Emitter. 11 // It contains the implementation of the disassembler tables. 12 // Documentation for the disassembler emitter in general can be found in 13 // X86DisasemblerEmitter.h. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "X86DisassemblerTables.h" 18 #include "X86DisassemblerShared.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/Support/ErrorHandling.h" 21 #include "llvm/Support/Format.h" 22 #include <map> 23 24 using namespace llvm; 25 using namespace X86Disassembler; 26 27 /// stringForContext - Returns a string containing the name of a particular 28 /// InstructionContext, usually for diagnostic purposes. 29 /// 30 /// @param insnContext - The instruction class to transform to a string. 31 /// @return - A statically-allocated string constant that contains the 32 /// name of the instruction class. 33 static inline const char* stringForContext(InstructionContext insnContext) { 34 switch (insnContext) { 35 default: 36 llvm_unreachable("Unhandled instruction class"); 37 #define ENUM_ENTRY(n, r, d) case n: return #n; break; 38 #define ENUM_ENTRY_K_B(n, r, d) ENUM_ENTRY(n, r, d) ENUM_ENTRY(n##_K_B, r, d)\ 39 ENUM_ENTRY(n##_KZ, r, d) ENUM_ENTRY(n##_K, r, d) ENUM_ENTRY(n##_B, r, d)\ 40 ENUM_ENTRY(n##_KZ_B, r, d) 41 INSTRUCTION_CONTEXTS 42 #undef ENUM_ENTRY 43 #undef ENUM_ENTRY_K_B 44 } 45 } 46 47 /// stringForOperandType - Like stringForContext, but for OperandTypes. 48 static inline const char* stringForOperandType(OperandType type) { 49 switch (type) { 50 default: 51 llvm_unreachable("Unhandled type"); 52 #define ENUM_ENTRY(i, d) case i: return #i; 53 TYPES 54 #undef ENUM_ENTRY 55 } 56 } 57 58 /// stringForOperandEncoding - like stringForContext, but for 59 /// OperandEncodings. 60 static inline const char* stringForOperandEncoding(OperandEncoding encoding) { 61 switch (encoding) { 62 default: 63 llvm_unreachable("Unhandled encoding"); 64 #define ENUM_ENTRY(i, d) case i: return #i; 65 ENCODINGS 66 #undef ENUM_ENTRY 67 } 68 } 69 70 /// inheritsFrom - Indicates whether all instructions in one class also belong 71 /// to another class. 72 /// 73 /// @param child - The class that may be the subset 74 /// @param parent - The class that may be the superset 75 /// @return - True if child is a subset of parent, false otherwise. 76 static inline bool inheritsFrom(InstructionContext child, 77 InstructionContext parent, 78 bool VEX_LIG = false) { 79 if (child == parent) 80 return true; 81 82 switch (parent) { 83 case IC: 84 return(inheritsFrom(child, IC_64BIT) || 85 inheritsFrom(child, IC_OPSIZE) || 86 inheritsFrom(child, IC_ADSIZE) || 87 inheritsFrom(child, IC_XD) || 88 inheritsFrom(child, IC_XS)); 89 case IC_64BIT: 90 return(inheritsFrom(child, IC_64BIT_REXW) || 91 inheritsFrom(child, IC_64BIT_OPSIZE) || 92 inheritsFrom(child, IC_64BIT_ADSIZE) || 93 inheritsFrom(child, IC_64BIT_XD) || 94 inheritsFrom(child, IC_64BIT_XS)); 95 case IC_OPSIZE: 96 return inheritsFrom(child, IC_64BIT_OPSIZE); 97 case IC_ADSIZE: 98 case IC_64BIT_ADSIZE: 99 return false; 100 case IC_XD: 101 return inheritsFrom(child, IC_64BIT_XD); 102 case IC_XS: 103 return inheritsFrom(child, IC_64BIT_XS); 104 case IC_XD_OPSIZE: 105 return inheritsFrom(child, IC_64BIT_XD_OPSIZE); 106 case IC_XS_OPSIZE: 107 return inheritsFrom(child, IC_64BIT_XS_OPSIZE); 108 case IC_64BIT_REXW: 109 return(inheritsFrom(child, IC_64BIT_REXW_XS) || 110 inheritsFrom(child, IC_64BIT_REXW_XD) || 111 inheritsFrom(child, IC_64BIT_REXW_OPSIZE)); 112 case IC_64BIT_OPSIZE: 113 return(inheritsFrom(child, IC_64BIT_REXW_OPSIZE)); 114 case IC_64BIT_XD: 115 return(inheritsFrom(child, IC_64BIT_REXW_XD)); 116 case IC_64BIT_XS: 117 return(inheritsFrom(child, IC_64BIT_REXW_XS)); 118 case IC_64BIT_XD_OPSIZE: 119 case IC_64BIT_XS_OPSIZE: 120 return false; 121 case IC_64BIT_REXW_XD: 122 case IC_64BIT_REXW_XS: 123 case IC_64BIT_REXW_OPSIZE: 124 return false; 125 case IC_VEX: 126 return (VEX_LIG && inheritsFrom(child, IC_VEX_L_W)) || 127 inheritsFrom(child, IC_VEX_W) || 128 (VEX_LIG && inheritsFrom(child, IC_VEX_L)); 129 case IC_VEX_XS: 130 return (VEX_LIG && inheritsFrom(child, IC_VEX_L_W_XS)) || 131 inheritsFrom(child, IC_VEX_W_XS) || 132 (VEX_LIG && inheritsFrom(child, IC_VEX_L_XS)); 133 case IC_VEX_XD: 134 return (VEX_LIG && inheritsFrom(child, IC_VEX_L_W_XD)) || 135 inheritsFrom(child, IC_VEX_W_XD) || 136 (VEX_LIG && inheritsFrom(child, IC_VEX_L_XD)); 137 case IC_VEX_OPSIZE: 138 return (VEX_LIG && inheritsFrom(child, IC_VEX_L_W_OPSIZE)) || 139 inheritsFrom(child, IC_VEX_W_OPSIZE) || 140 (VEX_LIG && inheritsFrom(child, IC_VEX_L_OPSIZE)); 141 case IC_VEX_W: 142 return VEX_LIG && inheritsFrom(child, IC_VEX_L_W); 143 case IC_VEX_W_XS: 144 return VEX_LIG && inheritsFrom(child, IC_VEX_L_W_XS); 145 case IC_VEX_W_XD: 146 return VEX_LIG && inheritsFrom(child, IC_VEX_L_W_XD); 147 case IC_VEX_W_OPSIZE: 148 return VEX_LIG && inheritsFrom(child, IC_VEX_L_W_OPSIZE); 149 case IC_VEX_L: 150 return inheritsFrom(child, IC_VEX_L_W); 151 case IC_VEX_L_XS: 152 return inheritsFrom(child, IC_VEX_L_W_XS); 153 case IC_VEX_L_XD: 154 return inheritsFrom(child, IC_VEX_L_W_XD); 155 case IC_VEX_L_OPSIZE: 156 return inheritsFrom(child, IC_VEX_L_W_OPSIZE); 157 case IC_VEX_L_W: 158 case IC_VEX_L_W_XS: 159 case IC_VEX_L_W_XD: 160 case IC_VEX_L_W_OPSIZE: 161 return false; 162 case IC_EVEX: 163 return inheritsFrom(child, IC_EVEX_W) || 164 inheritsFrom(child, IC_EVEX_L_W); 165 case IC_EVEX_XS: 166 return inheritsFrom(child, IC_EVEX_W_XS) || 167 inheritsFrom(child, IC_EVEX_L_W_XS); 168 case IC_EVEX_XD: 169 return inheritsFrom(child, IC_EVEX_W_XD) || 170 inheritsFrom(child, IC_EVEX_L_W_XD); 171 case IC_EVEX_OPSIZE: 172 return inheritsFrom(child, IC_EVEX_W_OPSIZE) || 173 inheritsFrom(child, IC_EVEX_L_W_OPSIZE); 174 case IC_EVEX_B: 175 return false; 176 case IC_EVEX_W: 177 case IC_EVEX_W_XS: 178 case IC_EVEX_W_XD: 179 case IC_EVEX_W_OPSIZE: 180 return false; 181 case IC_EVEX_L: 182 case IC_EVEX_L_K_B: 183 case IC_EVEX_L_KZ_B: 184 case IC_EVEX_L_B: 185 case IC_EVEX_L_XS: 186 case IC_EVEX_L_XD: 187 case IC_EVEX_L_OPSIZE: 188 return false; 189 case IC_EVEX_L_W: 190 case IC_EVEX_L_W_XS: 191 case IC_EVEX_L_W_XD: 192 case IC_EVEX_L_W_OPSIZE: 193 return false; 194 case IC_EVEX_L2: 195 case IC_EVEX_L2_XS: 196 case IC_EVEX_L2_XD: 197 case IC_EVEX_L2_OPSIZE: 198 return false; 199 case IC_EVEX_L2_W: 200 case IC_EVEX_L2_W_XS: 201 case IC_EVEX_L2_W_XD: 202 case IC_EVEX_L2_W_OPSIZE: 203 return false; 204 case IC_EVEX_K: 205 return inheritsFrom(child, IC_EVEX_W_K) || 206 inheritsFrom(child, IC_EVEX_L_W_K); 207 case IC_EVEX_XS_K: 208 return inheritsFrom(child, IC_EVEX_W_XS_K) || 209 inheritsFrom(child, IC_EVEX_L_W_XS_K); 210 case IC_EVEX_XD_K: 211 return inheritsFrom(child, IC_EVEX_W_XD_K) || 212 inheritsFrom(child, IC_EVEX_L_W_XD_K); 213 case IC_EVEX_K_B: 214 case IC_EVEX_KZ: 215 return false; 216 case IC_EVEX_XS_KZ: 217 return inheritsFrom(child, IC_EVEX_W_XS_KZ) || 218 inheritsFrom(child, IC_EVEX_L_W_XS_KZ); 219 case IC_EVEX_XD_KZ: 220 return inheritsFrom(child, IC_EVEX_W_XD_KZ) || 221 inheritsFrom(child, IC_EVEX_L_W_XD_KZ); 222 case IC_EVEX_KZ_B: 223 case IC_EVEX_OPSIZE_K: 224 case IC_EVEX_OPSIZE_B: 225 case IC_EVEX_OPSIZE_K_B: 226 case IC_EVEX_OPSIZE_KZ: 227 case IC_EVEX_OPSIZE_KZ_B: 228 return false; 229 case IC_EVEX_W_K: 230 case IC_EVEX_W_XS_K: 231 case IC_EVEX_W_XD_K: 232 case IC_EVEX_W_OPSIZE_K: 233 case IC_EVEX_W_OPSIZE_B: 234 case IC_EVEX_W_OPSIZE_K_B: 235 return false; 236 case IC_EVEX_L_K: 237 case IC_EVEX_L_XS_K: 238 case IC_EVEX_L_XD_K: 239 case IC_EVEX_L_OPSIZE_K: 240 case IC_EVEX_L_OPSIZE_B: 241 case IC_EVEX_L_OPSIZE_K_B: 242 return false; 243 case IC_EVEX_W_KZ: 244 case IC_EVEX_W_XS_KZ: 245 case IC_EVEX_W_XD_KZ: 246 case IC_EVEX_W_OPSIZE_KZ: 247 case IC_EVEX_W_OPSIZE_KZ_B: 248 return false; 249 case IC_EVEX_L_KZ: 250 case IC_EVEX_L_XS_KZ: 251 case IC_EVEX_L_XD_KZ: 252 case IC_EVEX_L_OPSIZE_KZ: 253 case IC_EVEX_L_OPSIZE_KZ_B: 254 return false; 255 case IC_EVEX_L_W_K: 256 case IC_EVEX_L_W_XS_K: 257 case IC_EVEX_L_W_XD_K: 258 case IC_EVEX_L_W_OPSIZE_K: 259 case IC_EVEX_L_W_OPSIZE_B: 260 case IC_EVEX_L_W_OPSIZE_K_B: 261 case IC_EVEX_L_W_KZ: 262 case IC_EVEX_L_W_XS_KZ: 263 case IC_EVEX_L_W_XD_KZ: 264 case IC_EVEX_L_W_OPSIZE_KZ: 265 case IC_EVEX_L_W_OPSIZE_KZ_B: 266 return false; 267 case IC_EVEX_L2_K: 268 case IC_EVEX_L2_B: 269 case IC_EVEX_L2_K_B: 270 case IC_EVEX_L2_KZ_B: 271 case IC_EVEX_L2_XS_K: 272 case IC_EVEX_L2_XS_B: 273 case IC_EVEX_L2_XD_B: 274 case IC_EVEX_L2_XD_K: 275 case IC_EVEX_L2_OPSIZE_K: 276 case IC_EVEX_L2_OPSIZE_B: 277 case IC_EVEX_L2_OPSIZE_K_B: 278 case IC_EVEX_L2_KZ: 279 case IC_EVEX_L2_XS_KZ: 280 case IC_EVEX_L2_XD_KZ: 281 case IC_EVEX_L2_OPSIZE_KZ: 282 case IC_EVEX_L2_OPSIZE_KZ_B: 283 return false; 284 case IC_EVEX_L2_W_K: 285 case IC_EVEX_L2_W_B: 286 case IC_EVEX_L2_W_XS_K: 287 case IC_EVEX_L2_W_XD_K: 288 case IC_EVEX_L2_W_XD_B: 289 case IC_EVEX_L2_W_OPSIZE_K: 290 case IC_EVEX_L2_W_OPSIZE_B: 291 case IC_EVEX_L2_W_OPSIZE_K_B: 292 case IC_EVEX_L2_W_KZ: 293 case IC_EVEX_L2_W_XS_KZ: 294 case IC_EVEX_L2_W_XD_KZ: 295 case IC_EVEX_L2_W_OPSIZE_KZ: 296 case IC_EVEX_L2_W_OPSIZE_KZ_B: 297 return false; 298 default: 299 errs() << "Unknown instruction class: " << 300 stringForContext((InstructionContext)parent) << "\n"; 301 llvm_unreachable("Unknown instruction class"); 302 } 303 } 304 305 /// outranks - Indicates whether, if an instruction has two different applicable 306 /// classes, which class should be preferred when performing decode. This 307 /// imposes a total ordering (ties are resolved toward "lower") 308 /// 309 /// @param upper - The class that may be preferable 310 /// @param lower - The class that may be less preferable 311 /// @return - True if upper is to be preferred, false otherwise. 312 static inline bool outranks(InstructionContext upper, 313 InstructionContext lower) { 314 assert(upper < IC_max); 315 assert(lower < IC_max); 316 317 #define ENUM_ENTRY(n, r, d) r, 318 #define ENUM_ENTRY_K_B(n, r, d) ENUM_ENTRY(n, r, d) \ 319 ENUM_ENTRY(n##_K_B, r, d) ENUM_ENTRY(n##_KZ_B, r, d) \ 320 ENUM_ENTRY(n##_KZ, r, d) ENUM_ENTRY(n##_K, r, d) ENUM_ENTRY(n##_B, r, d) 321 static int ranks[IC_max] = { 322 INSTRUCTION_CONTEXTS 323 }; 324 #undef ENUM_ENTRY 325 #undef ENUM_ENTRY_K_B 326 327 return (ranks[upper] > ranks[lower]); 328 } 329 330 /// getDecisionType - Determines whether a ModRM decision with 255 entries can 331 /// be compacted by eliminating redundant information. 332 /// 333 /// @param decision - The decision to be compacted. 334 /// @return - The compactest available representation for the decision. 335 static ModRMDecisionType getDecisionType(ModRMDecision &decision) { 336 bool satisfiesOneEntry = true; 337 bool satisfiesSplitRM = true; 338 bool satisfiesSplitReg = true; 339 bool satisfiesSplitMisc = true; 340 341 for (unsigned index = 0; index < 256; ++index) { 342 if (decision.instructionIDs[index] != decision.instructionIDs[0]) 343 satisfiesOneEntry = false; 344 345 if (((index & 0xc0) == 0xc0) && 346 (decision.instructionIDs[index] != decision.instructionIDs[0xc0])) 347 satisfiesSplitRM = false; 348 349 if (((index & 0xc0) != 0xc0) && 350 (decision.instructionIDs[index] != decision.instructionIDs[0x00])) 351 satisfiesSplitRM = false; 352 353 if (((index & 0xc0) == 0xc0) && 354 (decision.instructionIDs[index] != decision.instructionIDs[index&0xf8])) 355 satisfiesSplitReg = false; 356 357 if (((index & 0xc0) != 0xc0) && 358 (decision.instructionIDs[index] != decision.instructionIDs[index&0x38])) 359 satisfiesSplitMisc = false; 360 } 361 362 if (satisfiesOneEntry) 363 return MODRM_ONEENTRY; 364 365 if (satisfiesSplitRM) 366 return MODRM_SPLITRM; 367 368 if (satisfiesSplitReg && satisfiesSplitMisc) 369 return MODRM_SPLITREG; 370 371 if (satisfiesSplitMisc) 372 return MODRM_SPLITMISC; 373 374 return MODRM_FULL; 375 } 376 377 /// stringForDecisionType - Returns a statically-allocated string corresponding 378 /// to a particular decision type. 379 /// 380 /// @param dt - The decision type. 381 /// @return - A pointer to the statically-allocated string (e.g., 382 /// "MODRM_ONEENTRY" for MODRM_ONEENTRY). 383 static const char* stringForDecisionType(ModRMDecisionType dt) { 384 #define ENUM_ENTRY(n) case n: return #n; 385 switch (dt) { 386 default: 387 llvm_unreachable("Unknown decision type"); 388 MODRMTYPES 389 }; 390 #undef ENUM_ENTRY 391 } 392 393 DisassemblerTables::DisassemblerTables() { 394 unsigned i; 395 396 for (i = 0; i < array_lengthof(Tables); i++) { 397 Tables[i] = new ContextDecision; 398 memset(Tables[i], 0, sizeof(ContextDecision)); 399 } 400 401 HasConflicts = false; 402 } 403 404 DisassemblerTables::~DisassemblerTables() { 405 unsigned i; 406 407 for (i = 0; i < array_lengthof(Tables); i++) 408 delete Tables[i]; 409 } 410 411 void DisassemblerTables::emitModRMDecision(raw_ostream &o1, raw_ostream &o2, 412 unsigned &i1, unsigned &i2, 413 unsigned &ModRMTableNum, 414 ModRMDecision &decision) const { 415 static uint32_t sTableNumber = 0; 416 static uint32_t sEntryNumber = 1; 417 ModRMDecisionType dt = getDecisionType(decision); 418 419 if (dt == MODRM_ONEENTRY && decision.instructionIDs[0] == 0) 420 { 421 o2.indent(i2) << "{ /* ModRMDecision */" << "\n"; 422 i2++; 423 424 o2.indent(i2) << stringForDecisionType(dt) << "," << "\n"; 425 o2.indent(i2) << 0 << " /* EmptyTable */\n"; 426 427 i2--; 428 o2.indent(i2) << "}"; 429 return; 430 } 431 432 std::vector<unsigned> ModRMDecision; 433 434 switch (dt) { 435 default: 436 llvm_unreachable("Unknown decision type"); 437 case MODRM_ONEENTRY: 438 ModRMDecision.push_back(decision.instructionIDs[0]); 439 break; 440 case MODRM_SPLITRM: 441 ModRMDecision.push_back(decision.instructionIDs[0x00]); 442 ModRMDecision.push_back(decision.instructionIDs[0xc0]); 443 break; 444 case MODRM_SPLITREG: 445 for (unsigned index = 0; index < 64; index += 8) 446 ModRMDecision.push_back(decision.instructionIDs[index]); 447 for (unsigned index = 0xc0; index < 256; index += 8) 448 ModRMDecision.push_back(decision.instructionIDs[index]); 449 break; 450 case MODRM_SPLITMISC: 451 for (unsigned index = 0; index < 64; index += 8) 452 ModRMDecision.push_back(decision.instructionIDs[index]); 453 for (unsigned index = 0xc0; index < 256; ++index) 454 ModRMDecision.push_back(decision.instructionIDs[index]); 455 break; 456 case MODRM_FULL: 457 for (unsigned index = 0; index < 256; ++index) 458 ModRMDecision.push_back(decision.instructionIDs[index]); 459 break; 460 } 461 462 unsigned &EntryNumber = ModRMTable[ModRMDecision]; 463 if (EntryNumber == 0) { 464 EntryNumber = ModRMTableNum; 465 466 ModRMTableNum += ModRMDecision.size(); 467 o1 << "/* Table" << EntryNumber << " */\n"; 468 i1++; 469 for (std::vector<unsigned>::const_iterator I = ModRMDecision.begin(), 470 E = ModRMDecision.end(); I != E; ++I) { 471 o1.indent(i1 * 2) << format("0x%hx", *I) << ", /* " 472 << InstructionSpecifiers[*I].name << " */\n"; 473 } 474 i1--; 475 } 476 477 o2.indent(i2) << "{ /* struct ModRMDecision */" << "\n"; 478 i2++; 479 480 o2.indent(i2) << stringForDecisionType(dt) << "," << "\n"; 481 o2.indent(i2) << EntryNumber << " /* Table" << EntryNumber << " */\n"; 482 483 i2--; 484 o2.indent(i2) << "}"; 485 486 switch (dt) { 487 default: 488 llvm_unreachable("Unknown decision type"); 489 case MODRM_ONEENTRY: 490 sEntryNumber += 1; 491 break; 492 case MODRM_SPLITRM: 493 sEntryNumber += 2; 494 break; 495 case MODRM_SPLITREG: 496 sEntryNumber += 16; 497 break; 498 case MODRM_SPLITMISC: 499 sEntryNumber += 8 + 64; 500 break; 501 case MODRM_FULL: 502 sEntryNumber += 256; 503 break; 504 } 505 506 // We assume that the index can fit into uint16_t. 507 assert(sEntryNumber < 65536U && 508 "Index into ModRMDecision is too large for uint16_t!"); 509 510 ++sTableNumber; 511 } 512 513 void DisassemblerTables::emitOpcodeDecision(raw_ostream &o1, raw_ostream &o2, 514 unsigned &i1, unsigned &i2, 515 unsigned &ModRMTableNum, 516 OpcodeDecision &decision) const { 517 o2.indent(i2) << "{ /* struct OpcodeDecision */" << "\n"; 518 i2++; 519 o2.indent(i2) << "{" << "\n"; 520 i2++; 521 522 for (unsigned index = 0; index < 256; ++index) { 523 o2.indent(i2); 524 525 o2 << "/* 0x" << format("%02hhx", index) << " */" << "\n"; 526 527 emitModRMDecision(o1, o2, i1, i2, ModRMTableNum, 528 decision.modRMDecisions[index]); 529 530 if (index < 255) 531 o2 << ","; 532 533 o2 << "\n"; 534 } 535 536 i2--; 537 o2.indent(i2) << "}" << "\n"; 538 i2--; 539 o2.indent(i2) << "}" << "\n"; 540 } 541 542 void DisassemblerTables::emitContextDecision(raw_ostream &o1, raw_ostream &o2, 543 unsigned &i1, unsigned &i2, 544 unsigned &ModRMTableNum, 545 ContextDecision &decision, 546 const char* name) const { 547 o2.indent(i2) << "static const struct ContextDecision " << name << " = {\n"; 548 i2++; 549 o2.indent(i2) << "{ /* opcodeDecisions */" << "\n"; 550 i2++; 551 552 for (unsigned index = 0; index < IC_max; ++index) { 553 o2.indent(i2) << "/* "; 554 o2 << stringForContext((InstructionContext)index); 555 o2 << " */"; 556 o2 << "\n"; 557 558 emitOpcodeDecision(o1, o2, i1, i2, ModRMTableNum, 559 decision.opcodeDecisions[index]); 560 561 if (index + 1 < IC_max) 562 o2 << ", "; 563 } 564 565 i2--; 566 o2.indent(i2) << "}" << "\n"; 567 i2--; 568 o2.indent(i2) << "};" << "\n"; 569 } 570 571 void DisassemblerTables::emitInstructionInfo(raw_ostream &o, 572 unsigned &i) const { 573 unsigned NumInstructions = InstructionSpecifiers.size(); 574 575 o << "static const struct OperandSpecifier x86OperandSets[][" 576 << X86_MAX_OPERANDS << "] = {\n"; 577 578 typedef std::vector<std::pair<const char *, const char *> > OperandListTy; 579 std::map<OperandListTy, unsigned> OperandSets; 580 581 unsigned OperandSetNum = 0; 582 for (unsigned Index = 0; Index < NumInstructions; ++Index) { 583 OperandListTy OperandList; 584 585 for (unsigned OperandIndex = 0; OperandIndex < X86_MAX_OPERANDS; 586 ++OperandIndex) { 587 const char *Encoding = 588 stringForOperandEncoding((OperandEncoding)InstructionSpecifiers[Index] 589 .operands[OperandIndex].encoding); 590 const char *Type = 591 stringForOperandType((OperandType)InstructionSpecifiers[Index] 592 .operands[OperandIndex].type); 593 OperandList.push_back(std::make_pair(Encoding, Type)); 594 } 595 unsigned &N = OperandSets[OperandList]; 596 if (N != 0) continue; 597 598 N = ++OperandSetNum; 599 600 o << " { /* " << (OperandSetNum - 1) << " */\n"; 601 for (unsigned i = 0, e = OperandList.size(); i != e; ++i) { 602 o << " { " << OperandList[i].first << ", " 603 << OperandList[i].second << " },\n"; 604 } 605 o << " },\n"; 606 } 607 o << "};" << "\n\n"; 608 609 o.indent(i * 2) << "static const struct InstructionSpecifier "; 610 o << INSTRUCTIONS_STR "[" << InstructionSpecifiers.size() << "] = {\n"; 611 612 i++; 613 614 for (unsigned index = 0; index < NumInstructions; ++index) { 615 o.indent(i * 2) << "{ /* " << index << " */" << "\n"; 616 i++; 617 618 OperandListTy OperandList; 619 for (unsigned OperandIndex = 0; OperandIndex < X86_MAX_OPERANDS; 620 ++OperandIndex) { 621 const char *Encoding = 622 stringForOperandEncoding((OperandEncoding)InstructionSpecifiers[index] 623 .operands[OperandIndex].encoding); 624 const char *Type = 625 stringForOperandType((OperandType)InstructionSpecifiers[index] 626 .operands[OperandIndex].type); 627 OperandList.push_back(std::make_pair(Encoding, Type)); 628 } 629 o.indent(i * 2) << (OperandSets[OperandList] - 1) << ",\n"; 630 631 o.indent(i * 2) << "/* " << InstructionSpecifiers[index].name << " */"; 632 o << "\n"; 633 634 i--; 635 o.indent(i * 2) << "}"; 636 637 if (index + 1 < NumInstructions) 638 o << ","; 639 640 o << "\n"; 641 } 642 643 i--; 644 o.indent(i * 2) << "};" << "\n"; 645 } 646 647 void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const { 648 const unsigned int tableSize = 16384; 649 o.indent(i * 2) << "static const uint8_t " CONTEXTS_STR 650 "[" << tableSize << "] = {\n"; 651 i++; 652 653 for (unsigned index = 0; index < tableSize; ++index) { 654 o.indent(i * 2); 655 656 if (index & ATTR_EVEX) { 657 o << "IC_EVEX"; 658 if (index & ATTR_EVEXL2) 659 o << "_L2"; 660 else if (index & ATTR_EVEXL) 661 o << "_L"; 662 if (index & ATTR_REXW) 663 o << "_W"; 664 if (index & ATTR_OPSIZE) 665 o << "_OPSIZE"; 666 else if (index & ATTR_XD) 667 o << "_XD"; 668 else if (index & ATTR_XS) 669 o << "_XS"; 670 if (index & ATTR_EVEXKZ) 671 o << "_KZ"; 672 else if (index & ATTR_EVEXK) 673 o << "_K"; 674 if (index & ATTR_EVEXB) 675 o << "_B"; 676 } 677 else if ((index & ATTR_VEXL) && (index & ATTR_REXW) && (index & ATTR_OPSIZE)) 678 o << "IC_VEX_L_W_OPSIZE"; 679 else if ((index & ATTR_VEXL) && (index & ATTR_REXW) && (index & ATTR_XD)) 680 o << "IC_VEX_L_W_XD"; 681 else if ((index & ATTR_VEXL) && (index & ATTR_REXW) && (index & ATTR_XS)) 682 o << "IC_VEX_L_W_XS"; 683 else if ((index & ATTR_VEXL) && (index & ATTR_REXW)) 684 o << "IC_VEX_L_W"; 685 else if ((index & ATTR_VEXL) && (index & ATTR_OPSIZE)) 686 o << "IC_VEX_L_OPSIZE"; 687 else if ((index & ATTR_VEXL) && (index & ATTR_XD)) 688 o << "IC_VEX_L_XD"; 689 else if ((index & ATTR_VEXL) && (index & ATTR_XS)) 690 o << "IC_VEX_L_XS"; 691 else if ((index & ATTR_VEX) && (index & ATTR_REXW) && (index & ATTR_OPSIZE)) 692 o << "IC_VEX_W_OPSIZE"; 693 else if ((index & ATTR_VEX) && (index & ATTR_REXW) && (index & ATTR_XD)) 694 o << "IC_VEX_W_XD"; 695 else if ((index & ATTR_VEX) && (index & ATTR_REXW) && (index & ATTR_XS)) 696 o << "IC_VEX_W_XS"; 697 else if (index & ATTR_VEXL) 698 o << "IC_VEX_L"; 699 else if ((index & ATTR_VEX) && (index & ATTR_REXW)) 700 o << "IC_VEX_W"; 701 else if ((index & ATTR_VEX) && (index & ATTR_OPSIZE)) 702 o << "IC_VEX_OPSIZE"; 703 else if ((index & ATTR_VEX) && (index & ATTR_XD)) 704 o << "IC_VEX_XD"; 705 else if ((index & ATTR_VEX) && (index & ATTR_XS)) 706 o << "IC_VEX_XS"; 707 else if (index & ATTR_VEX) 708 o << "IC_VEX"; 709 else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS)) 710 o << "IC_64BIT_REXW_XS"; 711 else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XD)) 712 o << "IC_64BIT_REXW_XD"; 713 else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && 714 (index & ATTR_OPSIZE)) 715 o << "IC_64BIT_REXW_OPSIZE"; 716 else if ((index & ATTR_64BIT) && (index & ATTR_XD) && (index & ATTR_OPSIZE)) 717 o << "IC_64BIT_XD_OPSIZE"; 718 else if ((index & ATTR_64BIT) && (index & ATTR_XS) && (index & ATTR_OPSIZE)) 719 o << "IC_64BIT_XS_OPSIZE"; 720 else if ((index & ATTR_64BIT) && (index & ATTR_XS)) 721 o << "IC_64BIT_XS"; 722 else if ((index & ATTR_64BIT) && (index & ATTR_XD)) 723 o << "IC_64BIT_XD"; 724 else if ((index & ATTR_64BIT) && (index & ATTR_OPSIZE)) 725 o << "IC_64BIT_OPSIZE"; 726 else if ((index & ATTR_64BIT) && (index & ATTR_ADSIZE)) 727 o << "IC_64BIT_ADSIZE"; 728 else if ((index & ATTR_64BIT) && (index & ATTR_REXW)) 729 o << "IC_64BIT_REXW"; 730 else if ((index & ATTR_64BIT)) 731 o << "IC_64BIT"; 732 else if ((index & ATTR_XS) && (index & ATTR_OPSIZE)) 733 o << "IC_XS_OPSIZE"; 734 else if ((index & ATTR_XD) && (index & ATTR_OPSIZE)) 735 o << "IC_XD_OPSIZE"; 736 else if (index & ATTR_XS) 737 o << "IC_XS"; 738 else if (index & ATTR_XD) 739 o << "IC_XD"; 740 else if (index & ATTR_OPSIZE) 741 o << "IC_OPSIZE"; 742 else if (index & ATTR_ADSIZE) 743 o << "IC_ADSIZE"; 744 else 745 o << "IC"; 746 747 if (index < tableSize - 1) 748 o << ","; 749 else 750 o << " "; 751 752 o << " /* " << index << " */"; 753 754 o << "\n"; 755 } 756 757 i--; 758 o.indent(i * 2) << "};" << "\n"; 759 } 760 761 void DisassemblerTables::emitContextDecisions(raw_ostream &o1, raw_ostream &o2, 762 unsigned &i1, unsigned &i2, 763 unsigned &ModRMTableNum) const { 764 emitContextDecision(o1, o2, i1, i2, ModRMTableNum, *Tables[0], ONEBYTE_STR); 765 emitContextDecision(o1, o2, i1, i2, ModRMTableNum, *Tables[1], TWOBYTE_STR); 766 emitContextDecision(o1, o2, i1, i2, ModRMTableNum, *Tables[2], THREEBYTE38_STR); 767 emitContextDecision(o1, o2, i1, i2, ModRMTableNum, *Tables[3], THREEBYTE3A_STR); 768 emitContextDecision(o1, o2, i1, i2, ModRMTableNum, *Tables[4], XOP8_MAP_STR); 769 emitContextDecision(o1, o2, i1, i2, ModRMTableNum, *Tables[5], XOP9_MAP_STR); 770 emitContextDecision(o1, o2, i1, i2, ModRMTableNum, *Tables[6], XOPA_MAP_STR); 771 } 772 773 void DisassemblerTables::emit(raw_ostream &o) const { 774 unsigned i1 = 0; 775 unsigned i2 = 0; 776 777 std::string s1; 778 std::string s2; 779 780 raw_string_ostream o1(s1); 781 raw_string_ostream o2(s2); 782 783 emitInstructionInfo(o, i2); 784 o << "\n"; 785 786 emitContextTable(o, i2); 787 o << "\n"; 788 789 unsigned ModRMTableNum = 0; 790 791 o << "static const InstrUID modRMTable[] = {\n"; 792 i1++; 793 std::vector<unsigned> EmptyTable(1, 0); 794 ModRMTable[EmptyTable] = ModRMTableNum; 795 ModRMTableNum += EmptyTable.size(); 796 o1 << "/* EmptyTable */\n"; 797 o1.indent(i1 * 2) << "0x0,\n"; 798 i1--; 799 emitContextDecisions(o1, o2, i1, i2, ModRMTableNum); 800 801 o << o1.str(); 802 o << " 0x0\n"; 803 o << "};\n"; 804 o << "\n"; 805 o << o2.str(); 806 o << "\n"; 807 o << "\n"; 808 } 809 810 void DisassemblerTables::setTableFields(ModRMDecision &decision, 811 const ModRMFilter &filter, 812 InstrUID uid, 813 uint8_t opcode) { 814 for (unsigned index = 0; index < 256; ++index) { 815 if (filter.accepts(index)) { 816 if (decision.instructionIDs[index] == uid) 817 continue; 818 819 if (decision.instructionIDs[index] != 0) { 820 InstructionSpecifier &newInfo = 821 InstructionSpecifiers[uid]; 822 InstructionSpecifier &previousInfo = 823 InstructionSpecifiers[decision.instructionIDs[index]]; 824 825 // Instructions such as MOV8ao8 and MOV8ao8_16 differ only in the 826 // presence of the AdSize prefix. However, the disassembler doesn't 827 // care about that difference in the instruction definition; it 828 // handles 16-bit vs. 32-bit addressing for itself based purely 829 // on the 0x67 prefix and the CPU mode. So there's no need to 830 // disambiguate between them; just let them conflict/coexist. 831 if (previousInfo.name + "_16" == newInfo.name) 832 continue; 833 834 if(previousInfo.name == "NOOP" && (newInfo.name == "XCHG16ar" || 835 newInfo.name == "XCHG32ar" || 836 newInfo.name == "XCHG32ar64" || 837 newInfo.name == "XCHG64ar")) 838 continue; // special case for XCHG*ar and NOOP 839 840 if (outranks(previousInfo.insnContext, newInfo.insnContext)) 841 continue; 842 843 if (previousInfo.insnContext == newInfo.insnContext) { 844 errs() << "Error: Primary decode conflict: "; 845 errs() << newInfo.name << " would overwrite " << previousInfo.name; 846 errs() << "\n"; 847 errs() << "ModRM " << index << "\n"; 848 errs() << "Opcode " << (uint16_t)opcode << "\n"; 849 errs() << "Context " << stringForContext(newInfo.insnContext) << "\n"; 850 HasConflicts = true; 851 } 852 } 853 854 decision.instructionIDs[index] = uid; 855 } 856 } 857 } 858 859 void DisassemblerTables::setTableFields(OpcodeType type, 860 InstructionContext insnContext, 861 uint8_t opcode, 862 const ModRMFilter &filter, 863 InstrUID uid, 864 bool is32bit, 865 bool ignoresVEX_L) { 866 ContextDecision &decision = *Tables[type]; 867 868 for (unsigned index = 0; index < IC_max; ++index) { 869 if (is32bit && inheritsFrom((InstructionContext)index, IC_64BIT)) 870 continue; 871 872 if (inheritsFrom((InstructionContext)index, 873 InstructionSpecifiers[uid].insnContext, ignoresVEX_L)) 874 setTableFields(decision.opcodeDecisions[index].modRMDecisions[opcode], 875 filter, 876 uid, 877 opcode); 878 } 879 } 880