1 /*===-- X86DisassemblerDecoderInternal.h - Disassembler decoder ---*- C -*-===* 2 * 3 * The LLVM Compiler Infrastructure 4 * 5 * This file is distributed under the University of Illinois Open Source 6 * License. See LICENSE.TXT for details. 7 * 8 *===----------------------------------------------------------------------===* 9 * 10 * This file is part of the X86 Disassembler. 11 * It contains the public interface of the instruction decoder. 12 * Documentation for the disassembler can be found in X86Disassembler.h. 13 * 14 *===----------------------------------------------------------------------===*/ 15 16 #ifndef X86DISASSEMBLERDECODER_H 17 #define X86DISASSEMBLERDECODER_H 18 19 #ifdef __cplusplus 20 extern "C" { 21 #endif 22 23 #define INSTRUCTION_SPECIFIER_FIELDS 24 25 #define INSTRUCTION_IDS \ 26 unsigned instructionIDs; 27 28 #include "X86DisassemblerDecoderCommon.h" 29 30 #undef INSTRUCTION_SPECIFIER_FIELDS 31 #undef INSTRUCTION_IDS 32 33 /* 34 * Accessor functions for various fields of an Intel instruction 35 */ 36 #define modFromModRM(modRM) (((modRM) & 0xc0) >> 6) 37 #define regFromModRM(modRM) (((modRM) & 0x38) >> 3) 38 #define rmFromModRM(modRM) ((modRM) & 0x7) 39 #define scaleFromSIB(sib) (((sib) & 0xc0) >> 6) 40 #define indexFromSIB(sib) (((sib) & 0x38) >> 3) 41 #define baseFromSIB(sib) ((sib) & 0x7) 42 #define wFromREX(rex) (((rex) & 0x8) >> 3) 43 #define rFromREX(rex) (((rex) & 0x4) >> 2) 44 #define xFromREX(rex) (((rex) & 0x2) >> 1) 45 #define bFromREX(rex) ((rex) & 0x1) 46 47 #define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7) 48 #define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6) 49 #define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5) 50 #define mmmmmFromVEX2of3(vex) ((vex) & 0x1f) 51 #define wFromVEX3of3(vex) (((vex) & 0x80) >> 7) 52 #define vvvvFromVEX3of3(vex) (((~(vex)) & 0x78) >> 3) 53 #define lFromVEX3of3(vex) (((vex) & 0x4) >> 2) 54 #define ppFromVEX3of3(vex) ((vex) & 0x3) 55 56 #define rFromVEX2of2(vex) (((~(vex)) & 0x80) >> 7) 57 #define vvvvFromVEX2of2(vex) (((~(vex)) & 0x78) >> 3) 58 #define lFromVEX2of2(vex) (((vex) & 0x4) >> 2) 59 #define ppFromVEX2of2(vex) ((vex) & 0x3) 60 61 /* 62 * These enums represent Intel registers for use by the decoder. 63 */ 64 65 #define REGS_8BIT \ 66 ENTRY(AL) \ 67 ENTRY(CL) \ 68 ENTRY(DL) \ 69 ENTRY(BL) \ 70 ENTRY(AH) \ 71 ENTRY(CH) \ 72 ENTRY(DH) \ 73 ENTRY(BH) \ 74 ENTRY(R8B) \ 75 ENTRY(R9B) \ 76 ENTRY(R10B) \ 77 ENTRY(R11B) \ 78 ENTRY(R12B) \ 79 ENTRY(R13B) \ 80 ENTRY(R14B) \ 81 ENTRY(R15B) \ 82 ENTRY(SPL) \ 83 ENTRY(BPL) \ 84 ENTRY(SIL) \ 85 ENTRY(DIL) 86 87 #define EA_BASES_16BIT \ 88 ENTRY(BX_SI) \ 89 ENTRY(BX_DI) \ 90 ENTRY(BP_SI) \ 91 ENTRY(BP_DI) \ 92 ENTRY(SI) \ 93 ENTRY(DI) \ 94 ENTRY(BP) \ 95 ENTRY(BX) \ 96 ENTRY(R8W) \ 97 ENTRY(R9W) \ 98 ENTRY(R10W) \ 99 ENTRY(R11W) \ 100 ENTRY(R12W) \ 101 ENTRY(R13W) \ 102 ENTRY(R14W) \ 103 ENTRY(R15W) 104 105 #define REGS_16BIT \ 106 ENTRY(AX) \ 107 ENTRY(CX) \ 108 ENTRY(DX) \ 109 ENTRY(BX) \ 110 ENTRY(SP) \ 111 ENTRY(BP) \ 112 ENTRY(SI) \ 113 ENTRY(DI) \ 114 ENTRY(R8W) \ 115 ENTRY(R9W) \ 116 ENTRY(R10W) \ 117 ENTRY(R11W) \ 118 ENTRY(R12W) \ 119 ENTRY(R13W) \ 120 ENTRY(R14W) \ 121 ENTRY(R15W) 122 123 #define EA_BASES_32BIT \ 124 ENTRY(EAX) \ 125 ENTRY(ECX) \ 126 ENTRY(EDX) \ 127 ENTRY(EBX) \ 128 ENTRY(sib) \ 129 ENTRY(EBP) \ 130 ENTRY(ESI) \ 131 ENTRY(EDI) \ 132 ENTRY(R8D) \ 133 ENTRY(R9D) \ 134 ENTRY(R10D) \ 135 ENTRY(R11D) \ 136 ENTRY(R12D) \ 137 ENTRY(R13D) \ 138 ENTRY(R14D) \ 139 ENTRY(R15D) 140 141 #define REGS_32BIT \ 142 ENTRY(EAX) \ 143 ENTRY(ECX) \ 144 ENTRY(EDX) \ 145 ENTRY(EBX) \ 146 ENTRY(ESP) \ 147 ENTRY(EBP) \ 148 ENTRY(ESI) \ 149 ENTRY(EDI) \ 150 ENTRY(R8D) \ 151 ENTRY(R9D) \ 152 ENTRY(R10D) \ 153 ENTRY(R11D) \ 154 ENTRY(R12D) \ 155 ENTRY(R13D) \ 156 ENTRY(R14D) \ 157 ENTRY(R15D) 158 159 #define EA_BASES_64BIT \ 160 ENTRY(RAX) \ 161 ENTRY(RCX) \ 162 ENTRY(RDX) \ 163 ENTRY(RBX) \ 164 ENTRY(sib64) \ 165 ENTRY(RBP) \ 166 ENTRY(RSI) \ 167 ENTRY(RDI) \ 168 ENTRY(R8) \ 169 ENTRY(R9) \ 170 ENTRY(R10) \ 171 ENTRY(R11) \ 172 ENTRY(R12) \ 173 ENTRY(R13) \ 174 ENTRY(R14) \ 175 ENTRY(R15) 176 177 #define REGS_64BIT \ 178 ENTRY(RAX) \ 179 ENTRY(RCX) \ 180 ENTRY(RDX) \ 181 ENTRY(RBX) \ 182 ENTRY(RSP) \ 183 ENTRY(RBP) \ 184 ENTRY(RSI) \ 185 ENTRY(RDI) \ 186 ENTRY(R8) \ 187 ENTRY(R9) \ 188 ENTRY(R10) \ 189 ENTRY(R11) \ 190 ENTRY(R12) \ 191 ENTRY(R13) \ 192 ENTRY(R14) \ 193 ENTRY(R15) 194 195 #define REGS_MMX \ 196 ENTRY(MM0) \ 197 ENTRY(MM1) \ 198 ENTRY(MM2) \ 199 ENTRY(MM3) \ 200 ENTRY(MM4) \ 201 ENTRY(MM5) \ 202 ENTRY(MM6) \ 203 ENTRY(MM7) 204 205 #define REGS_XMM \ 206 ENTRY(XMM0) \ 207 ENTRY(XMM1) \ 208 ENTRY(XMM2) \ 209 ENTRY(XMM3) \ 210 ENTRY(XMM4) \ 211 ENTRY(XMM5) \ 212 ENTRY(XMM6) \ 213 ENTRY(XMM7) \ 214 ENTRY(XMM8) \ 215 ENTRY(XMM9) \ 216 ENTRY(XMM10) \ 217 ENTRY(XMM11) \ 218 ENTRY(XMM12) \ 219 ENTRY(XMM13) \ 220 ENTRY(XMM14) \ 221 ENTRY(XMM15) 222 223 #define REGS_YMM \ 224 ENTRY(YMM0) \ 225 ENTRY(YMM1) \ 226 ENTRY(YMM2) \ 227 ENTRY(YMM3) \ 228 ENTRY(YMM4) \ 229 ENTRY(YMM5) \ 230 ENTRY(YMM6) \ 231 ENTRY(YMM7) \ 232 ENTRY(YMM8) \ 233 ENTRY(YMM9) \ 234 ENTRY(YMM10) \ 235 ENTRY(YMM11) \ 236 ENTRY(YMM12) \ 237 ENTRY(YMM13) \ 238 ENTRY(YMM14) \ 239 ENTRY(YMM15) 240 241 #define REGS_SEGMENT \ 242 ENTRY(ES) \ 243 ENTRY(CS) \ 244 ENTRY(SS) \ 245 ENTRY(DS) \ 246 ENTRY(FS) \ 247 ENTRY(GS) 248 249 #define REGS_DEBUG \ 250 ENTRY(DR0) \ 251 ENTRY(DR1) \ 252 ENTRY(DR2) \ 253 ENTRY(DR3) \ 254 ENTRY(DR4) \ 255 ENTRY(DR5) \ 256 ENTRY(DR6) \ 257 ENTRY(DR7) 258 259 #define REGS_CONTROL \ 260 ENTRY(CR0) \ 261 ENTRY(CR1) \ 262 ENTRY(CR2) \ 263 ENTRY(CR3) \ 264 ENTRY(CR4) \ 265 ENTRY(CR5) \ 266 ENTRY(CR6) \ 267 ENTRY(CR7) \ 268 ENTRY(CR8) 269 270 #define ALL_EA_BASES \ 271 EA_BASES_16BIT \ 272 EA_BASES_32BIT \ 273 EA_BASES_64BIT 274 275 #define ALL_SIB_BASES \ 276 REGS_32BIT \ 277 REGS_64BIT 278 279 #define ALL_REGS \ 280 REGS_8BIT \ 281 REGS_16BIT \ 282 REGS_32BIT \ 283 REGS_64BIT \ 284 REGS_MMX \ 285 REGS_XMM \ 286 REGS_YMM \ 287 REGS_SEGMENT \ 288 REGS_DEBUG \ 289 REGS_CONTROL \ 290 ENTRY(RIP) 291 292 /* 293 * EABase - All possible values of the base field for effective-address 294 * computations, a.k.a. the Mod and R/M fields of the ModR/M byte. We 295 * distinguish between bases (EA_BASE_*) and registers that just happen to be 296 * referred to when Mod == 0b11 (EA_REG_*). 297 */ 298 typedef enum { 299 EA_BASE_NONE, 300 #define ENTRY(x) EA_BASE_##x, 301 ALL_EA_BASES 302 #undef ENTRY 303 #define ENTRY(x) EA_REG_##x, 304 ALL_REGS 305 #undef ENTRY 306 EA_max 307 } EABase; 308 309 /* 310 * SIBIndex - All possible values of the SIB index field. 311 * Borrows entries from ALL_EA_BASES with the special case that 312 * sib is synonymous with NONE. 313 * Vector SIB: index can be XMM or YMM. 314 */ 315 typedef enum { 316 SIB_INDEX_NONE, 317 #define ENTRY(x) SIB_INDEX_##x, 318 ALL_EA_BASES 319 REGS_XMM 320 REGS_YMM 321 #undef ENTRY 322 SIB_INDEX_max 323 } SIBIndex; 324 325 /* 326 * SIBBase - All possible values of the SIB base field. 327 */ 328 typedef enum { 329 SIB_BASE_NONE, 330 #define ENTRY(x) SIB_BASE_##x, 331 ALL_SIB_BASES 332 #undef ENTRY 333 SIB_BASE_max 334 } SIBBase; 335 336 /* 337 * EADisplacement - Possible displacement types for effective-address 338 * computations. 339 */ 340 typedef enum { 341 EA_DISP_NONE, 342 EA_DISP_8, 343 EA_DISP_16, 344 EA_DISP_32 345 } EADisplacement; 346 347 /* 348 * Reg - All possible values of the reg field in the ModR/M byte. 349 */ 350 typedef enum { 351 #define ENTRY(x) MODRM_REG_##x, 352 ALL_REGS 353 #undef ENTRY 354 MODRM_REG_max 355 } Reg; 356 357 /* 358 * SegmentOverride - All possible segment overrides. 359 */ 360 typedef enum { 361 SEG_OVERRIDE_NONE, 362 SEG_OVERRIDE_CS, 363 SEG_OVERRIDE_SS, 364 SEG_OVERRIDE_DS, 365 SEG_OVERRIDE_ES, 366 SEG_OVERRIDE_FS, 367 SEG_OVERRIDE_GS, 368 SEG_OVERRIDE_max 369 } SegmentOverride; 370 371 /* 372 * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field 373 */ 374 375 typedef enum { 376 VEX_LOB_0F = 0x1, 377 VEX_LOB_0F38 = 0x2, 378 VEX_LOB_0F3A = 0x3 379 } VEXLeadingOpcodeByte; 380 381 /* 382 * VEXPrefixCode - Possible values for the VEX.pp field 383 */ 384 385 typedef enum { 386 VEX_PREFIX_NONE = 0x0, 387 VEX_PREFIX_66 = 0x1, 388 VEX_PREFIX_F3 = 0x2, 389 VEX_PREFIX_F2 = 0x3 390 } VEXPrefixCode; 391 392 typedef uint8_t BOOL; 393 394 /* 395 * byteReader_t - Type for the byte reader that the consumer must provide to 396 * the decoder. Reads a single byte from the instruction's address space. 397 * @param arg - A baton that the consumer can associate with any internal 398 * state that it needs. 399 * @param byte - A pointer to a single byte in memory that should be set to 400 * contain the value at address. 401 * @param address - The address in the instruction's address space that should 402 * be read from. 403 * @return - -1 if the byte cannot be read for any reason; 0 otherwise. 404 */ 405 typedef int (*byteReader_t)(void* arg, uint8_t* byte, uint64_t address); 406 407 /* 408 * dlog_t - Type for the logging function that the consumer can provide to 409 * get debugging output from the decoder. 410 * @param arg - A baton that the consumer can associate with any internal 411 * state that it needs. 412 * @param log - A string that contains the message. Will be reused after 413 * the logger returns. 414 */ 415 typedef void (*dlog_t)(void* arg, const char *log); 416 417 /* 418 * The x86 internal instruction, which is produced by the decoder. 419 */ 420 struct InternalInstruction { 421 /* Reader interface (C) */ 422 byteReader_t reader; 423 /* Opaque value passed to the reader */ 424 void* readerArg; 425 /* The address of the next byte to read via the reader */ 426 uint64_t readerCursor; 427 428 /* Logger interface (C) */ 429 dlog_t dlog; 430 /* Opaque value passed to the logger */ 431 void* dlogArg; 432 433 /* General instruction information */ 434 435 /* The mode to disassemble for (64-bit, protected, real) */ 436 DisassemblerMode mode; 437 /* The start of the instruction, usable with the reader */ 438 uint64_t startLocation; 439 /* The length of the instruction, in bytes */ 440 size_t length; 441 442 /* Prefix state */ 443 444 /* 1 if the prefix byte corresponding to the entry is present; 0 if not */ 445 uint8_t prefixPresent[0x100]; 446 /* contains the location (for use with the reader) of the prefix byte */ 447 uint64_t prefixLocations[0x100]; 448 /* The value of the VEX prefix, if present */ 449 uint8_t vexPrefix[3]; 450 /* The length of the VEX prefix (0 if not present) */ 451 uint8_t vexSize; 452 /* The value of the REX prefix, if present */ 453 uint8_t rexPrefix; 454 /* The location where a mandatory prefix would have to be (i.e., right before 455 the opcode, or right before the REX prefix if one is present) */ 456 uint64_t necessaryPrefixLocation; 457 /* The segment override type */ 458 SegmentOverride segmentOverride; 459 460 /* Sizes of various critical pieces of data, in bytes */ 461 uint8_t registerSize; 462 uint8_t addressSize; 463 uint8_t displacementSize; 464 uint8_t immediateSize; 465 466 /* Offsets from the start of the instruction to the pieces of data, which is 467 needed to find relocation entries for adding symbolic operands */ 468 uint8_t displacementOffset; 469 uint8_t immediateOffset; 470 471 /* opcode state */ 472 473 /* The value of the two-byte escape prefix (usually 0x0f) */ 474 uint8_t twoByteEscape; 475 /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */ 476 uint8_t threeByteEscape; 477 /* The last byte of the opcode, not counting any ModR/M extension */ 478 uint8_t opcode; 479 /* The ModR/M byte of the instruction, if it is an opcode extension */ 480 uint8_t modRMExtension; 481 482 /* decode state */ 483 484 /* The type of opcode, used for indexing into the array of decode tables */ 485 OpcodeType opcodeType; 486 /* The instruction ID, extracted from the decode table */ 487 uint16_t instructionID; 488 /* The specifier for the instruction, from the instruction info table */ 489 const struct InstructionSpecifier *spec; 490 491 /* state for additional bytes, consumed during operand decode. Pattern: 492 consumed___ indicates that the byte was already consumed and does not 493 need to be consumed again */ 494 495 /* The VEX.vvvv field, which contains a third register operand for some AVX 496 instructions */ 497 Reg vvvv; 498 499 /* The ModR/M byte, which contains most register operands and some portion of 500 all memory operands */ 501 BOOL consumedModRM; 502 uint8_t modRM; 503 504 /* The SIB byte, used for more complex 32- or 64-bit memory operands */ 505 BOOL consumedSIB; 506 uint8_t sib; 507 508 /* The displacement, used for memory operands */ 509 BOOL consumedDisplacement; 510 int32_t displacement; 511 512 /* Immediates. There can be two in some cases */ 513 uint8_t numImmediatesConsumed; 514 uint8_t numImmediatesTranslated; 515 uint64_t immediates[2]; 516 517 /* A register or immediate operand encoded into the opcode */ 518 BOOL consumedOpcodeModifier; 519 uint8_t opcodeModifier; 520 Reg opcodeRegister; 521 522 /* Portions of the ModR/M byte */ 523 524 /* These fields determine the allowable values for the ModR/M fields, which 525 depend on operand and address widths */ 526 EABase eaBaseBase; 527 EABase eaRegBase; 528 Reg regBase; 529 530 /* The Mod and R/M fields can encode a base for an effective address, or a 531 register. These are separated into two fields here */ 532 EABase eaBase; 533 EADisplacement eaDisplacement; 534 /* The reg field always encodes a register */ 535 Reg reg; 536 537 /* SIB state */ 538 SIBIndex sibIndex; 539 uint8_t sibScale; 540 SIBBase sibBase; 541 }; 542 543 /* decodeInstruction - Decode one instruction and store the decoding results in 544 * a buffer provided by the consumer. 545 * @param insn - The buffer to store the instruction in. Allocated by the 546 * consumer. 547 * @param reader - The byteReader_t for the bytes to be read. 548 * @param readerArg - An argument to pass to the reader for storing context 549 * specific to the consumer. May be NULL. 550 * @param logger - The dlog_t to be used in printing status messages from the 551 * disassembler. May be NULL. 552 * @param loggerArg - An argument to pass to the logger for storing context 553 * specific to the logger. May be NULL. 554 * @param startLoc - The address (in the reader's address space) of the first 555 * byte in the instruction. 556 * @param mode - The mode (16-bit, 32-bit, 64-bit) to decode in. 557 * @return - Nonzero if there was an error during decode, 0 otherwise. 558 */ 559 int decodeInstruction(struct InternalInstruction* insn, 560 byteReader_t reader, 561 void* readerArg, 562 dlog_t logger, 563 void* loggerArg, 564 void* miiArg, 565 uint64_t startLoc, 566 DisassemblerMode mode); 567 568 /* x86DisassemblerDebug - C-accessible function for printing a message to 569 * debugs() 570 * @param file - The name of the file printing the debug message. 571 * @param line - The line number that printed the debug message. 572 * @param s - The message to print. 573 */ 574 575 void x86DisassemblerDebug(const char *file, 576 unsigned line, 577 const char *s); 578 579 const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii); 580 581 #ifdef __cplusplus 582 } 583 #endif 584 585 #endif 586