1 //===-- X86DisassemblerDecoder.cpp - Disassembler decoder -----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is part of the X86 Disassembler. 11 // It contains the implementation of the instruction decoder. 12 // Documentation for the disassembler can be found in X86Disassembler.h. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include <cstdarg> /* for va_*() */ 17 #include <cstdio> /* for vsnprintf() */ 18 #include <cstdlib> /* for exit() */ 19 #include <cstring> /* for memset() */ 20 21 #include "X86DisassemblerDecoder.h" 22 23 using namespace llvm::X86Disassembler; 24 25 /// Specifies whether a ModR/M byte is needed and (if so) which 26 /// instruction each possible value of the ModR/M byte corresponds to. Once 27 /// this information is known, we have narrowed down to a single instruction. 28 struct ModRMDecision { 29 uint8_t modrm_type; 30 uint16_t instructionIDs; 31 }; 32 33 /// Specifies which set of ModR/M->instruction tables to look at 34 /// given a particular opcode. 35 struct OpcodeDecision { 36 ModRMDecision modRMDecisions[256]; 37 }; 38 39 /// Specifies which opcode->instruction tables to look at given 40 /// a particular context (set of attributes). Since there are many possible 41 /// contexts, the decoder first uses CONTEXTS_SYM to determine which context 42 /// applies given a specific set of attributes. Hence there are only IC_max 43 /// entries in this table, rather than 2^(ATTR_max). 44 struct ContextDecision { 45 OpcodeDecision opcodeDecisions[IC_max]; 46 }; 47 48 #include "X86GenDisassemblerTables.inc" 49 50 #ifndef NDEBUG 51 #define debug(s) do { Debug(__FILE__, __LINE__, s); } while (0) 52 #else 53 #define debug(s) do { } while (0) 54 #endif 55 56 /* 57 * contextForAttrs - Client for the instruction context table. Takes a set of 58 * attributes and returns the appropriate decode context. 59 * 60 * @param attrMask - Attributes, from the enumeration attributeBits. 61 * @return - The InstructionContext to use when looking up an 62 * an instruction with these attributes. 63 */ 64 static InstructionContext contextForAttrs(uint16_t attrMask) { 65 return static_cast<InstructionContext>(CONTEXTS_SYM[attrMask]); 66 } 67 68 /* 69 * modRMRequired - Reads the appropriate instruction table to determine whether 70 * the ModR/M byte is required to decode a particular instruction. 71 * 72 * @param type - The opcode type (i.e., how many bytes it has). 73 * @param insnContext - The context for the instruction, as returned by 74 * contextForAttrs. 75 * @param opcode - The last byte of the instruction's opcode, not counting 76 * ModR/M extensions and escapes. 77 * @return - true if the ModR/M byte is required, false otherwise. 78 */ 79 static int modRMRequired(OpcodeType type, 80 InstructionContext insnContext, 81 uint16_t opcode) { 82 const struct ContextDecision* decision = nullptr; 83 84 switch (type) { 85 case ONEBYTE: 86 decision = &ONEBYTE_SYM; 87 break; 88 case TWOBYTE: 89 decision = &TWOBYTE_SYM; 90 break; 91 case THREEBYTE_38: 92 decision = &THREEBYTE38_SYM; 93 break; 94 case THREEBYTE_3A: 95 decision = &THREEBYTE3A_SYM; 96 break; 97 case XOP8_MAP: 98 decision = &XOP8_MAP_SYM; 99 break; 100 case XOP9_MAP: 101 decision = &XOP9_MAP_SYM; 102 break; 103 case XOPA_MAP: 104 decision = &XOPA_MAP_SYM; 105 break; 106 } 107 108 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. 109 modrm_type != MODRM_ONEENTRY; 110 } 111 112 /* 113 * decode - Reads the appropriate instruction table to obtain the unique ID of 114 * an instruction. 115 * 116 * @param type - See modRMRequired(). 117 * @param insnContext - See modRMRequired(). 118 * @param opcode - See modRMRequired(). 119 * @param modRM - The ModR/M byte if required, or any value if not. 120 * @return - The UID of the instruction, or 0 on failure. 121 */ 122 static InstrUID decode(OpcodeType type, 123 InstructionContext insnContext, 124 uint8_t opcode, 125 uint8_t modRM) { 126 const struct ModRMDecision* dec = nullptr; 127 128 switch (type) { 129 case ONEBYTE: 130 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 131 break; 132 case TWOBYTE: 133 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 134 break; 135 case THREEBYTE_38: 136 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 137 break; 138 case THREEBYTE_3A: 139 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 140 break; 141 case XOP8_MAP: 142 dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 143 break; 144 case XOP9_MAP: 145 dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 146 break; 147 case XOPA_MAP: 148 dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 149 break; 150 } 151 152 switch (dec->modrm_type) { 153 default: 154 debug("Corrupt table! Unknown modrm_type"); 155 return 0; 156 case MODRM_ONEENTRY: 157 return modRMTable[dec->instructionIDs]; 158 case MODRM_SPLITRM: 159 if (modFromModRM(modRM) == 0x3) 160 return modRMTable[dec->instructionIDs+1]; 161 return modRMTable[dec->instructionIDs]; 162 case MODRM_SPLITREG: 163 if (modFromModRM(modRM) == 0x3) 164 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8]; 165 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; 166 case MODRM_SPLITMISC: 167 if (modFromModRM(modRM) == 0x3) 168 return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8]; 169 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; 170 case MODRM_FULL: 171 return modRMTable[dec->instructionIDs+modRM]; 172 } 173 } 174 175 /* 176 * specifierForUID - Given a UID, returns the name and operand specification for 177 * that instruction. 178 * 179 * @param uid - The unique ID for the instruction. This should be returned by 180 * decode(); specifierForUID will not check bounds. 181 * @return - A pointer to the specification for that instruction. 182 */ 183 static const struct InstructionSpecifier *specifierForUID(InstrUID uid) { 184 return &INSTRUCTIONS_SYM[uid]; 185 } 186 187 /* 188 * consumeByte - Uses the reader function provided by the user to consume one 189 * byte from the instruction's memory and advance the cursor. 190 * 191 * @param insn - The instruction with the reader function to use. The cursor 192 * for this instruction is advanced. 193 * @param byte - A pointer to a pre-allocated memory buffer to be populated 194 * with the data read. 195 * @return - 0 if the read was successful; nonzero otherwise. 196 */ 197 static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) { 198 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor); 199 200 if (!ret) 201 ++(insn->readerCursor); 202 203 return ret; 204 } 205 206 /* 207 * lookAtByte - Like consumeByte, but does not advance the cursor. 208 * 209 * @param insn - See consumeByte(). 210 * @param byte - See consumeByte(). 211 * @return - See consumeByte(). 212 */ 213 static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) { 214 return insn->reader(insn->readerArg, byte, insn->readerCursor); 215 } 216 217 static void unconsumeByte(struct InternalInstruction* insn) { 218 insn->readerCursor--; 219 } 220 221 #define CONSUME_FUNC(name, type) \ 222 static int name(struct InternalInstruction* insn, type* ptr) { \ 223 type combined = 0; \ 224 unsigned offset; \ 225 for (offset = 0; offset < sizeof(type); ++offset) { \ 226 uint8_t byte; \ 227 int ret = insn->reader(insn->readerArg, \ 228 &byte, \ 229 insn->readerCursor + offset); \ 230 if (ret) \ 231 return ret; \ 232 combined = combined | ((uint64_t)byte << (offset * 8)); \ 233 } \ 234 *ptr = combined; \ 235 insn->readerCursor += sizeof(type); \ 236 return 0; \ 237 } 238 239 /* 240 * consume* - Use the reader function provided by the user to consume data 241 * values of various sizes from the instruction's memory and advance the 242 * cursor appropriately. These readers perform endian conversion. 243 * 244 * @param insn - See consumeByte(). 245 * @param ptr - A pointer to a pre-allocated memory of appropriate size to 246 * be populated with the data read. 247 * @return - See consumeByte(). 248 */ 249 CONSUME_FUNC(consumeInt8, int8_t) 250 CONSUME_FUNC(consumeInt16, int16_t) 251 CONSUME_FUNC(consumeInt32, int32_t) 252 CONSUME_FUNC(consumeUInt16, uint16_t) 253 CONSUME_FUNC(consumeUInt32, uint32_t) 254 CONSUME_FUNC(consumeUInt64, uint64_t) 255 256 /* 257 * dbgprintf - Uses the logging function provided by the user to log a single 258 * message, typically without a carriage-return. 259 * 260 * @param insn - The instruction containing the logging function. 261 * @param format - See printf(). 262 * @param ... - See printf(). 263 */ 264 static void dbgprintf(struct InternalInstruction* insn, 265 const char* format, 266 ...) { 267 char buffer[256]; 268 va_list ap; 269 270 if (!insn->dlog) 271 return; 272 273 va_start(ap, format); 274 (void)vsnprintf(buffer, sizeof(buffer), format, ap); 275 va_end(ap); 276 277 insn->dlog(insn->dlogArg, buffer); 278 } 279 280 /* 281 * setPrefixPresent - Marks that a particular prefix is present at a particular 282 * location. 283 * 284 * @param insn - The instruction to be marked as having the prefix. 285 * @param prefix - The prefix that is present. 286 * @param location - The location where the prefix is located (in the address 287 * space of the instruction's reader). 288 */ 289 static void setPrefixPresent(struct InternalInstruction* insn, 290 uint8_t prefix, 291 uint64_t location) 292 { 293 insn->prefixPresent[prefix] = 1; 294 insn->prefixLocations[prefix] = location; 295 } 296 297 /* 298 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is 299 * present at a given location. 300 * 301 * @param insn - The instruction to be queried. 302 * @param prefix - The prefix. 303 * @param location - The location to query. 304 * @return - Whether the prefix is at that location. 305 */ 306 static bool isPrefixAtLocation(struct InternalInstruction* insn, 307 uint8_t prefix, 308 uint64_t location) 309 { 310 return insn->prefixPresent[prefix] == 1 && 311 insn->prefixLocations[prefix] == location; 312 } 313 314 /* 315 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the 316 * instruction as having them. Also sets the instruction's default operand, 317 * address, and other relevant data sizes to report operands correctly. 318 * 319 * @param insn - The instruction whose prefixes are to be read. 320 * @return - 0 if the instruction could be read until the end of the prefix 321 * bytes, and no prefixes conflicted; nonzero otherwise. 322 */ 323 static int readPrefixes(struct InternalInstruction* insn) { 324 bool isPrefix = true; 325 bool prefixGroups[4] = { false }; 326 uint64_t prefixLocation; 327 uint8_t byte = 0; 328 uint8_t nextByte; 329 330 bool hasAdSize = false; 331 bool hasOpSize = false; 332 333 dbgprintf(insn, "readPrefixes()"); 334 335 while (isPrefix) { 336 prefixLocation = insn->readerCursor; 337 338 /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */ 339 if (consumeByte(insn, &byte)) 340 break; 341 342 /* 343 * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then 344 * break and let it be disassembled as a normal "instruction". 345 */ 346 if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) 347 break; 348 349 if (insn->readerCursor - 1 == insn->startLocation 350 && (byte == 0xf2 || byte == 0xf3) 351 && !lookAtByte(insn, &nextByte)) 352 { 353 /* 354 * If the byte is 0xf2 or 0xf3, and any of the following conditions are 355 * met: 356 * - it is followed by a LOCK (0xf0) prefix 357 * - it is followed by an xchg instruction 358 * then it should be disassembled as a xacquire/xrelease not repne/rep. 359 */ 360 if ((byte == 0xf2 || byte == 0xf3) && 361 ((nextByte == 0xf0) || 362 ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) 363 insn->xAcquireRelease = true; 364 /* 365 * Also if the byte is 0xf3, and the following condition is met: 366 * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or 367 * "mov mem, imm" (opcode 0xc6/0xc7) instructions. 368 * then it should be disassembled as an xrelease not rep. 369 */ 370 if (byte == 0xf3 && 371 (nextByte == 0x88 || nextByte == 0x89 || 372 nextByte == 0xc6 || nextByte == 0xc7)) 373 insn->xAcquireRelease = true; 374 if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) { 375 if (consumeByte(insn, &nextByte)) 376 return -1; 377 if (lookAtByte(insn, &nextByte)) 378 return -1; 379 unconsumeByte(insn); 380 } 381 if (nextByte != 0x0f && nextByte != 0x90) 382 break; 383 } 384 385 switch (byte) { 386 case 0xf0: /* LOCK */ 387 case 0xf2: /* REPNE/REPNZ */ 388 case 0xf3: /* REP or REPE/REPZ */ 389 if (prefixGroups[0]) 390 dbgprintf(insn, "Redundant Group 1 prefix"); 391 prefixGroups[0] = true; 392 setPrefixPresent(insn, byte, prefixLocation); 393 break; 394 case 0x2e: /* CS segment override -OR- Branch not taken */ 395 case 0x36: /* SS segment override -OR- Branch taken */ 396 case 0x3e: /* DS segment override */ 397 case 0x26: /* ES segment override */ 398 case 0x64: /* FS segment override */ 399 case 0x65: /* GS segment override */ 400 switch (byte) { 401 case 0x2e: 402 insn->segmentOverride = SEG_OVERRIDE_CS; 403 break; 404 case 0x36: 405 insn->segmentOverride = SEG_OVERRIDE_SS; 406 break; 407 case 0x3e: 408 insn->segmentOverride = SEG_OVERRIDE_DS; 409 break; 410 case 0x26: 411 insn->segmentOverride = SEG_OVERRIDE_ES; 412 break; 413 case 0x64: 414 insn->segmentOverride = SEG_OVERRIDE_FS; 415 break; 416 case 0x65: 417 insn->segmentOverride = SEG_OVERRIDE_GS; 418 break; 419 default: 420 debug("Unhandled override"); 421 return -1; 422 } 423 if (prefixGroups[1]) 424 dbgprintf(insn, "Redundant Group 2 prefix"); 425 prefixGroups[1] = true; 426 setPrefixPresent(insn, byte, prefixLocation); 427 break; 428 case 0x66: /* Operand-size override */ 429 if (prefixGroups[2]) 430 dbgprintf(insn, "Redundant Group 3 prefix"); 431 prefixGroups[2] = true; 432 hasOpSize = true; 433 setPrefixPresent(insn, byte, prefixLocation); 434 break; 435 case 0x67: /* Address-size override */ 436 if (prefixGroups[3]) 437 dbgprintf(insn, "Redundant Group 4 prefix"); 438 prefixGroups[3] = true; 439 hasAdSize = true; 440 setPrefixPresent(insn, byte, prefixLocation); 441 break; 442 default: /* Not a prefix byte */ 443 isPrefix = false; 444 break; 445 } 446 447 if (isPrefix) 448 dbgprintf(insn, "Found prefix 0x%hhx", byte); 449 } 450 451 insn->vectorExtensionType = TYPE_NO_VEX_XOP; 452 453 if (byte == 0x62) { 454 uint8_t byte1, byte2; 455 456 if (consumeByte(insn, &byte1)) { 457 dbgprintf(insn, "Couldn't read second byte of EVEX prefix"); 458 return -1; 459 } 460 461 if (lookAtByte(insn, &byte2)) { 462 dbgprintf(insn, "Couldn't read third byte of EVEX prefix"); 463 return -1; 464 } 465 466 if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) && 467 ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) { 468 insn->vectorExtensionType = TYPE_EVEX; 469 } else { 470 unconsumeByte(insn); /* unconsume byte1 */ 471 unconsumeByte(insn); /* unconsume byte */ 472 insn->necessaryPrefixLocation = insn->readerCursor - 2; 473 } 474 475 if (insn->vectorExtensionType == TYPE_EVEX) { 476 insn->vectorExtensionPrefix[0] = byte; 477 insn->vectorExtensionPrefix[1] = byte1; 478 if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) { 479 dbgprintf(insn, "Couldn't read third byte of EVEX prefix"); 480 return -1; 481 } 482 if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) { 483 dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix"); 484 return -1; 485 } 486 487 /* We simulate the REX prefix for simplicity's sake */ 488 if (insn->mode == MODE_64BIT) { 489 insn->rexPrefix = 0x40 490 | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) 491 | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) 492 | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) 493 | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0); 494 } 495 496 dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx", 497 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], 498 insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]); 499 } 500 } else if (byte == 0xc4) { 501 uint8_t byte1; 502 503 if (lookAtByte(insn, &byte1)) { 504 dbgprintf(insn, "Couldn't read second byte of VEX"); 505 return -1; 506 } 507 508 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 509 insn->vectorExtensionType = TYPE_VEX_3B; 510 insn->necessaryPrefixLocation = insn->readerCursor - 1; 511 } else { 512 unconsumeByte(insn); 513 insn->necessaryPrefixLocation = insn->readerCursor - 1; 514 } 515 516 if (insn->vectorExtensionType == TYPE_VEX_3B) { 517 insn->vectorExtensionPrefix[0] = byte; 518 consumeByte(insn, &insn->vectorExtensionPrefix[1]); 519 consumeByte(insn, &insn->vectorExtensionPrefix[2]); 520 521 /* We simulate the REX prefix for simplicity's sake */ 522 523 if (insn->mode == MODE_64BIT) { 524 insn->rexPrefix = 0x40 525 | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) 526 | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) 527 | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) 528 | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0); 529 } 530 531 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", 532 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], 533 insn->vectorExtensionPrefix[2]); 534 } 535 } else if (byte == 0xc5) { 536 uint8_t byte1; 537 538 if (lookAtByte(insn, &byte1)) { 539 dbgprintf(insn, "Couldn't read second byte of VEX"); 540 return -1; 541 } 542 543 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 544 insn->vectorExtensionType = TYPE_VEX_2B; 545 } else { 546 unconsumeByte(insn); 547 } 548 549 if (insn->vectorExtensionType == TYPE_VEX_2B) { 550 insn->vectorExtensionPrefix[0] = byte; 551 consumeByte(insn, &insn->vectorExtensionPrefix[1]); 552 553 if (insn->mode == MODE_64BIT) { 554 insn->rexPrefix = 0x40 555 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2); 556 } 557 558 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { 559 default: 560 break; 561 case VEX_PREFIX_66: 562 hasOpSize = true; 563 break; 564 } 565 566 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", 567 insn->vectorExtensionPrefix[0], 568 insn->vectorExtensionPrefix[1]); 569 } 570 } else if (byte == 0x8f) { 571 uint8_t byte1; 572 573 if (lookAtByte(insn, &byte1)) { 574 dbgprintf(insn, "Couldn't read second byte of XOP"); 575 return -1; 576 } 577 578 if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */ 579 insn->vectorExtensionType = TYPE_XOP; 580 insn->necessaryPrefixLocation = insn->readerCursor - 1; 581 } else { 582 unconsumeByte(insn); 583 insn->necessaryPrefixLocation = insn->readerCursor - 1; 584 } 585 586 if (insn->vectorExtensionType == TYPE_XOP) { 587 insn->vectorExtensionPrefix[0] = byte; 588 consumeByte(insn, &insn->vectorExtensionPrefix[1]); 589 consumeByte(insn, &insn->vectorExtensionPrefix[2]); 590 591 /* We simulate the REX prefix for simplicity's sake */ 592 593 if (insn->mode == MODE_64BIT) { 594 insn->rexPrefix = 0x40 595 | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) 596 | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) 597 | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) 598 | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0); 599 } 600 601 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { 602 default: 603 break; 604 case VEX_PREFIX_66: 605 hasOpSize = true; 606 break; 607 } 608 609 dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx", 610 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], 611 insn->vectorExtensionPrefix[2]); 612 } 613 } else { 614 if (insn->mode == MODE_64BIT) { 615 if ((byte & 0xf0) == 0x40) { 616 uint8_t opcodeByte; 617 618 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { 619 dbgprintf(insn, "Redundant REX prefix"); 620 return -1; 621 } 622 623 insn->rexPrefix = byte; 624 insn->necessaryPrefixLocation = insn->readerCursor - 2; 625 626 dbgprintf(insn, "Found REX prefix 0x%hhx", byte); 627 } else { 628 unconsumeByte(insn); 629 insn->necessaryPrefixLocation = insn->readerCursor - 1; 630 } 631 } else { 632 unconsumeByte(insn); 633 insn->necessaryPrefixLocation = insn->readerCursor - 1; 634 } 635 } 636 637 if (insn->mode == MODE_16BIT) { 638 insn->registerSize = (hasOpSize ? 4 : 2); 639 insn->addressSize = (hasAdSize ? 4 : 2); 640 insn->displacementSize = (hasAdSize ? 4 : 2); 641 insn->immediateSize = (hasOpSize ? 4 : 2); 642 } else if (insn->mode == MODE_32BIT) { 643 insn->registerSize = (hasOpSize ? 2 : 4); 644 insn->addressSize = (hasAdSize ? 2 : 4); 645 insn->displacementSize = (hasAdSize ? 2 : 4); 646 insn->immediateSize = (hasOpSize ? 2 : 4); 647 } else if (insn->mode == MODE_64BIT) { 648 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { 649 insn->registerSize = 8; 650 insn->addressSize = (hasAdSize ? 4 : 8); 651 insn->displacementSize = 4; 652 insn->immediateSize = 4; 653 } else { 654 insn->registerSize = (hasOpSize ? 2 : 4); 655 insn->addressSize = (hasAdSize ? 4 : 8); 656 insn->displacementSize = (hasOpSize ? 2 : 4); 657 insn->immediateSize = (hasOpSize ? 2 : 4); 658 } 659 } 660 661 return 0; 662 } 663 664 /* 665 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of 666 * extended or escape opcodes). 667 * 668 * @param insn - The instruction whose opcode is to be read. 669 * @return - 0 if the opcode could be read successfully; nonzero otherwise. 670 */ 671 static int readOpcode(struct InternalInstruction* insn) { 672 /* Determine the length of the primary opcode */ 673 674 uint8_t current; 675 676 dbgprintf(insn, "readOpcode()"); 677 678 insn->opcodeType = ONEBYTE; 679 680 if (insn->vectorExtensionType == TYPE_EVEX) { 681 switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) { 682 default: 683 dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)", 684 mmFromEVEX2of4(insn->vectorExtensionPrefix[1])); 685 return -1; 686 case VEX_LOB_0F: 687 insn->opcodeType = TWOBYTE; 688 return consumeByte(insn, &insn->opcode); 689 case VEX_LOB_0F38: 690 insn->opcodeType = THREEBYTE_38; 691 return consumeByte(insn, &insn->opcode); 692 case VEX_LOB_0F3A: 693 insn->opcodeType = THREEBYTE_3A; 694 return consumeByte(insn, &insn->opcode); 695 } 696 } else if (insn->vectorExtensionType == TYPE_VEX_3B) { 697 switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) { 698 default: 699 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", 700 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])); 701 return -1; 702 case VEX_LOB_0F: 703 insn->opcodeType = TWOBYTE; 704 return consumeByte(insn, &insn->opcode); 705 case VEX_LOB_0F38: 706 insn->opcodeType = THREEBYTE_38; 707 return consumeByte(insn, &insn->opcode); 708 case VEX_LOB_0F3A: 709 insn->opcodeType = THREEBYTE_3A; 710 return consumeByte(insn, &insn->opcode); 711 } 712 } else if (insn->vectorExtensionType == TYPE_VEX_2B) { 713 insn->opcodeType = TWOBYTE; 714 return consumeByte(insn, &insn->opcode); 715 } else if (insn->vectorExtensionType == TYPE_XOP) { 716 switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) { 717 default: 718 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", 719 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])); 720 return -1; 721 case XOP_MAP_SELECT_8: 722 insn->opcodeType = XOP8_MAP; 723 return consumeByte(insn, &insn->opcode); 724 case XOP_MAP_SELECT_9: 725 insn->opcodeType = XOP9_MAP; 726 return consumeByte(insn, &insn->opcode); 727 case XOP_MAP_SELECT_A: 728 insn->opcodeType = XOPA_MAP; 729 return consumeByte(insn, &insn->opcode); 730 } 731 } 732 733 if (consumeByte(insn, ¤t)) 734 return -1; 735 736 if (current == 0x0f) { 737 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); 738 739 if (consumeByte(insn, ¤t)) 740 return -1; 741 742 if (current == 0x38) { 743 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 744 745 if (consumeByte(insn, ¤t)) 746 return -1; 747 748 insn->opcodeType = THREEBYTE_38; 749 } else if (current == 0x3a) { 750 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 751 752 if (consumeByte(insn, ¤t)) 753 return -1; 754 755 insn->opcodeType = THREEBYTE_3A; 756 } else { 757 dbgprintf(insn, "Didn't find a three-byte escape prefix"); 758 759 insn->opcodeType = TWOBYTE; 760 } 761 } 762 763 /* 764 * At this point we have consumed the full opcode. 765 * Anything we consume from here on must be unconsumed. 766 */ 767 768 insn->opcode = current; 769 770 return 0; 771 } 772 773 static int readModRM(struct InternalInstruction* insn); 774 775 /* 776 * getIDWithAttrMask - Determines the ID of an instruction, consuming 777 * the ModR/M byte as appropriate for extended and escape opcodes, 778 * and using a supplied attribute mask. 779 * 780 * @param instructionID - A pointer whose target is filled in with the ID of the 781 * instruction. 782 * @param insn - The instruction whose ID is to be determined. 783 * @param attrMask - The attribute mask to search. 784 * @return - 0 if the ModR/M could be read when needed or was not 785 * needed; nonzero otherwise. 786 */ 787 static int getIDWithAttrMask(uint16_t* instructionID, 788 struct InternalInstruction* insn, 789 uint16_t attrMask) { 790 bool hasModRMExtension; 791 792 InstructionContext instructionClass = contextForAttrs(attrMask); 793 794 hasModRMExtension = modRMRequired(insn->opcodeType, 795 instructionClass, 796 insn->opcode); 797 798 if (hasModRMExtension) { 799 if (readModRM(insn)) 800 return -1; 801 802 *instructionID = decode(insn->opcodeType, 803 instructionClass, 804 insn->opcode, 805 insn->modRM); 806 } else { 807 *instructionID = decode(insn->opcodeType, 808 instructionClass, 809 insn->opcode, 810 0); 811 } 812 813 return 0; 814 } 815 816 /* 817 * is16BitEquivalent - Determines whether two instruction names refer to 818 * equivalent instructions but one is 16-bit whereas the other is not. 819 * 820 * @param orig - The instruction that is not 16-bit 821 * @param equiv - The instruction that is 16-bit 822 */ 823 static bool is16BitEquivalent(const char *orig, const char *equiv) { 824 off_t i; 825 826 for (i = 0;; i++) { 827 if (orig[i] == '\0' && equiv[i] == '\0') 828 return true; 829 if (orig[i] == '\0' || equiv[i] == '\0') 830 return false; 831 if (orig[i] != equiv[i]) { 832 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') 833 continue; 834 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') 835 continue; 836 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') 837 continue; 838 return false; 839 } 840 } 841 } 842 843 /* 844 * is64Bit - Determines whether this instruction is a 64-bit instruction. 845 * 846 * @param name - The instruction that is not 16-bit 847 */ 848 static bool is64Bit(const char *name) { 849 off_t i; 850 851 for (i = 0;; ++i) { 852 if (name[i] == '\0') 853 return false; 854 if (name[i] == '6' && name[i+1] == '4') 855 return true; 856 } 857 } 858 859 /* 860 * getID - Determines the ID of an instruction, consuming the ModR/M byte as 861 * appropriate for extended and escape opcodes. Determines the attributes and 862 * context for the instruction before doing so. 863 * 864 * @param insn - The instruction whose ID is to be determined. 865 * @return - 0 if the ModR/M could be read when needed or was not needed; 866 * nonzero otherwise. 867 */ 868 static int getID(struct InternalInstruction* insn, const void *miiArg) { 869 uint16_t attrMask; 870 uint16_t instructionID; 871 872 dbgprintf(insn, "getID()"); 873 874 attrMask = ATTR_NONE; 875 876 if (insn->mode == MODE_64BIT) 877 attrMask |= ATTR_64BIT; 878 879 if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) { 880 attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX; 881 882 if (insn->vectorExtensionType == TYPE_EVEX) { 883 switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) { 884 case VEX_PREFIX_66: 885 attrMask |= ATTR_OPSIZE; 886 break; 887 case VEX_PREFIX_F3: 888 attrMask |= ATTR_XS; 889 break; 890 case VEX_PREFIX_F2: 891 attrMask |= ATTR_XD; 892 break; 893 } 894 895 if (zFromEVEX4of4(insn->vectorExtensionPrefix[3])) 896 attrMask |= ATTR_EVEXKZ; 897 if (bFromEVEX4of4(insn->vectorExtensionPrefix[3])) 898 attrMask |= ATTR_EVEXB; 899 if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])) 900 attrMask |= ATTR_EVEXK; 901 if (lFromEVEX4of4(insn->vectorExtensionPrefix[3])) 902 attrMask |= ATTR_EVEXL; 903 if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3])) 904 attrMask |= ATTR_EVEXL2; 905 } else if (insn->vectorExtensionType == TYPE_VEX_3B) { 906 switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) { 907 case VEX_PREFIX_66: 908 attrMask |= ATTR_OPSIZE; 909 break; 910 case VEX_PREFIX_F3: 911 attrMask |= ATTR_XS; 912 break; 913 case VEX_PREFIX_F2: 914 attrMask |= ATTR_XD; 915 break; 916 } 917 918 if (lFromVEX3of3(insn->vectorExtensionPrefix[2])) 919 attrMask |= ATTR_VEXL; 920 } else if (insn->vectorExtensionType == TYPE_VEX_2B) { 921 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { 922 case VEX_PREFIX_66: 923 attrMask |= ATTR_OPSIZE; 924 break; 925 case VEX_PREFIX_F3: 926 attrMask |= ATTR_XS; 927 break; 928 case VEX_PREFIX_F2: 929 attrMask |= ATTR_XD; 930 break; 931 } 932 933 if (lFromVEX2of2(insn->vectorExtensionPrefix[1])) 934 attrMask |= ATTR_VEXL; 935 } else if (insn->vectorExtensionType == TYPE_XOP) { 936 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { 937 case VEX_PREFIX_66: 938 attrMask |= ATTR_OPSIZE; 939 break; 940 case VEX_PREFIX_F3: 941 attrMask |= ATTR_XS; 942 break; 943 case VEX_PREFIX_F2: 944 attrMask |= ATTR_XD; 945 break; 946 } 947 948 if (lFromXOP3of3(insn->vectorExtensionPrefix[2])) 949 attrMask |= ATTR_VEXL; 950 } else { 951 return -1; 952 } 953 } else { 954 if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) 955 attrMask |= ATTR_OPSIZE; 956 else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) 957 attrMask |= ATTR_ADSIZE; 958 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) 959 attrMask |= ATTR_XS; 960 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) 961 attrMask |= ATTR_XD; 962 } 963 964 if (insn->rexPrefix & 0x08) 965 attrMask |= ATTR_REXW; 966 967 /* 968 * JCXZ/JECXZ need special handling for 16-bit mode because the meaning 969 * of the AdSize prefix is inverted w.r.t. 32-bit mode. 970 */ 971 if (insn->mode == MODE_16BIT && insn->opcodeType == ONEBYTE && 972 insn->opcode == 0xE3) 973 attrMask ^= ATTR_ADSIZE; 974 975 /* 976 * In 64-bit mode all f64 superscripted opcodes ignore opcode size prefix 977 * CALL/JMP/JCC instructions need to ignore 0x66 and consume 4 bytes 978 */ 979 980 if (insn->mode == MODE_64BIT && 981 isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) { 982 switch (insn->opcode) { 983 case 0xE8: 984 case 0xE9: 985 // Take care of psubsb and other mmx instructions. 986 if (insn->opcodeType == ONEBYTE) { 987 attrMask ^= ATTR_OPSIZE; 988 insn->immediateSize = 4; 989 insn->displacementSize = 4; 990 } 991 break; 992 case 0x82: 993 case 0x83: 994 case 0x84: 995 case 0x85: 996 case 0x86: 997 case 0x87: 998 case 0x88: 999 case 0x89: 1000 case 0x8A: 1001 case 0x8B: 1002 case 0x8C: 1003 case 0x8D: 1004 case 0x8E: 1005 case 0x8F: 1006 // Take care of lea and three byte ops. 1007 if (insn->opcodeType == TWOBYTE) { 1008 attrMask ^= ATTR_OPSIZE; 1009 insn->immediateSize = 4; 1010 insn->displacementSize = 4; 1011 } 1012 break; 1013 } 1014 } 1015 1016 if (getIDWithAttrMask(&instructionID, insn, attrMask)) 1017 return -1; 1018 1019 /* The following clauses compensate for limitations of the tables. */ 1020 1021 if (insn->mode != MODE_64BIT && 1022 insn->vectorExtensionType != TYPE_NO_VEX_XOP) { 1023 /* 1024 * The tables can't distinquish between cases where the W-bit is used to 1025 * select register size and cases where its a required part of the opcode. 1026 */ 1027 if ((insn->vectorExtensionType == TYPE_EVEX && 1028 wFromEVEX3of4(insn->vectorExtensionPrefix[2])) || 1029 (insn->vectorExtensionType == TYPE_VEX_3B && 1030 wFromVEX3of3(insn->vectorExtensionPrefix[2])) || 1031 (insn->vectorExtensionType == TYPE_XOP && 1032 wFromXOP3of3(insn->vectorExtensionPrefix[2]))) { 1033 1034 uint16_t instructionIDWithREXW; 1035 if (getIDWithAttrMask(&instructionIDWithREXW, 1036 insn, attrMask | ATTR_REXW)) { 1037 insn->instructionID = instructionID; 1038 insn->spec = specifierForUID(instructionID); 1039 return 0; 1040 } 1041 1042 auto SpecName = GetInstrName(instructionIDWithREXW, miiArg); 1043 // If not a 64-bit instruction. Switch the opcode. 1044 if (!is64Bit(SpecName.data())) { 1045 insn->instructionID = instructionIDWithREXW; 1046 insn->spec = specifierForUID(instructionIDWithREXW); 1047 return 0; 1048 } 1049 } 1050 } 1051 1052 /* 1053 * Absolute moves need special handling. 1054 * -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are 1055 * inverted w.r.t. 1056 * -For 32-bit mode we need to ensure the ADSIZE prefix is observed in 1057 * any position. 1058 */ 1059 if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) { 1060 /* Make sure we observed the prefixes in any position. */ 1061 if (insn->prefixPresent[0x67]) 1062 attrMask |= ATTR_ADSIZE; 1063 if (insn->prefixPresent[0x66]) 1064 attrMask |= ATTR_OPSIZE; 1065 1066 /* In 16-bit, invert the attributes. */ 1067 if (insn->mode == MODE_16BIT) 1068 attrMask ^= ATTR_ADSIZE | ATTR_OPSIZE; 1069 1070 if (getIDWithAttrMask(&instructionID, insn, attrMask)) 1071 return -1; 1072 1073 insn->instructionID = instructionID; 1074 insn->spec = specifierForUID(instructionID); 1075 return 0; 1076 } 1077 1078 if ((insn->mode == MODE_16BIT || insn->prefixPresent[0x66]) && 1079 !(attrMask & ATTR_OPSIZE)) { 1080 /* 1081 * The instruction tables make no distinction between instructions that 1082 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a 1083 * particular spot (i.e., many MMX operations). In general we're 1084 * conservative, but in the specific case where OpSize is present but not 1085 * in the right place we check if there's a 16-bit operation. 1086 */ 1087 1088 const struct InstructionSpecifier *spec; 1089 uint16_t instructionIDWithOpsize; 1090 llvm::StringRef specName, specWithOpSizeName; 1091 1092 spec = specifierForUID(instructionID); 1093 1094 if (getIDWithAttrMask(&instructionIDWithOpsize, 1095 insn, 1096 attrMask | ATTR_OPSIZE)) { 1097 /* 1098 * ModRM required with OpSize but not present; give up and return version 1099 * without OpSize set 1100 */ 1101 1102 insn->instructionID = instructionID; 1103 insn->spec = spec; 1104 return 0; 1105 } 1106 1107 specName = GetInstrName(instructionID, miiArg); 1108 specWithOpSizeName = GetInstrName(instructionIDWithOpsize, miiArg); 1109 1110 if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) && 1111 (insn->mode == MODE_16BIT) ^ insn->prefixPresent[0x66]) { 1112 insn->instructionID = instructionIDWithOpsize; 1113 insn->spec = specifierForUID(instructionIDWithOpsize); 1114 } else { 1115 insn->instructionID = instructionID; 1116 insn->spec = spec; 1117 } 1118 return 0; 1119 } 1120 1121 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && 1122 insn->rexPrefix & 0x01) { 1123 /* 1124 * NOOP shouldn't decode as NOOP if REX.b is set. Instead 1125 * it should decode as XCHG %r8, %eax. 1126 */ 1127 1128 const struct InstructionSpecifier *spec; 1129 uint16_t instructionIDWithNewOpcode; 1130 const struct InstructionSpecifier *specWithNewOpcode; 1131 1132 spec = specifierForUID(instructionID); 1133 1134 /* Borrow opcode from one of the other XCHGar opcodes */ 1135 insn->opcode = 0x91; 1136 1137 if (getIDWithAttrMask(&instructionIDWithNewOpcode, 1138 insn, 1139 attrMask)) { 1140 insn->opcode = 0x90; 1141 1142 insn->instructionID = instructionID; 1143 insn->spec = spec; 1144 return 0; 1145 } 1146 1147 specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode); 1148 1149 /* Change back */ 1150 insn->opcode = 0x90; 1151 1152 insn->instructionID = instructionIDWithNewOpcode; 1153 insn->spec = specWithNewOpcode; 1154 1155 return 0; 1156 } 1157 1158 insn->instructionID = instructionID; 1159 insn->spec = specifierForUID(insn->instructionID); 1160 1161 return 0; 1162 } 1163 1164 /* 1165 * readSIB - Consumes the SIB byte to determine addressing information for an 1166 * instruction. 1167 * 1168 * @param insn - The instruction whose SIB byte is to be read. 1169 * @return - 0 if the SIB byte was successfully read; nonzero otherwise. 1170 */ 1171 static int readSIB(struct InternalInstruction* insn) { 1172 SIBIndex sibIndexBase = SIB_INDEX_NONE; 1173 SIBBase sibBaseBase = SIB_BASE_NONE; 1174 uint8_t index, base; 1175 1176 dbgprintf(insn, "readSIB()"); 1177 1178 if (insn->consumedSIB) 1179 return 0; 1180 1181 insn->consumedSIB = true; 1182 1183 switch (insn->addressSize) { 1184 case 2: 1185 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode"); 1186 return -1; 1187 case 4: 1188 sibIndexBase = SIB_INDEX_EAX; 1189 sibBaseBase = SIB_BASE_EAX; 1190 break; 1191 case 8: 1192 sibIndexBase = SIB_INDEX_RAX; 1193 sibBaseBase = SIB_BASE_RAX; 1194 break; 1195 } 1196 1197 if (consumeByte(insn, &insn->sib)) 1198 return -1; 1199 1200 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); 1201 1202 // FIXME: The fifth bit (bit index 4) is only to be used for instructions 1203 // that understand VSIB indexing. ORing the bit in here is mildy dangerous 1204 // because performing math on an 'enum SIBIndex' can produce garbage. 1205 // Excluding the "none" value, it should cover 6 spaces of register names: 1206 // - 16 possibilities for 16-bit GPR starting at SIB_INDEX_BX_SI 1207 // - 16 possibilities for 32-bit GPR starting at SIB_INDEX_EAX 1208 // - 16 possibilities for 64-bit GPR starting at SIB_INDEX_RAX 1209 // - 32 possibilities for each of XMM, YMM, ZMM registers 1210 // When sibIndexBase gets assigned SIB_INDEX_RAX as it does in 64-bit mode, 1211 // summing in a fully decoded index between 0 and 31 can end up with a value 1212 // that looks like something in the low half of the XMM range. 1213 // translateRMMemory() tries to reverse the damage, with only partial success, 1214 // as evidenced by known bugs in "test/MC/Disassembler/X86/x86-64.txt" 1215 if (insn->vectorExtensionType == TYPE_EVEX) 1216 index |= v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4; 1217 1218 if (index == 0x4) { 1219 insn->sibIndex = SIB_INDEX_NONE; 1220 } else { 1221 insn->sibIndex = (SIBIndex)(sibIndexBase + index); 1222 } 1223 1224 insn->sibScale = 1 << scaleFromSIB(insn->sib); 1225 1226 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); 1227 1228 switch (base) { 1229 case 0x5: 1230 case 0xd: 1231 switch (modFromModRM(insn->modRM)) { 1232 case 0x0: 1233 insn->eaDisplacement = EA_DISP_32; 1234 insn->sibBase = SIB_BASE_NONE; 1235 break; 1236 case 0x1: 1237 insn->eaDisplacement = EA_DISP_8; 1238 insn->sibBase = (SIBBase)(sibBaseBase + base); 1239 break; 1240 case 0x2: 1241 insn->eaDisplacement = EA_DISP_32; 1242 insn->sibBase = (SIBBase)(sibBaseBase + base); 1243 break; 1244 case 0x3: 1245 debug("Cannot have Mod = 0b11 and a SIB byte"); 1246 return -1; 1247 } 1248 break; 1249 default: 1250 insn->sibBase = (SIBBase)(sibBaseBase + base); 1251 break; 1252 } 1253 1254 return 0; 1255 } 1256 1257 /* 1258 * readDisplacement - Consumes the displacement of an instruction. 1259 * 1260 * @param insn - The instruction whose displacement is to be read. 1261 * @return - 0 if the displacement byte was successfully read; nonzero 1262 * otherwise. 1263 */ 1264 static int readDisplacement(struct InternalInstruction* insn) { 1265 int8_t d8; 1266 int16_t d16; 1267 int32_t d32; 1268 1269 dbgprintf(insn, "readDisplacement()"); 1270 1271 if (insn->consumedDisplacement) 1272 return 0; 1273 1274 insn->consumedDisplacement = true; 1275 insn->displacementOffset = insn->readerCursor - insn->startLocation; 1276 1277 switch (insn->eaDisplacement) { 1278 case EA_DISP_NONE: 1279 insn->consumedDisplacement = false; 1280 break; 1281 case EA_DISP_8: 1282 if (consumeInt8(insn, &d8)) 1283 return -1; 1284 insn->displacement = d8; 1285 break; 1286 case EA_DISP_16: 1287 if (consumeInt16(insn, &d16)) 1288 return -1; 1289 insn->displacement = d16; 1290 break; 1291 case EA_DISP_32: 1292 if (consumeInt32(insn, &d32)) 1293 return -1; 1294 insn->displacement = d32; 1295 break; 1296 } 1297 1298 insn->consumedDisplacement = true; 1299 return 0; 1300 } 1301 1302 /* 1303 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and 1304 * displacement) for an instruction and interprets it. 1305 * 1306 * @param insn - The instruction whose addressing information is to be read. 1307 * @return - 0 if the information was successfully read; nonzero otherwise. 1308 */ 1309 static int readModRM(struct InternalInstruction* insn) { 1310 uint8_t mod, rm, reg; 1311 1312 dbgprintf(insn, "readModRM()"); 1313 1314 if (insn->consumedModRM) 1315 return 0; 1316 1317 if (consumeByte(insn, &insn->modRM)) 1318 return -1; 1319 insn->consumedModRM = true; 1320 1321 mod = modFromModRM(insn->modRM); 1322 rm = rmFromModRM(insn->modRM); 1323 reg = regFromModRM(insn->modRM); 1324 1325 /* 1326 * This goes by insn->registerSize to pick the correct register, which messes 1327 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in 1328 * fixupReg(). 1329 */ 1330 switch (insn->registerSize) { 1331 case 2: 1332 insn->regBase = MODRM_REG_AX; 1333 insn->eaRegBase = EA_REG_AX; 1334 break; 1335 case 4: 1336 insn->regBase = MODRM_REG_EAX; 1337 insn->eaRegBase = EA_REG_EAX; 1338 break; 1339 case 8: 1340 insn->regBase = MODRM_REG_RAX; 1341 insn->eaRegBase = EA_REG_RAX; 1342 break; 1343 } 1344 1345 reg |= rFromREX(insn->rexPrefix) << 3; 1346 rm |= bFromREX(insn->rexPrefix) << 3; 1347 if (insn->vectorExtensionType == TYPE_EVEX) { 1348 reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; 1349 rm |= xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; 1350 } 1351 1352 insn->reg = (Reg)(insn->regBase + reg); 1353 1354 switch (insn->addressSize) { 1355 case 2: 1356 insn->eaBaseBase = EA_BASE_BX_SI; 1357 1358 switch (mod) { 1359 case 0x0: 1360 if (rm == 0x6) { 1361 insn->eaBase = EA_BASE_NONE; 1362 insn->eaDisplacement = EA_DISP_16; 1363 if (readDisplacement(insn)) 1364 return -1; 1365 } else { 1366 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1367 insn->eaDisplacement = EA_DISP_NONE; 1368 } 1369 break; 1370 case 0x1: 1371 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1372 insn->eaDisplacement = EA_DISP_8; 1373 insn->displacementSize = 1; 1374 if (readDisplacement(insn)) 1375 return -1; 1376 break; 1377 case 0x2: 1378 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1379 insn->eaDisplacement = EA_DISP_16; 1380 if (readDisplacement(insn)) 1381 return -1; 1382 break; 1383 case 0x3: 1384 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1385 if (readDisplacement(insn)) 1386 return -1; 1387 break; 1388 } 1389 break; 1390 case 4: 1391 case 8: 1392 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); 1393 1394 switch (mod) { 1395 case 0x0: 1396 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */ 1397 // In determining whether RIP-relative mode is used (rm=5), 1398 // or whether a SIB byte is present (rm=4), 1399 // the extension bits (REX.b and EVEX.x) are ignored. 1400 switch (rm & 7) { 1401 case 0x4: // SIB byte is present 1402 insn->eaBase = (insn->addressSize == 4 ? 1403 EA_BASE_sib : EA_BASE_sib64); 1404 if (readSIB(insn) || readDisplacement(insn)) 1405 return -1; 1406 break; 1407 case 0x5: // RIP-relative 1408 insn->eaBase = EA_BASE_NONE; 1409 insn->eaDisplacement = EA_DISP_32; 1410 if (readDisplacement(insn)) 1411 return -1; 1412 break; 1413 default: 1414 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1415 break; 1416 } 1417 break; 1418 case 0x1: 1419 insn->displacementSize = 1; 1420 /* FALLTHROUGH */ 1421 case 0x2: 1422 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); 1423 switch (rm & 7) { 1424 case 0x4: // SIB byte is present 1425 insn->eaBase = EA_BASE_sib; 1426 if (readSIB(insn) || readDisplacement(insn)) 1427 return -1; 1428 break; 1429 default: 1430 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1431 if (readDisplacement(insn)) 1432 return -1; 1433 break; 1434 } 1435 break; 1436 case 0x3: 1437 insn->eaDisplacement = EA_DISP_NONE; 1438 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1439 break; 1440 } 1441 break; 1442 } /* switch (insn->addressSize) */ 1443 1444 return 0; 1445 } 1446 1447 #define GENERIC_FIXUP_FUNC(name, base, prefix) \ 1448 static uint16_t name(struct InternalInstruction *insn, \ 1449 OperandType type, \ 1450 uint8_t index, \ 1451 uint8_t *valid) { \ 1452 *valid = 1; \ 1453 switch (type) { \ 1454 default: \ 1455 debug("Unhandled register type"); \ 1456 *valid = 0; \ 1457 return 0; \ 1458 case TYPE_Rv: \ 1459 return base + index; \ 1460 case TYPE_R8: \ 1461 if (insn->rexPrefix && \ 1462 index >= 4 && index <= 7) { \ 1463 return prefix##_SPL + (index - 4); \ 1464 } else { \ 1465 return prefix##_AL + index; \ 1466 } \ 1467 case TYPE_R16: \ 1468 return prefix##_AX + index; \ 1469 case TYPE_R32: \ 1470 return prefix##_EAX + index; \ 1471 case TYPE_R64: \ 1472 return prefix##_RAX + index; \ 1473 case TYPE_ZMM: \ 1474 return prefix##_ZMM0 + index; \ 1475 case TYPE_YMM: \ 1476 return prefix##_YMM0 + index; \ 1477 case TYPE_XMM: \ 1478 return prefix##_XMM0 + index; \ 1479 case TYPE_VK: \ 1480 if (index > 7) \ 1481 *valid = 0; \ 1482 return prefix##_K0 + index; \ 1483 case TYPE_MM64: \ 1484 return prefix##_MM0 + (index & 0x7); \ 1485 case TYPE_SEGMENTREG: \ 1486 if (index > 5) \ 1487 *valid = 0; \ 1488 return prefix##_ES + index; \ 1489 case TYPE_DEBUGREG: \ 1490 return prefix##_DR0 + index; \ 1491 case TYPE_CONTROLREG: \ 1492 return prefix##_CR0 + index; \ 1493 case TYPE_BNDR: \ 1494 if (index > 3) \ 1495 *valid = 0; \ 1496 return prefix##_BND0 + index; \ 1497 } \ 1498 } 1499 1500 /* 1501 * fixup*Value - Consults an operand type to determine the meaning of the 1502 * reg or R/M field. If the operand is an XMM operand, for example, an 1503 * operand would be XMM0 instead of AX, which readModRM() would otherwise 1504 * misinterpret it as. 1505 * 1506 * @param insn - The instruction containing the operand. 1507 * @param type - The operand type. 1508 * @param index - The existing value of the field as reported by readModRM(). 1509 * @param valid - The address of a uint8_t. The target is set to 1 if the 1510 * field is valid for the register class; 0 if not. 1511 * @return - The proper value. 1512 */ 1513 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG) 1514 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) 1515 1516 /* 1517 * fixupReg - Consults an operand specifier to determine which of the 1518 * fixup*Value functions to use in correcting readModRM()'ss interpretation. 1519 * 1520 * @param insn - See fixup*Value(). 1521 * @param op - The operand specifier. 1522 * @return - 0 if fixup was successful; -1 if the register returned was 1523 * invalid for its class. 1524 */ 1525 static int fixupReg(struct InternalInstruction *insn, 1526 const struct OperandSpecifier *op) { 1527 uint8_t valid; 1528 1529 dbgprintf(insn, "fixupReg()"); 1530 1531 switch ((OperandEncoding)op->encoding) { 1532 default: 1533 debug("Expected a REG or R/M encoding in fixupReg"); 1534 return -1; 1535 case ENCODING_VVVV: 1536 insn->vvvv = (Reg)fixupRegValue(insn, 1537 (OperandType)op->type, 1538 insn->vvvv, 1539 &valid); 1540 if (!valid) 1541 return -1; 1542 break; 1543 case ENCODING_REG: 1544 insn->reg = (Reg)fixupRegValue(insn, 1545 (OperandType)op->type, 1546 insn->reg - insn->regBase, 1547 &valid); 1548 if (!valid) 1549 return -1; 1550 break; 1551 CASE_ENCODING_RM: 1552 CASE_ENCODING_VSIB: 1553 if (insn->eaBase >= insn->eaRegBase) { 1554 insn->eaBase = (EABase)fixupRMValue(insn, 1555 (OperandType)op->type, 1556 insn->eaBase - insn->eaRegBase, 1557 &valid); 1558 if (!valid) 1559 return -1; 1560 } 1561 break; 1562 } 1563 1564 return 0; 1565 } 1566 1567 /* 1568 * readOpcodeRegister - Reads an operand from the opcode field of an 1569 * instruction and interprets it appropriately given the operand width. 1570 * Handles AddRegFrm instructions. 1571 * 1572 * @param insn - the instruction whose opcode field is to be read. 1573 * @param size - The width (in bytes) of the register being specified. 1574 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means 1575 * RAX. 1576 * @return - 0 on success; nonzero otherwise. 1577 */ 1578 static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) { 1579 dbgprintf(insn, "readOpcodeRegister()"); 1580 1581 if (size == 0) 1582 size = insn->registerSize; 1583 1584 switch (size) { 1585 case 1: 1586 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) 1587 | (insn->opcode & 7))); 1588 if (insn->rexPrefix && 1589 insn->opcodeRegister >= MODRM_REG_AL + 0x4 && 1590 insn->opcodeRegister < MODRM_REG_AL + 0x8) { 1591 insn->opcodeRegister = (Reg)(MODRM_REG_SPL 1592 + (insn->opcodeRegister - MODRM_REG_AL - 4)); 1593 } 1594 1595 break; 1596 case 2: 1597 insn->opcodeRegister = (Reg)(MODRM_REG_AX 1598 + ((bFromREX(insn->rexPrefix) << 3) 1599 | (insn->opcode & 7))); 1600 break; 1601 case 4: 1602 insn->opcodeRegister = (Reg)(MODRM_REG_EAX 1603 + ((bFromREX(insn->rexPrefix) << 3) 1604 | (insn->opcode & 7))); 1605 break; 1606 case 8: 1607 insn->opcodeRegister = (Reg)(MODRM_REG_RAX 1608 + ((bFromREX(insn->rexPrefix) << 3) 1609 | (insn->opcode & 7))); 1610 break; 1611 } 1612 1613 return 0; 1614 } 1615 1616 /* 1617 * readImmediate - Consumes an immediate operand from an instruction, given the 1618 * desired operand size. 1619 * 1620 * @param insn - The instruction whose operand is to be read. 1621 * @param size - The width (in bytes) of the operand. 1622 * @return - 0 if the immediate was successfully consumed; nonzero 1623 * otherwise. 1624 */ 1625 static int readImmediate(struct InternalInstruction* insn, uint8_t size) { 1626 uint8_t imm8; 1627 uint16_t imm16; 1628 uint32_t imm32; 1629 uint64_t imm64; 1630 1631 dbgprintf(insn, "readImmediate()"); 1632 1633 if (insn->numImmediatesConsumed == 2) { 1634 debug("Already consumed two immediates"); 1635 return -1; 1636 } 1637 1638 if (size == 0) 1639 size = insn->immediateSize; 1640 else 1641 insn->immediateSize = size; 1642 insn->immediateOffset = insn->readerCursor - insn->startLocation; 1643 1644 switch (size) { 1645 case 1: 1646 if (consumeByte(insn, &imm8)) 1647 return -1; 1648 insn->immediates[insn->numImmediatesConsumed] = imm8; 1649 break; 1650 case 2: 1651 if (consumeUInt16(insn, &imm16)) 1652 return -1; 1653 insn->immediates[insn->numImmediatesConsumed] = imm16; 1654 break; 1655 case 4: 1656 if (consumeUInt32(insn, &imm32)) 1657 return -1; 1658 insn->immediates[insn->numImmediatesConsumed] = imm32; 1659 break; 1660 case 8: 1661 if (consumeUInt64(insn, &imm64)) 1662 return -1; 1663 insn->immediates[insn->numImmediatesConsumed] = imm64; 1664 break; 1665 } 1666 1667 insn->numImmediatesConsumed++; 1668 1669 return 0; 1670 } 1671 1672 /* 1673 * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix. 1674 * 1675 * @param insn - The instruction whose operand is to be read. 1676 * @return - 0 if the vvvv was successfully consumed; nonzero 1677 * otherwise. 1678 */ 1679 static int readVVVV(struct InternalInstruction* insn) { 1680 dbgprintf(insn, "readVVVV()"); 1681 1682 int vvvv; 1683 if (insn->vectorExtensionType == TYPE_EVEX) 1684 vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 | 1685 vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2])); 1686 else if (insn->vectorExtensionType == TYPE_VEX_3B) 1687 vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]); 1688 else if (insn->vectorExtensionType == TYPE_VEX_2B) 1689 vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]); 1690 else if (insn->vectorExtensionType == TYPE_XOP) 1691 vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]); 1692 else 1693 return -1; 1694 1695 if (insn->mode != MODE_64BIT) 1696 vvvv &= 0x7; 1697 1698 insn->vvvv = static_cast<Reg>(vvvv); 1699 return 0; 1700 } 1701 1702 /* 1703 * readMaskRegister - Reads an mask register from the opcode field of an 1704 * instruction. 1705 * 1706 * @param insn - The instruction whose opcode field is to be read. 1707 * @return - 0 on success; nonzero otherwise. 1708 */ 1709 static int readMaskRegister(struct InternalInstruction* insn) { 1710 dbgprintf(insn, "readMaskRegister()"); 1711 1712 if (insn->vectorExtensionType != TYPE_EVEX) 1713 return -1; 1714 1715 insn->writemask = 1716 static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])); 1717 return 0; 1718 } 1719 1720 /* 1721 * readOperands - Consults the specifier for an instruction and consumes all 1722 * operands for that instruction, interpreting them as it goes. 1723 * 1724 * @param insn - The instruction whose operands are to be read and interpreted. 1725 * @return - 0 if all operands could be read; nonzero otherwise. 1726 */ 1727 static int readOperands(struct InternalInstruction* insn) { 1728 int hasVVVV, needVVVV; 1729 int sawRegImm = 0; 1730 1731 dbgprintf(insn, "readOperands()"); 1732 1733 /* If non-zero vvvv specified, need to make sure one of the operands 1734 uses it. */ 1735 hasVVVV = !readVVVV(insn); 1736 needVVVV = hasVVVV && (insn->vvvv != 0); 1737 1738 for (const auto &Op : x86OperandSets[insn->spec->operands]) { 1739 switch (Op.encoding) { 1740 case ENCODING_NONE: 1741 case ENCODING_SI: 1742 case ENCODING_DI: 1743 break; 1744 CASE_ENCODING_VSIB: 1745 // VSIB can use the V2 bit so check only the other bits. 1746 if (needVVVV) 1747 needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0); 1748 if (readModRM(insn)) 1749 return -1; 1750 if (fixupReg(insn, &Op)) 1751 return -1; 1752 // Apply the AVX512 compressed displacement scaling factor. 1753 if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8) 1754 insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB); 1755 break; 1756 case ENCODING_REG: 1757 CASE_ENCODING_RM: 1758 if (readModRM(insn)) 1759 return -1; 1760 if (fixupReg(insn, &Op)) 1761 return -1; 1762 // Apply the AVX512 compressed displacement scaling factor. 1763 if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8) 1764 insn->displacement *= 1 << (Op.encoding - ENCODING_RM); 1765 break; 1766 case ENCODING_IB: 1767 if (sawRegImm) { 1768 /* Saw a register immediate so don't read again and instead split the 1769 previous immediate. FIXME: This is a hack. */ 1770 insn->immediates[insn->numImmediatesConsumed] = 1771 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; 1772 ++insn->numImmediatesConsumed; 1773 break; 1774 } 1775 if (readImmediate(insn, 1)) 1776 return -1; 1777 if (Op.type == TYPE_XMM || Op.type == TYPE_YMM) 1778 sawRegImm = 1; 1779 break; 1780 case ENCODING_IW: 1781 if (readImmediate(insn, 2)) 1782 return -1; 1783 break; 1784 case ENCODING_ID: 1785 if (readImmediate(insn, 4)) 1786 return -1; 1787 break; 1788 case ENCODING_IO: 1789 if (readImmediate(insn, 8)) 1790 return -1; 1791 break; 1792 case ENCODING_Iv: 1793 if (readImmediate(insn, insn->immediateSize)) 1794 return -1; 1795 break; 1796 case ENCODING_Ia: 1797 if (readImmediate(insn, insn->addressSize)) 1798 return -1; 1799 break; 1800 case ENCODING_RB: 1801 if (readOpcodeRegister(insn, 1)) 1802 return -1; 1803 break; 1804 case ENCODING_RW: 1805 if (readOpcodeRegister(insn, 2)) 1806 return -1; 1807 break; 1808 case ENCODING_RD: 1809 if (readOpcodeRegister(insn, 4)) 1810 return -1; 1811 break; 1812 case ENCODING_RO: 1813 if (readOpcodeRegister(insn, 8)) 1814 return -1; 1815 break; 1816 case ENCODING_Rv: 1817 if (readOpcodeRegister(insn, 0)) 1818 return -1; 1819 break; 1820 case ENCODING_FP: 1821 break; 1822 case ENCODING_VVVV: 1823 needVVVV = 0; /* Mark that we have found a VVVV operand. */ 1824 if (!hasVVVV) 1825 return -1; 1826 if (fixupReg(insn, &Op)) 1827 return -1; 1828 break; 1829 case ENCODING_WRITEMASK: 1830 if (readMaskRegister(insn)) 1831 return -1; 1832 break; 1833 case ENCODING_DUP: 1834 break; 1835 default: 1836 dbgprintf(insn, "Encountered an operand with an unknown encoding."); 1837 return -1; 1838 } 1839 } 1840 1841 /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */ 1842 if (needVVVV) return -1; 1843 1844 return 0; 1845 } 1846 1847 /* 1848 * decodeInstruction - Reads and interprets a full instruction provided by the 1849 * user. 1850 * 1851 * @param insn - A pointer to the instruction to be populated. Must be 1852 * pre-allocated. 1853 * @param reader - The function to be used to read the instruction's bytes. 1854 * @param readerArg - A generic argument to be passed to the reader to store 1855 * any internal state. 1856 * @param logger - If non-NULL, the function to be used to write log messages 1857 * and warnings. 1858 * @param loggerArg - A generic argument to be passed to the logger to store 1859 * any internal state. 1860 * @param startLoc - The address (in the reader's address space) of the first 1861 * byte in the instruction. 1862 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to 1863 * decode the instruction in. 1864 * @return - 0 if the instruction's memory could be read; nonzero if 1865 * not. 1866 */ 1867 int llvm::X86Disassembler::decodeInstruction( 1868 struct InternalInstruction *insn, byteReader_t reader, 1869 const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg, 1870 uint64_t startLoc, DisassemblerMode mode) { 1871 memset(insn, 0, sizeof(struct InternalInstruction)); 1872 1873 insn->reader = reader; 1874 insn->readerArg = readerArg; 1875 insn->dlog = logger; 1876 insn->dlogArg = loggerArg; 1877 insn->startLocation = startLoc; 1878 insn->readerCursor = startLoc; 1879 insn->mode = mode; 1880 insn->numImmediatesConsumed = 0; 1881 1882 if (readPrefixes(insn) || 1883 readOpcode(insn) || 1884 getID(insn, miiArg) || 1885 insn->instructionID == 0 || 1886 readOperands(insn)) 1887 return -1; 1888 1889 insn->operands = x86OperandSets[insn->spec->operands]; 1890 1891 insn->length = insn->readerCursor - insn->startLocation; 1892 1893 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", 1894 startLoc, insn->readerCursor, insn->length); 1895 1896 if (insn->length > 15) 1897 dbgprintf(insn, "Instruction exceeds 15-byte limit"); 1898 1899 return 0; 1900 } 1901