1 //===-- X86DisassemblerDecoder.cpp - Disassembler decoder -----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is part of the X86 Disassembler. 11 // It contains the implementation of the instruction decoder. 12 // Documentation for the disassembler can be found in X86Disassembler.h. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include <cstdarg> /* for va_*() */ 17 #include <cstdio> /* for vsnprintf() */ 18 #include <cstdlib> /* for exit() */ 19 #include <cstring> /* for memset() */ 20 21 #include "X86DisassemblerDecoder.h" 22 23 using namespace llvm::X86Disassembler; 24 25 /// Specifies whether a ModR/M byte is needed and (if so) which 26 /// instruction each possible value of the ModR/M byte corresponds to. Once 27 /// this information is known, we have narrowed down to a single instruction. 28 struct ModRMDecision { 29 uint8_t modrm_type; 30 uint16_t instructionIDs; 31 }; 32 33 /// Specifies which set of ModR/M->instruction tables to look at 34 /// given a particular opcode. 35 struct OpcodeDecision { 36 ModRMDecision modRMDecisions[256]; 37 }; 38 39 /// Specifies which opcode->instruction tables to look at given 40 /// a particular context (set of attributes). Since there are many possible 41 /// contexts, the decoder first uses CONTEXTS_SYM to determine which context 42 /// applies given a specific set of attributes. Hence there are only IC_max 43 /// entries in this table, rather than 2^(ATTR_max). 44 struct ContextDecision { 45 OpcodeDecision opcodeDecisions[IC_max]; 46 }; 47 48 #include "X86GenDisassemblerTables.inc" 49 50 #ifndef NDEBUG 51 #define debug(s) do { Debug(__FILE__, __LINE__, s); } while (0) 52 #else 53 #define debug(s) do { } while (0) 54 #endif 55 56 /* 57 * contextForAttrs - Client for the instruction context table. Takes a set of 58 * attributes and returns the appropriate decode context. 59 * 60 * @param attrMask - Attributes, from the enumeration attributeBits. 61 * @return - The InstructionContext to use when looking up an 62 * an instruction with these attributes. 63 */ 64 static InstructionContext contextForAttrs(uint16_t attrMask) { 65 return static_cast<InstructionContext>(CONTEXTS_SYM[attrMask]); 66 } 67 68 /* 69 * modRMRequired - Reads the appropriate instruction table to determine whether 70 * the ModR/M byte is required to decode a particular instruction. 71 * 72 * @param type - The opcode type (i.e., how many bytes it has). 73 * @param insnContext - The context for the instruction, as returned by 74 * contextForAttrs. 75 * @param opcode - The last byte of the instruction's opcode, not counting 76 * ModR/M extensions and escapes. 77 * @return - true if the ModR/M byte is required, false otherwise. 78 */ 79 static int modRMRequired(OpcodeType type, 80 InstructionContext insnContext, 81 uint16_t opcode) { 82 const struct ContextDecision* decision = nullptr; 83 84 switch (type) { 85 case ONEBYTE: 86 decision = &ONEBYTE_SYM; 87 break; 88 case TWOBYTE: 89 decision = &TWOBYTE_SYM; 90 break; 91 case THREEBYTE_38: 92 decision = &THREEBYTE38_SYM; 93 break; 94 case THREEBYTE_3A: 95 decision = &THREEBYTE3A_SYM; 96 break; 97 case XOP8_MAP: 98 decision = &XOP8_MAP_SYM; 99 break; 100 case XOP9_MAP: 101 decision = &XOP9_MAP_SYM; 102 break; 103 case XOPA_MAP: 104 decision = &XOPA_MAP_SYM; 105 break; 106 } 107 108 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. 109 modrm_type != MODRM_ONEENTRY; 110 } 111 112 /* 113 * decode - Reads the appropriate instruction table to obtain the unique ID of 114 * an instruction. 115 * 116 * @param type - See modRMRequired(). 117 * @param insnContext - See modRMRequired(). 118 * @param opcode - See modRMRequired(). 119 * @param modRM - The ModR/M byte if required, or any value if not. 120 * @return - The UID of the instruction, or 0 on failure. 121 */ 122 static InstrUID decode(OpcodeType type, 123 InstructionContext insnContext, 124 uint8_t opcode, 125 uint8_t modRM) { 126 const struct ModRMDecision* dec = nullptr; 127 128 switch (type) { 129 case ONEBYTE: 130 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 131 break; 132 case TWOBYTE: 133 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 134 break; 135 case THREEBYTE_38: 136 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 137 break; 138 case THREEBYTE_3A: 139 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 140 break; 141 case XOP8_MAP: 142 dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 143 break; 144 case XOP9_MAP: 145 dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 146 break; 147 case XOPA_MAP: 148 dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 149 break; 150 } 151 152 switch (dec->modrm_type) { 153 default: 154 debug("Corrupt table! Unknown modrm_type"); 155 return 0; 156 case MODRM_ONEENTRY: 157 return modRMTable[dec->instructionIDs]; 158 case MODRM_SPLITRM: 159 if (modFromModRM(modRM) == 0x3) 160 return modRMTable[dec->instructionIDs+1]; 161 return modRMTable[dec->instructionIDs]; 162 case MODRM_SPLITREG: 163 if (modFromModRM(modRM) == 0x3) 164 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8]; 165 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; 166 case MODRM_SPLITMISC: 167 if (modFromModRM(modRM) == 0x3) 168 return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8]; 169 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; 170 case MODRM_FULL: 171 return modRMTable[dec->instructionIDs+modRM]; 172 } 173 } 174 175 /* 176 * specifierForUID - Given a UID, returns the name and operand specification for 177 * that instruction. 178 * 179 * @param uid - The unique ID for the instruction. This should be returned by 180 * decode(); specifierForUID will not check bounds. 181 * @return - A pointer to the specification for that instruction. 182 */ 183 static const struct InstructionSpecifier *specifierForUID(InstrUID uid) { 184 return &INSTRUCTIONS_SYM[uid]; 185 } 186 187 /* 188 * consumeByte - Uses the reader function provided by the user to consume one 189 * byte from the instruction's memory and advance the cursor. 190 * 191 * @param insn - The instruction with the reader function to use. The cursor 192 * for this instruction is advanced. 193 * @param byte - A pointer to a pre-allocated memory buffer to be populated 194 * with the data read. 195 * @return - 0 if the read was successful; nonzero otherwise. 196 */ 197 static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) { 198 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor); 199 200 if (!ret) 201 ++(insn->readerCursor); 202 203 return ret; 204 } 205 206 /* 207 * lookAtByte - Like consumeByte, but does not advance the cursor. 208 * 209 * @param insn - See consumeByte(). 210 * @param byte - See consumeByte(). 211 * @return - See consumeByte(). 212 */ 213 static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) { 214 return insn->reader(insn->readerArg, byte, insn->readerCursor); 215 } 216 217 static void unconsumeByte(struct InternalInstruction* insn) { 218 insn->readerCursor--; 219 } 220 221 #define CONSUME_FUNC(name, type) \ 222 static int name(struct InternalInstruction* insn, type* ptr) { \ 223 type combined = 0; \ 224 unsigned offset; \ 225 for (offset = 0; offset < sizeof(type); ++offset) { \ 226 uint8_t byte; \ 227 int ret = insn->reader(insn->readerArg, \ 228 &byte, \ 229 insn->readerCursor + offset); \ 230 if (ret) \ 231 return ret; \ 232 combined = combined | ((uint64_t)byte << (offset * 8)); \ 233 } \ 234 *ptr = combined; \ 235 insn->readerCursor += sizeof(type); \ 236 return 0; \ 237 } 238 239 /* 240 * consume* - Use the reader function provided by the user to consume data 241 * values of various sizes from the instruction's memory and advance the 242 * cursor appropriately. These readers perform endian conversion. 243 * 244 * @param insn - See consumeByte(). 245 * @param ptr - A pointer to a pre-allocated memory of appropriate size to 246 * be populated with the data read. 247 * @return - See consumeByte(). 248 */ 249 CONSUME_FUNC(consumeInt8, int8_t) 250 CONSUME_FUNC(consumeInt16, int16_t) 251 CONSUME_FUNC(consumeInt32, int32_t) 252 CONSUME_FUNC(consumeUInt16, uint16_t) 253 CONSUME_FUNC(consumeUInt32, uint32_t) 254 CONSUME_FUNC(consumeUInt64, uint64_t) 255 256 /* 257 * dbgprintf - Uses the logging function provided by the user to log a single 258 * message, typically without a carriage-return. 259 * 260 * @param insn - The instruction containing the logging function. 261 * @param format - See printf(). 262 * @param ... - See printf(). 263 */ 264 static void dbgprintf(struct InternalInstruction* insn, 265 const char* format, 266 ...) { 267 char buffer[256]; 268 va_list ap; 269 270 if (!insn->dlog) 271 return; 272 273 va_start(ap, format); 274 (void)vsnprintf(buffer, sizeof(buffer), format, ap); 275 va_end(ap); 276 277 insn->dlog(insn->dlogArg, buffer); 278 } 279 280 static bool isREX(struct InternalInstruction *insn, uint8_t prefix) { 281 if (insn->mode == MODE_64BIT) 282 return prefix >= 0x40 && prefix <= 0x4f; 283 return false; 284 } 285 286 /* 287 * setPrefixPresent - Marks that a particular prefix is present as mandatory 288 * 289 * @param insn - The instruction to be marked as having the prefix. 290 * @param prefix - The prefix that is present. 291 */ 292 static void setPrefixPresent(struct InternalInstruction *insn, uint8_t prefix) { 293 uint8_t nextByte; 294 switch (prefix) { 295 case 0xf2: 296 case 0xf3: 297 if (lookAtByte(insn, &nextByte)) 298 break; 299 // TODO: 300 // 1. There could be several 0x66 301 // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then 302 // it's not mandatory prefix 303 // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need 304 // 0x0f exactly after it to be mandatory prefix 305 if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66) 306 // The last of 0xf2 /0xf3 is mandatory prefix 307 insn->mandatoryPrefix = prefix; 308 insn->repeatPrefix = prefix; 309 break; 310 case 0x66: 311 if (lookAtByte(insn, &nextByte)) 312 break; 313 // 0x66 can't overwrite existing mandatory prefix and should be ignored 314 if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte))) 315 insn->mandatoryPrefix = prefix; 316 break; 317 } 318 } 319 320 /* 321 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the 322 * instruction as having them. Also sets the instruction's default operand, 323 * address, and other relevant data sizes to report operands correctly. 324 * 325 * @param insn - The instruction whose prefixes are to be read. 326 * @return - 0 if the instruction could be read until the end of the prefix 327 * bytes, and no prefixes conflicted; nonzero otherwise. 328 */ 329 static int readPrefixes(struct InternalInstruction* insn) { 330 bool isPrefix = true; 331 uint8_t byte = 0; 332 uint8_t nextByte; 333 334 dbgprintf(insn, "readPrefixes()"); 335 336 while (isPrefix) { 337 /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */ 338 if (consumeByte(insn, &byte)) 339 break; 340 341 /* 342 * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then 343 * break and let it be disassembled as a normal "instruction". 344 */ 345 if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK 346 break; 347 348 if ((byte == 0xf2 || byte == 0xf3) && !lookAtByte(insn, &nextByte)) { 349 /* 350 * If the byte is 0xf2 or 0xf3, and any of the following conditions are 351 * met: 352 * - it is followed by a LOCK (0xf0) prefix 353 * - it is followed by an xchg instruction 354 * then it should be disassembled as a xacquire/xrelease not repne/rep. 355 */ 356 if (((nextByte == 0xf0) || 357 ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) { 358 insn->xAcquireRelease = true; 359 if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support 360 break; 361 } 362 /* 363 * Also if the byte is 0xf3, and the following condition is met: 364 * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or 365 * "mov mem, imm" (opcode 0xc6/0xc7) instructions. 366 * then it should be disassembled as an xrelease not rep. 367 */ 368 if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 || 369 nextByte == 0xc6 || nextByte == 0xc7)) { 370 insn->xAcquireRelease = true; 371 if (nextByte != 0x90) // PAUSE instruction support 372 break; 373 } 374 if (isREX(insn, nextByte)) { 375 uint8_t nnextByte; 376 // Go to REX prefix after the current one 377 if (consumeByte(insn, &nnextByte)) 378 return -1; 379 // We should be able to read next byte after REX prefix 380 if (lookAtByte(insn, &nnextByte)) 381 return -1; 382 unconsumeByte(insn); 383 } 384 } 385 386 switch (byte) { 387 case 0xf0: /* LOCK */ 388 case 0xf2: /* REPNE/REPNZ */ 389 case 0xf3: /* REP or REPE/REPZ */ 390 setPrefixPresent(insn, byte); 391 break; 392 case 0x2e: /* CS segment override -OR- Branch not taken */ 393 case 0x36: /* SS segment override -OR- Branch taken */ 394 case 0x3e: /* DS segment override */ 395 case 0x26: /* ES segment override */ 396 case 0x64: /* FS segment override */ 397 case 0x65: /* GS segment override */ 398 switch (byte) { 399 case 0x2e: 400 insn->segmentOverride = SEG_OVERRIDE_CS; 401 break; 402 case 0x36: 403 insn->segmentOverride = SEG_OVERRIDE_SS; 404 break; 405 case 0x3e: 406 insn->segmentOverride = SEG_OVERRIDE_DS; 407 break; 408 case 0x26: 409 insn->segmentOverride = SEG_OVERRIDE_ES; 410 break; 411 case 0x64: 412 insn->segmentOverride = SEG_OVERRIDE_FS; 413 break; 414 case 0x65: 415 insn->segmentOverride = SEG_OVERRIDE_GS; 416 break; 417 default: 418 debug("Unhandled override"); 419 return -1; 420 } 421 setPrefixPresent(insn, byte); 422 break; 423 case 0x66: /* Operand-size override */ 424 insn->hasOpSize = true; 425 setPrefixPresent(insn, byte); 426 break; 427 case 0x67: /* Address-size override */ 428 insn->hasAdSize = true; 429 setPrefixPresent(insn, byte); 430 break; 431 default: /* Not a prefix byte */ 432 isPrefix = false; 433 break; 434 } 435 436 if (isPrefix) 437 dbgprintf(insn, "Found prefix 0x%hhx", byte); 438 } 439 440 insn->vectorExtensionType = TYPE_NO_VEX_XOP; 441 442 if (byte == 0x62) { 443 uint8_t byte1, byte2; 444 445 if (consumeByte(insn, &byte1)) { 446 dbgprintf(insn, "Couldn't read second byte of EVEX prefix"); 447 return -1; 448 } 449 450 if (lookAtByte(insn, &byte2)) { 451 dbgprintf(insn, "Couldn't read third byte of EVEX prefix"); 452 return -1; 453 } 454 455 if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) && 456 ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) { 457 insn->vectorExtensionType = TYPE_EVEX; 458 } else { 459 unconsumeByte(insn); /* unconsume byte1 */ 460 unconsumeByte(insn); /* unconsume byte */ 461 } 462 463 if (insn->vectorExtensionType == TYPE_EVEX) { 464 insn->vectorExtensionPrefix[0] = byte; 465 insn->vectorExtensionPrefix[1] = byte1; 466 if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) { 467 dbgprintf(insn, "Couldn't read third byte of EVEX prefix"); 468 return -1; 469 } 470 if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) { 471 dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix"); 472 return -1; 473 } 474 475 /* We simulate the REX prefix for simplicity's sake */ 476 if (insn->mode == MODE_64BIT) { 477 insn->rexPrefix = 0x40 478 | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) 479 | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) 480 | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) 481 | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0); 482 } 483 484 dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx", 485 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], 486 insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]); 487 } 488 } else if (byte == 0xc4) { 489 uint8_t byte1; 490 491 if (lookAtByte(insn, &byte1)) { 492 dbgprintf(insn, "Couldn't read second byte of VEX"); 493 return -1; 494 } 495 496 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) 497 insn->vectorExtensionType = TYPE_VEX_3B; 498 else 499 unconsumeByte(insn); 500 501 if (insn->vectorExtensionType == TYPE_VEX_3B) { 502 insn->vectorExtensionPrefix[0] = byte; 503 consumeByte(insn, &insn->vectorExtensionPrefix[1]); 504 consumeByte(insn, &insn->vectorExtensionPrefix[2]); 505 506 /* We simulate the REX prefix for simplicity's sake */ 507 508 if (insn->mode == MODE_64BIT) 509 insn->rexPrefix = 0x40 510 | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) 511 | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) 512 | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) 513 | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0); 514 515 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", 516 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], 517 insn->vectorExtensionPrefix[2]); 518 } 519 } else if (byte == 0xc5) { 520 uint8_t byte1; 521 522 if (lookAtByte(insn, &byte1)) { 523 dbgprintf(insn, "Couldn't read second byte of VEX"); 524 return -1; 525 } 526 527 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) 528 insn->vectorExtensionType = TYPE_VEX_2B; 529 else 530 unconsumeByte(insn); 531 532 if (insn->vectorExtensionType == TYPE_VEX_2B) { 533 insn->vectorExtensionPrefix[0] = byte; 534 consumeByte(insn, &insn->vectorExtensionPrefix[1]); 535 536 if (insn->mode == MODE_64BIT) 537 insn->rexPrefix = 0x40 538 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2); 539 540 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { 541 default: 542 break; 543 case VEX_PREFIX_66: 544 insn->hasOpSize = true; 545 break; 546 } 547 548 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", 549 insn->vectorExtensionPrefix[0], 550 insn->vectorExtensionPrefix[1]); 551 } 552 } else if (byte == 0x8f) { 553 uint8_t byte1; 554 555 if (lookAtByte(insn, &byte1)) { 556 dbgprintf(insn, "Couldn't read second byte of XOP"); 557 return -1; 558 } 559 560 if ((byte1 & 0x38) != 0x0) /* 0 in these 3 bits is a POP instruction. */ 561 insn->vectorExtensionType = TYPE_XOP; 562 else 563 unconsumeByte(insn); 564 565 if (insn->vectorExtensionType == TYPE_XOP) { 566 insn->vectorExtensionPrefix[0] = byte; 567 consumeByte(insn, &insn->vectorExtensionPrefix[1]); 568 consumeByte(insn, &insn->vectorExtensionPrefix[2]); 569 570 /* We simulate the REX prefix for simplicity's sake */ 571 572 if (insn->mode == MODE_64BIT) 573 insn->rexPrefix = 0x40 574 | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) 575 | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) 576 | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) 577 | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0); 578 579 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { 580 default: 581 break; 582 case VEX_PREFIX_66: 583 insn->hasOpSize = true; 584 break; 585 } 586 587 dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx", 588 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], 589 insn->vectorExtensionPrefix[2]); 590 } 591 } else if (isREX(insn, byte)) { 592 if (lookAtByte(insn, &nextByte)) 593 return -1; 594 insn->rexPrefix = byte; 595 dbgprintf(insn, "Found REX prefix 0x%hhx", byte); 596 } else 597 unconsumeByte(insn); 598 599 if (insn->mode == MODE_16BIT) { 600 insn->registerSize = (insn->hasOpSize ? 4 : 2); 601 insn->addressSize = (insn->hasAdSize ? 4 : 2); 602 insn->displacementSize = (insn->hasAdSize ? 4 : 2); 603 insn->immediateSize = (insn->hasOpSize ? 4 : 2); 604 } else if (insn->mode == MODE_32BIT) { 605 insn->registerSize = (insn->hasOpSize ? 2 : 4); 606 insn->addressSize = (insn->hasAdSize ? 2 : 4); 607 insn->displacementSize = (insn->hasAdSize ? 2 : 4); 608 insn->immediateSize = (insn->hasOpSize ? 2 : 4); 609 } else if (insn->mode == MODE_64BIT) { 610 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { 611 insn->registerSize = 8; 612 insn->addressSize = (insn->hasAdSize ? 4 : 8); 613 insn->displacementSize = 4; 614 insn->immediateSize = 4; 615 } else { 616 insn->registerSize = (insn->hasOpSize ? 2 : 4); 617 insn->addressSize = (insn->hasAdSize ? 4 : 8); 618 insn->displacementSize = (insn->hasOpSize ? 2 : 4); 619 insn->immediateSize = (insn->hasOpSize ? 2 : 4); 620 } 621 } 622 623 return 0; 624 } 625 626 /* 627 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of 628 * extended or escape opcodes). 629 * 630 * @param insn - The instruction whose opcode is to be read. 631 * @return - 0 if the opcode could be read successfully; nonzero otherwise. 632 */ 633 static int readOpcode(struct InternalInstruction* insn) { 634 /* Determine the length of the primary opcode */ 635 636 uint8_t current; 637 638 dbgprintf(insn, "readOpcode()"); 639 640 insn->opcodeType = ONEBYTE; 641 642 if (insn->vectorExtensionType == TYPE_EVEX) { 643 switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) { 644 default: 645 dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)", 646 mmFromEVEX2of4(insn->vectorExtensionPrefix[1])); 647 return -1; 648 case VEX_LOB_0F: 649 insn->opcodeType = TWOBYTE; 650 return consumeByte(insn, &insn->opcode); 651 case VEX_LOB_0F38: 652 insn->opcodeType = THREEBYTE_38; 653 return consumeByte(insn, &insn->opcode); 654 case VEX_LOB_0F3A: 655 insn->opcodeType = THREEBYTE_3A; 656 return consumeByte(insn, &insn->opcode); 657 } 658 } else if (insn->vectorExtensionType == TYPE_VEX_3B) { 659 switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) { 660 default: 661 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", 662 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])); 663 return -1; 664 case VEX_LOB_0F: 665 insn->opcodeType = TWOBYTE; 666 return consumeByte(insn, &insn->opcode); 667 case VEX_LOB_0F38: 668 insn->opcodeType = THREEBYTE_38; 669 return consumeByte(insn, &insn->opcode); 670 case VEX_LOB_0F3A: 671 insn->opcodeType = THREEBYTE_3A; 672 return consumeByte(insn, &insn->opcode); 673 } 674 } else if (insn->vectorExtensionType == TYPE_VEX_2B) { 675 insn->opcodeType = TWOBYTE; 676 return consumeByte(insn, &insn->opcode); 677 } else if (insn->vectorExtensionType == TYPE_XOP) { 678 switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) { 679 default: 680 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", 681 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])); 682 return -1; 683 case XOP_MAP_SELECT_8: 684 insn->opcodeType = XOP8_MAP; 685 return consumeByte(insn, &insn->opcode); 686 case XOP_MAP_SELECT_9: 687 insn->opcodeType = XOP9_MAP; 688 return consumeByte(insn, &insn->opcode); 689 case XOP_MAP_SELECT_A: 690 insn->opcodeType = XOPA_MAP; 691 return consumeByte(insn, &insn->opcode); 692 } 693 } 694 695 if (consumeByte(insn, ¤t)) 696 return -1; 697 698 if (current == 0x0f) { 699 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); 700 701 if (consumeByte(insn, ¤t)) 702 return -1; 703 704 if (current == 0x38) { 705 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 706 707 if (consumeByte(insn, ¤t)) 708 return -1; 709 710 insn->opcodeType = THREEBYTE_38; 711 } else if (current == 0x3a) { 712 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 713 714 if (consumeByte(insn, ¤t)) 715 return -1; 716 717 insn->opcodeType = THREEBYTE_3A; 718 } else { 719 dbgprintf(insn, "Didn't find a three-byte escape prefix"); 720 721 insn->opcodeType = TWOBYTE; 722 } 723 } else if (insn->mandatoryPrefix) 724 // The opcode with mandatory prefix must start with opcode escape. 725 // If not it's legacy repeat prefix 726 insn->mandatoryPrefix = 0; 727 728 /* 729 * At this point we have consumed the full opcode. 730 * Anything we consume from here on must be unconsumed. 731 */ 732 733 insn->opcode = current; 734 735 return 0; 736 } 737 738 static int readModRM(struct InternalInstruction* insn); 739 740 /* 741 * getIDWithAttrMask - Determines the ID of an instruction, consuming 742 * the ModR/M byte as appropriate for extended and escape opcodes, 743 * and using a supplied attribute mask. 744 * 745 * @param instructionID - A pointer whose target is filled in with the ID of the 746 * instruction. 747 * @param insn - The instruction whose ID is to be determined. 748 * @param attrMask - The attribute mask to search. 749 * @return - 0 if the ModR/M could be read when needed or was not 750 * needed; nonzero otherwise. 751 */ 752 static int getIDWithAttrMask(uint16_t* instructionID, 753 struct InternalInstruction* insn, 754 uint16_t attrMask) { 755 bool hasModRMExtension; 756 757 InstructionContext instructionClass = contextForAttrs(attrMask); 758 759 hasModRMExtension = modRMRequired(insn->opcodeType, 760 instructionClass, 761 insn->opcode); 762 763 if (hasModRMExtension) { 764 if (readModRM(insn)) 765 return -1; 766 767 *instructionID = decode(insn->opcodeType, 768 instructionClass, 769 insn->opcode, 770 insn->modRM); 771 } else { 772 *instructionID = decode(insn->opcodeType, 773 instructionClass, 774 insn->opcode, 775 0); 776 } 777 778 return 0; 779 } 780 781 /* 782 * is16BitEquivalent - Determines whether two instruction names refer to 783 * equivalent instructions but one is 16-bit whereas the other is not. 784 * 785 * @param orig - The instruction that is not 16-bit 786 * @param equiv - The instruction that is 16-bit 787 */ 788 static bool is16BitEquivalent(const char *orig, const char *equiv) { 789 off_t i; 790 791 for (i = 0;; i++) { 792 if (orig[i] == '\0' && equiv[i] == '\0') 793 return true; 794 if (orig[i] == '\0' || equiv[i] == '\0') 795 return false; 796 if (orig[i] != equiv[i]) { 797 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') 798 continue; 799 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') 800 continue; 801 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') 802 continue; 803 return false; 804 } 805 } 806 } 807 808 /* 809 * is64Bit - Determines whether this instruction is a 64-bit instruction. 810 * 811 * @param name - The instruction that is not 16-bit 812 */ 813 static bool is64Bit(const char *name) { 814 off_t i; 815 816 for (i = 0;; ++i) { 817 if (name[i] == '\0') 818 return false; 819 if (name[i] == '6' && name[i+1] == '4') 820 return true; 821 } 822 } 823 824 /* 825 * getID - Determines the ID of an instruction, consuming the ModR/M byte as 826 * appropriate for extended and escape opcodes. Determines the attributes and 827 * context for the instruction before doing so. 828 * 829 * @param insn - The instruction whose ID is to be determined. 830 * @return - 0 if the ModR/M could be read when needed or was not needed; 831 * nonzero otherwise. 832 */ 833 static int getID(struct InternalInstruction* insn, const void *miiArg) { 834 uint16_t attrMask; 835 uint16_t instructionID; 836 837 dbgprintf(insn, "getID()"); 838 839 attrMask = ATTR_NONE; 840 841 if (insn->mode == MODE_64BIT) 842 attrMask |= ATTR_64BIT; 843 844 if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) { 845 attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX; 846 847 if (insn->vectorExtensionType == TYPE_EVEX) { 848 switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) { 849 case VEX_PREFIX_66: 850 attrMask |= ATTR_OPSIZE; 851 break; 852 case VEX_PREFIX_F3: 853 attrMask |= ATTR_XS; 854 break; 855 case VEX_PREFIX_F2: 856 attrMask |= ATTR_XD; 857 break; 858 } 859 860 if (zFromEVEX4of4(insn->vectorExtensionPrefix[3])) 861 attrMask |= ATTR_EVEXKZ; 862 if (bFromEVEX4of4(insn->vectorExtensionPrefix[3])) 863 attrMask |= ATTR_EVEXB; 864 if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])) 865 attrMask |= ATTR_EVEXK; 866 if (lFromEVEX4of4(insn->vectorExtensionPrefix[3])) 867 attrMask |= ATTR_EVEXL; 868 if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3])) 869 attrMask |= ATTR_EVEXL2; 870 } else if (insn->vectorExtensionType == TYPE_VEX_3B) { 871 switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) { 872 case VEX_PREFIX_66: 873 attrMask |= ATTR_OPSIZE; 874 break; 875 case VEX_PREFIX_F3: 876 attrMask |= ATTR_XS; 877 break; 878 case VEX_PREFIX_F2: 879 attrMask |= ATTR_XD; 880 break; 881 } 882 883 if (lFromVEX3of3(insn->vectorExtensionPrefix[2])) 884 attrMask |= ATTR_VEXL; 885 } else if (insn->vectorExtensionType == TYPE_VEX_2B) { 886 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { 887 case VEX_PREFIX_66: 888 attrMask |= ATTR_OPSIZE; 889 break; 890 case VEX_PREFIX_F3: 891 attrMask |= ATTR_XS; 892 break; 893 case VEX_PREFIX_F2: 894 attrMask |= ATTR_XD; 895 break; 896 } 897 898 if (lFromVEX2of2(insn->vectorExtensionPrefix[1])) 899 attrMask |= ATTR_VEXL; 900 } else if (insn->vectorExtensionType == TYPE_XOP) { 901 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { 902 case VEX_PREFIX_66: 903 attrMask |= ATTR_OPSIZE; 904 break; 905 case VEX_PREFIX_F3: 906 attrMask |= ATTR_XS; 907 break; 908 case VEX_PREFIX_F2: 909 attrMask |= ATTR_XD; 910 break; 911 } 912 913 if (lFromXOP3of3(insn->vectorExtensionPrefix[2])) 914 attrMask |= ATTR_VEXL; 915 } else { 916 return -1; 917 } 918 } else if (!insn->mandatoryPrefix) { 919 // If we don't have mandatory prefix we should use legacy prefixes here 920 if (insn->hasOpSize && (insn->mode != MODE_16BIT)) 921 attrMask |= ATTR_OPSIZE; 922 if (insn->hasAdSize) 923 attrMask |= ATTR_ADSIZE; 924 if (insn->opcodeType == ONEBYTE) { 925 if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90)) 926 // Special support for PAUSE 927 attrMask |= ATTR_XS; 928 } else { 929 if (insn->repeatPrefix == 0xf2) 930 attrMask |= ATTR_XD; 931 else if (insn->repeatPrefix == 0xf3) 932 attrMask |= ATTR_XS; 933 } 934 } else { 935 switch (insn->mandatoryPrefix) { 936 case 0xf2: 937 attrMask |= ATTR_XD; 938 break; 939 case 0xf3: 940 attrMask |= ATTR_XS; 941 break; 942 case 0x66: 943 if (insn->mode != MODE_16BIT) 944 attrMask |= ATTR_OPSIZE; 945 break; 946 case 0x67: 947 attrMask |= ATTR_ADSIZE; 948 break; 949 } 950 } 951 952 if (insn->rexPrefix & 0x08) { 953 attrMask |= ATTR_REXW; 954 attrMask &= ~ATTR_ADSIZE; 955 } 956 957 /* 958 * JCXZ/JECXZ need special handling for 16-bit mode because the meaning 959 * of the AdSize prefix is inverted w.r.t. 32-bit mode. 960 */ 961 if (insn->mode == MODE_16BIT && insn->opcodeType == ONEBYTE && 962 insn->opcode == 0xE3) 963 attrMask ^= ATTR_ADSIZE; 964 965 /* 966 * In 64-bit mode all f64 superscripted opcodes ignore opcode size prefix 967 * CALL/JMP/JCC instructions need to ignore 0x66 and consume 4 bytes 968 */ 969 970 if ((insn->mode == MODE_64BIT) && insn->hasOpSize) { 971 switch (insn->opcode) { 972 case 0xE8: 973 case 0xE9: 974 // Take care of psubsb and other mmx instructions. 975 if (insn->opcodeType == ONEBYTE) { 976 attrMask ^= ATTR_OPSIZE; 977 insn->immediateSize = 4; 978 insn->displacementSize = 4; 979 } 980 break; 981 case 0x82: 982 case 0x83: 983 case 0x84: 984 case 0x85: 985 case 0x86: 986 case 0x87: 987 case 0x88: 988 case 0x89: 989 case 0x8A: 990 case 0x8B: 991 case 0x8C: 992 case 0x8D: 993 case 0x8E: 994 case 0x8F: 995 // Take care of lea and three byte ops. 996 if (insn->opcodeType == TWOBYTE) { 997 attrMask ^= ATTR_OPSIZE; 998 insn->immediateSize = 4; 999 insn->displacementSize = 4; 1000 } 1001 break; 1002 } 1003 } 1004 1005 if (getIDWithAttrMask(&instructionID, insn, attrMask)) 1006 return -1; 1007 1008 /* The following clauses compensate for limitations of the tables. */ 1009 1010 if (insn->mode != MODE_64BIT && 1011 insn->vectorExtensionType != TYPE_NO_VEX_XOP) { 1012 /* 1013 * The tables can't distinquish between cases where the W-bit is used to 1014 * select register size and cases where its a required part of the opcode. 1015 */ 1016 if ((insn->vectorExtensionType == TYPE_EVEX && 1017 wFromEVEX3of4(insn->vectorExtensionPrefix[2])) || 1018 (insn->vectorExtensionType == TYPE_VEX_3B && 1019 wFromVEX3of3(insn->vectorExtensionPrefix[2])) || 1020 (insn->vectorExtensionType == TYPE_XOP && 1021 wFromXOP3of3(insn->vectorExtensionPrefix[2]))) { 1022 1023 uint16_t instructionIDWithREXW; 1024 if (getIDWithAttrMask(&instructionIDWithREXW, 1025 insn, attrMask | ATTR_REXW)) { 1026 insn->instructionID = instructionID; 1027 insn->spec = specifierForUID(instructionID); 1028 return 0; 1029 } 1030 1031 auto SpecName = GetInstrName(instructionIDWithREXW, miiArg); 1032 // If not a 64-bit instruction. Switch the opcode. 1033 if (!is64Bit(SpecName.data())) { 1034 insn->instructionID = instructionIDWithREXW; 1035 insn->spec = specifierForUID(instructionIDWithREXW); 1036 return 0; 1037 } 1038 } 1039 } 1040 1041 /* 1042 * Absolute moves need special handling. 1043 * -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are 1044 * inverted w.r.t. 1045 * -For 32-bit mode we need to ensure the ADSIZE prefix is observed in 1046 * any position. 1047 */ 1048 if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) { 1049 /* Make sure we observed the prefixes in any position. */ 1050 if (insn->hasAdSize) 1051 attrMask |= ATTR_ADSIZE; 1052 if (insn->hasOpSize) 1053 attrMask |= ATTR_OPSIZE; 1054 1055 /* In 16-bit, invert the attributes. */ 1056 if (insn->mode == MODE_16BIT) 1057 attrMask ^= ATTR_ADSIZE | ATTR_OPSIZE; 1058 1059 if (getIDWithAttrMask(&instructionID, insn, attrMask)) 1060 return -1; 1061 1062 insn->instructionID = instructionID; 1063 insn->spec = specifierForUID(instructionID); 1064 return 0; 1065 } 1066 1067 if ((insn->mode == MODE_16BIT || insn->hasOpSize) && 1068 !(attrMask & ATTR_OPSIZE)) { 1069 /* 1070 * The instruction tables make no distinction between instructions that 1071 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a 1072 * particular spot (i.e., many MMX operations). In general we're 1073 * conservative, but in the specific case where OpSize is present but not 1074 * in the right place we check if there's a 16-bit operation. 1075 */ 1076 1077 const struct InstructionSpecifier *spec; 1078 uint16_t instructionIDWithOpsize; 1079 llvm::StringRef specName, specWithOpSizeName; 1080 1081 spec = specifierForUID(instructionID); 1082 1083 if (getIDWithAttrMask(&instructionIDWithOpsize, 1084 insn, 1085 attrMask | ATTR_OPSIZE)) { 1086 /* 1087 * ModRM required with OpSize but not present; give up and return version 1088 * without OpSize set 1089 */ 1090 1091 insn->instructionID = instructionID; 1092 insn->spec = spec; 1093 return 0; 1094 } 1095 1096 specName = GetInstrName(instructionID, miiArg); 1097 specWithOpSizeName = GetInstrName(instructionIDWithOpsize, miiArg); 1098 1099 if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) && 1100 (insn->mode == MODE_16BIT) ^ insn->hasOpSize) { 1101 insn->instructionID = instructionIDWithOpsize; 1102 insn->spec = specifierForUID(instructionIDWithOpsize); 1103 } else { 1104 insn->instructionID = instructionID; 1105 insn->spec = spec; 1106 } 1107 return 0; 1108 } 1109 1110 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && 1111 insn->rexPrefix & 0x01) { 1112 /* 1113 * NOOP shouldn't decode as NOOP if REX.b is set. Instead 1114 * it should decode as XCHG %r8, %eax. 1115 */ 1116 1117 const struct InstructionSpecifier *spec; 1118 uint16_t instructionIDWithNewOpcode; 1119 const struct InstructionSpecifier *specWithNewOpcode; 1120 1121 spec = specifierForUID(instructionID); 1122 1123 /* Borrow opcode from one of the other XCHGar opcodes */ 1124 insn->opcode = 0x91; 1125 1126 if (getIDWithAttrMask(&instructionIDWithNewOpcode, 1127 insn, 1128 attrMask)) { 1129 insn->opcode = 0x90; 1130 1131 insn->instructionID = instructionID; 1132 insn->spec = spec; 1133 return 0; 1134 } 1135 1136 specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode); 1137 1138 /* Change back */ 1139 insn->opcode = 0x90; 1140 1141 insn->instructionID = instructionIDWithNewOpcode; 1142 insn->spec = specWithNewOpcode; 1143 1144 return 0; 1145 } 1146 1147 insn->instructionID = instructionID; 1148 insn->spec = specifierForUID(insn->instructionID); 1149 1150 return 0; 1151 } 1152 1153 /* 1154 * readSIB - Consumes the SIB byte to determine addressing information for an 1155 * instruction. 1156 * 1157 * @param insn - The instruction whose SIB byte is to be read. 1158 * @return - 0 if the SIB byte was successfully read; nonzero otherwise. 1159 */ 1160 static int readSIB(struct InternalInstruction* insn) { 1161 SIBBase sibBaseBase = SIB_BASE_NONE; 1162 uint8_t index, base; 1163 1164 dbgprintf(insn, "readSIB()"); 1165 1166 if (insn->consumedSIB) 1167 return 0; 1168 1169 insn->consumedSIB = true; 1170 1171 switch (insn->addressSize) { 1172 case 2: 1173 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode"); 1174 return -1; 1175 case 4: 1176 insn->sibIndexBase = SIB_INDEX_EAX; 1177 sibBaseBase = SIB_BASE_EAX; 1178 break; 1179 case 8: 1180 insn->sibIndexBase = SIB_INDEX_RAX; 1181 sibBaseBase = SIB_BASE_RAX; 1182 break; 1183 } 1184 1185 if (consumeByte(insn, &insn->sib)) 1186 return -1; 1187 1188 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); 1189 1190 if (index == 0x4) { 1191 insn->sibIndex = SIB_INDEX_NONE; 1192 } else { 1193 insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index); 1194 } 1195 1196 insn->sibScale = 1 << scaleFromSIB(insn->sib); 1197 1198 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); 1199 1200 switch (base) { 1201 case 0x5: 1202 case 0xd: 1203 switch (modFromModRM(insn->modRM)) { 1204 case 0x0: 1205 insn->eaDisplacement = EA_DISP_32; 1206 insn->sibBase = SIB_BASE_NONE; 1207 break; 1208 case 0x1: 1209 insn->eaDisplacement = EA_DISP_8; 1210 insn->sibBase = (SIBBase)(sibBaseBase + base); 1211 break; 1212 case 0x2: 1213 insn->eaDisplacement = EA_DISP_32; 1214 insn->sibBase = (SIBBase)(sibBaseBase + base); 1215 break; 1216 case 0x3: 1217 debug("Cannot have Mod = 0b11 and a SIB byte"); 1218 return -1; 1219 } 1220 break; 1221 default: 1222 insn->sibBase = (SIBBase)(sibBaseBase + base); 1223 break; 1224 } 1225 1226 return 0; 1227 } 1228 1229 /* 1230 * readDisplacement - Consumes the displacement of an instruction. 1231 * 1232 * @param insn - The instruction whose displacement is to be read. 1233 * @return - 0 if the displacement byte was successfully read; nonzero 1234 * otherwise. 1235 */ 1236 static int readDisplacement(struct InternalInstruction* insn) { 1237 int8_t d8; 1238 int16_t d16; 1239 int32_t d32; 1240 1241 dbgprintf(insn, "readDisplacement()"); 1242 1243 if (insn->consumedDisplacement) 1244 return 0; 1245 1246 insn->consumedDisplacement = true; 1247 insn->displacementOffset = insn->readerCursor - insn->startLocation; 1248 1249 switch (insn->eaDisplacement) { 1250 case EA_DISP_NONE: 1251 insn->consumedDisplacement = false; 1252 break; 1253 case EA_DISP_8: 1254 if (consumeInt8(insn, &d8)) 1255 return -1; 1256 insn->displacement = d8; 1257 break; 1258 case EA_DISP_16: 1259 if (consumeInt16(insn, &d16)) 1260 return -1; 1261 insn->displacement = d16; 1262 break; 1263 case EA_DISP_32: 1264 if (consumeInt32(insn, &d32)) 1265 return -1; 1266 insn->displacement = d32; 1267 break; 1268 } 1269 1270 insn->consumedDisplacement = true; 1271 return 0; 1272 } 1273 1274 /* 1275 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and 1276 * displacement) for an instruction and interprets it. 1277 * 1278 * @param insn - The instruction whose addressing information is to be read. 1279 * @return - 0 if the information was successfully read; nonzero otherwise. 1280 */ 1281 static int readModRM(struct InternalInstruction* insn) { 1282 uint8_t mod, rm, reg; 1283 1284 dbgprintf(insn, "readModRM()"); 1285 1286 if (insn->consumedModRM) 1287 return 0; 1288 1289 if (consumeByte(insn, &insn->modRM)) 1290 return -1; 1291 insn->consumedModRM = true; 1292 1293 mod = modFromModRM(insn->modRM); 1294 rm = rmFromModRM(insn->modRM); 1295 reg = regFromModRM(insn->modRM); 1296 1297 /* 1298 * This goes by insn->registerSize to pick the correct register, which messes 1299 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in 1300 * fixupReg(). 1301 */ 1302 switch (insn->registerSize) { 1303 case 2: 1304 insn->regBase = MODRM_REG_AX; 1305 insn->eaRegBase = EA_REG_AX; 1306 break; 1307 case 4: 1308 insn->regBase = MODRM_REG_EAX; 1309 insn->eaRegBase = EA_REG_EAX; 1310 break; 1311 case 8: 1312 insn->regBase = MODRM_REG_RAX; 1313 insn->eaRegBase = EA_REG_RAX; 1314 break; 1315 } 1316 1317 reg |= rFromREX(insn->rexPrefix) << 3; 1318 rm |= bFromREX(insn->rexPrefix) << 3; 1319 if (insn->vectorExtensionType == TYPE_EVEX) { 1320 reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; 1321 rm |= xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; 1322 } 1323 1324 insn->reg = (Reg)(insn->regBase + reg); 1325 1326 switch (insn->addressSize) { 1327 case 2: 1328 insn->eaBaseBase = EA_BASE_BX_SI; 1329 1330 switch (mod) { 1331 case 0x0: 1332 if (rm == 0x6) { 1333 insn->eaBase = EA_BASE_NONE; 1334 insn->eaDisplacement = EA_DISP_16; 1335 if (readDisplacement(insn)) 1336 return -1; 1337 } else { 1338 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1339 insn->eaDisplacement = EA_DISP_NONE; 1340 } 1341 break; 1342 case 0x1: 1343 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1344 insn->eaDisplacement = EA_DISP_8; 1345 insn->displacementSize = 1; 1346 if (readDisplacement(insn)) 1347 return -1; 1348 break; 1349 case 0x2: 1350 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1351 insn->eaDisplacement = EA_DISP_16; 1352 if (readDisplacement(insn)) 1353 return -1; 1354 break; 1355 case 0x3: 1356 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1357 if (readDisplacement(insn)) 1358 return -1; 1359 break; 1360 } 1361 break; 1362 case 4: 1363 case 8: 1364 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); 1365 1366 switch (mod) { 1367 case 0x0: 1368 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */ 1369 // In determining whether RIP-relative mode is used (rm=5), 1370 // or whether a SIB byte is present (rm=4), 1371 // the extension bits (REX.b and EVEX.x) are ignored. 1372 switch (rm & 7) { 1373 case 0x4: // SIB byte is present 1374 insn->eaBase = (insn->addressSize == 4 ? 1375 EA_BASE_sib : EA_BASE_sib64); 1376 if (readSIB(insn) || readDisplacement(insn)) 1377 return -1; 1378 break; 1379 case 0x5: // RIP-relative 1380 insn->eaBase = EA_BASE_NONE; 1381 insn->eaDisplacement = EA_DISP_32; 1382 if (readDisplacement(insn)) 1383 return -1; 1384 break; 1385 default: 1386 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1387 break; 1388 } 1389 break; 1390 case 0x1: 1391 insn->displacementSize = 1; 1392 /* FALLTHROUGH */ 1393 case 0x2: 1394 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); 1395 switch (rm & 7) { 1396 case 0x4: // SIB byte is present 1397 insn->eaBase = EA_BASE_sib; 1398 if (readSIB(insn) || readDisplacement(insn)) 1399 return -1; 1400 break; 1401 default: 1402 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1403 if (readDisplacement(insn)) 1404 return -1; 1405 break; 1406 } 1407 break; 1408 case 0x3: 1409 insn->eaDisplacement = EA_DISP_NONE; 1410 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1411 break; 1412 } 1413 break; 1414 } /* switch (insn->addressSize) */ 1415 1416 return 0; 1417 } 1418 1419 #define GENERIC_FIXUP_FUNC(name, base, prefix) \ 1420 static uint16_t name(struct InternalInstruction *insn, \ 1421 OperandType type, \ 1422 uint8_t index, \ 1423 uint8_t *valid) { \ 1424 *valid = 1; \ 1425 switch (type) { \ 1426 default: \ 1427 debug("Unhandled register type"); \ 1428 *valid = 0; \ 1429 return 0; \ 1430 case TYPE_Rv: \ 1431 return base + index; \ 1432 case TYPE_R8: \ 1433 if (insn->rexPrefix && \ 1434 index >= 4 && index <= 7) { \ 1435 return prefix##_SPL + (index - 4); \ 1436 } else { \ 1437 return prefix##_AL + index; \ 1438 } \ 1439 case TYPE_R16: \ 1440 return prefix##_AX + index; \ 1441 case TYPE_R32: \ 1442 return prefix##_EAX + index; \ 1443 case TYPE_R64: \ 1444 return prefix##_RAX + index; \ 1445 case TYPE_ZMM: \ 1446 return prefix##_ZMM0 + index; \ 1447 case TYPE_YMM: \ 1448 return prefix##_YMM0 + index; \ 1449 case TYPE_XMM: \ 1450 return prefix##_XMM0 + index; \ 1451 case TYPE_VK: \ 1452 if (index > 7) \ 1453 *valid = 0; \ 1454 return prefix##_K0 + index; \ 1455 case TYPE_MM64: \ 1456 return prefix##_MM0 + (index & 0x7); \ 1457 case TYPE_SEGMENTREG: \ 1458 if ((index & 7) > 5) \ 1459 *valid = 0; \ 1460 return prefix##_ES + (index & 7); \ 1461 case TYPE_DEBUGREG: \ 1462 return prefix##_DR0 + index; \ 1463 case TYPE_CONTROLREG: \ 1464 return prefix##_CR0 + index; \ 1465 case TYPE_BNDR: \ 1466 if (index > 3) \ 1467 *valid = 0; \ 1468 return prefix##_BND0 + index; \ 1469 case TYPE_MVSIBX: \ 1470 return prefix##_XMM0 + index; \ 1471 case TYPE_MVSIBY: \ 1472 return prefix##_YMM0 + index; \ 1473 case TYPE_MVSIBZ: \ 1474 return prefix##_ZMM0 + index; \ 1475 } \ 1476 } 1477 1478 /* 1479 * fixup*Value - Consults an operand type to determine the meaning of the 1480 * reg or R/M field. If the operand is an XMM operand, for example, an 1481 * operand would be XMM0 instead of AX, which readModRM() would otherwise 1482 * misinterpret it as. 1483 * 1484 * @param insn - The instruction containing the operand. 1485 * @param type - The operand type. 1486 * @param index - The existing value of the field as reported by readModRM(). 1487 * @param valid - The address of a uint8_t. The target is set to 1 if the 1488 * field is valid for the register class; 0 if not. 1489 * @return - The proper value. 1490 */ 1491 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG) 1492 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) 1493 1494 /* 1495 * fixupReg - Consults an operand specifier to determine which of the 1496 * fixup*Value functions to use in correcting readModRM()'ss interpretation. 1497 * 1498 * @param insn - See fixup*Value(). 1499 * @param op - The operand specifier. 1500 * @return - 0 if fixup was successful; -1 if the register returned was 1501 * invalid for its class. 1502 */ 1503 static int fixupReg(struct InternalInstruction *insn, 1504 const struct OperandSpecifier *op) { 1505 uint8_t valid; 1506 1507 dbgprintf(insn, "fixupReg()"); 1508 1509 switch ((OperandEncoding)op->encoding) { 1510 default: 1511 debug("Expected a REG or R/M encoding in fixupReg"); 1512 return -1; 1513 case ENCODING_VVVV: 1514 insn->vvvv = (Reg)fixupRegValue(insn, 1515 (OperandType)op->type, 1516 insn->vvvv, 1517 &valid); 1518 if (!valid) 1519 return -1; 1520 break; 1521 case ENCODING_REG: 1522 insn->reg = (Reg)fixupRegValue(insn, 1523 (OperandType)op->type, 1524 insn->reg - insn->regBase, 1525 &valid); 1526 if (!valid) 1527 return -1; 1528 break; 1529 CASE_ENCODING_RM: 1530 if (insn->eaBase >= insn->eaRegBase) { 1531 insn->eaBase = (EABase)fixupRMValue(insn, 1532 (OperandType)op->type, 1533 insn->eaBase - insn->eaRegBase, 1534 &valid); 1535 if (!valid) 1536 return -1; 1537 } 1538 break; 1539 } 1540 1541 return 0; 1542 } 1543 1544 /* 1545 * readOpcodeRegister - Reads an operand from the opcode field of an 1546 * instruction and interprets it appropriately given the operand width. 1547 * Handles AddRegFrm instructions. 1548 * 1549 * @param insn - the instruction whose opcode field is to be read. 1550 * @param size - The width (in bytes) of the register being specified. 1551 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means 1552 * RAX. 1553 * @return - 0 on success; nonzero otherwise. 1554 */ 1555 static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) { 1556 dbgprintf(insn, "readOpcodeRegister()"); 1557 1558 if (size == 0) 1559 size = insn->registerSize; 1560 1561 switch (size) { 1562 case 1: 1563 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) 1564 | (insn->opcode & 7))); 1565 if (insn->rexPrefix && 1566 insn->opcodeRegister >= MODRM_REG_AL + 0x4 && 1567 insn->opcodeRegister < MODRM_REG_AL + 0x8) { 1568 insn->opcodeRegister = (Reg)(MODRM_REG_SPL 1569 + (insn->opcodeRegister - MODRM_REG_AL - 4)); 1570 } 1571 1572 break; 1573 case 2: 1574 insn->opcodeRegister = (Reg)(MODRM_REG_AX 1575 + ((bFromREX(insn->rexPrefix) << 3) 1576 | (insn->opcode & 7))); 1577 break; 1578 case 4: 1579 insn->opcodeRegister = (Reg)(MODRM_REG_EAX 1580 + ((bFromREX(insn->rexPrefix) << 3) 1581 | (insn->opcode & 7))); 1582 break; 1583 case 8: 1584 insn->opcodeRegister = (Reg)(MODRM_REG_RAX 1585 + ((bFromREX(insn->rexPrefix) << 3) 1586 | (insn->opcode & 7))); 1587 break; 1588 } 1589 1590 return 0; 1591 } 1592 1593 /* 1594 * readImmediate - Consumes an immediate operand from an instruction, given the 1595 * desired operand size. 1596 * 1597 * @param insn - The instruction whose operand is to be read. 1598 * @param size - The width (in bytes) of the operand. 1599 * @return - 0 if the immediate was successfully consumed; nonzero 1600 * otherwise. 1601 */ 1602 static int readImmediate(struct InternalInstruction* insn, uint8_t size) { 1603 uint8_t imm8; 1604 uint16_t imm16; 1605 uint32_t imm32; 1606 uint64_t imm64; 1607 1608 dbgprintf(insn, "readImmediate()"); 1609 1610 if (insn->numImmediatesConsumed == 2) { 1611 debug("Already consumed two immediates"); 1612 return -1; 1613 } 1614 1615 if (size == 0) 1616 size = insn->immediateSize; 1617 else 1618 insn->immediateSize = size; 1619 insn->immediateOffset = insn->readerCursor - insn->startLocation; 1620 1621 switch (size) { 1622 case 1: 1623 if (consumeByte(insn, &imm8)) 1624 return -1; 1625 insn->immediates[insn->numImmediatesConsumed] = imm8; 1626 break; 1627 case 2: 1628 if (consumeUInt16(insn, &imm16)) 1629 return -1; 1630 insn->immediates[insn->numImmediatesConsumed] = imm16; 1631 break; 1632 case 4: 1633 if (consumeUInt32(insn, &imm32)) 1634 return -1; 1635 insn->immediates[insn->numImmediatesConsumed] = imm32; 1636 break; 1637 case 8: 1638 if (consumeUInt64(insn, &imm64)) 1639 return -1; 1640 insn->immediates[insn->numImmediatesConsumed] = imm64; 1641 break; 1642 } 1643 1644 insn->numImmediatesConsumed++; 1645 1646 return 0; 1647 } 1648 1649 /* 1650 * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix. 1651 * 1652 * @param insn - The instruction whose operand is to be read. 1653 * @return - 0 if the vvvv was successfully consumed; nonzero 1654 * otherwise. 1655 */ 1656 static int readVVVV(struct InternalInstruction* insn) { 1657 dbgprintf(insn, "readVVVV()"); 1658 1659 int vvvv; 1660 if (insn->vectorExtensionType == TYPE_EVEX) 1661 vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 | 1662 vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2])); 1663 else if (insn->vectorExtensionType == TYPE_VEX_3B) 1664 vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]); 1665 else if (insn->vectorExtensionType == TYPE_VEX_2B) 1666 vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]); 1667 else if (insn->vectorExtensionType == TYPE_XOP) 1668 vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]); 1669 else 1670 return -1; 1671 1672 if (insn->mode != MODE_64BIT) 1673 vvvv &= 0x7; 1674 1675 insn->vvvv = static_cast<Reg>(vvvv); 1676 return 0; 1677 } 1678 1679 /* 1680 * readMaskRegister - Reads an mask register from the opcode field of an 1681 * instruction. 1682 * 1683 * @param insn - The instruction whose opcode field is to be read. 1684 * @return - 0 on success; nonzero otherwise. 1685 */ 1686 static int readMaskRegister(struct InternalInstruction* insn) { 1687 dbgprintf(insn, "readMaskRegister()"); 1688 1689 if (insn->vectorExtensionType != TYPE_EVEX) 1690 return -1; 1691 1692 insn->writemask = 1693 static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])); 1694 return 0; 1695 } 1696 1697 /* 1698 * readOperands - Consults the specifier for an instruction and consumes all 1699 * operands for that instruction, interpreting them as it goes. 1700 * 1701 * @param insn - The instruction whose operands are to be read and interpreted. 1702 * @return - 0 if all operands could be read; nonzero otherwise. 1703 */ 1704 static int readOperands(struct InternalInstruction* insn) { 1705 int hasVVVV, needVVVV; 1706 int sawRegImm = 0; 1707 1708 dbgprintf(insn, "readOperands()"); 1709 1710 /* If non-zero vvvv specified, need to make sure one of the operands 1711 uses it. */ 1712 hasVVVV = !readVVVV(insn); 1713 needVVVV = hasVVVV && (insn->vvvv != 0); 1714 1715 for (const auto &Op : x86OperandSets[insn->spec->operands]) { 1716 switch (Op.encoding) { 1717 case ENCODING_NONE: 1718 case ENCODING_SI: 1719 case ENCODING_DI: 1720 break; 1721 CASE_ENCODING_VSIB: 1722 // VSIB can use the V2 bit so check only the other bits. 1723 if (needVVVV) 1724 needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0); 1725 if (readModRM(insn)) 1726 return -1; 1727 1728 // Reject if SIB wasn't used. 1729 if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64) 1730 return -1; 1731 1732 // If sibIndex was set to SIB_INDEX_NONE, index offset is 4. 1733 if (insn->sibIndex == SIB_INDEX_NONE) 1734 insn->sibIndex = (SIBIndex)4; 1735 1736 // If EVEX.v2 is set this is one of the 16-31 registers. 1737 if (insn->vectorExtensionType == TYPE_EVEX && 1738 v2FromEVEX4of4(insn->vectorExtensionPrefix[3])) 1739 insn->sibIndex = (SIBIndex)(insn->sibIndex + 16); 1740 1741 // Adjust the index register to the correct size. 1742 switch ((OperandType)Op.type) { 1743 default: 1744 debug("Unhandled VSIB index type"); 1745 return -1; 1746 case TYPE_MVSIBX: 1747 insn->sibIndex = (SIBIndex)(SIB_INDEX_XMM0 + 1748 (insn->sibIndex - insn->sibIndexBase)); 1749 break; 1750 case TYPE_MVSIBY: 1751 insn->sibIndex = (SIBIndex)(SIB_INDEX_YMM0 + 1752 (insn->sibIndex - insn->sibIndexBase)); 1753 break; 1754 case TYPE_MVSIBZ: 1755 insn->sibIndex = (SIBIndex)(SIB_INDEX_ZMM0 + 1756 (insn->sibIndex - insn->sibIndexBase)); 1757 break; 1758 } 1759 1760 // Apply the AVX512 compressed displacement scaling factor. 1761 if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8) 1762 insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB); 1763 break; 1764 case ENCODING_REG: 1765 CASE_ENCODING_RM: 1766 if (readModRM(insn)) 1767 return -1; 1768 if (fixupReg(insn, &Op)) 1769 return -1; 1770 // Apply the AVX512 compressed displacement scaling factor. 1771 if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8) 1772 insn->displacement *= 1 << (Op.encoding - ENCODING_RM); 1773 break; 1774 case ENCODING_IB: 1775 if (sawRegImm) { 1776 /* Saw a register immediate so don't read again and instead split the 1777 previous immediate. FIXME: This is a hack. */ 1778 insn->immediates[insn->numImmediatesConsumed] = 1779 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; 1780 ++insn->numImmediatesConsumed; 1781 break; 1782 } 1783 if (readImmediate(insn, 1)) 1784 return -1; 1785 if (Op.type == TYPE_XMM || Op.type == TYPE_YMM) 1786 sawRegImm = 1; 1787 break; 1788 case ENCODING_IW: 1789 if (readImmediate(insn, 2)) 1790 return -1; 1791 break; 1792 case ENCODING_ID: 1793 if (readImmediate(insn, 4)) 1794 return -1; 1795 break; 1796 case ENCODING_IO: 1797 if (readImmediate(insn, 8)) 1798 return -1; 1799 break; 1800 case ENCODING_Iv: 1801 if (readImmediate(insn, insn->immediateSize)) 1802 return -1; 1803 break; 1804 case ENCODING_Ia: 1805 if (readImmediate(insn, insn->addressSize)) 1806 return -1; 1807 break; 1808 case ENCODING_IRC: 1809 insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) | 1810 lFromEVEX4of4(insn->vectorExtensionPrefix[3]); 1811 break; 1812 case ENCODING_RB: 1813 if (readOpcodeRegister(insn, 1)) 1814 return -1; 1815 break; 1816 case ENCODING_RW: 1817 if (readOpcodeRegister(insn, 2)) 1818 return -1; 1819 break; 1820 case ENCODING_RD: 1821 if (readOpcodeRegister(insn, 4)) 1822 return -1; 1823 break; 1824 case ENCODING_RO: 1825 if (readOpcodeRegister(insn, 8)) 1826 return -1; 1827 break; 1828 case ENCODING_Rv: 1829 if (readOpcodeRegister(insn, 0)) 1830 return -1; 1831 break; 1832 case ENCODING_FP: 1833 break; 1834 case ENCODING_VVVV: 1835 needVVVV = 0; /* Mark that we have found a VVVV operand. */ 1836 if (!hasVVVV) 1837 return -1; 1838 if (fixupReg(insn, &Op)) 1839 return -1; 1840 break; 1841 case ENCODING_WRITEMASK: 1842 if (readMaskRegister(insn)) 1843 return -1; 1844 break; 1845 case ENCODING_DUP: 1846 break; 1847 default: 1848 dbgprintf(insn, "Encountered an operand with an unknown encoding."); 1849 return -1; 1850 } 1851 } 1852 1853 /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */ 1854 if (needVVVV) return -1; 1855 1856 return 0; 1857 } 1858 1859 /* 1860 * decodeInstruction - Reads and interprets a full instruction provided by the 1861 * user. 1862 * 1863 * @param insn - A pointer to the instruction to be populated. Must be 1864 * pre-allocated. 1865 * @param reader - The function to be used to read the instruction's bytes. 1866 * @param readerArg - A generic argument to be passed to the reader to store 1867 * any internal state. 1868 * @param logger - If non-NULL, the function to be used to write log messages 1869 * and warnings. 1870 * @param loggerArg - A generic argument to be passed to the logger to store 1871 * any internal state. 1872 * @param startLoc - The address (in the reader's address space) of the first 1873 * byte in the instruction. 1874 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to 1875 * decode the instruction in. 1876 * @return - 0 if the instruction's memory could be read; nonzero if 1877 * not. 1878 */ 1879 int llvm::X86Disassembler::decodeInstruction( 1880 struct InternalInstruction *insn, byteReader_t reader, 1881 const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg, 1882 uint64_t startLoc, DisassemblerMode mode) { 1883 memset(insn, 0, sizeof(struct InternalInstruction)); 1884 1885 insn->reader = reader; 1886 insn->readerArg = readerArg; 1887 insn->dlog = logger; 1888 insn->dlogArg = loggerArg; 1889 insn->startLocation = startLoc; 1890 insn->readerCursor = startLoc; 1891 insn->mode = mode; 1892 insn->numImmediatesConsumed = 0; 1893 1894 if (readPrefixes(insn) || 1895 readOpcode(insn) || 1896 getID(insn, miiArg) || 1897 insn->instructionID == 0 || 1898 readOperands(insn)) 1899 return -1; 1900 1901 insn->operands = x86OperandSets[insn->spec->operands]; 1902 1903 insn->length = insn->readerCursor - insn->startLocation; 1904 1905 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", 1906 startLoc, insn->readerCursor, insn->length); 1907 1908 if (insn->length > 15) 1909 dbgprintf(insn, "Instruction exceeds 15-byte limit"); 1910 1911 return 0; 1912 } 1913