1 //===- X86Disassembler.cpp - Disassembler for x86 and x86_64 ----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is part of the X86 Disassembler. 11 // It contains code to translate the data produced by the decoder into 12 // MCInsts. 13 // Documentation for the disassembler can be found in X86Disassembler.h. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "X86Disassembler.h" 18 #include "X86DisassemblerDecoder.h" 19 20 #include "llvm/MC/EDInstInfo.h" 21 #include "llvm/MC/MCDisassembler.h" 22 #include "llvm/MC/MCDisassembler.h" 23 #include "llvm/MC/MCInst.h" 24 #include "llvm/MC/MCInstrInfo.h" 25 #include "llvm/MC/MCSubtargetInfo.h" 26 #include "llvm/Support/Debug.h" 27 #include "llvm/Support/MemoryObject.h" 28 #include "llvm/Support/TargetRegistry.h" 29 #include "llvm/Support/raw_ostream.h" 30 31 #define GET_REGINFO_ENUM 32 #include "X86GenRegisterInfo.inc" 33 #define GET_INSTRINFO_ENUM 34 #include "X86GenInstrInfo.inc" 35 #include "X86GenEDInfo.inc" 36 37 using namespace llvm; 38 using namespace llvm::X86Disassembler; 39 40 void x86DisassemblerDebug(const char *file, 41 unsigned line, 42 const char *s) { 43 dbgs() << file << ":" << line << ": " << s; 44 } 45 46 const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii) { 47 const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii); 48 return MII->getName(Opcode); 49 } 50 51 #define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s)); 52 53 namespace llvm { 54 55 // Fill-ins to make the compiler happy. These constants are never actually 56 // assigned; they are just filler to make an automatically-generated switch 57 // statement work. 58 namespace X86 { 59 enum { 60 BX_SI = 500, 61 BX_DI = 501, 62 BP_SI = 502, 63 BP_DI = 503, 64 sib = 504, 65 sib64 = 505 66 }; 67 } 68 69 extern Target TheX86_32Target, TheX86_64Target; 70 71 } 72 73 static bool translateInstruction(MCInst &target, 74 InternalInstruction &source); 75 76 X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI, 77 DisassemblerMode mode, 78 const MCInstrInfo *MII) 79 : MCDisassembler(STI), MII(MII), fMode(mode) {} 80 81 X86GenericDisassembler::~X86GenericDisassembler() { 82 delete MII; 83 } 84 85 const EDInstInfo *X86GenericDisassembler::getEDInfo() const { 86 return instInfoX86; 87 } 88 89 /// regionReader - a callback function that wraps the readByte method from 90 /// MemoryObject. 91 /// 92 /// @param arg - The generic callback parameter. In this case, this should 93 /// be a pointer to a MemoryObject. 94 /// @param byte - A pointer to the byte to be read. 95 /// @param address - The address to be read. 96 static int regionReader(void* arg, uint8_t* byte, uint64_t address) { 97 MemoryObject* region = static_cast<MemoryObject*>(arg); 98 return region->readByte(address, byte); 99 } 100 101 /// logger - a callback function that wraps the operator<< method from 102 /// raw_ostream. 103 /// 104 /// @param arg - The generic callback parameter. This should be a pointe 105 /// to a raw_ostream. 106 /// @param log - A string to be logged. logger() adds a newline. 107 static void logger(void* arg, const char* log) { 108 if (!arg) 109 return; 110 111 raw_ostream &vStream = *(static_cast<raw_ostream*>(arg)); 112 vStream << log << "\n"; 113 } 114 115 // 116 // Public interface for the disassembler 117 // 118 119 MCDisassembler::DecodeStatus 120 X86GenericDisassembler::getInstruction(MCInst &instr, 121 uint64_t &size, 122 MemoryObject ®ion, 123 uint64_t address, 124 raw_ostream &vStream, 125 raw_ostream &cStream) const { 126 InternalInstruction internalInstr; 127 128 dlog_t loggerFn = logger; 129 if (&vStream == &nulls()) 130 loggerFn = 0; // Disable logging completely if it's going to nulls(). 131 132 int ret = decodeInstruction(&internalInstr, 133 regionReader, 134 (void*)®ion, 135 loggerFn, 136 (void*)&vStream, 137 (void*)MII, 138 address, 139 fMode); 140 141 if (ret) { 142 size = internalInstr.readerCursor - address; 143 return Fail; 144 } 145 else { 146 size = internalInstr.length; 147 return (!translateInstruction(instr, internalInstr)) ? Success : Fail; 148 } 149 } 150 151 // 152 // Private code that translates from struct InternalInstructions to MCInsts. 153 // 154 155 /// translateRegister - Translates an internal register to the appropriate LLVM 156 /// register, and appends it as an operand to an MCInst. 157 /// 158 /// @param mcInst - The MCInst to append to. 159 /// @param reg - The Reg to append. 160 static void translateRegister(MCInst &mcInst, Reg reg) { 161 #define ENTRY(x) X86::x, 162 uint8_t llvmRegnums[] = { 163 ALL_REGS 164 0 165 }; 166 #undef ENTRY 167 168 uint8_t llvmRegnum = llvmRegnums[reg]; 169 mcInst.addOperand(MCOperand::CreateReg(llvmRegnum)); 170 } 171 172 /// translateImmediate - Appends an immediate operand to an MCInst. 173 /// 174 /// @param mcInst - The MCInst to append to. 175 /// @param immediate - The immediate value to append. 176 /// @param operand - The operand, as stored in the descriptor table. 177 /// @param insn - The internal instruction. 178 static void translateImmediate(MCInst &mcInst, uint64_t immediate, 179 const OperandSpecifier &operand, 180 InternalInstruction &insn) { 181 // Sign-extend the immediate if necessary. 182 183 OperandType type = operand.type; 184 185 if (type == TYPE_RELv) { 186 switch (insn.displacementSize) { 187 default: 188 break; 189 case 1: 190 type = TYPE_MOFFS8; 191 break; 192 case 2: 193 type = TYPE_MOFFS16; 194 break; 195 case 4: 196 type = TYPE_MOFFS32; 197 break; 198 case 8: 199 type = TYPE_MOFFS64; 200 break; 201 } 202 } 203 // By default sign-extend all X86 immediates based on their encoding. 204 else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 || 205 type == TYPE_IMM64) { 206 uint32_t Opcode = mcInst.getOpcode(); 207 switch (operand.encoding) { 208 default: 209 break; 210 case ENCODING_IB: 211 // Special case those X86 instructions that use the imm8 as a set of 212 // bits, bit count, etc. and are not sign-extend. 213 if (Opcode != X86::BLENDPSrri && Opcode != X86::BLENDPDrri && 214 Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri && 215 Opcode != X86::DPPSrri && Opcode != X86::DPPDrri && 216 Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri && 217 Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri && 218 Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri && 219 Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri && 220 Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri && 221 Opcode != X86::VINSERTPSrr) 222 type = TYPE_MOFFS8; 223 break; 224 case ENCODING_IW: 225 type = TYPE_MOFFS16; 226 break; 227 case ENCODING_ID: 228 type = TYPE_MOFFS32; 229 break; 230 case ENCODING_IO: 231 type = TYPE_MOFFS64; 232 break; 233 } 234 } 235 236 switch (type) { 237 case TYPE_XMM128: 238 mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4))); 239 return; 240 case TYPE_XMM256: 241 mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4))); 242 return; 243 case TYPE_MOFFS8: 244 case TYPE_REL8: 245 if(immediate & 0x80) 246 immediate |= ~(0xffull); 247 break; 248 case TYPE_MOFFS16: 249 if(immediate & 0x8000) 250 immediate |= ~(0xffffull); 251 break; 252 case TYPE_MOFFS32: 253 case TYPE_REL32: 254 case TYPE_REL64: 255 if(immediate & 0x80000000) 256 immediate |= ~(0xffffffffull); 257 break; 258 case TYPE_MOFFS64: 259 default: 260 // operand is 64 bits wide. Do nothing. 261 break; 262 } 263 264 mcInst.addOperand(MCOperand::CreateImm(immediate)); 265 } 266 267 /// translateRMRegister - Translates a register stored in the R/M field of the 268 /// ModR/M byte to its LLVM equivalent and appends it to an MCInst. 269 /// @param mcInst - The MCInst to append to. 270 /// @param insn - The internal instruction to extract the R/M field 271 /// from. 272 /// @return - 0 on success; -1 otherwise 273 static bool translateRMRegister(MCInst &mcInst, 274 InternalInstruction &insn) { 275 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 276 debug("A R/M register operand may not have a SIB byte"); 277 return true; 278 } 279 280 switch (insn.eaBase) { 281 default: 282 debug("Unexpected EA base register"); 283 return true; 284 case EA_BASE_NONE: 285 debug("EA_BASE_NONE for ModR/M base"); 286 return true; 287 #define ENTRY(x) case EA_BASE_##x: 288 ALL_EA_BASES 289 #undef ENTRY 290 debug("A R/M register operand may not have a base; " 291 "the operand must be a register."); 292 return true; 293 #define ENTRY(x) \ 294 case EA_REG_##x: \ 295 mcInst.addOperand(MCOperand::CreateReg(X86::x)); break; 296 ALL_REGS 297 #undef ENTRY 298 } 299 300 return false; 301 } 302 303 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M 304 /// fields of an internal instruction (and possibly its SIB byte) to a memory 305 /// operand in LLVM's format, and appends it to an MCInst. 306 /// 307 /// @param mcInst - The MCInst to append to. 308 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 309 /// from. 310 /// @return - 0 on success; nonzero otherwise 311 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) { 312 // Addresses in an MCInst are represented as five operands: 313 // 1. basereg (register) The R/M base, or (if there is a SIB) the 314 // SIB base 315 // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified 316 // scale amount 317 // 3. indexreg (register) x86_registerNONE, or (if there is a SIB) 318 // the index (which is multiplied by the 319 // scale amount) 320 // 4. displacement (immediate) 0, or the displacement if there is one 321 // 5. segmentreg (register) x86_registerNONE for now, but could be set 322 // if we have segment overrides 323 324 MCOperand baseReg; 325 MCOperand scaleAmount; 326 MCOperand indexReg; 327 MCOperand displacement; 328 MCOperand segmentReg; 329 330 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 331 if (insn.sibBase != SIB_BASE_NONE) { 332 switch (insn.sibBase) { 333 default: 334 debug("Unexpected sibBase"); 335 return true; 336 #define ENTRY(x) \ 337 case SIB_BASE_##x: \ 338 baseReg = MCOperand::CreateReg(X86::x); break; 339 ALL_SIB_BASES 340 #undef ENTRY 341 } 342 } else { 343 baseReg = MCOperand::CreateReg(0); 344 } 345 346 if (insn.sibIndex != SIB_INDEX_NONE) { 347 switch (insn.sibIndex) { 348 default: 349 debug("Unexpected sibIndex"); 350 return true; 351 #define ENTRY(x) \ 352 case SIB_INDEX_##x: \ 353 indexReg = MCOperand::CreateReg(X86::x); break; 354 EA_BASES_32BIT 355 EA_BASES_64BIT 356 #undef ENTRY 357 } 358 } else { 359 indexReg = MCOperand::CreateReg(0); 360 } 361 362 scaleAmount = MCOperand::CreateImm(insn.sibScale); 363 } else { 364 switch (insn.eaBase) { 365 case EA_BASE_NONE: 366 if (insn.eaDisplacement == EA_DISP_NONE) { 367 debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base"); 368 return true; 369 } 370 if (insn.mode == MODE_64BIT) 371 baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6 372 else 373 baseReg = MCOperand::CreateReg(0); 374 375 indexReg = MCOperand::CreateReg(0); 376 break; 377 case EA_BASE_BX_SI: 378 baseReg = MCOperand::CreateReg(X86::BX); 379 indexReg = MCOperand::CreateReg(X86::SI); 380 break; 381 case EA_BASE_BX_DI: 382 baseReg = MCOperand::CreateReg(X86::BX); 383 indexReg = MCOperand::CreateReg(X86::DI); 384 break; 385 case EA_BASE_BP_SI: 386 baseReg = MCOperand::CreateReg(X86::BP); 387 indexReg = MCOperand::CreateReg(X86::SI); 388 break; 389 case EA_BASE_BP_DI: 390 baseReg = MCOperand::CreateReg(X86::BP); 391 indexReg = MCOperand::CreateReg(X86::DI); 392 break; 393 default: 394 indexReg = MCOperand::CreateReg(0); 395 switch (insn.eaBase) { 396 default: 397 debug("Unexpected eaBase"); 398 return true; 399 // Here, we will use the fill-ins defined above. However, 400 // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and 401 // sib and sib64 were handled in the top-level if, so they're only 402 // placeholders to keep the compiler happy. 403 #define ENTRY(x) \ 404 case EA_BASE_##x: \ 405 baseReg = MCOperand::CreateReg(X86::x); break; 406 ALL_EA_BASES 407 #undef ENTRY 408 #define ENTRY(x) case EA_REG_##x: 409 ALL_REGS 410 #undef ENTRY 411 debug("A R/M memory operand may not be a register; " 412 "the base field must be a base."); 413 return true; 414 } 415 } 416 417 scaleAmount = MCOperand::CreateImm(1); 418 } 419 420 displacement = MCOperand::CreateImm(insn.displacement); 421 422 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = { 423 0, // SEG_OVERRIDE_NONE 424 X86::CS, 425 X86::SS, 426 X86::DS, 427 X86::ES, 428 X86::FS, 429 X86::GS 430 }; 431 432 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); 433 434 mcInst.addOperand(baseReg); 435 mcInst.addOperand(scaleAmount); 436 mcInst.addOperand(indexReg); 437 mcInst.addOperand(displacement); 438 mcInst.addOperand(segmentReg); 439 return false; 440 } 441 442 /// translateRM - Translates an operand stored in the R/M (and possibly SIB) 443 /// byte of an instruction to LLVM form, and appends it to an MCInst. 444 /// 445 /// @param mcInst - The MCInst to append to. 446 /// @param operand - The operand, as stored in the descriptor table. 447 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 448 /// from. 449 /// @return - 0 on success; nonzero otherwise 450 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, 451 InternalInstruction &insn) { 452 switch (operand.type) { 453 default: 454 debug("Unexpected type for a R/M operand"); 455 return true; 456 case TYPE_R8: 457 case TYPE_R16: 458 case TYPE_R32: 459 case TYPE_R64: 460 case TYPE_Rv: 461 case TYPE_MM: 462 case TYPE_MM32: 463 case TYPE_MM64: 464 case TYPE_XMM: 465 case TYPE_XMM32: 466 case TYPE_XMM64: 467 case TYPE_XMM128: 468 case TYPE_XMM256: 469 case TYPE_DEBUGREG: 470 case TYPE_CONTROLREG: 471 return translateRMRegister(mcInst, insn); 472 case TYPE_M: 473 case TYPE_M8: 474 case TYPE_M16: 475 case TYPE_M32: 476 case TYPE_M64: 477 case TYPE_M128: 478 case TYPE_M256: 479 case TYPE_M512: 480 case TYPE_Mv: 481 case TYPE_M32FP: 482 case TYPE_M64FP: 483 case TYPE_M80FP: 484 case TYPE_M16INT: 485 case TYPE_M32INT: 486 case TYPE_M64INT: 487 case TYPE_M1616: 488 case TYPE_M1632: 489 case TYPE_M1664: 490 case TYPE_LEA: 491 return translateRMMemory(mcInst, insn); 492 } 493 } 494 495 /// translateFPRegister - Translates a stack position on the FPU stack to its 496 /// LLVM form, and appends it to an MCInst. 497 /// 498 /// @param mcInst - The MCInst to append to. 499 /// @param stackPos - The stack position to translate. 500 /// @return - 0 on success; nonzero otherwise. 501 static bool translateFPRegister(MCInst &mcInst, 502 uint8_t stackPos) { 503 if (stackPos >= 8) { 504 debug("Invalid FP stack position"); 505 return true; 506 } 507 508 mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos)); 509 510 return false; 511 } 512 513 /// translateOperand - Translates an operand stored in an internal instruction 514 /// to LLVM's format and appends it to an MCInst. 515 /// 516 /// @param mcInst - The MCInst to append to. 517 /// @param operand - The operand, as stored in the descriptor table. 518 /// @param insn - The internal instruction. 519 /// @return - false on success; true otherwise. 520 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, 521 InternalInstruction &insn) { 522 switch (operand.encoding) { 523 default: 524 debug("Unhandled operand encoding during translation"); 525 return true; 526 case ENCODING_REG: 527 translateRegister(mcInst, insn.reg); 528 return false; 529 case ENCODING_RM: 530 return translateRM(mcInst, operand, insn); 531 case ENCODING_CB: 532 case ENCODING_CW: 533 case ENCODING_CD: 534 case ENCODING_CP: 535 case ENCODING_CO: 536 case ENCODING_CT: 537 debug("Translation of code offsets isn't supported."); 538 return true; 539 case ENCODING_IB: 540 case ENCODING_IW: 541 case ENCODING_ID: 542 case ENCODING_IO: 543 case ENCODING_Iv: 544 case ENCODING_Ia: 545 translateImmediate(mcInst, 546 insn.immediates[insn.numImmediatesTranslated++], 547 operand, 548 insn); 549 return false; 550 case ENCODING_RB: 551 case ENCODING_RW: 552 case ENCODING_RD: 553 case ENCODING_RO: 554 translateRegister(mcInst, insn.opcodeRegister); 555 return false; 556 case ENCODING_I: 557 return translateFPRegister(mcInst, insn.opcodeModifier); 558 case ENCODING_Rv: 559 translateRegister(mcInst, insn.opcodeRegister); 560 return false; 561 case ENCODING_VVVV: 562 translateRegister(mcInst, insn.vvvv); 563 return false; 564 case ENCODING_DUP: 565 return translateOperand(mcInst, 566 insn.spec->operands[operand.type - TYPE_DUP0], 567 insn); 568 } 569 } 570 571 /// translateInstruction - Translates an internal instruction and all its 572 /// operands to an MCInst. 573 /// 574 /// @param mcInst - The MCInst to populate with the instruction's data. 575 /// @param insn - The internal instruction. 576 /// @return - false on success; true otherwise. 577 static bool translateInstruction(MCInst &mcInst, 578 InternalInstruction &insn) { 579 if (!insn.spec) { 580 debug("Instruction has no specification"); 581 return true; 582 } 583 584 mcInst.setOpcode(insn.instructionID); 585 586 int index; 587 588 insn.numImmediatesTranslated = 0; 589 590 for (index = 0; index < X86_MAX_OPERANDS; ++index) { 591 if (insn.spec->operands[index].encoding != ENCODING_NONE) { 592 if (translateOperand(mcInst, insn.spec->operands[index], insn)) { 593 return true; 594 } 595 } 596 } 597 598 return false; 599 } 600 601 static MCDisassembler *createX86_32Disassembler(const Target &T, 602 const MCSubtargetInfo &STI) { 603 return new X86Disassembler::X86GenericDisassembler(STI, MODE_32BIT, 604 T.createMCInstrInfo()); 605 } 606 607 static MCDisassembler *createX86_64Disassembler(const Target &T, 608 const MCSubtargetInfo &STI) { 609 return new X86Disassembler::X86GenericDisassembler(STI, MODE_64BIT, 610 T.createMCInstrInfo()); 611 } 612 613 extern "C" void LLVMInitializeX86Disassembler() { 614 // Register the disassembler. 615 TargetRegistry::RegisterMCDisassembler(TheX86_32Target, 616 createX86_32Disassembler); 617 TargetRegistry::RegisterMCDisassembler(TheX86_64Target, 618 createX86_64Disassembler); 619 } 620