1 //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is part of the X86 Disassembler. 11 // It contains code to translate the data produced by the decoder into 12 // MCInsts. 13 // Documentation for the disassembler can be found in X86Disassembler.h. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "X86Disassembler.h" 18 #include "X86DisassemblerDecoder.h" 19 #include "llvm/MC/EDInstInfo.h" 20 #include "llvm/MC/MCContext.h" 21 #include "llvm/MC/MCDisassembler.h" 22 #include "llvm/MC/MCExpr.h" 23 #include "llvm/MC/MCInst.h" 24 #include "llvm/MC/MCInstrInfo.h" 25 #include "llvm/MC/MCSubtargetInfo.h" 26 #include "llvm/Support/Debug.h" 27 #include "llvm/Support/MemoryObject.h" 28 #include "llvm/Support/TargetRegistry.h" 29 #include "llvm/Support/raw_ostream.h" 30 31 #define GET_REGINFO_ENUM 32 #include "X86GenRegisterInfo.inc" 33 #define GET_INSTRINFO_ENUM 34 #include "X86GenInstrInfo.inc" 35 #include "X86GenEDInfo.inc" 36 37 using namespace llvm; 38 using namespace llvm::X86Disassembler; 39 40 void x86DisassemblerDebug(const char *file, 41 unsigned line, 42 const char *s) { 43 dbgs() << file << ":" << line << ": " << s; 44 } 45 46 const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii) { 47 const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii); 48 return MII->getName(Opcode); 49 } 50 51 #define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s)); 52 53 namespace llvm { 54 55 // Fill-ins to make the compiler happy. These constants are never actually 56 // assigned; they are just filler to make an automatically-generated switch 57 // statement work. 58 namespace X86 { 59 enum { 60 BX_SI = 500, 61 BX_DI = 501, 62 BP_SI = 502, 63 BP_DI = 503, 64 sib = 504, 65 sib64 = 505 66 }; 67 } 68 69 extern Target TheX86_32Target, TheX86_64Target; 70 71 } 72 73 static bool translateInstruction(MCInst &target, 74 InternalInstruction &source, 75 const MCDisassembler *Dis); 76 77 X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI, 78 DisassemblerMode mode, 79 const MCInstrInfo *MII) 80 : MCDisassembler(STI), MII(MII), fMode(mode) {} 81 82 X86GenericDisassembler::~X86GenericDisassembler() { 83 delete MII; 84 } 85 86 const EDInstInfo *X86GenericDisassembler::getEDInfo() const { 87 return instInfoX86; 88 } 89 90 /// regionReader - a callback function that wraps the readByte method from 91 /// MemoryObject. 92 /// 93 /// @param arg - The generic callback parameter. In this case, this should 94 /// be a pointer to a MemoryObject. 95 /// @param byte - A pointer to the byte to be read. 96 /// @param address - The address to be read. 97 static int regionReader(const void* arg, uint8_t* byte, uint64_t address) { 98 const MemoryObject* region = static_cast<const MemoryObject*>(arg); 99 return region->readByte(address, byte); 100 } 101 102 /// logger - a callback function that wraps the operator<< method from 103 /// raw_ostream. 104 /// 105 /// @param arg - The generic callback parameter. This should be a pointe 106 /// to a raw_ostream. 107 /// @param log - A string to be logged. logger() adds a newline. 108 static void logger(void* arg, const char* log) { 109 if (!arg) 110 return; 111 112 raw_ostream &vStream = *(static_cast<raw_ostream*>(arg)); 113 vStream << log << "\n"; 114 } 115 116 // 117 // Public interface for the disassembler 118 // 119 120 MCDisassembler::DecodeStatus 121 X86GenericDisassembler::getInstruction(MCInst &instr, 122 uint64_t &size, 123 const MemoryObject ®ion, 124 uint64_t address, 125 raw_ostream &vStream, 126 raw_ostream &cStream) const { 127 CommentStream = &cStream; 128 129 InternalInstruction internalInstr; 130 131 dlog_t loggerFn = logger; 132 if (&vStream == &nulls()) 133 loggerFn = 0; // Disable logging completely if it's going to nulls(). 134 135 int ret = decodeInstruction(&internalInstr, 136 regionReader, 137 (const void*)®ion, 138 loggerFn, 139 (void*)&vStream, 140 (const void*)MII, 141 address, 142 fMode); 143 144 if (ret) { 145 size = internalInstr.readerCursor - address; 146 return Fail; 147 } 148 else { 149 size = internalInstr.length; 150 return (!translateInstruction(instr, internalInstr, this)) ? 151 Success : Fail; 152 } 153 } 154 155 // 156 // Private code that translates from struct InternalInstructions to MCInsts. 157 // 158 159 /// translateRegister - Translates an internal register to the appropriate LLVM 160 /// register, and appends it as an operand to an MCInst. 161 /// 162 /// @param mcInst - The MCInst to append to. 163 /// @param reg - The Reg to append. 164 static void translateRegister(MCInst &mcInst, Reg reg) { 165 #define ENTRY(x) X86::x, 166 uint8_t llvmRegnums[] = { 167 ALL_REGS 168 0 169 }; 170 #undef ENTRY 171 172 uint8_t llvmRegnum = llvmRegnums[reg]; 173 mcInst.addOperand(MCOperand::CreateReg(llvmRegnum)); 174 } 175 176 /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the 177 /// immediate Value in the MCInst. 178 /// 179 /// @param Value - The immediate Value, has had any PC adjustment made by 180 /// the caller. 181 /// @param isBranch - If the instruction is a branch instruction 182 /// @param Address - The starting address of the instruction 183 /// @param Offset - The byte offset to this immediate in the instruction 184 /// @param Width - The byte width of this immediate in the instruction 185 /// 186 /// If the getOpInfo() function was set when setupForSymbolicDisassembly() was 187 /// called then that function is called to get any symbolic information for the 188 /// immediate in the instruction using the Address, Offset and Width. If that 189 /// returns non-zero then the symbolic information it returns is used to create 190 /// an MCExpr and that is added as an operand to the MCInst. If getOpInfo() 191 /// returns zero and isBranch is true then a symbol look up for immediate Value 192 /// is done and if a symbol is found an MCExpr is created with that, else 193 /// an MCExpr with the immediate Value is created. This function returns true 194 /// if it adds an operand to the MCInst and false otherwise. 195 static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, 196 uint64_t Address, uint64_t Offset, 197 uint64_t Width, MCInst &MI, 198 const MCDisassembler *Dis) { 199 LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback(); 200 struct LLVMOpInfo1 SymbolicOp; 201 memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); 202 SymbolicOp.Value = Value; 203 void *DisInfo = Dis->getDisInfoBlock(); 204 205 if (!getOpInfo || 206 !getOpInfo(DisInfo, Address, Offset, Width, 1, &SymbolicOp)) { 207 // Clear SymbolicOp.Value from above and also all other fields. 208 memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); 209 LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback(); 210 if (!SymbolLookUp) 211 return false; 212 uint64_t ReferenceType; 213 if (isBranch) 214 ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; 215 else 216 ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; 217 const char *ReferenceName; 218 const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address, 219 &ReferenceName); 220 if (Name) { 221 SymbolicOp.AddSymbol.Name = Name; 222 SymbolicOp.AddSymbol.Present = true; 223 } 224 // For branches always create an MCExpr so it gets printed as hex address. 225 else if (isBranch) { 226 SymbolicOp.Value = Value; 227 } 228 if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) 229 (*Dis->CommentStream) << "symbol stub for: " << ReferenceName; 230 if (!Name && !isBranch) 231 return false; 232 } 233 234 MCContext *Ctx = Dis->getMCContext(); 235 const MCExpr *Add = NULL; 236 if (SymbolicOp.AddSymbol.Present) { 237 if (SymbolicOp.AddSymbol.Name) { 238 StringRef Name(SymbolicOp.AddSymbol.Name); 239 MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); 240 Add = MCSymbolRefExpr::Create(Sym, *Ctx); 241 } else { 242 Add = MCConstantExpr::Create((int)SymbolicOp.AddSymbol.Value, *Ctx); 243 } 244 } 245 246 const MCExpr *Sub = NULL; 247 if (SymbolicOp.SubtractSymbol.Present) { 248 if (SymbolicOp.SubtractSymbol.Name) { 249 StringRef Name(SymbolicOp.SubtractSymbol.Name); 250 MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); 251 Sub = MCSymbolRefExpr::Create(Sym, *Ctx); 252 } else { 253 Sub = MCConstantExpr::Create((int)SymbolicOp.SubtractSymbol.Value, *Ctx); 254 } 255 } 256 257 const MCExpr *Off = NULL; 258 if (SymbolicOp.Value != 0) 259 Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx); 260 261 const MCExpr *Expr; 262 if (Sub) { 263 const MCExpr *LHS; 264 if (Add) 265 LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx); 266 else 267 LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx); 268 if (Off != 0) 269 Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx); 270 else 271 Expr = LHS; 272 } else if (Add) { 273 if (Off != 0) 274 Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx); 275 else 276 Expr = Add; 277 } else { 278 if (Off != 0) 279 Expr = Off; 280 else 281 Expr = MCConstantExpr::Create(0, *Ctx); 282 } 283 284 MI.addOperand(MCOperand::CreateExpr(Expr)); 285 286 return true; 287 } 288 289 /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being 290 /// referenced by a load instruction with the base register that is the rip. 291 /// These can often be addresses in a literal pool. The Address of the 292 /// instruction and its immediate Value are used to determine the address 293 /// being referenced in the literal pool entry. The SymbolLookUp call back will 294 /// return a pointer to a literal 'C' string if the referenced address is an 295 /// address into a section with 'C' string literals. 296 static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value, 297 const void *Decoder) { 298 const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); 299 LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback(); 300 if (SymbolLookUp) { 301 void *DisInfo = Dis->getDisInfoBlock(); 302 uint64_t ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load; 303 const char *ReferenceName; 304 (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName); 305 if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) 306 (*Dis->CommentStream) << "literal pool for: " << ReferenceName; 307 } 308 } 309 310 /// translateImmediate - Appends an immediate operand to an MCInst. 311 /// 312 /// @param mcInst - The MCInst to append to. 313 /// @param immediate - The immediate value to append. 314 /// @param operand - The operand, as stored in the descriptor table. 315 /// @param insn - The internal instruction. 316 static void translateImmediate(MCInst &mcInst, uint64_t immediate, 317 const OperandSpecifier &operand, 318 InternalInstruction &insn, 319 const MCDisassembler *Dis) { 320 // Sign-extend the immediate if necessary. 321 322 OperandType type = (OperandType)operand.type; 323 324 bool isBranch = false; 325 uint64_t pcrel = 0; 326 if (type == TYPE_RELv) { 327 isBranch = true; 328 pcrel = insn.startLocation + 329 insn.immediateOffset + insn.immediateSize; 330 switch (insn.displacementSize) { 331 default: 332 break; 333 case 1: 334 type = TYPE_MOFFS8; 335 break; 336 case 2: 337 type = TYPE_MOFFS16; 338 break; 339 case 4: 340 type = TYPE_MOFFS32; 341 break; 342 case 8: 343 type = TYPE_MOFFS64; 344 break; 345 } 346 } 347 // By default sign-extend all X86 immediates based on their encoding. 348 else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 || 349 type == TYPE_IMM64) { 350 uint32_t Opcode = mcInst.getOpcode(); 351 switch (operand.encoding) { 352 default: 353 break; 354 case ENCODING_IB: 355 // Special case those X86 instructions that use the imm8 as a set of 356 // bits, bit count, etc. and are not sign-extend. 357 if (Opcode != X86::BLENDPSrri && Opcode != X86::BLENDPDrri && 358 Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri && 359 Opcode != X86::DPPSrri && Opcode != X86::DPPDrri && 360 Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri && 361 Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri && 362 Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri && 363 Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri && 364 Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri && 365 Opcode != X86::VINSERTPSrr) 366 type = TYPE_MOFFS8; 367 break; 368 case ENCODING_IW: 369 type = TYPE_MOFFS16; 370 break; 371 case ENCODING_ID: 372 type = TYPE_MOFFS32; 373 break; 374 case ENCODING_IO: 375 type = TYPE_MOFFS64; 376 break; 377 } 378 } 379 380 switch (type) { 381 case TYPE_XMM32: 382 case TYPE_XMM64: 383 case TYPE_XMM128: 384 mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4))); 385 return; 386 case TYPE_XMM256: 387 mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4))); 388 return; 389 case TYPE_REL8: 390 isBranch = true; 391 pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; 392 // fall through to sign extend the immediate if needed. 393 case TYPE_MOFFS8: 394 if(immediate & 0x80) 395 immediate |= ~(0xffull); 396 break; 397 case TYPE_MOFFS16: 398 if(immediate & 0x8000) 399 immediate |= ~(0xffffull); 400 break; 401 case TYPE_REL32: 402 case TYPE_REL64: 403 isBranch = true; 404 pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; 405 // fall through to sign extend the immediate if needed. 406 case TYPE_MOFFS32: 407 if(immediate & 0x80000000) 408 immediate |= ~(0xffffffffull); 409 break; 410 case TYPE_MOFFS64: 411 default: 412 // operand is 64 bits wide. Do nothing. 413 break; 414 } 415 416 if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation, 417 insn.immediateOffset, insn.immediateSize, 418 mcInst, Dis)) 419 mcInst.addOperand(MCOperand::CreateImm(immediate)); 420 } 421 422 /// translateRMRegister - Translates a register stored in the R/M field of the 423 /// ModR/M byte to its LLVM equivalent and appends it to an MCInst. 424 /// @param mcInst - The MCInst to append to. 425 /// @param insn - The internal instruction to extract the R/M field 426 /// from. 427 /// @return - 0 on success; -1 otherwise 428 static bool translateRMRegister(MCInst &mcInst, 429 InternalInstruction &insn) { 430 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 431 debug("A R/M register operand may not have a SIB byte"); 432 return true; 433 } 434 435 switch (insn.eaBase) { 436 default: 437 debug("Unexpected EA base register"); 438 return true; 439 case EA_BASE_NONE: 440 debug("EA_BASE_NONE for ModR/M base"); 441 return true; 442 #define ENTRY(x) case EA_BASE_##x: 443 ALL_EA_BASES 444 #undef ENTRY 445 debug("A R/M register operand may not have a base; " 446 "the operand must be a register."); 447 return true; 448 #define ENTRY(x) \ 449 case EA_REG_##x: \ 450 mcInst.addOperand(MCOperand::CreateReg(X86::x)); break; 451 ALL_REGS 452 #undef ENTRY 453 } 454 455 return false; 456 } 457 458 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M 459 /// fields of an internal instruction (and possibly its SIB byte) to a memory 460 /// operand in LLVM's format, and appends it to an MCInst. 461 /// 462 /// @param mcInst - The MCInst to append to. 463 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 464 /// from. 465 /// @return - 0 on success; nonzero otherwise 466 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, 467 const MCDisassembler *Dis) { 468 // Addresses in an MCInst are represented as five operands: 469 // 1. basereg (register) The R/M base, or (if there is a SIB) the 470 // SIB base 471 // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified 472 // scale amount 473 // 3. indexreg (register) x86_registerNONE, or (if there is a SIB) 474 // the index (which is multiplied by the 475 // scale amount) 476 // 4. displacement (immediate) 0, or the displacement if there is one 477 // 5. segmentreg (register) x86_registerNONE for now, but could be set 478 // if we have segment overrides 479 480 MCOperand baseReg; 481 MCOperand scaleAmount; 482 MCOperand indexReg; 483 MCOperand displacement; 484 MCOperand segmentReg; 485 uint64_t pcrel = 0; 486 487 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 488 if (insn.sibBase != SIB_BASE_NONE) { 489 switch (insn.sibBase) { 490 default: 491 debug("Unexpected sibBase"); 492 return true; 493 #define ENTRY(x) \ 494 case SIB_BASE_##x: \ 495 baseReg = MCOperand::CreateReg(X86::x); break; 496 ALL_SIB_BASES 497 #undef ENTRY 498 } 499 } else { 500 baseReg = MCOperand::CreateReg(0); 501 } 502 503 // Check whether we are handling VSIB addressing mode for GATHER. 504 // If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and 505 // we should use SIB_INDEX_XMM4|YMM4 for VSIB. 506 // I don't see a way to get the correct IndexReg in readSIB: 507 // We can tell whether it is VSIB or SIB after instruction ID is decoded, 508 // but instruction ID may not be decoded yet when calling readSIB. 509 uint32_t Opcode = mcInst.getOpcode(); 510 bool IndexIs128 = (Opcode == X86::VGATHERDPDrm || 511 Opcode == X86::VGATHERDPDYrm || 512 Opcode == X86::VGATHERQPDrm || 513 Opcode == X86::VGATHERDPSrm || 514 Opcode == X86::VGATHERQPSrm || 515 Opcode == X86::VPGATHERDQrm || 516 Opcode == X86::VPGATHERDQYrm || 517 Opcode == X86::VPGATHERQQrm || 518 Opcode == X86::VPGATHERDDrm || 519 Opcode == X86::VPGATHERQDrm); 520 bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm || 521 Opcode == X86::VGATHERDPSYrm || 522 Opcode == X86::VGATHERQPSYrm || 523 Opcode == X86::VPGATHERQQYrm || 524 Opcode == X86::VPGATHERDDYrm || 525 Opcode == X86::VPGATHERQDYrm); 526 if (IndexIs128 || IndexIs256) { 527 unsigned IndexOffset = insn.sibIndex - 528 (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX); 529 SIBIndex IndexBase = IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0; 530 insn.sibIndex = (SIBIndex)(IndexBase + 531 (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset)); 532 } 533 534 if (insn.sibIndex != SIB_INDEX_NONE) { 535 switch (insn.sibIndex) { 536 default: 537 debug("Unexpected sibIndex"); 538 return true; 539 #define ENTRY(x) \ 540 case SIB_INDEX_##x: \ 541 indexReg = MCOperand::CreateReg(X86::x); break; 542 EA_BASES_32BIT 543 EA_BASES_64BIT 544 REGS_XMM 545 REGS_YMM 546 #undef ENTRY 547 } 548 } else { 549 indexReg = MCOperand::CreateReg(0); 550 } 551 552 scaleAmount = MCOperand::CreateImm(insn.sibScale); 553 } else { 554 switch (insn.eaBase) { 555 case EA_BASE_NONE: 556 if (insn.eaDisplacement == EA_DISP_NONE) { 557 debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base"); 558 return true; 559 } 560 if (insn.mode == MODE_64BIT){ 561 pcrel = insn.startLocation + 562 insn.displacementOffset + insn.displacementSize; 563 tryAddingPcLoadReferenceComment(insn.startLocation + 564 insn.displacementOffset, 565 insn.displacement + pcrel, Dis); 566 baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6 567 } 568 else 569 baseReg = MCOperand::CreateReg(0); 570 571 indexReg = MCOperand::CreateReg(0); 572 break; 573 case EA_BASE_BX_SI: 574 baseReg = MCOperand::CreateReg(X86::BX); 575 indexReg = MCOperand::CreateReg(X86::SI); 576 break; 577 case EA_BASE_BX_DI: 578 baseReg = MCOperand::CreateReg(X86::BX); 579 indexReg = MCOperand::CreateReg(X86::DI); 580 break; 581 case EA_BASE_BP_SI: 582 baseReg = MCOperand::CreateReg(X86::BP); 583 indexReg = MCOperand::CreateReg(X86::SI); 584 break; 585 case EA_BASE_BP_DI: 586 baseReg = MCOperand::CreateReg(X86::BP); 587 indexReg = MCOperand::CreateReg(X86::DI); 588 break; 589 default: 590 indexReg = MCOperand::CreateReg(0); 591 switch (insn.eaBase) { 592 default: 593 debug("Unexpected eaBase"); 594 return true; 595 // Here, we will use the fill-ins defined above. However, 596 // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and 597 // sib and sib64 were handled in the top-level if, so they're only 598 // placeholders to keep the compiler happy. 599 #define ENTRY(x) \ 600 case EA_BASE_##x: \ 601 baseReg = MCOperand::CreateReg(X86::x); break; 602 ALL_EA_BASES 603 #undef ENTRY 604 #define ENTRY(x) case EA_REG_##x: 605 ALL_REGS 606 #undef ENTRY 607 debug("A R/M memory operand may not be a register; " 608 "the base field must be a base."); 609 return true; 610 } 611 } 612 613 scaleAmount = MCOperand::CreateImm(1); 614 } 615 616 displacement = MCOperand::CreateImm(insn.displacement); 617 618 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = { 619 0, // SEG_OVERRIDE_NONE 620 X86::CS, 621 X86::SS, 622 X86::DS, 623 X86::ES, 624 X86::FS, 625 X86::GS 626 }; 627 628 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); 629 630 mcInst.addOperand(baseReg); 631 mcInst.addOperand(scaleAmount); 632 mcInst.addOperand(indexReg); 633 if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false, 634 insn.startLocation, insn.displacementOffset, 635 insn.displacementSize, mcInst, Dis)) 636 mcInst.addOperand(displacement); 637 mcInst.addOperand(segmentReg); 638 return false; 639 } 640 641 /// translateRM - Translates an operand stored in the R/M (and possibly SIB) 642 /// byte of an instruction to LLVM form, and appends it to an MCInst. 643 /// 644 /// @param mcInst - The MCInst to append to. 645 /// @param operand - The operand, as stored in the descriptor table. 646 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 647 /// from. 648 /// @return - 0 on success; nonzero otherwise 649 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, 650 InternalInstruction &insn, const MCDisassembler *Dis) { 651 switch (operand.type) { 652 default: 653 debug("Unexpected type for a R/M operand"); 654 return true; 655 case TYPE_R8: 656 case TYPE_R16: 657 case TYPE_R32: 658 case TYPE_R64: 659 case TYPE_Rv: 660 case TYPE_MM: 661 case TYPE_MM32: 662 case TYPE_MM64: 663 case TYPE_XMM: 664 case TYPE_XMM32: 665 case TYPE_XMM64: 666 case TYPE_XMM128: 667 case TYPE_XMM256: 668 case TYPE_DEBUGREG: 669 case TYPE_CONTROLREG: 670 return translateRMRegister(mcInst, insn); 671 case TYPE_M: 672 case TYPE_M8: 673 case TYPE_M16: 674 case TYPE_M32: 675 case TYPE_M64: 676 case TYPE_M128: 677 case TYPE_M256: 678 case TYPE_M512: 679 case TYPE_Mv: 680 case TYPE_M32FP: 681 case TYPE_M64FP: 682 case TYPE_M80FP: 683 case TYPE_M16INT: 684 case TYPE_M32INT: 685 case TYPE_M64INT: 686 case TYPE_M1616: 687 case TYPE_M1632: 688 case TYPE_M1664: 689 case TYPE_LEA: 690 return translateRMMemory(mcInst, insn, Dis); 691 } 692 } 693 694 /// translateFPRegister - Translates a stack position on the FPU stack to its 695 /// LLVM form, and appends it to an MCInst. 696 /// 697 /// @param mcInst - The MCInst to append to. 698 /// @param stackPos - The stack position to translate. 699 /// @return - 0 on success; nonzero otherwise. 700 static bool translateFPRegister(MCInst &mcInst, 701 uint8_t stackPos) { 702 if (stackPos >= 8) { 703 debug("Invalid FP stack position"); 704 return true; 705 } 706 707 mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos)); 708 709 return false; 710 } 711 712 /// translateOperand - Translates an operand stored in an internal instruction 713 /// to LLVM's format and appends it to an MCInst. 714 /// 715 /// @param mcInst - The MCInst to append to. 716 /// @param operand - The operand, as stored in the descriptor table. 717 /// @param insn - The internal instruction. 718 /// @return - false on success; true otherwise. 719 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, 720 InternalInstruction &insn, 721 const MCDisassembler *Dis) { 722 switch (operand.encoding) { 723 default: 724 debug("Unhandled operand encoding during translation"); 725 return true; 726 case ENCODING_REG: 727 translateRegister(mcInst, insn.reg); 728 return false; 729 case ENCODING_RM: 730 return translateRM(mcInst, operand, insn, Dis); 731 case ENCODING_CB: 732 case ENCODING_CW: 733 case ENCODING_CD: 734 case ENCODING_CP: 735 case ENCODING_CO: 736 case ENCODING_CT: 737 debug("Translation of code offsets isn't supported."); 738 return true; 739 case ENCODING_IB: 740 case ENCODING_IW: 741 case ENCODING_ID: 742 case ENCODING_IO: 743 case ENCODING_Iv: 744 case ENCODING_Ia: 745 translateImmediate(mcInst, 746 insn.immediates[insn.numImmediatesTranslated++], 747 operand, 748 insn, 749 Dis); 750 return false; 751 case ENCODING_RB: 752 case ENCODING_RW: 753 case ENCODING_RD: 754 case ENCODING_RO: 755 translateRegister(mcInst, insn.opcodeRegister); 756 return false; 757 case ENCODING_I: 758 return translateFPRegister(mcInst, insn.opcodeModifier); 759 case ENCODING_Rv: 760 translateRegister(mcInst, insn.opcodeRegister); 761 return false; 762 case ENCODING_VVVV: 763 translateRegister(mcInst, insn.vvvv); 764 return false; 765 case ENCODING_DUP: 766 return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0], 767 insn, Dis); 768 } 769 } 770 771 /// translateInstruction - Translates an internal instruction and all its 772 /// operands to an MCInst. 773 /// 774 /// @param mcInst - The MCInst to populate with the instruction's data. 775 /// @param insn - The internal instruction. 776 /// @return - false on success; true otherwise. 777 static bool translateInstruction(MCInst &mcInst, 778 InternalInstruction &insn, 779 const MCDisassembler *Dis) { 780 if (!insn.spec) { 781 debug("Instruction has no specification"); 782 return true; 783 } 784 785 mcInst.setOpcode(insn.instructionID); 786 787 int index; 788 789 insn.numImmediatesTranslated = 0; 790 791 for (index = 0; index < X86_MAX_OPERANDS; ++index) { 792 if (insn.operands[index].encoding != ENCODING_NONE) { 793 if (translateOperand(mcInst, insn.operands[index], insn, Dis)) { 794 return true; 795 } 796 } 797 } 798 799 return false; 800 } 801 802 static MCDisassembler *createX86_32Disassembler(const Target &T, 803 const MCSubtargetInfo &STI) { 804 return new X86Disassembler::X86GenericDisassembler(STI, MODE_32BIT, 805 T.createMCInstrInfo()); 806 } 807 808 static MCDisassembler *createX86_64Disassembler(const Target &T, 809 const MCSubtargetInfo &STI) { 810 return new X86Disassembler::X86GenericDisassembler(STI, MODE_64BIT, 811 T.createMCInstrInfo()); 812 } 813 814 extern "C" void LLVMInitializeX86Disassembler() { 815 // Register the disassembler. 816 TargetRegistry::RegisterMCDisassembler(TheX86_32Target, 817 createX86_32Disassembler); 818 TargetRegistry::RegisterMCDisassembler(TheX86_64Target, 819 createX86_64Disassembler); 820 } 821