1 //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is part of the X86 Disassembler. 11 // It contains code to translate the data produced by the decoder into 12 // MCInsts. 13 // Documentation for the disassembler can be found in X86Disassembler.h. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "X86Disassembler.h" 18 #include "X86DisassemblerDecoder.h" 19 #include "llvm/MC/MCContext.h" 20 #include "llvm/MC/MCDisassembler.h" 21 #include "llvm/MC/MCExpr.h" 22 #include "llvm/MC/MCInst.h" 23 #include "llvm/MC/MCInstrInfo.h" 24 #include "llvm/MC/MCSubtargetInfo.h" 25 #include "llvm/Support/Debug.h" 26 #include "llvm/Support/TargetRegistry.h" 27 #include "llvm/Support/raw_ostream.h" 28 29 using namespace llvm; 30 using namespace llvm::X86Disassembler; 31 32 #define DEBUG_TYPE "x86-disassembler" 33 34 #define GET_REGINFO_ENUM 35 #include "X86GenRegisterInfo.inc" 36 #define GET_INSTRINFO_ENUM 37 #include "X86GenInstrInfo.inc" 38 #define GET_SUBTARGETINFO_ENUM 39 #include "X86GenSubtargetInfo.inc" 40 41 void llvm::X86Disassembler::Debug(const char *file, unsigned line, 42 const char *s) { 43 dbgs() << file << ":" << line << ": " << s; 44 } 45 46 const char *llvm::X86Disassembler::GetInstrName(unsigned Opcode, 47 const void *mii) { 48 const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii); 49 return MII->getName(Opcode); 50 } 51 52 #define debug(s) DEBUG(Debug(__FILE__, __LINE__, s)); 53 54 namespace llvm { 55 56 // Fill-ins to make the compiler happy. These constants are never actually 57 // assigned; they are just filler to make an automatically-generated switch 58 // statement work. 59 namespace X86 { 60 enum { 61 BX_SI = 500, 62 BX_DI = 501, 63 BP_SI = 502, 64 BP_DI = 503, 65 sib = 504, 66 sib64 = 505 67 }; 68 } 69 70 extern Target TheX86_32Target, TheX86_64Target; 71 72 } 73 74 static bool translateInstruction(MCInst &target, 75 InternalInstruction &source, 76 const MCDisassembler *Dis); 77 78 X86GenericDisassembler::X86GenericDisassembler( 79 const MCSubtargetInfo &STI, 80 MCContext &Ctx, 81 std::unique_ptr<const MCInstrInfo> MII) 82 : MCDisassembler(STI, Ctx), MII(std::move(MII)) { 83 switch (STI.getFeatureBits() & 84 (X86::Mode16Bit | X86::Mode32Bit | X86::Mode64Bit)) { 85 case X86::Mode16Bit: 86 fMode = MODE_16BIT; 87 break; 88 case X86::Mode32Bit: 89 fMode = MODE_32BIT; 90 break; 91 case X86::Mode64Bit: 92 fMode = MODE_64BIT; 93 break; 94 default: 95 llvm_unreachable("Invalid CPU mode"); 96 } 97 } 98 99 struct Region { 100 ArrayRef<uint8_t> Bytes; 101 uint64_t Base; 102 Region(ArrayRef<uint8_t> Bytes, uint64_t Base) : Bytes(Bytes), Base(Base) {} 103 }; 104 105 /// A callback function that wraps the readByte method from Region. 106 /// 107 /// @param Arg - The generic callback parameter. In this case, this should 108 /// be a pointer to a Region. 109 /// @param Byte - A pointer to the byte to be read. 110 /// @param Address - The address to be read. 111 static int regionReader(const void *Arg, uint8_t *Byte, uint64_t Address) { 112 auto *R = static_cast<const Region *>(Arg); 113 ArrayRef<uint8_t> Bytes = R->Bytes; 114 unsigned Index = Address - R->Base; 115 if (Bytes.size() <= Index) 116 return -1; 117 *Byte = Bytes[Index]; 118 return 0; 119 } 120 121 /// logger - a callback function that wraps the operator<< method from 122 /// raw_ostream. 123 /// 124 /// @param arg - The generic callback parameter. This should be a pointe 125 /// to a raw_ostream. 126 /// @param log - A string to be logged. logger() adds a newline. 127 static void logger(void* arg, const char* log) { 128 if (!arg) 129 return; 130 131 raw_ostream &vStream = *(static_cast<raw_ostream*>(arg)); 132 vStream << log << "\n"; 133 } 134 135 // 136 // Public interface for the disassembler 137 // 138 139 MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction( 140 MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, 141 raw_ostream &VStream, raw_ostream &CStream) const { 142 CommentStream = &CStream; 143 144 InternalInstruction InternalInstr; 145 146 dlog_t LoggerFn = logger; 147 if (&VStream == &nulls()) 148 LoggerFn = nullptr; // Disable logging completely if it's going to nulls(). 149 150 Region R(Bytes, Address); 151 152 int Ret = decodeInstruction(&InternalInstr, regionReader, (const void *)&R, 153 LoggerFn, (void *)&VStream, 154 (const void *)MII.get(), Address, fMode); 155 156 if (Ret) { 157 Size = InternalInstr.readerCursor - Address; 158 return Fail; 159 } else { 160 Size = InternalInstr.length; 161 return (!translateInstruction(Instr, InternalInstr, this)) ? Success : Fail; 162 } 163 } 164 165 // 166 // Private code that translates from struct InternalInstructions to MCInsts. 167 // 168 169 /// translateRegister - Translates an internal register to the appropriate LLVM 170 /// register, and appends it as an operand to an MCInst. 171 /// 172 /// @param mcInst - The MCInst to append to. 173 /// @param reg - The Reg to append. 174 static void translateRegister(MCInst &mcInst, Reg reg) { 175 #define ENTRY(x) X86::x, 176 uint8_t llvmRegnums[] = { 177 ALL_REGS 178 0 179 }; 180 #undef ENTRY 181 182 uint8_t llvmRegnum = llvmRegnums[reg]; 183 mcInst.addOperand(MCOperand::CreateReg(llvmRegnum)); 184 } 185 186 /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the 187 /// immediate Value in the MCInst. 188 /// 189 /// @param Value - The immediate Value, has had any PC adjustment made by 190 /// the caller. 191 /// @param isBranch - If the instruction is a branch instruction 192 /// @param Address - The starting address of the instruction 193 /// @param Offset - The byte offset to this immediate in the instruction 194 /// @param Width - The byte width of this immediate in the instruction 195 /// 196 /// If the getOpInfo() function was set when setupForSymbolicDisassembly() was 197 /// called then that function is called to get any symbolic information for the 198 /// immediate in the instruction using the Address, Offset and Width. If that 199 /// returns non-zero then the symbolic information it returns is used to create 200 /// an MCExpr and that is added as an operand to the MCInst. If getOpInfo() 201 /// returns zero and isBranch is true then a symbol look up for immediate Value 202 /// is done and if a symbol is found an MCExpr is created with that, else 203 /// an MCExpr with the immediate Value is created. This function returns true 204 /// if it adds an operand to the MCInst and false otherwise. 205 static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, 206 uint64_t Address, uint64_t Offset, 207 uint64_t Width, MCInst &MI, 208 const MCDisassembler *Dis) { 209 return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch, 210 Offset, Width); 211 } 212 213 /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being 214 /// referenced by a load instruction with the base register that is the rip. 215 /// These can often be addresses in a literal pool. The Address of the 216 /// instruction and its immediate Value are used to determine the address 217 /// being referenced in the literal pool entry. The SymbolLookUp call back will 218 /// return a pointer to a literal 'C' string if the referenced address is an 219 /// address into a section with 'C' string literals. 220 static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value, 221 const void *Decoder) { 222 const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); 223 Dis->tryAddingPcLoadReferenceComment(Value, Address); 224 } 225 226 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = { 227 0, // SEG_OVERRIDE_NONE 228 X86::CS, 229 X86::SS, 230 X86::DS, 231 X86::ES, 232 X86::FS, 233 X86::GS 234 }; 235 236 /// translateSrcIndex - Appends a source index operand to an MCInst. 237 /// 238 /// @param mcInst - The MCInst to append to. 239 /// @param insn - The internal instruction. 240 static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) { 241 unsigned baseRegNo; 242 243 if (insn.mode == MODE_64BIT) 244 baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::RSI; 245 else if (insn.mode == MODE_32BIT) 246 baseRegNo = insn.prefixPresent[0x67] ? X86::SI : X86::ESI; 247 else { 248 assert(insn.mode == MODE_16BIT); 249 baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::SI; 250 } 251 MCOperand baseReg = MCOperand::CreateReg(baseRegNo); 252 mcInst.addOperand(baseReg); 253 254 MCOperand segmentReg; 255 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); 256 mcInst.addOperand(segmentReg); 257 return false; 258 } 259 260 /// translateDstIndex - Appends a destination index operand to an MCInst. 261 /// 262 /// @param mcInst - The MCInst to append to. 263 /// @param insn - The internal instruction. 264 265 static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) { 266 unsigned baseRegNo; 267 268 if (insn.mode == MODE_64BIT) 269 baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::RDI; 270 else if (insn.mode == MODE_32BIT) 271 baseRegNo = insn.prefixPresent[0x67] ? X86::DI : X86::EDI; 272 else { 273 assert(insn.mode == MODE_16BIT); 274 baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::DI; 275 } 276 MCOperand baseReg = MCOperand::CreateReg(baseRegNo); 277 mcInst.addOperand(baseReg); 278 return false; 279 } 280 281 /// translateImmediate - Appends an immediate operand to an MCInst. 282 /// 283 /// @param mcInst - The MCInst to append to. 284 /// @param immediate - The immediate value to append. 285 /// @param operand - The operand, as stored in the descriptor table. 286 /// @param insn - The internal instruction. 287 static void translateImmediate(MCInst &mcInst, uint64_t immediate, 288 const OperandSpecifier &operand, 289 InternalInstruction &insn, 290 const MCDisassembler *Dis) { 291 // Sign-extend the immediate if necessary. 292 293 OperandType type = (OperandType)operand.type; 294 295 bool isBranch = false; 296 uint64_t pcrel = 0; 297 if (type == TYPE_RELv) { 298 isBranch = true; 299 pcrel = insn.startLocation + 300 insn.immediateOffset + insn.immediateSize; 301 switch (insn.displacementSize) { 302 default: 303 break; 304 case 1: 305 if(immediate & 0x80) 306 immediate |= ~(0xffull); 307 break; 308 case 2: 309 if(immediate & 0x8000) 310 immediate |= ~(0xffffull); 311 break; 312 case 4: 313 if(immediate & 0x80000000) 314 immediate |= ~(0xffffffffull); 315 break; 316 case 8: 317 break; 318 } 319 } 320 // By default sign-extend all X86 immediates based on their encoding. 321 else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 || 322 type == TYPE_IMM64 || type == TYPE_IMMv) { 323 switch (operand.encoding) { 324 default: 325 break; 326 case ENCODING_IB: 327 if(immediate & 0x80) 328 immediate |= ~(0xffull); 329 break; 330 case ENCODING_IW: 331 if(immediate & 0x8000) 332 immediate |= ~(0xffffull); 333 break; 334 case ENCODING_ID: 335 if(immediate & 0x80000000) 336 immediate |= ~(0xffffffffull); 337 break; 338 case ENCODING_IO: 339 break; 340 } 341 } else if (type == TYPE_IMM3) { 342 // Check for immediates that printSSECC can't handle. 343 if (immediate >= 8) { 344 unsigned NewOpc; 345 switch (mcInst.getOpcode()) { 346 default: llvm_unreachable("unexpected opcode"); 347 case X86::CMPPDrmi: NewOpc = X86::CMPPDrmi_alt; break; 348 case X86::CMPPDrri: NewOpc = X86::CMPPDrri_alt; break; 349 case X86::CMPPSrmi: NewOpc = X86::CMPPSrmi_alt; break; 350 case X86::CMPPSrri: NewOpc = X86::CMPPSrri_alt; break; 351 case X86::CMPSDrm: NewOpc = X86::CMPSDrm_alt; break; 352 case X86::CMPSDrr: NewOpc = X86::CMPSDrr_alt; break; 353 case X86::CMPSSrm: NewOpc = X86::CMPSSrm_alt; break; 354 case X86::CMPSSrr: NewOpc = X86::CMPSSrr_alt; break; 355 } 356 // Switch opcode to the one that doesn't get special printing. 357 mcInst.setOpcode(NewOpc); 358 } 359 } else if (type == TYPE_IMM5) { 360 // Check for immediates that printAVXCC can't handle. 361 if (immediate >= 32) { 362 unsigned NewOpc; 363 switch (mcInst.getOpcode()) { 364 default: llvm_unreachable("unexpected opcode"); 365 case X86::VCMPPDrmi: NewOpc = X86::VCMPPDrmi_alt; break; 366 case X86::VCMPPDrri: NewOpc = X86::VCMPPDrri_alt; break; 367 case X86::VCMPPSrmi: NewOpc = X86::VCMPPSrmi_alt; break; 368 case X86::VCMPPSrri: NewOpc = X86::VCMPPSrri_alt; break; 369 case X86::VCMPSDrm: NewOpc = X86::VCMPSDrm_alt; break; 370 case X86::VCMPSDrr: NewOpc = X86::VCMPSDrr_alt; break; 371 case X86::VCMPSSrm: NewOpc = X86::VCMPSSrm_alt; break; 372 case X86::VCMPSSrr: NewOpc = X86::VCMPSSrr_alt; break; 373 case X86::VCMPPDYrmi: NewOpc = X86::VCMPPDYrmi_alt; break; 374 case X86::VCMPPDYrri: NewOpc = X86::VCMPPDYrri_alt; break; 375 case X86::VCMPPSYrmi: NewOpc = X86::VCMPPSYrmi_alt; break; 376 case X86::VCMPPSYrri: NewOpc = X86::VCMPPSYrri_alt; break; 377 case X86::VCMPPDZrmi: NewOpc = X86::VCMPPDZrmi_alt; break; 378 case X86::VCMPPDZrri: NewOpc = X86::VCMPPDZrri_alt; break; 379 case X86::VCMPPSZrmi: NewOpc = X86::VCMPPSZrmi_alt; break; 380 case X86::VCMPPSZrri: NewOpc = X86::VCMPPSZrri_alt; break; 381 case X86::VCMPSDZrm: NewOpc = X86::VCMPSDZrmi_alt; break; 382 case X86::VCMPSDZrr: NewOpc = X86::VCMPSDZrri_alt; break; 383 case X86::VCMPSSZrm: NewOpc = X86::VCMPSSZrmi_alt; break; 384 case X86::VCMPSSZrr: NewOpc = X86::VCMPSSZrri_alt; break; 385 } 386 // Switch opcode to the one that doesn't get special printing. 387 mcInst.setOpcode(NewOpc); 388 } 389 } 390 391 switch (type) { 392 case TYPE_XMM32: 393 case TYPE_XMM64: 394 case TYPE_XMM128: 395 mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4))); 396 return; 397 case TYPE_XMM256: 398 mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4))); 399 return; 400 case TYPE_XMM512: 401 mcInst.addOperand(MCOperand::CreateReg(X86::ZMM0 + (immediate >> 4))); 402 return; 403 case TYPE_REL8: 404 isBranch = true; 405 pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; 406 if(immediate & 0x80) 407 immediate |= ~(0xffull); 408 break; 409 case TYPE_REL32: 410 case TYPE_REL64: 411 isBranch = true; 412 pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; 413 if(immediate & 0x80000000) 414 immediate |= ~(0xffffffffull); 415 break; 416 default: 417 // operand is 64 bits wide. Do nothing. 418 break; 419 } 420 421 if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation, 422 insn.immediateOffset, insn.immediateSize, 423 mcInst, Dis)) 424 mcInst.addOperand(MCOperand::CreateImm(immediate)); 425 426 if (type == TYPE_MOFFS8 || type == TYPE_MOFFS16 || 427 type == TYPE_MOFFS32 || type == TYPE_MOFFS64) { 428 MCOperand segmentReg; 429 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); 430 mcInst.addOperand(segmentReg); 431 } 432 } 433 434 /// translateRMRegister - Translates a register stored in the R/M field of the 435 /// ModR/M byte to its LLVM equivalent and appends it to an MCInst. 436 /// @param mcInst - The MCInst to append to. 437 /// @param insn - The internal instruction to extract the R/M field 438 /// from. 439 /// @return - 0 on success; -1 otherwise 440 static bool translateRMRegister(MCInst &mcInst, 441 InternalInstruction &insn) { 442 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 443 debug("A R/M register operand may not have a SIB byte"); 444 return true; 445 } 446 447 switch (insn.eaBase) { 448 default: 449 debug("Unexpected EA base register"); 450 return true; 451 case EA_BASE_NONE: 452 debug("EA_BASE_NONE for ModR/M base"); 453 return true; 454 #define ENTRY(x) case EA_BASE_##x: 455 ALL_EA_BASES 456 #undef ENTRY 457 debug("A R/M register operand may not have a base; " 458 "the operand must be a register."); 459 return true; 460 #define ENTRY(x) \ 461 case EA_REG_##x: \ 462 mcInst.addOperand(MCOperand::CreateReg(X86::x)); break; 463 ALL_REGS 464 #undef ENTRY 465 } 466 467 return false; 468 } 469 470 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M 471 /// fields of an internal instruction (and possibly its SIB byte) to a memory 472 /// operand in LLVM's format, and appends it to an MCInst. 473 /// 474 /// @param mcInst - The MCInst to append to. 475 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 476 /// from. 477 /// @return - 0 on success; nonzero otherwise 478 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, 479 const MCDisassembler *Dis) { 480 // Addresses in an MCInst are represented as five operands: 481 // 1. basereg (register) The R/M base, or (if there is a SIB) the 482 // SIB base 483 // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified 484 // scale amount 485 // 3. indexreg (register) x86_registerNONE, or (if there is a SIB) 486 // the index (which is multiplied by the 487 // scale amount) 488 // 4. displacement (immediate) 0, or the displacement if there is one 489 // 5. segmentreg (register) x86_registerNONE for now, but could be set 490 // if we have segment overrides 491 492 MCOperand baseReg; 493 MCOperand scaleAmount; 494 MCOperand indexReg; 495 MCOperand displacement; 496 MCOperand segmentReg; 497 uint64_t pcrel = 0; 498 499 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 500 if (insn.sibBase != SIB_BASE_NONE) { 501 switch (insn.sibBase) { 502 default: 503 debug("Unexpected sibBase"); 504 return true; 505 #define ENTRY(x) \ 506 case SIB_BASE_##x: \ 507 baseReg = MCOperand::CreateReg(X86::x); break; 508 ALL_SIB_BASES 509 #undef ENTRY 510 } 511 } else { 512 baseReg = MCOperand::CreateReg(0); 513 } 514 515 // Check whether we are handling VSIB addressing mode for GATHER. 516 // If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and 517 // we should use SIB_INDEX_XMM4|YMM4 for VSIB. 518 // I don't see a way to get the correct IndexReg in readSIB: 519 // We can tell whether it is VSIB or SIB after instruction ID is decoded, 520 // but instruction ID may not be decoded yet when calling readSIB. 521 uint32_t Opcode = mcInst.getOpcode(); 522 bool IndexIs128 = (Opcode == X86::VGATHERDPDrm || 523 Opcode == X86::VGATHERDPDYrm || 524 Opcode == X86::VGATHERQPDrm || 525 Opcode == X86::VGATHERDPSrm || 526 Opcode == X86::VGATHERQPSrm || 527 Opcode == X86::VPGATHERDQrm || 528 Opcode == X86::VPGATHERDQYrm || 529 Opcode == X86::VPGATHERQQrm || 530 Opcode == X86::VPGATHERDDrm || 531 Opcode == X86::VPGATHERQDrm); 532 bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm || 533 Opcode == X86::VGATHERDPSYrm || 534 Opcode == X86::VGATHERQPSYrm || 535 Opcode == X86::VGATHERDPDZrm || 536 Opcode == X86::VPGATHERDQZrm || 537 Opcode == X86::VPGATHERQQYrm || 538 Opcode == X86::VPGATHERDDYrm || 539 Opcode == X86::VPGATHERQDYrm); 540 bool IndexIs512 = (Opcode == X86::VGATHERQPDZrm || 541 Opcode == X86::VGATHERDPSZrm || 542 Opcode == X86::VGATHERQPSZrm || 543 Opcode == X86::VPGATHERQQZrm || 544 Opcode == X86::VPGATHERDDZrm || 545 Opcode == X86::VPGATHERQDZrm); 546 if (IndexIs128 || IndexIs256 || IndexIs512) { 547 unsigned IndexOffset = insn.sibIndex - 548 (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX); 549 SIBIndex IndexBase = IndexIs512 ? SIB_INDEX_ZMM0 : 550 IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0; 551 insn.sibIndex = (SIBIndex)(IndexBase + 552 (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset)); 553 } 554 555 if (insn.sibIndex != SIB_INDEX_NONE) { 556 switch (insn.sibIndex) { 557 default: 558 debug("Unexpected sibIndex"); 559 return true; 560 #define ENTRY(x) \ 561 case SIB_INDEX_##x: \ 562 indexReg = MCOperand::CreateReg(X86::x); break; 563 EA_BASES_32BIT 564 EA_BASES_64BIT 565 REGS_XMM 566 REGS_YMM 567 REGS_ZMM 568 #undef ENTRY 569 } 570 } else { 571 indexReg = MCOperand::CreateReg(0); 572 } 573 574 scaleAmount = MCOperand::CreateImm(insn.sibScale); 575 } else { 576 switch (insn.eaBase) { 577 case EA_BASE_NONE: 578 if (insn.eaDisplacement == EA_DISP_NONE) { 579 debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base"); 580 return true; 581 } 582 if (insn.mode == MODE_64BIT){ 583 pcrel = insn.startLocation + 584 insn.displacementOffset + insn.displacementSize; 585 tryAddingPcLoadReferenceComment(insn.startLocation + 586 insn.displacementOffset, 587 insn.displacement + pcrel, Dis); 588 baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6 589 } 590 else 591 baseReg = MCOperand::CreateReg(0); 592 593 indexReg = MCOperand::CreateReg(0); 594 break; 595 case EA_BASE_BX_SI: 596 baseReg = MCOperand::CreateReg(X86::BX); 597 indexReg = MCOperand::CreateReg(X86::SI); 598 break; 599 case EA_BASE_BX_DI: 600 baseReg = MCOperand::CreateReg(X86::BX); 601 indexReg = MCOperand::CreateReg(X86::DI); 602 break; 603 case EA_BASE_BP_SI: 604 baseReg = MCOperand::CreateReg(X86::BP); 605 indexReg = MCOperand::CreateReg(X86::SI); 606 break; 607 case EA_BASE_BP_DI: 608 baseReg = MCOperand::CreateReg(X86::BP); 609 indexReg = MCOperand::CreateReg(X86::DI); 610 break; 611 default: 612 indexReg = MCOperand::CreateReg(0); 613 switch (insn.eaBase) { 614 default: 615 debug("Unexpected eaBase"); 616 return true; 617 // Here, we will use the fill-ins defined above. However, 618 // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and 619 // sib and sib64 were handled in the top-level if, so they're only 620 // placeholders to keep the compiler happy. 621 #define ENTRY(x) \ 622 case EA_BASE_##x: \ 623 baseReg = MCOperand::CreateReg(X86::x); break; 624 ALL_EA_BASES 625 #undef ENTRY 626 #define ENTRY(x) case EA_REG_##x: 627 ALL_REGS 628 #undef ENTRY 629 debug("A R/M memory operand may not be a register; " 630 "the base field must be a base."); 631 return true; 632 } 633 } 634 635 scaleAmount = MCOperand::CreateImm(1); 636 } 637 638 displacement = MCOperand::CreateImm(insn.displacement); 639 640 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); 641 642 mcInst.addOperand(baseReg); 643 mcInst.addOperand(scaleAmount); 644 mcInst.addOperand(indexReg); 645 if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false, 646 insn.startLocation, insn.displacementOffset, 647 insn.displacementSize, mcInst, Dis)) 648 mcInst.addOperand(displacement); 649 mcInst.addOperand(segmentReg); 650 return false; 651 } 652 653 /// translateRM - Translates an operand stored in the R/M (and possibly SIB) 654 /// byte of an instruction to LLVM form, and appends it to an MCInst. 655 /// 656 /// @param mcInst - The MCInst to append to. 657 /// @param operand - The operand, as stored in the descriptor table. 658 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 659 /// from. 660 /// @return - 0 on success; nonzero otherwise 661 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, 662 InternalInstruction &insn, const MCDisassembler *Dis) { 663 switch (operand.type) { 664 default: 665 debug("Unexpected type for a R/M operand"); 666 return true; 667 case TYPE_R8: 668 case TYPE_R16: 669 case TYPE_R32: 670 case TYPE_R64: 671 case TYPE_Rv: 672 case TYPE_MM64: 673 case TYPE_XMM: 674 case TYPE_XMM32: 675 case TYPE_XMM64: 676 case TYPE_XMM128: 677 case TYPE_XMM256: 678 case TYPE_XMM512: 679 case TYPE_VK1: 680 case TYPE_VK8: 681 case TYPE_VK16: 682 case TYPE_DEBUGREG: 683 case TYPE_CONTROLREG: 684 return translateRMRegister(mcInst, insn); 685 case TYPE_M: 686 case TYPE_M8: 687 case TYPE_M16: 688 case TYPE_M32: 689 case TYPE_M64: 690 case TYPE_M128: 691 case TYPE_M256: 692 case TYPE_M512: 693 case TYPE_Mv: 694 case TYPE_M32FP: 695 case TYPE_M64FP: 696 case TYPE_M80FP: 697 case TYPE_M1616: 698 case TYPE_M1632: 699 case TYPE_M1664: 700 case TYPE_LEA: 701 return translateRMMemory(mcInst, insn, Dis); 702 } 703 } 704 705 /// translateFPRegister - Translates a stack position on the FPU stack to its 706 /// LLVM form, and appends it to an MCInst. 707 /// 708 /// @param mcInst - The MCInst to append to. 709 /// @param stackPos - The stack position to translate. 710 static void translateFPRegister(MCInst &mcInst, 711 uint8_t stackPos) { 712 mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos)); 713 } 714 715 /// translateMaskRegister - Translates a 3-bit mask register number to 716 /// LLVM form, and appends it to an MCInst. 717 /// 718 /// @param mcInst - The MCInst to append to. 719 /// @param maskRegNum - Number of mask register from 0 to 7. 720 /// @return - false on success; true otherwise. 721 static bool translateMaskRegister(MCInst &mcInst, 722 uint8_t maskRegNum) { 723 if (maskRegNum >= 8) { 724 debug("Invalid mask register number"); 725 return true; 726 } 727 728 mcInst.addOperand(MCOperand::CreateReg(X86::K0 + maskRegNum)); 729 return false; 730 } 731 732 /// translateOperand - Translates an operand stored in an internal instruction 733 /// to LLVM's format and appends it to an MCInst. 734 /// 735 /// @param mcInst - The MCInst to append to. 736 /// @param operand - The operand, as stored in the descriptor table. 737 /// @param insn - The internal instruction. 738 /// @return - false on success; true otherwise. 739 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, 740 InternalInstruction &insn, 741 const MCDisassembler *Dis) { 742 switch (operand.encoding) { 743 default: 744 debug("Unhandled operand encoding during translation"); 745 return true; 746 case ENCODING_REG: 747 translateRegister(mcInst, insn.reg); 748 return false; 749 case ENCODING_WRITEMASK: 750 return translateMaskRegister(mcInst, insn.writemask); 751 CASE_ENCODING_RM: 752 return translateRM(mcInst, operand, insn, Dis); 753 case ENCODING_CB: 754 case ENCODING_CW: 755 case ENCODING_CD: 756 case ENCODING_CP: 757 case ENCODING_CO: 758 case ENCODING_CT: 759 debug("Translation of code offsets isn't supported."); 760 return true; 761 case ENCODING_IB: 762 case ENCODING_IW: 763 case ENCODING_ID: 764 case ENCODING_IO: 765 case ENCODING_Iv: 766 case ENCODING_Ia: 767 translateImmediate(mcInst, 768 insn.immediates[insn.numImmediatesTranslated++], 769 operand, 770 insn, 771 Dis); 772 return false; 773 case ENCODING_SI: 774 return translateSrcIndex(mcInst, insn); 775 case ENCODING_DI: 776 return translateDstIndex(mcInst, insn); 777 case ENCODING_RB: 778 case ENCODING_RW: 779 case ENCODING_RD: 780 case ENCODING_RO: 781 case ENCODING_Rv: 782 translateRegister(mcInst, insn.opcodeRegister); 783 return false; 784 case ENCODING_FP: 785 translateFPRegister(mcInst, insn.modRM & 7); 786 return false; 787 case ENCODING_VVVV: 788 translateRegister(mcInst, insn.vvvv); 789 return false; 790 case ENCODING_DUP: 791 return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0], 792 insn, Dis); 793 } 794 } 795 796 /// translateInstruction - Translates an internal instruction and all its 797 /// operands to an MCInst. 798 /// 799 /// @param mcInst - The MCInst to populate with the instruction's data. 800 /// @param insn - The internal instruction. 801 /// @return - false on success; true otherwise. 802 static bool translateInstruction(MCInst &mcInst, 803 InternalInstruction &insn, 804 const MCDisassembler *Dis) { 805 if (!insn.spec) { 806 debug("Instruction has no specification"); 807 return true; 808 } 809 810 mcInst.setOpcode(insn.instructionID); 811 // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3 812 // prefix bytes should be disassembled as xrelease and xacquire then set the 813 // opcode to those instead of the rep and repne opcodes. 814 if (insn.xAcquireRelease) { 815 if(mcInst.getOpcode() == X86::REP_PREFIX) 816 mcInst.setOpcode(X86::XRELEASE_PREFIX); 817 else if(mcInst.getOpcode() == X86::REPNE_PREFIX) 818 mcInst.setOpcode(X86::XACQUIRE_PREFIX); 819 } 820 821 insn.numImmediatesTranslated = 0; 822 823 for (const auto &Op : insn.operands) { 824 if (Op.encoding != ENCODING_NONE) { 825 if (translateOperand(mcInst, Op, insn, Dis)) { 826 return true; 827 } 828 } 829 } 830 831 return false; 832 } 833 834 static MCDisassembler *createX86Disassembler(const Target &T, 835 const MCSubtargetInfo &STI, 836 MCContext &Ctx) { 837 std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo()); 838 return new X86Disassembler::X86GenericDisassembler(STI, Ctx, std::move(MII)); 839 } 840 841 extern "C" void LLVMInitializeX86Disassembler() { 842 // Register the disassembler. 843 TargetRegistry::RegisterMCDisassembler(TheX86_32Target, 844 createX86Disassembler); 845 TargetRegistry::RegisterMCDisassembler(TheX86_64Target, 846 createX86Disassembler); 847 } 848