1 //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is part of the X86 Disassembler. 11 // It contains code to translate the data produced by the decoder into 12 // MCInsts. 13 // Documentation for the disassembler can be found in X86Disassembler.h. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "X86Disassembler.h" 18 #include "X86DisassemblerDecoder.h" 19 #include "llvm/MC/MCContext.h" 20 #include "llvm/MC/MCDisassembler.h" 21 #include "llvm/MC/MCExpr.h" 22 #include "llvm/MC/MCInst.h" 23 #include "llvm/MC/MCInstrInfo.h" 24 #include "llvm/MC/MCSubtargetInfo.h" 25 #include "llvm/Support/Debug.h" 26 #include "llvm/Support/TargetRegistry.h" 27 #include "llvm/Support/raw_ostream.h" 28 29 using namespace llvm; 30 using namespace llvm::X86Disassembler; 31 32 #define DEBUG_TYPE "x86-disassembler" 33 34 #define GET_REGINFO_ENUM 35 #include "X86GenRegisterInfo.inc" 36 #define GET_INSTRINFO_ENUM 37 #include "X86GenInstrInfo.inc" 38 #define GET_SUBTARGETINFO_ENUM 39 #include "X86GenSubtargetInfo.inc" 40 41 void llvm::X86Disassembler::Debug(const char *file, unsigned line, 42 const char *s) { 43 dbgs() << file << ":" << line << ": " << s; 44 } 45 46 const char *llvm::X86Disassembler::GetInstrName(unsigned Opcode, 47 const void *mii) { 48 const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii); 49 return MII->getName(Opcode); 50 } 51 52 #define debug(s) DEBUG(Debug(__FILE__, __LINE__, s)); 53 54 namespace llvm { 55 56 // Fill-ins to make the compiler happy. These constants are never actually 57 // assigned; they are just filler to make an automatically-generated switch 58 // statement work. 59 namespace X86 { 60 enum { 61 BX_SI = 500, 62 BX_DI = 501, 63 BP_SI = 502, 64 BP_DI = 503, 65 sib = 504, 66 sib64 = 505 67 }; 68 } 69 70 extern Target TheX86_32Target, TheX86_64Target; 71 72 } 73 74 static bool translateInstruction(MCInst &target, 75 InternalInstruction &source, 76 const MCDisassembler *Dis); 77 78 X86GenericDisassembler::X86GenericDisassembler( 79 const MCSubtargetInfo &STI, 80 MCContext &Ctx, 81 std::unique_ptr<const MCInstrInfo> MII) 82 : MCDisassembler(STI, Ctx), MII(std::move(MII)) { 83 switch (STI.getFeatureBits() & 84 (X86::Mode16Bit | X86::Mode32Bit | X86::Mode64Bit)) { 85 case X86::Mode16Bit: 86 fMode = MODE_16BIT; 87 break; 88 case X86::Mode32Bit: 89 fMode = MODE_32BIT; 90 break; 91 case X86::Mode64Bit: 92 fMode = MODE_64BIT; 93 break; 94 default: 95 llvm_unreachable("Invalid CPU mode"); 96 } 97 } 98 99 struct Region { 100 ArrayRef<uint8_t> Bytes; 101 uint64_t Base; 102 Region(ArrayRef<uint8_t> Bytes, uint64_t Base) : Bytes(Bytes), Base(Base) {} 103 }; 104 105 /// A callback function that wraps the readByte method from Region. 106 /// 107 /// @param Arg - The generic callback parameter. In this case, this should 108 /// be a pointer to a Region. 109 /// @param Byte - A pointer to the byte to be read. 110 /// @param Address - The address to be read. 111 static int regionReader(const void *Arg, uint8_t *Byte, uint64_t Address) { 112 auto *R = static_cast<const Region *>(Arg); 113 ArrayRef<uint8_t> Bytes = R->Bytes; 114 unsigned Index = Address - R->Base; 115 if (Bytes.size() <= Index) 116 return -1; 117 *Byte = Bytes[Index]; 118 return 0; 119 } 120 121 /// logger - a callback function that wraps the operator<< method from 122 /// raw_ostream. 123 /// 124 /// @param arg - The generic callback parameter. This should be a pointe 125 /// to a raw_ostream. 126 /// @param log - A string to be logged. logger() adds a newline. 127 static void logger(void* arg, const char* log) { 128 if (!arg) 129 return; 130 131 raw_ostream &vStream = *(static_cast<raw_ostream*>(arg)); 132 vStream << log << "\n"; 133 } 134 135 // 136 // Public interface for the disassembler 137 // 138 139 MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction( 140 MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, 141 raw_ostream &VStream, raw_ostream &CStream) const { 142 CommentStream = &CStream; 143 144 InternalInstruction InternalInstr; 145 146 dlog_t LoggerFn = logger; 147 if (&VStream == &nulls()) 148 LoggerFn = nullptr; // Disable logging completely if it's going to nulls(). 149 150 Region R(Bytes, Address); 151 152 int Ret = decodeInstruction(&InternalInstr, regionReader, (const void *)&R, 153 LoggerFn, (void *)&VStream, 154 (const void *)MII.get(), Address, fMode); 155 156 if (Ret) { 157 Size = InternalInstr.readerCursor - Address; 158 return Fail; 159 } else { 160 Size = InternalInstr.length; 161 return (!translateInstruction(Instr, InternalInstr, this)) ? Success : Fail; 162 } 163 } 164 165 // 166 // Private code that translates from struct InternalInstructions to MCInsts. 167 // 168 169 /// translateRegister - Translates an internal register to the appropriate LLVM 170 /// register, and appends it as an operand to an MCInst. 171 /// 172 /// @param mcInst - The MCInst to append to. 173 /// @param reg - The Reg to append. 174 static void translateRegister(MCInst &mcInst, Reg reg) { 175 #define ENTRY(x) X86::x, 176 uint8_t llvmRegnums[] = { 177 ALL_REGS 178 0 179 }; 180 #undef ENTRY 181 182 uint8_t llvmRegnum = llvmRegnums[reg]; 183 mcInst.addOperand(MCOperand::CreateReg(llvmRegnum)); 184 } 185 186 /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the 187 /// immediate Value in the MCInst. 188 /// 189 /// @param Value - The immediate Value, has had any PC adjustment made by 190 /// the caller. 191 /// @param isBranch - If the instruction is a branch instruction 192 /// @param Address - The starting address of the instruction 193 /// @param Offset - The byte offset to this immediate in the instruction 194 /// @param Width - The byte width of this immediate in the instruction 195 /// 196 /// If the getOpInfo() function was set when setupForSymbolicDisassembly() was 197 /// called then that function is called to get any symbolic information for the 198 /// immediate in the instruction using the Address, Offset and Width. If that 199 /// returns non-zero then the symbolic information it returns is used to create 200 /// an MCExpr and that is added as an operand to the MCInst. If getOpInfo() 201 /// returns zero and isBranch is true then a symbol look up for immediate Value 202 /// is done and if a symbol is found an MCExpr is created with that, else 203 /// an MCExpr with the immediate Value is created. This function returns true 204 /// if it adds an operand to the MCInst and false otherwise. 205 static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, 206 uint64_t Address, uint64_t Offset, 207 uint64_t Width, MCInst &MI, 208 const MCDisassembler *Dis) { 209 return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch, 210 Offset, Width); 211 } 212 213 /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being 214 /// referenced by a load instruction with the base register that is the rip. 215 /// These can often be addresses in a literal pool. The Address of the 216 /// instruction and its immediate Value are used to determine the address 217 /// being referenced in the literal pool entry. The SymbolLookUp call back will 218 /// return a pointer to a literal 'C' string if the referenced address is an 219 /// address into a section with 'C' string literals. 220 static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value, 221 const void *Decoder) { 222 const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); 223 Dis->tryAddingPcLoadReferenceComment(Value, Address); 224 } 225 226 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = { 227 0, // SEG_OVERRIDE_NONE 228 X86::CS, 229 X86::SS, 230 X86::DS, 231 X86::ES, 232 X86::FS, 233 X86::GS 234 }; 235 236 /// translateSrcIndex - Appends a source index operand to an MCInst. 237 /// 238 /// @param mcInst - The MCInst to append to. 239 /// @param insn - The internal instruction. 240 static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) { 241 unsigned baseRegNo; 242 243 if (insn.mode == MODE_64BIT) 244 baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::RSI; 245 else if (insn.mode == MODE_32BIT) 246 baseRegNo = insn.prefixPresent[0x67] ? X86::SI : X86::ESI; 247 else { 248 assert(insn.mode == MODE_16BIT); 249 baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::SI; 250 } 251 MCOperand baseReg = MCOperand::CreateReg(baseRegNo); 252 mcInst.addOperand(baseReg); 253 254 MCOperand segmentReg; 255 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); 256 mcInst.addOperand(segmentReg); 257 return false; 258 } 259 260 /// translateDstIndex - Appends a destination index operand to an MCInst. 261 /// 262 /// @param mcInst - The MCInst to append to. 263 /// @param insn - The internal instruction. 264 265 static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) { 266 unsigned baseRegNo; 267 268 if (insn.mode == MODE_64BIT) 269 baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::RDI; 270 else if (insn.mode == MODE_32BIT) 271 baseRegNo = insn.prefixPresent[0x67] ? X86::DI : X86::EDI; 272 else { 273 assert(insn.mode == MODE_16BIT); 274 baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::DI; 275 } 276 MCOperand baseReg = MCOperand::CreateReg(baseRegNo); 277 mcInst.addOperand(baseReg); 278 return false; 279 } 280 281 /// translateImmediate - Appends an immediate operand to an MCInst. 282 /// 283 /// @param mcInst - The MCInst to append to. 284 /// @param immediate - The immediate value to append. 285 /// @param operand - The operand, as stored in the descriptor table. 286 /// @param insn - The internal instruction. 287 static void translateImmediate(MCInst &mcInst, uint64_t immediate, 288 const OperandSpecifier &operand, 289 InternalInstruction &insn, 290 const MCDisassembler *Dis) { 291 // Sign-extend the immediate if necessary. 292 293 OperandType type = (OperandType)operand.type; 294 295 bool isBranch = false; 296 uint64_t pcrel = 0; 297 if (type == TYPE_RELv) { 298 isBranch = true; 299 pcrel = insn.startLocation + 300 insn.immediateOffset + insn.immediateSize; 301 switch (insn.displacementSize) { 302 default: 303 break; 304 case 1: 305 if(immediate & 0x80) 306 immediate |= ~(0xffull); 307 break; 308 case 2: 309 if(immediate & 0x8000) 310 immediate |= ~(0xffffull); 311 break; 312 case 4: 313 if(immediate & 0x80000000) 314 immediate |= ~(0xffffffffull); 315 break; 316 case 8: 317 break; 318 } 319 } 320 // By default sign-extend all X86 immediates based on their encoding. 321 else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 || 322 type == TYPE_IMM64 || type == TYPE_IMMv) { 323 uint32_t Opcode = mcInst.getOpcode(); 324 switch (operand.encoding) { 325 default: 326 break; 327 case ENCODING_IB: 328 // Special case those X86 instructions that use the imm8 as a set of 329 // bits, bit count, etc. and are not sign-extend. 330 if (Opcode != X86::BLENDPSrri && Opcode != X86::BLENDPDrri && 331 Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri && 332 Opcode != X86::DPPSrri && Opcode != X86::DPPDrri && 333 Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri && 334 Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri && 335 Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri && 336 Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri && 337 Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri && 338 Opcode != X86::VINSERTPSrr) 339 if(immediate & 0x80) 340 immediate |= ~(0xffull); 341 break; 342 case ENCODING_IW: 343 if(immediate & 0x8000) 344 immediate |= ~(0xffffull); 345 break; 346 case ENCODING_ID: 347 if(immediate & 0x80000000) 348 immediate |= ~(0xffffffffull); 349 break; 350 case ENCODING_IO: 351 break; 352 } 353 } else if (type == TYPE_IMM3) { 354 // Check for immediates that printSSECC can't handle. 355 if (immediate >= 8) { 356 unsigned NewOpc; 357 switch (mcInst.getOpcode()) { 358 default: llvm_unreachable("unexpected opcode"); 359 case X86::CMPPDrmi: NewOpc = X86::CMPPDrmi_alt; break; 360 case X86::CMPPDrri: NewOpc = X86::CMPPDrri_alt; break; 361 case X86::CMPPSrmi: NewOpc = X86::CMPPSrmi_alt; break; 362 case X86::CMPPSrri: NewOpc = X86::CMPPSrri_alt; break; 363 case X86::CMPSDrm: NewOpc = X86::CMPSDrm_alt; break; 364 case X86::CMPSDrr: NewOpc = X86::CMPSDrr_alt; break; 365 case X86::CMPSSrm: NewOpc = X86::CMPSSrm_alt; break; 366 case X86::CMPSSrr: NewOpc = X86::CMPSSrr_alt; break; 367 } 368 // Switch opcode to the one that doesn't get special printing. 369 mcInst.setOpcode(NewOpc); 370 } 371 } else if (type == TYPE_IMM5) { 372 // Check for immediates that printAVXCC can't handle. 373 if (immediate >= 32) { 374 unsigned NewOpc; 375 switch (mcInst.getOpcode()) { 376 default: llvm_unreachable("unexpected opcode"); 377 case X86::VCMPPDrmi: NewOpc = X86::VCMPPDrmi_alt; break; 378 case X86::VCMPPDrri: NewOpc = X86::VCMPPDrri_alt; break; 379 case X86::VCMPPSrmi: NewOpc = X86::VCMPPSrmi_alt; break; 380 case X86::VCMPPSrri: NewOpc = X86::VCMPPSrri_alt; break; 381 case X86::VCMPSDrm: NewOpc = X86::VCMPSDrm_alt; break; 382 case X86::VCMPSDrr: NewOpc = X86::VCMPSDrr_alt; break; 383 case X86::VCMPSSrm: NewOpc = X86::VCMPSSrm_alt; break; 384 case X86::VCMPSSrr: NewOpc = X86::VCMPSSrr_alt; break; 385 case X86::VCMPPDYrmi: NewOpc = X86::VCMPPDYrmi_alt; break; 386 case X86::VCMPPDYrri: NewOpc = X86::VCMPPDYrri_alt; break; 387 case X86::VCMPPSYrmi: NewOpc = X86::VCMPPSYrmi_alt; break; 388 case X86::VCMPPSYrri: NewOpc = X86::VCMPPSYrri_alt; break; 389 case X86::VCMPPDZrmi: NewOpc = X86::VCMPPDZrmi_alt; break; 390 case X86::VCMPPDZrri: NewOpc = X86::VCMPPDZrri_alt; break; 391 case X86::VCMPPSZrmi: NewOpc = X86::VCMPPSZrmi_alt; break; 392 case X86::VCMPPSZrri: NewOpc = X86::VCMPPSZrri_alt; break; 393 case X86::VCMPSDZrm: NewOpc = X86::VCMPSDZrmi_alt; break; 394 case X86::VCMPSDZrr: NewOpc = X86::VCMPSDZrri_alt; break; 395 case X86::VCMPSSZrm: NewOpc = X86::VCMPSSZrmi_alt; break; 396 case X86::VCMPSSZrr: NewOpc = X86::VCMPSSZrri_alt; break; 397 } 398 // Switch opcode to the one that doesn't get special printing. 399 mcInst.setOpcode(NewOpc); 400 } 401 } 402 403 switch (type) { 404 case TYPE_XMM32: 405 case TYPE_XMM64: 406 case TYPE_XMM128: 407 mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4))); 408 return; 409 case TYPE_XMM256: 410 mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4))); 411 return; 412 case TYPE_XMM512: 413 mcInst.addOperand(MCOperand::CreateReg(X86::ZMM0 + (immediate >> 4))); 414 return; 415 case TYPE_REL8: 416 isBranch = true; 417 pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; 418 if(immediate & 0x80) 419 immediate |= ~(0xffull); 420 break; 421 case TYPE_REL32: 422 case TYPE_REL64: 423 isBranch = true; 424 pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; 425 if(immediate & 0x80000000) 426 immediate |= ~(0xffffffffull); 427 break; 428 default: 429 // operand is 64 bits wide. Do nothing. 430 break; 431 } 432 433 if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation, 434 insn.immediateOffset, insn.immediateSize, 435 mcInst, Dis)) 436 mcInst.addOperand(MCOperand::CreateImm(immediate)); 437 438 if (type == TYPE_MOFFS8 || type == TYPE_MOFFS16 || 439 type == TYPE_MOFFS32 || type == TYPE_MOFFS64) { 440 MCOperand segmentReg; 441 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); 442 mcInst.addOperand(segmentReg); 443 } 444 } 445 446 /// translateRMRegister - Translates a register stored in the R/M field of the 447 /// ModR/M byte to its LLVM equivalent and appends it to an MCInst. 448 /// @param mcInst - The MCInst to append to. 449 /// @param insn - The internal instruction to extract the R/M field 450 /// from. 451 /// @return - 0 on success; -1 otherwise 452 static bool translateRMRegister(MCInst &mcInst, 453 InternalInstruction &insn) { 454 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 455 debug("A R/M register operand may not have a SIB byte"); 456 return true; 457 } 458 459 switch (insn.eaBase) { 460 default: 461 debug("Unexpected EA base register"); 462 return true; 463 case EA_BASE_NONE: 464 debug("EA_BASE_NONE for ModR/M base"); 465 return true; 466 #define ENTRY(x) case EA_BASE_##x: 467 ALL_EA_BASES 468 #undef ENTRY 469 debug("A R/M register operand may not have a base; " 470 "the operand must be a register."); 471 return true; 472 #define ENTRY(x) \ 473 case EA_REG_##x: \ 474 mcInst.addOperand(MCOperand::CreateReg(X86::x)); break; 475 ALL_REGS 476 #undef ENTRY 477 } 478 479 return false; 480 } 481 482 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M 483 /// fields of an internal instruction (and possibly its SIB byte) to a memory 484 /// operand in LLVM's format, and appends it to an MCInst. 485 /// 486 /// @param mcInst - The MCInst to append to. 487 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 488 /// from. 489 /// @return - 0 on success; nonzero otherwise 490 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, 491 const MCDisassembler *Dis) { 492 // Addresses in an MCInst are represented as five operands: 493 // 1. basereg (register) The R/M base, or (if there is a SIB) the 494 // SIB base 495 // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified 496 // scale amount 497 // 3. indexreg (register) x86_registerNONE, or (if there is a SIB) 498 // the index (which is multiplied by the 499 // scale amount) 500 // 4. displacement (immediate) 0, or the displacement if there is one 501 // 5. segmentreg (register) x86_registerNONE for now, but could be set 502 // if we have segment overrides 503 504 MCOperand baseReg; 505 MCOperand scaleAmount; 506 MCOperand indexReg; 507 MCOperand displacement; 508 MCOperand segmentReg; 509 uint64_t pcrel = 0; 510 511 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 512 if (insn.sibBase != SIB_BASE_NONE) { 513 switch (insn.sibBase) { 514 default: 515 debug("Unexpected sibBase"); 516 return true; 517 #define ENTRY(x) \ 518 case SIB_BASE_##x: \ 519 baseReg = MCOperand::CreateReg(X86::x); break; 520 ALL_SIB_BASES 521 #undef ENTRY 522 } 523 } else { 524 baseReg = MCOperand::CreateReg(0); 525 } 526 527 // Check whether we are handling VSIB addressing mode for GATHER. 528 // If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and 529 // we should use SIB_INDEX_XMM4|YMM4 for VSIB. 530 // I don't see a way to get the correct IndexReg in readSIB: 531 // We can tell whether it is VSIB or SIB after instruction ID is decoded, 532 // but instruction ID may not be decoded yet when calling readSIB. 533 uint32_t Opcode = mcInst.getOpcode(); 534 bool IndexIs128 = (Opcode == X86::VGATHERDPDrm || 535 Opcode == X86::VGATHERDPDYrm || 536 Opcode == X86::VGATHERQPDrm || 537 Opcode == X86::VGATHERDPSrm || 538 Opcode == X86::VGATHERQPSrm || 539 Opcode == X86::VPGATHERDQrm || 540 Opcode == X86::VPGATHERDQYrm || 541 Opcode == X86::VPGATHERQQrm || 542 Opcode == X86::VPGATHERDDrm || 543 Opcode == X86::VPGATHERQDrm); 544 bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm || 545 Opcode == X86::VGATHERDPSYrm || 546 Opcode == X86::VGATHERQPSYrm || 547 Opcode == X86::VGATHERDPDZrm || 548 Opcode == X86::VPGATHERDQZrm || 549 Opcode == X86::VPGATHERQQYrm || 550 Opcode == X86::VPGATHERDDYrm || 551 Opcode == X86::VPGATHERQDYrm); 552 bool IndexIs512 = (Opcode == X86::VGATHERQPDZrm || 553 Opcode == X86::VGATHERDPSZrm || 554 Opcode == X86::VGATHERQPSZrm || 555 Opcode == X86::VPGATHERQQZrm || 556 Opcode == X86::VPGATHERDDZrm || 557 Opcode == X86::VPGATHERQDZrm); 558 if (IndexIs128 || IndexIs256 || IndexIs512) { 559 unsigned IndexOffset = insn.sibIndex - 560 (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX); 561 SIBIndex IndexBase = IndexIs512 ? SIB_INDEX_ZMM0 : 562 IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0; 563 insn.sibIndex = (SIBIndex)(IndexBase + 564 (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset)); 565 } 566 567 if (insn.sibIndex != SIB_INDEX_NONE) { 568 switch (insn.sibIndex) { 569 default: 570 debug("Unexpected sibIndex"); 571 return true; 572 #define ENTRY(x) \ 573 case SIB_INDEX_##x: \ 574 indexReg = MCOperand::CreateReg(X86::x); break; 575 EA_BASES_32BIT 576 EA_BASES_64BIT 577 REGS_XMM 578 REGS_YMM 579 REGS_ZMM 580 #undef ENTRY 581 } 582 } else { 583 indexReg = MCOperand::CreateReg(0); 584 } 585 586 scaleAmount = MCOperand::CreateImm(insn.sibScale); 587 } else { 588 switch (insn.eaBase) { 589 case EA_BASE_NONE: 590 if (insn.eaDisplacement == EA_DISP_NONE) { 591 debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base"); 592 return true; 593 } 594 if (insn.mode == MODE_64BIT){ 595 pcrel = insn.startLocation + 596 insn.displacementOffset + insn.displacementSize; 597 tryAddingPcLoadReferenceComment(insn.startLocation + 598 insn.displacementOffset, 599 insn.displacement + pcrel, Dis); 600 baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6 601 } 602 else 603 baseReg = MCOperand::CreateReg(0); 604 605 indexReg = MCOperand::CreateReg(0); 606 break; 607 case EA_BASE_BX_SI: 608 baseReg = MCOperand::CreateReg(X86::BX); 609 indexReg = MCOperand::CreateReg(X86::SI); 610 break; 611 case EA_BASE_BX_DI: 612 baseReg = MCOperand::CreateReg(X86::BX); 613 indexReg = MCOperand::CreateReg(X86::DI); 614 break; 615 case EA_BASE_BP_SI: 616 baseReg = MCOperand::CreateReg(X86::BP); 617 indexReg = MCOperand::CreateReg(X86::SI); 618 break; 619 case EA_BASE_BP_DI: 620 baseReg = MCOperand::CreateReg(X86::BP); 621 indexReg = MCOperand::CreateReg(X86::DI); 622 break; 623 default: 624 indexReg = MCOperand::CreateReg(0); 625 switch (insn.eaBase) { 626 default: 627 debug("Unexpected eaBase"); 628 return true; 629 // Here, we will use the fill-ins defined above. However, 630 // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and 631 // sib and sib64 were handled in the top-level if, so they're only 632 // placeholders to keep the compiler happy. 633 #define ENTRY(x) \ 634 case EA_BASE_##x: \ 635 baseReg = MCOperand::CreateReg(X86::x); break; 636 ALL_EA_BASES 637 #undef ENTRY 638 #define ENTRY(x) case EA_REG_##x: 639 ALL_REGS 640 #undef ENTRY 641 debug("A R/M memory operand may not be a register; " 642 "the base field must be a base."); 643 return true; 644 } 645 } 646 647 scaleAmount = MCOperand::CreateImm(1); 648 } 649 650 displacement = MCOperand::CreateImm(insn.displacement); 651 652 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); 653 654 mcInst.addOperand(baseReg); 655 mcInst.addOperand(scaleAmount); 656 mcInst.addOperand(indexReg); 657 if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false, 658 insn.startLocation, insn.displacementOffset, 659 insn.displacementSize, mcInst, Dis)) 660 mcInst.addOperand(displacement); 661 mcInst.addOperand(segmentReg); 662 return false; 663 } 664 665 /// translateRM - Translates an operand stored in the R/M (and possibly SIB) 666 /// byte of an instruction to LLVM form, and appends it to an MCInst. 667 /// 668 /// @param mcInst - The MCInst to append to. 669 /// @param operand - The operand, as stored in the descriptor table. 670 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 671 /// from. 672 /// @return - 0 on success; nonzero otherwise 673 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, 674 InternalInstruction &insn, const MCDisassembler *Dis) { 675 switch (operand.type) { 676 default: 677 debug("Unexpected type for a R/M operand"); 678 return true; 679 case TYPE_R8: 680 case TYPE_R16: 681 case TYPE_R32: 682 case TYPE_R64: 683 case TYPE_Rv: 684 case TYPE_MM64: 685 case TYPE_XMM: 686 case TYPE_XMM32: 687 case TYPE_XMM64: 688 case TYPE_XMM128: 689 case TYPE_XMM256: 690 case TYPE_XMM512: 691 case TYPE_VK1: 692 case TYPE_VK8: 693 case TYPE_VK16: 694 case TYPE_DEBUGREG: 695 case TYPE_CONTROLREG: 696 return translateRMRegister(mcInst, insn); 697 case TYPE_M: 698 case TYPE_M8: 699 case TYPE_M16: 700 case TYPE_M32: 701 case TYPE_M64: 702 case TYPE_M128: 703 case TYPE_M256: 704 case TYPE_M512: 705 case TYPE_Mv: 706 case TYPE_M32FP: 707 case TYPE_M64FP: 708 case TYPE_M80FP: 709 case TYPE_M1616: 710 case TYPE_M1632: 711 case TYPE_M1664: 712 case TYPE_LEA: 713 return translateRMMemory(mcInst, insn, Dis); 714 } 715 } 716 717 /// translateFPRegister - Translates a stack position on the FPU stack to its 718 /// LLVM form, and appends it to an MCInst. 719 /// 720 /// @param mcInst - The MCInst to append to. 721 /// @param stackPos - The stack position to translate. 722 static void translateFPRegister(MCInst &mcInst, 723 uint8_t stackPos) { 724 mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos)); 725 } 726 727 /// translateMaskRegister - Translates a 3-bit mask register number to 728 /// LLVM form, and appends it to an MCInst. 729 /// 730 /// @param mcInst - The MCInst to append to. 731 /// @param maskRegNum - Number of mask register from 0 to 7. 732 /// @return - false on success; true otherwise. 733 static bool translateMaskRegister(MCInst &mcInst, 734 uint8_t maskRegNum) { 735 if (maskRegNum >= 8) { 736 debug("Invalid mask register number"); 737 return true; 738 } 739 740 mcInst.addOperand(MCOperand::CreateReg(X86::K0 + maskRegNum)); 741 return false; 742 } 743 744 /// translateOperand - Translates an operand stored in an internal instruction 745 /// to LLVM's format and appends it to an MCInst. 746 /// 747 /// @param mcInst - The MCInst to append to. 748 /// @param operand - The operand, as stored in the descriptor table. 749 /// @param insn - The internal instruction. 750 /// @return - false on success; true otherwise. 751 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, 752 InternalInstruction &insn, 753 const MCDisassembler *Dis) { 754 switch (operand.encoding) { 755 default: 756 debug("Unhandled operand encoding during translation"); 757 return true; 758 case ENCODING_REG: 759 translateRegister(mcInst, insn.reg); 760 return false; 761 case ENCODING_WRITEMASK: 762 return translateMaskRegister(mcInst, insn.writemask); 763 CASE_ENCODING_RM: 764 return translateRM(mcInst, operand, insn, Dis); 765 case ENCODING_CB: 766 case ENCODING_CW: 767 case ENCODING_CD: 768 case ENCODING_CP: 769 case ENCODING_CO: 770 case ENCODING_CT: 771 debug("Translation of code offsets isn't supported."); 772 return true; 773 case ENCODING_IB: 774 case ENCODING_IW: 775 case ENCODING_ID: 776 case ENCODING_IO: 777 case ENCODING_Iv: 778 case ENCODING_Ia: 779 translateImmediate(mcInst, 780 insn.immediates[insn.numImmediatesTranslated++], 781 operand, 782 insn, 783 Dis); 784 return false; 785 case ENCODING_SI: 786 return translateSrcIndex(mcInst, insn); 787 case ENCODING_DI: 788 return translateDstIndex(mcInst, insn); 789 case ENCODING_RB: 790 case ENCODING_RW: 791 case ENCODING_RD: 792 case ENCODING_RO: 793 case ENCODING_Rv: 794 translateRegister(mcInst, insn.opcodeRegister); 795 return false; 796 case ENCODING_FP: 797 translateFPRegister(mcInst, insn.modRM & 7); 798 return false; 799 case ENCODING_VVVV: 800 translateRegister(mcInst, insn.vvvv); 801 return false; 802 case ENCODING_DUP: 803 return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0], 804 insn, Dis); 805 } 806 } 807 808 /// translateInstruction - Translates an internal instruction and all its 809 /// operands to an MCInst. 810 /// 811 /// @param mcInst - The MCInst to populate with the instruction's data. 812 /// @param insn - The internal instruction. 813 /// @return - false on success; true otherwise. 814 static bool translateInstruction(MCInst &mcInst, 815 InternalInstruction &insn, 816 const MCDisassembler *Dis) { 817 if (!insn.spec) { 818 debug("Instruction has no specification"); 819 return true; 820 } 821 822 mcInst.setOpcode(insn.instructionID); 823 // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3 824 // prefix bytes should be disassembled as xrelease and xacquire then set the 825 // opcode to those instead of the rep and repne opcodes. 826 if (insn.xAcquireRelease) { 827 if(mcInst.getOpcode() == X86::REP_PREFIX) 828 mcInst.setOpcode(X86::XRELEASE_PREFIX); 829 else if(mcInst.getOpcode() == X86::REPNE_PREFIX) 830 mcInst.setOpcode(X86::XACQUIRE_PREFIX); 831 } 832 833 insn.numImmediatesTranslated = 0; 834 835 for (const auto &Op : insn.operands) { 836 if (Op.encoding != ENCODING_NONE) { 837 if (translateOperand(mcInst, Op, insn, Dis)) { 838 return true; 839 } 840 } 841 } 842 843 return false; 844 } 845 846 static MCDisassembler *createX86Disassembler(const Target &T, 847 const MCSubtargetInfo &STI, 848 MCContext &Ctx) { 849 std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo()); 850 return new X86Disassembler::X86GenericDisassembler(STI, Ctx, std::move(MII)); 851 } 852 853 extern "C" void LLVMInitializeX86Disassembler() { 854 // Register the disassembler. 855 TargetRegistry::RegisterMCDisassembler(TheX86_32Target, 856 createX86Disassembler); 857 TargetRegistry::RegisterMCDisassembler(TheX86_64Target, 858 createX86Disassembler); 859 } 860