1 //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is part of the X86 Disassembler. 11 // It contains code to translate the data produced by the decoder into 12 // MCInsts. 13 // Documentation for the disassembler can be found in X86Disassembler.h. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "X86Disassembler.h" 18 #include "X86DisassemblerDecoder.h" 19 #include "llvm/MC/MCContext.h" 20 #include "llvm/MC/MCDisassembler.h" 21 #include "llvm/MC/MCExpr.h" 22 #include "llvm/MC/MCInst.h" 23 #include "llvm/MC/MCInstrInfo.h" 24 #include "llvm/MC/MCSubtargetInfo.h" 25 #include "llvm/Support/Debug.h" 26 #include "llvm/Support/MemoryObject.h" 27 #include "llvm/Support/TargetRegistry.h" 28 #include "llvm/Support/raw_ostream.h" 29 30 #define GET_REGINFO_ENUM 31 #include "X86GenRegisterInfo.inc" 32 #define GET_INSTRINFO_ENUM 33 #include "X86GenInstrInfo.inc" 34 #define GET_SUBTARGETINFO_ENUM 35 #include "X86GenSubtargetInfo.inc" 36 37 using namespace llvm; 38 using namespace llvm::X86Disassembler; 39 40 void llvm::X86Disassembler::Debug(const char *file, unsigned line, 41 const char *s) { 42 dbgs() << file << ":" << line << ": " << s; 43 } 44 45 const char *llvm::X86Disassembler::GetInstrName(unsigned Opcode, 46 const void *mii) { 47 const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii); 48 return MII->getName(Opcode); 49 } 50 51 #define debug(s) DEBUG(Debug(__FILE__, __LINE__, s)); 52 53 namespace llvm { 54 55 // Fill-ins to make the compiler happy. These constants are never actually 56 // assigned; they are just filler to make an automatically-generated switch 57 // statement work. 58 namespace X86 { 59 enum { 60 BX_SI = 500, 61 BX_DI = 501, 62 BP_SI = 502, 63 BP_DI = 503, 64 sib = 504, 65 sib64 = 505 66 }; 67 } 68 69 extern Target TheX86_32Target, TheX86_64Target; 70 71 } 72 73 static bool translateInstruction(MCInst &target, 74 InternalInstruction &source, 75 const MCDisassembler *Dis); 76 77 X86GenericDisassembler::X86GenericDisassembler( 78 const MCSubtargetInfo &STI, 79 MCContext &Ctx, 80 std::unique_ptr<const MCInstrInfo> MII) 81 : MCDisassembler(STI, Ctx), MII(std::move(MII)) { 82 switch (STI.getFeatureBits() & 83 (X86::Mode16Bit | X86::Mode32Bit | X86::Mode64Bit)) { 84 case X86::Mode16Bit: 85 fMode = MODE_16BIT; 86 break; 87 case X86::Mode32Bit: 88 fMode = MODE_32BIT; 89 break; 90 case X86::Mode64Bit: 91 fMode = MODE_64BIT; 92 break; 93 default: 94 llvm_unreachable("Invalid CPU mode"); 95 } 96 } 97 98 /// regionReader - a callback function that wraps the readByte method from 99 /// MemoryObject. 100 /// 101 /// @param arg - The generic callback parameter. In this case, this should 102 /// be a pointer to a MemoryObject. 103 /// @param byte - A pointer to the byte to be read. 104 /// @param address - The address to be read. 105 static int regionReader(const void* arg, uint8_t* byte, uint64_t address) { 106 const MemoryObject* region = static_cast<const MemoryObject*>(arg); 107 return region->readByte(address, byte); 108 } 109 110 /// logger - a callback function that wraps the operator<< method from 111 /// raw_ostream. 112 /// 113 /// @param arg - The generic callback parameter. This should be a pointe 114 /// to a raw_ostream. 115 /// @param log - A string to be logged. logger() adds a newline. 116 static void logger(void* arg, const char* log) { 117 if (!arg) 118 return; 119 120 raw_ostream &vStream = *(static_cast<raw_ostream*>(arg)); 121 vStream << log << "\n"; 122 } 123 124 // 125 // Public interface for the disassembler 126 // 127 128 MCDisassembler::DecodeStatus 129 X86GenericDisassembler::getInstruction(MCInst &instr, 130 uint64_t &size, 131 const MemoryObject ®ion, 132 uint64_t address, 133 raw_ostream &vStream, 134 raw_ostream &cStream) const { 135 CommentStream = &cStream; 136 137 InternalInstruction internalInstr; 138 139 dlog_t loggerFn = logger; 140 if (&vStream == &nulls()) 141 loggerFn = 0; // Disable logging completely if it's going to nulls(). 142 143 int ret = decodeInstruction(&internalInstr, 144 regionReader, 145 (const void*)®ion, 146 loggerFn, 147 (void*)&vStream, 148 (const void*)MII.get(), 149 address, 150 fMode); 151 152 if (ret) { 153 size = internalInstr.readerCursor - address; 154 return Fail; 155 } 156 else { 157 size = internalInstr.length; 158 return (!translateInstruction(instr, internalInstr, this)) ? 159 Success : Fail; 160 } 161 } 162 163 // 164 // Private code that translates from struct InternalInstructions to MCInsts. 165 // 166 167 /// translateRegister - Translates an internal register to the appropriate LLVM 168 /// register, and appends it as an operand to an MCInst. 169 /// 170 /// @param mcInst - The MCInst to append to. 171 /// @param reg - The Reg to append. 172 static void translateRegister(MCInst &mcInst, Reg reg) { 173 #define ENTRY(x) X86::x, 174 uint8_t llvmRegnums[] = { 175 ALL_REGS 176 0 177 }; 178 #undef ENTRY 179 180 uint8_t llvmRegnum = llvmRegnums[reg]; 181 mcInst.addOperand(MCOperand::CreateReg(llvmRegnum)); 182 } 183 184 /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the 185 /// immediate Value in the MCInst. 186 /// 187 /// @param Value - The immediate Value, has had any PC adjustment made by 188 /// the caller. 189 /// @param isBranch - If the instruction is a branch instruction 190 /// @param Address - The starting address of the instruction 191 /// @param Offset - The byte offset to this immediate in the instruction 192 /// @param Width - The byte width of this immediate in the instruction 193 /// 194 /// If the getOpInfo() function was set when setupForSymbolicDisassembly() was 195 /// called then that function is called to get any symbolic information for the 196 /// immediate in the instruction using the Address, Offset and Width. If that 197 /// returns non-zero then the symbolic information it returns is used to create 198 /// an MCExpr and that is added as an operand to the MCInst. If getOpInfo() 199 /// returns zero and isBranch is true then a symbol look up for immediate Value 200 /// is done and if a symbol is found an MCExpr is created with that, else 201 /// an MCExpr with the immediate Value is created. This function returns true 202 /// if it adds an operand to the MCInst and false otherwise. 203 static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, 204 uint64_t Address, uint64_t Offset, 205 uint64_t Width, MCInst &MI, 206 const MCDisassembler *Dis) { 207 return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch, 208 Offset, Width); 209 } 210 211 /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being 212 /// referenced by a load instruction with the base register that is the rip. 213 /// These can often be addresses in a literal pool. The Address of the 214 /// instruction and its immediate Value are used to determine the address 215 /// being referenced in the literal pool entry. The SymbolLookUp call back will 216 /// return a pointer to a literal 'C' string if the referenced address is an 217 /// address into a section with 'C' string literals. 218 static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value, 219 const void *Decoder) { 220 const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); 221 Dis->tryAddingPcLoadReferenceComment(Value, Address); 222 } 223 224 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = { 225 0, // SEG_OVERRIDE_NONE 226 X86::CS, 227 X86::SS, 228 X86::DS, 229 X86::ES, 230 X86::FS, 231 X86::GS 232 }; 233 234 /// translateSrcIndex - Appends a source index operand to an MCInst. 235 /// 236 /// @param mcInst - The MCInst to append to. 237 /// @param insn - The internal instruction. 238 static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) { 239 unsigned baseRegNo; 240 241 if (insn.mode == MODE_64BIT) 242 baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::RSI; 243 else if (insn.mode == MODE_32BIT) 244 baseRegNo = insn.prefixPresent[0x67] ? X86::SI : X86::ESI; 245 else { 246 assert(insn.mode == MODE_16BIT); 247 baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::SI; 248 } 249 MCOperand baseReg = MCOperand::CreateReg(baseRegNo); 250 mcInst.addOperand(baseReg); 251 252 MCOperand segmentReg; 253 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); 254 mcInst.addOperand(segmentReg); 255 return false; 256 } 257 258 /// translateDstIndex - Appends a destination index operand to an MCInst. 259 /// 260 /// @param mcInst - The MCInst to append to. 261 /// @param insn - The internal instruction. 262 263 static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) { 264 unsigned baseRegNo; 265 266 if (insn.mode == MODE_64BIT) 267 baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::RDI; 268 else if (insn.mode == MODE_32BIT) 269 baseRegNo = insn.prefixPresent[0x67] ? X86::DI : X86::EDI; 270 else { 271 assert(insn.mode == MODE_16BIT); 272 baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::DI; 273 } 274 MCOperand baseReg = MCOperand::CreateReg(baseRegNo); 275 mcInst.addOperand(baseReg); 276 return false; 277 } 278 279 /// translateImmediate - Appends an immediate operand to an MCInst. 280 /// 281 /// @param mcInst - The MCInst to append to. 282 /// @param immediate - The immediate value to append. 283 /// @param operand - The operand, as stored in the descriptor table. 284 /// @param insn - The internal instruction. 285 static void translateImmediate(MCInst &mcInst, uint64_t immediate, 286 const OperandSpecifier &operand, 287 InternalInstruction &insn, 288 const MCDisassembler *Dis) { 289 // Sign-extend the immediate if necessary. 290 291 OperandType type = (OperandType)operand.type; 292 293 bool isBranch = false; 294 uint64_t pcrel = 0; 295 if (type == TYPE_RELv) { 296 isBranch = true; 297 pcrel = insn.startLocation + 298 insn.immediateOffset + insn.immediateSize; 299 switch (insn.displacementSize) { 300 default: 301 break; 302 case 1: 303 if(immediate & 0x80) 304 immediate |= ~(0xffull); 305 break; 306 case 2: 307 if(immediate & 0x8000) 308 immediate |= ~(0xffffull); 309 break; 310 case 4: 311 if(immediate & 0x80000000) 312 immediate |= ~(0xffffffffull); 313 break; 314 case 8: 315 break; 316 } 317 } 318 // By default sign-extend all X86 immediates based on their encoding. 319 else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 || 320 type == TYPE_IMM64) { 321 uint32_t Opcode = mcInst.getOpcode(); 322 switch (operand.encoding) { 323 default: 324 break; 325 case ENCODING_IB: 326 // Special case those X86 instructions that use the imm8 as a set of 327 // bits, bit count, etc. and are not sign-extend. 328 if (Opcode != X86::BLENDPSrri && Opcode != X86::BLENDPDrri && 329 Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri && 330 Opcode != X86::DPPSrri && Opcode != X86::DPPDrri && 331 Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri && 332 Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri && 333 Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri && 334 Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri && 335 Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri && 336 Opcode != X86::VINSERTPSrr) 337 if(immediate & 0x80) 338 immediate |= ~(0xffull); 339 break; 340 case ENCODING_IW: 341 if(immediate & 0x8000) 342 immediate |= ~(0xffffull); 343 break; 344 case ENCODING_ID: 345 if(immediate & 0x80000000) 346 immediate |= ~(0xffffffffull); 347 break; 348 case ENCODING_IO: 349 break; 350 } 351 } 352 353 switch (type) { 354 case TYPE_XMM32: 355 case TYPE_XMM64: 356 case TYPE_XMM128: 357 mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4))); 358 return; 359 case TYPE_XMM256: 360 mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4))); 361 return; 362 case TYPE_XMM512: 363 mcInst.addOperand(MCOperand::CreateReg(X86::ZMM0 + (immediate >> 4))); 364 return; 365 case TYPE_REL8: 366 isBranch = true; 367 pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; 368 if(immediate & 0x80) 369 immediate |= ~(0xffull); 370 break; 371 case TYPE_REL32: 372 case TYPE_REL64: 373 isBranch = true; 374 pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; 375 if(immediate & 0x80000000) 376 immediate |= ~(0xffffffffull); 377 break; 378 default: 379 // operand is 64 bits wide. Do nothing. 380 break; 381 } 382 383 if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation, 384 insn.immediateOffset, insn.immediateSize, 385 mcInst, Dis)) 386 mcInst.addOperand(MCOperand::CreateImm(immediate)); 387 388 if (type == TYPE_MOFFS8 || type == TYPE_MOFFS16 || 389 type == TYPE_MOFFS32 || type == TYPE_MOFFS64) { 390 MCOperand segmentReg; 391 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); 392 mcInst.addOperand(segmentReg); 393 } 394 } 395 396 /// translateRMRegister - Translates a register stored in the R/M field of the 397 /// ModR/M byte to its LLVM equivalent and appends it to an MCInst. 398 /// @param mcInst - The MCInst to append to. 399 /// @param insn - The internal instruction to extract the R/M field 400 /// from. 401 /// @return - 0 on success; -1 otherwise 402 static bool translateRMRegister(MCInst &mcInst, 403 InternalInstruction &insn) { 404 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 405 debug("A R/M register operand may not have a SIB byte"); 406 return true; 407 } 408 409 switch (insn.eaBase) { 410 default: 411 debug("Unexpected EA base register"); 412 return true; 413 case EA_BASE_NONE: 414 debug("EA_BASE_NONE for ModR/M base"); 415 return true; 416 #define ENTRY(x) case EA_BASE_##x: 417 ALL_EA_BASES 418 #undef ENTRY 419 debug("A R/M register operand may not have a base; " 420 "the operand must be a register."); 421 return true; 422 #define ENTRY(x) \ 423 case EA_REG_##x: \ 424 mcInst.addOperand(MCOperand::CreateReg(X86::x)); break; 425 ALL_REGS 426 #undef ENTRY 427 } 428 429 return false; 430 } 431 432 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M 433 /// fields of an internal instruction (and possibly its SIB byte) to a memory 434 /// operand in LLVM's format, and appends it to an MCInst. 435 /// 436 /// @param mcInst - The MCInst to append to. 437 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 438 /// from. 439 /// @return - 0 on success; nonzero otherwise 440 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, 441 const MCDisassembler *Dis) { 442 // Addresses in an MCInst are represented as five operands: 443 // 1. basereg (register) The R/M base, or (if there is a SIB) the 444 // SIB base 445 // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified 446 // scale amount 447 // 3. indexreg (register) x86_registerNONE, or (if there is a SIB) 448 // the index (which is multiplied by the 449 // scale amount) 450 // 4. displacement (immediate) 0, or the displacement if there is one 451 // 5. segmentreg (register) x86_registerNONE for now, but could be set 452 // if we have segment overrides 453 454 MCOperand baseReg; 455 MCOperand scaleAmount; 456 MCOperand indexReg; 457 MCOperand displacement; 458 MCOperand segmentReg; 459 uint64_t pcrel = 0; 460 461 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 462 if (insn.sibBase != SIB_BASE_NONE) { 463 switch (insn.sibBase) { 464 default: 465 debug("Unexpected sibBase"); 466 return true; 467 #define ENTRY(x) \ 468 case SIB_BASE_##x: \ 469 baseReg = MCOperand::CreateReg(X86::x); break; 470 ALL_SIB_BASES 471 #undef ENTRY 472 } 473 } else { 474 baseReg = MCOperand::CreateReg(0); 475 } 476 477 // Check whether we are handling VSIB addressing mode for GATHER. 478 // If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and 479 // we should use SIB_INDEX_XMM4|YMM4 for VSIB. 480 // I don't see a way to get the correct IndexReg in readSIB: 481 // We can tell whether it is VSIB or SIB after instruction ID is decoded, 482 // but instruction ID may not be decoded yet when calling readSIB. 483 uint32_t Opcode = mcInst.getOpcode(); 484 bool IndexIs128 = (Opcode == X86::VGATHERDPDrm || 485 Opcode == X86::VGATHERDPDYrm || 486 Opcode == X86::VGATHERQPDrm || 487 Opcode == X86::VGATHERDPSrm || 488 Opcode == X86::VGATHERQPSrm || 489 Opcode == X86::VPGATHERDQrm || 490 Opcode == X86::VPGATHERDQYrm || 491 Opcode == X86::VPGATHERQQrm || 492 Opcode == X86::VPGATHERDDrm || 493 Opcode == X86::VPGATHERQDrm); 494 bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm || 495 Opcode == X86::VGATHERDPSYrm || 496 Opcode == X86::VGATHERQPSYrm || 497 Opcode == X86::VGATHERDPDZrm || 498 Opcode == X86::VPGATHERDQZrm || 499 Opcode == X86::VPGATHERQQYrm || 500 Opcode == X86::VPGATHERDDYrm || 501 Opcode == X86::VPGATHERQDYrm); 502 bool IndexIs512 = (Opcode == X86::VGATHERQPDZrm || 503 Opcode == X86::VGATHERDPSZrm || 504 Opcode == X86::VGATHERQPSZrm || 505 Opcode == X86::VPGATHERQQZrm || 506 Opcode == X86::VPGATHERDDZrm || 507 Opcode == X86::VPGATHERQDZrm); 508 if (IndexIs128 || IndexIs256 || IndexIs512) { 509 unsigned IndexOffset = insn.sibIndex - 510 (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX); 511 SIBIndex IndexBase = IndexIs512 ? SIB_INDEX_ZMM0 : 512 IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0; 513 insn.sibIndex = (SIBIndex)(IndexBase + 514 (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset)); 515 } 516 517 if (insn.sibIndex != SIB_INDEX_NONE) { 518 switch (insn.sibIndex) { 519 default: 520 debug("Unexpected sibIndex"); 521 return true; 522 #define ENTRY(x) \ 523 case SIB_INDEX_##x: \ 524 indexReg = MCOperand::CreateReg(X86::x); break; 525 EA_BASES_32BIT 526 EA_BASES_64BIT 527 REGS_XMM 528 REGS_YMM 529 REGS_ZMM 530 #undef ENTRY 531 } 532 } else { 533 indexReg = MCOperand::CreateReg(0); 534 } 535 536 scaleAmount = MCOperand::CreateImm(insn.sibScale); 537 } else { 538 switch (insn.eaBase) { 539 case EA_BASE_NONE: 540 if (insn.eaDisplacement == EA_DISP_NONE) { 541 debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base"); 542 return true; 543 } 544 if (insn.mode == MODE_64BIT){ 545 pcrel = insn.startLocation + 546 insn.displacementOffset + insn.displacementSize; 547 tryAddingPcLoadReferenceComment(insn.startLocation + 548 insn.displacementOffset, 549 insn.displacement + pcrel, Dis); 550 baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6 551 } 552 else 553 baseReg = MCOperand::CreateReg(0); 554 555 indexReg = MCOperand::CreateReg(0); 556 break; 557 case EA_BASE_BX_SI: 558 baseReg = MCOperand::CreateReg(X86::BX); 559 indexReg = MCOperand::CreateReg(X86::SI); 560 break; 561 case EA_BASE_BX_DI: 562 baseReg = MCOperand::CreateReg(X86::BX); 563 indexReg = MCOperand::CreateReg(X86::DI); 564 break; 565 case EA_BASE_BP_SI: 566 baseReg = MCOperand::CreateReg(X86::BP); 567 indexReg = MCOperand::CreateReg(X86::SI); 568 break; 569 case EA_BASE_BP_DI: 570 baseReg = MCOperand::CreateReg(X86::BP); 571 indexReg = MCOperand::CreateReg(X86::DI); 572 break; 573 default: 574 indexReg = MCOperand::CreateReg(0); 575 switch (insn.eaBase) { 576 default: 577 debug("Unexpected eaBase"); 578 return true; 579 // Here, we will use the fill-ins defined above. However, 580 // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and 581 // sib and sib64 were handled in the top-level if, so they're only 582 // placeholders to keep the compiler happy. 583 #define ENTRY(x) \ 584 case EA_BASE_##x: \ 585 baseReg = MCOperand::CreateReg(X86::x); break; 586 ALL_EA_BASES 587 #undef ENTRY 588 #define ENTRY(x) case EA_REG_##x: 589 ALL_REGS 590 #undef ENTRY 591 debug("A R/M memory operand may not be a register; " 592 "the base field must be a base."); 593 return true; 594 } 595 } 596 597 scaleAmount = MCOperand::CreateImm(1); 598 } 599 600 displacement = MCOperand::CreateImm(insn.displacement); 601 602 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); 603 604 mcInst.addOperand(baseReg); 605 mcInst.addOperand(scaleAmount); 606 mcInst.addOperand(indexReg); 607 if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false, 608 insn.startLocation, insn.displacementOffset, 609 insn.displacementSize, mcInst, Dis)) 610 mcInst.addOperand(displacement); 611 mcInst.addOperand(segmentReg); 612 return false; 613 } 614 615 /// translateRM - Translates an operand stored in the R/M (and possibly SIB) 616 /// byte of an instruction to LLVM form, and appends it to an MCInst. 617 /// 618 /// @param mcInst - The MCInst to append to. 619 /// @param operand - The operand, as stored in the descriptor table. 620 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 621 /// from. 622 /// @return - 0 on success; nonzero otherwise 623 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, 624 InternalInstruction &insn, const MCDisassembler *Dis) { 625 switch (operand.type) { 626 default: 627 debug("Unexpected type for a R/M operand"); 628 return true; 629 case TYPE_R8: 630 case TYPE_R16: 631 case TYPE_R32: 632 case TYPE_R64: 633 case TYPE_Rv: 634 case TYPE_MM: 635 case TYPE_MM32: 636 case TYPE_MM64: 637 case TYPE_XMM: 638 case TYPE_XMM32: 639 case TYPE_XMM64: 640 case TYPE_XMM128: 641 case TYPE_XMM256: 642 case TYPE_XMM512: 643 case TYPE_VK1: 644 case TYPE_VK8: 645 case TYPE_VK16: 646 case TYPE_DEBUGREG: 647 case TYPE_CONTROLREG: 648 return translateRMRegister(mcInst, insn); 649 case TYPE_M: 650 case TYPE_M8: 651 case TYPE_M16: 652 case TYPE_M32: 653 case TYPE_M64: 654 case TYPE_M128: 655 case TYPE_M256: 656 case TYPE_M512: 657 case TYPE_Mv: 658 case TYPE_M32FP: 659 case TYPE_M64FP: 660 case TYPE_M80FP: 661 case TYPE_M16INT: 662 case TYPE_M32INT: 663 case TYPE_M64INT: 664 case TYPE_M1616: 665 case TYPE_M1632: 666 case TYPE_M1664: 667 case TYPE_LEA: 668 return translateRMMemory(mcInst, insn, Dis); 669 } 670 } 671 672 /// translateFPRegister - Translates a stack position on the FPU stack to its 673 /// LLVM form, and appends it to an MCInst. 674 /// 675 /// @param mcInst - The MCInst to append to. 676 /// @param stackPos - The stack position to translate. 677 static void translateFPRegister(MCInst &mcInst, 678 uint8_t stackPos) { 679 mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos)); 680 } 681 682 /// translateMaskRegister - Translates a 3-bit mask register number to 683 /// LLVM form, and appends it to an MCInst. 684 /// 685 /// @param mcInst - The MCInst to append to. 686 /// @param maskRegNum - Number of mask register from 0 to 7. 687 /// @return - false on success; true otherwise. 688 static bool translateMaskRegister(MCInst &mcInst, 689 uint8_t maskRegNum) { 690 if (maskRegNum >= 8) { 691 debug("Invalid mask register number"); 692 return true; 693 } 694 695 mcInst.addOperand(MCOperand::CreateReg(X86::K0 + maskRegNum)); 696 return false; 697 } 698 699 /// translateOperand - Translates an operand stored in an internal instruction 700 /// to LLVM's format and appends it to an MCInst. 701 /// 702 /// @param mcInst - The MCInst to append to. 703 /// @param operand - The operand, as stored in the descriptor table. 704 /// @param insn - The internal instruction. 705 /// @return - false on success; true otherwise. 706 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, 707 InternalInstruction &insn, 708 const MCDisassembler *Dis) { 709 switch (operand.encoding) { 710 default: 711 debug("Unhandled operand encoding during translation"); 712 return true; 713 case ENCODING_REG: 714 translateRegister(mcInst, insn.reg); 715 return false; 716 case ENCODING_WRITEMASK: 717 return translateMaskRegister(mcInst, insn.writemask); 718 case ENCODING_RM: 719 return translateRM(mcInst, operand, insn, Dis); 720 case ENCODING_CB: 721 case ENCODING_CW: 722 case ENCODING_CD: 723 case ENCODING_CP: 724 case ENCODING_CO: 725 case ENCODING_CT: 726 debug("Translation of code offsets isn't supported."); 727 return true; 728 case ENCODING_IB: 729 case ENCODING_IW: 730 case ENCODING_ID: 731 case ENCODING_IO: 732 case ENCODING_Iv: 733 case ENCODING_Ia: 734 translateImmediate(mcInst, 735 insn.immediates[insn.numImmediatesTranslated++], 736 operand, 737 insn, 738 Dis); 739 return false; 740 case ENCODING_SI: 741 return translateSrcIndex(mcInst, insn); 742 case ENCODING_DI: 743 return translateDstIndex(mcInst, insn); 744 case ENCODING_RB: 745 case ENCODING_RW: 746 case ENCODING_RD: 747 case ENCODING_RO: 748 case ENCODING_Rv: 749 translateRegister(mcInst, insn.opcodeRegister); 750 return false; 751 case ENCODING_FP: 752 translateFPRegister(mcInst, insn.modRM & 7); 753 return false; 754 case ENCODING_VVVV: 755 translateRegister(mcInst, insn.vvvv); 756 return false; 757 case ENCODING_DUP: 758 return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0], 759 insn, Dis); 760 } 761 } 762 763 /// translateInstruction - Translates an internal instruction and all its 764 /// operands to an MCInst. 765 /// 766 /// @param mcInst - The MCInst to populate with the instruction's data. 767 /// @param insn - The internal instruction. 768 /// @return - false on success; true otherwise. 769 static bool translateInstruction(MCInst &mcInst, 770 InternalInstruction &insn, 771 const MCDisassembler *Dis) { 772 if (!insn.spec) { 773 debug("Instruction has no specification"); 774 return true; 775 } 776 777 mcInst.setOpcode(insn.instructionID); 778 // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3 779 // prefix bytes should be disassembled as xrelease and xacquire then set the 780 // opcode to those instead of the rep and repne opcodes. 781 if (insn.xAcquireRelease) { 782 if(mcInst.getOpcode() == X86::REP_PREFIX) 783 mcInst.setOpcode(X86::XRELEASE_PREFIX); 784 else if(mcInst.getOpcode() == X86::REPNE_PREFIX) 785 mcInst.setOpcode(X86::XACQUIRE_PREFIX); 786 } 787 788 int index; 789 790 insn.numImmediatesTranslated = 0; 791 792 for (index = 0; index < X86_MAX_OPERANDS; ++index) { 793 if (insn.operands[index].encoding != ENCODING_NONE) { 794 if (translateOperand(mcInst, insn.operands[index], insn, Dis)) { 795 return true; 796 } 797 } 798 } 799 800 return false; 801 } 802 803 static MCDisassembler *createX86Disassembler(const Target &T, 804 const MCSubtargetInfo &STI, 805 MCContext &Ctx) { 806 std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo()); 807 return new X86Disassembler::X86GenericDisassembler(STI, Ctx, std::move(MII)); 808 } 809 810 extern "C" void LLVMInitializeX86Disassembler() { 811 // Register the disassembler. 812 TargetRegistry::RegisterMCDisassembler(TheX86_32Target, 813 createX86Disassembler); 814 TargetRegistry::RegisterMCDisassembler(TheX86_64Target, 815 createX86Disassembler); 816 } 817