1 //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is part of the X86 Disassembler. 11 // It contains code to translate the data produced by the decoder into 12 // MCInsts. 13 // Documentation for the disassembler can be found in X86Disassembler.h. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "X86Disassembler.h" 18 #include "X86DisassemblerDecoder.h" 19 #include "llvm/MC/MCContext.h" 20 #include "llvm/MC/MCDisassembler.h" 21 #include "llvm/MC/MCExpr.h" 22 #include "llvm/MC/MCInst.h" 23 #include "llvm/MC/MCInstrInfo.h" 24 #include "llvm/MC/MCSubtargetInfo.h" 25 #include "llvm/Support/Debug.h" 26 #include "llvm/Support/TargetRegistry.h" 27 #include "llvm/Support/raw_ostream.h" 28 29 using namespace llvm; 30 using namespace llvm::X86Disassembler; 31 32 #define DEBUG_TYPE "x86-disassembler" 33 34 #define GET_REGINFO_ENUM 35 #include "X86GenRegisterInfo.inc" 36 #define GET_INSTRINFO_ENUM 37 #include "X86GenInstrInfo.inc" 38 #define GET_SUBTARGETINFO_ENUM 39 #include "X86GenSubtargetInfo.inc" 40 41 void llvm::X86Disassembler::Debug(const char *file, unsigned line, 42 const char *s) { 43 dbgs() << file << ":" << line << ": " << s; 44 } 45 46 const char *llvm::X86Disassembler::GetInstrName(unsigned Opcode, 47 const void *mii) { 48 const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii); 49 return MII->getName(Opcode); 50 } 51 52 #define debug(s) DEBUG(Debug(__FILE__, __LINE__, s)); 53 54 namespace llvm { 55 56 // Fill-ins to make the compiler happy. These constants are never actually 57 // assigned; they are just filler to make an automatically-generated switch 58 // statement work. 59 namespace X86 { 60 enum { 61 BX_SI = 500, 62 BX_DI = 501, 63 BP_SI = 502, 64 BP_DI = 503, 65 sib = 504, 66 sib64 = 505 67 }; 68 } 69 70 extern Target TheX86_32Target, TheX86_64Target; 71 72 } 73 74 static bool translateInstruction(MCInst &target, 75 InternalInstruction &source, 76 const MCDisassembler *Dis); 77 78 X86GenericDisassembler::X86GenericDisassembler( 79 const MCSubtargetInfo &STI, 80 MCContext &Ctx, 81 std::unique_ptr<const MCInstrInfo> MII) 82 : MCDisassembler(STI, Ctx), MII(std::move(MII)) { 83 switch (STI.getFeatureBits() & 84 (X86::Mode16Bit | X86::Mode32Bit | X86::Mode64Bit)) { 85 case X86::Mode16Bit: 86 fMode = MODE_16BIT; 87 break; 88 case X86::Mode32Bit: 89 fMode = MODE_32BIT; 90 break; 91 case X86::Mode64Bit: 92 fMode = MODE_64BIT; 93 break; 94 default: 95 llvm_unreachable("Invalid CPU mode"); 96 } 97 } 98 99 struct Region { 100 ArrayRef<uint8_t> Bytes; 101 uint64_t Base; 102 Region(ArrayRef<uint8_t> Bytes, uint64_t Base) : Bytes(Bytes), Base(Base) {} 103 }; 104 105 /// A callback function that wraps the readByte method from Region. 106 /// 107 /// @param Arg - The generic callback parameter. In this case, this should 108 /// be a pointer to a Region. 109 /// @param Byte - A pointer to the byte to be read. 110 /// @param Address - The address to be read. 111 static int regionReader(const void *Arg, uint8_t *Byte, uint64_t Address) { 112 auto *R = static_cast<const Region *>(Arg); 113 ArrayRef<uint8_t> Bytes = R->Bytes; 114 unsigned Index = Address - R->Base; 115 if (Bytes.size() <= Index) 116 return -1; 117 *Byte = Bytes[Index]; 118 return 0; 119 } 120 121 /// logger - a callback function that wraps the operator<< method from 122 /// raw_ostream. 123 /// 124 /// @param arg - The generic callback parameter. This should be a pointe 125 /// to a raw_ostream. 126 /// @param log - A string to be logged. logger() adds a newline. 127 static void logger(void* arg, const char* log) { 128 if (!arg) 129 return; 130 131 raw_ostream &vStream = *(static_cast<raw_ostream*>(arg)); 132 vStream << log << "\n"; 133 } 134 135 // 136 // Public interface for the disassembler 137 // 138 139 MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction( 140 MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, 141 raw_ostream &VStream, raw_ostream &CStream) const { 142 CommentStream = &CStream; 143 144 InternalInstruction InternalInstr; 145 146 dlog_t LoggerFn = logger; 147 if (&VStream == &nulls()) 148 LoggerFn = nullptr; // Disable logging completely if it's going to nulls(). 149 150 Region R(Bytes, Address); 151 152 int Ret = decodeInstruction(&InternalInstr, regionReader, (const void *)&R, 153 LoggerFn, (void *)&VStream, 154 (const void *)MII.get(), Address, fMode); 155 156 if (Ret) { 157 Size = InternalInstr.readerCursor - Address; 158 return Fail; 159 } else { 160 Size = InternalInstr.length; 161 return (!translateInstruction(Instr, InternalInstr, this)) ? Success : Fail; 162 } 163 } 164 165 // 166 // Private code that translates from struct InternalInstructions to MCInsts. 167 // 168 169 /// translateRegister - Translates an internal register to the appropriate LLVM 170 /// register, and appends it as an operand to an MCInst. 171 /// 172 /// @param mcInst - The MCInst to append to. 173 /// @param reg - The Reg to append. 174 static void translateRegister(MCInst &mcInst, Reg reg) { 175 #define ENTRY(x) X86::x, 176 uint8_t llvmRegnums[] = { 177 ALL_REGS 178 0 179 }; 180 #undef ENTRY 181 182 uint8_t llvmRegnum = llvmRegnums[reg]; 183 mcInst.addOperand(MCOperand::CreateReg(llvmRegnum)); 184 } 185 186 /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the 187 /// immediate Value in the MCInst. 188 /// 189 /// @param Value - The immediate Value, has had any PC adjustment made by 190 /// the caller. 191 /// @param isBranch - If the instruction is a branch instruction 192 /// @param Address - The starting address of the instruction 193 /// @param Offset - The byte offset to this immediate in the instruction 194 /// @param Width - The byte width of this immediate in the instruction 195 /// 196 /// If the getOpInfo() function was set when setupForSymbolicDisassembly() was 197 /// called then that function is called to get any symbolic information for the 198 /// immediate in the instruction using the Address, Offset and Width. If that 199 /// returns non-zero then the symbolic information it returns is used to create 200 /// an MCExpr and that is added as an operand to the MCInst. If getOpInfo() 201 /// returns zero and isBranch is true then a symbol look up for immediate Value 202 /// is done and if a symbol is found an MCExpr is created with that, else 203 /// an MCExpr with the immediate Value is created. This function returns true 204 /// if it adds an operand to the MCInst and false otherwise. 205 static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, 206 uint64_t Address, uint64_t Offset, 207 uint64_t Width, MCInst &MI, 208 const MCDisassembler *Dis) { 209 return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch, 210 Offset, Width); 211 } 212 213 /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being 214 /// referenced by a load instruction with the base register that is the rip. 215 /// These can often be addresses in a literal pool. The Address of the 216 /// instruction and its immediate Value are used to determine the address 217 /// being referenced in the literal pool entry. The SymbolLookUp call back will 218 /// return a pointer to a literal 'C' string if the referenced address is an 219 /// address into a section with 'C' string literals. 220 static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value, 221 const void *Decoder) { 222 const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); 223 Dis->tryAddingPcLoadReferenceComment(Value, Address); 224 } 225 226 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = { 227 0, // SEG_OVERRIDE_NONE 228 X86::CS, 229 X86::SS, 230 X86::DS, 231 X86::ES, 232 X86::FS, 233 X86::GS 234 }; 235 236 /// translateSrcIndex - Appends a source index operand to an MCInst. 237 /// 238 /// @param mcInst - The MCInst to append to. 239 /// @param insn - The internal instruction. 240 static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) { 241 unsigned baseRegNo; 242 243 if (insn.mode == MODE_64BIT) 244 baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::RSI; 245 else if (insn.mode == MODE_32BIT) 246 baseRegNo = insn.prefixPresent[0x67] ? X86::SI : X86::ESI; 247 else { 248 assert(insn.mode == MODE_16BIT); 249 baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::SI; 250 } 251 MCOperand baseReg = MCOperand::CreateReg(baseRegNo); 252 mcInst.addOperand(baseReg); 253 254 MCOperand segmentReg; 255 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); 256 mcInst.addOperand(segmentReg); 257 return false; 258 } 259 260 /// translateDstIndex - Appends a destination index operand to an MCInst. 261 /// 262 /// @param mcInst - The MCInst to append to. 263 /// @param insn - The internal instruction. 264 265 static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) { 266 unsigned baseRegNo; 267 268 if (insn.mode == MODE_64BIT) 269 baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::RDI; 270 else if (insn.mode == MODE_32BIT) 271 baseRegNo = insn.prefixPresent[0x67] ? X86::DI : X86::EDI; 272 else { 273 assert(insn.mode == MODE_16BIT); 274 baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::DI; 275 } 276 MCOperand baseReg = MCOperand::CreateReg(baseRegNo); 277 mcInst.addOperand(baseReg); 278 return false; 279 } 280 281 /// translateImmediate - Appends an immediate operand to an MCInst. 282 /// 283 /// @param mcInst - The MCInst to append to. 284 /// @param immediate - The immediate value to append. 285 /// @param operand - The operand, as stored in the descriptor table. 286 /// @param insn - The internal instruction. 287 static void translateImmediate(MCInst &mcInst, uint64_t immediate, 288 const OperandSpecifier &operand, 289 InternalInstruction &insn, 290 const MCDisassembler *Dis) { 291 // Sign-extend the immediate if necessary. 292 293 OperandType type = (OperandType)operand.type; 294 295 bool isBranch = false; 296 uint64_t pcrel = 0; 297 if (type == TYPE_RELv) { 298 isBranch = true; 299 pcrel = insn.startLocation + 300 insn.immediateOffset + insn.immediateSize; 301 switch (insn.displacementSize) { 302 default: 303 break; 304 case 1: 305 if(immediate & 0x80) 306 immediate |= ~(0xffull); 307 break; 308 case 2: 309 if(immediate & 0x8000) 310 immediate |= ~(0xffffull); 311 break; 312 case 4: 313 if(immediate & 0x80000000) 314 immediate |= ~(0xffffffffull); 315 break; 316 case 8: 317 break; 318 } 319 } 320 // By default sign-extend all X86 immediates based on their encoding. 321 else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 || 322 type == TYPE_IMM64 || type == TYPE_IMMv) { 323 switch (operand.encoding) { 324 default: 325 break; 326 case ENCODING_IB: 327 if(immediate & 0x80) 328 immediate |= ~(0xffull); 329 break; 330 case ENCODING_IW: 331 if(immediate & 0x8000) 332 immediate |= ~(0xffffull); 333 break; 334 case ENCODING_ID: 335 if(immediate & 0x80000000) 336 immediate |= ~(0xffffffffull); 337 break; 338 case ENCODING_IO: 339 break; 340 } 341 } else if (type == TYPE_IMM3) { 342 // Check for immediates that printSSECC can't handle. 343 if (immediate >= 8) { 344 unsigned NewOpc; 345 switch (mcInst.getOpcode()) { 346 default: llvm_unreachable("unexpected opcode"); 347 case X86::CMPPDrmi: NewOpc = X86::CMPPDrmi_alt; break; 348 case X86::CMPPDrri: NewOpc = X86::CMPPDrri_alt; break; 349 case X86::CMPPSrmi: NewOpc = X86::CMPPSrmi_alt; break; 350 case X86::CMPPSrri: NewOpc = X86::CMPPSrri_alt; break; 351 case X86::CMPSDrm: NewOpc = X86::CMPSDrm_alt; break; 352 case X86::CMPSDrr: NewOpc = X86::CMPSDrr_alt; break; 353 case X86::CMPSSrm: NewOpc = X86::CMPSSrm_alt; break; 354 case X86::CMPSSrr: NewOpc = X86::CMPSSrr_alt; break; 355 } 356 // Switch opcode to the one that doesn't get special printing. 357 mcInst.setOpcode(NewOpc); 358 } 359 } else if (type == TYPE_IMM5) { 360 // Check for immediates that printAVXCC can't handle. 361 if (immediate >= 32) { 362 unsigned NewOpc; 363 switch (mcInst.getOpcode()) { 364 default: llvm_unreachable("unexpected opcode"); 365 case X86::VCMPPDrmi: NewOpc = X86::VCMPPDrmi_alt; break; 366 case X86::VCMPPDrri: NewOpc = X86::VCMPPDrri_alt; break; 367 case X86::VCMPPSrmi: NewOpc = X86::VCMPPSrmi_alt; break; 368 case X86::VCMPPSrri: NewOpc = X86::VCMPPSrri_alt; break; 369 case X86::VCMPSDrm: NewOpc = X86::VCMPSDrm_alt; break; 370 case X86::VCMPSDrr: NewOpc = X86::VCMPSDrr_alt; break; 371 case X86::VCMPSSrm: NewOpc = X86::VCMPSSrm_alt; break; 372 case X86::VCMPSSrr: NewOpc = X86::VCMPSSrr_alt; break; 373 case X86::VCMPPDYrmi: NewOpc = X86::VCMPPDYrmi_alt; break; 374 case X86::VCMPPDYrri: NewOpc = X86::VCMPPDYrri_alt; break; 375 case X86::VCMPPSYrmi: NewOpc = X86::VCMPPSYrmi_alt; break; 376 case X86::VCMPPSYrri: NewOpc = X86::VCMPPSYrri_alt; break; 377 case X86::VCMPPDZrmi: NewOpc = X86::VCMPPDZrmi_alt; break; 378 case X86::VCMPPDZrri: NewOpc = X86::VCMPPDZrri_alt; break; 379 case X86::VCMPPSZrmi: NewOpc = X86::VCMPPSZrmi_alt; break; 380 case X86::VCMPPSZrri: NewOpc = X86::VCMPPSZrri_alt; break; 381 case X86::VCMPSDZrm: NewOpc = X86::VCMPSDZrmi_alt; break; 382 case X86::VCMPSDZrr: NewOpc = X86::VCMPSDZrri_alt; break; 383 case X86::VCMPSSZrm: NewOpc = X86::VCMPSSZrmi_alt; break; 384 case X86::VCMPSSZrr: NewOpc = X86::VCMPSSZrri_alt; break; 385 } 386 // Switch opcode to the one that doesn't get special printing. 387 mcInst.setOpcode(NewOpc); 388 } 389 } else if (type == TYPE_AVX512ICC) { 390 if (immediate >= 8 || ((immediate & 0x3) == 3)) { 391 unsigned NewOpc; 392 switch (mcInst.getOpcode()) { 393 default: llvm_unreachable("unexpected opcode"); 394 case X86::VPCMPBZ128rmi: NewOpc = X86::VPCMPBZ128rmi_alt; break; 395 case X86::VPCMPBZ128rmik: NewOpc = X86::VPCMPBZ128rmik_alt; break; 396 case X86::VPCMPBZ128rri: NewOpc = X86::VPCMPBZ128rri_alt; break; 397 case X86::VPCMPBZ128rrik: NewOpc = X86::VPCMPBZ128rrik_alt; break; 398 case X86::VPCMPBZ256rmi: NewOpc = X86::VPCMPBZ256rmi_alt; break; 399 case X86::VPCMPBZ256rmik: NewOpc = X86::VPCMPBZ256rmik_alt; break; 400 case X86::VPCMPBZ256rri: NewOpc = X86::VPCMPBZ256rri_alt; break; 401 case X86::VPCMPBZ256rrik: NewOpc = X86::VPCMPBZ256rrik_alt; break; 402 case X86::VPCMPBZrmi: NewOpc = X86::VPCMPBZrmi_alt; break; 403 case X86::VPCMPBZrmik: NewOpc = X86::VPCMPBZrmik_alt; break; 404 case X86::VPCMPBZrri: NewOpc = X86::VPCMPBZrri_alt; break; 405 case X86::VPCMPBZrrik: NewOpc = X86::VPCMPBZrrik_alt; break; 406 case X86::VPCMPDZ128rmi: NewOpc = X86::VPCMPDZ128rmi_alt; break; 407 case X86::VPCMPDZ128rmib: NewOpc = X86::VPCMPDZ128rmib_alt; break; 408 case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPDZ128rmibk_alt; break; 409 case X86::VPCMPDZ128rmik: NewOpc = X86::VPCMPDZ128rmik_alt; break; 410 case X86::VPCMPDZ128rri: NewOpc = X86::VPCMPDZ128rri_alt; break; 411 case X86::VPCMPDZ128rrik: NewOpc = X86::VPCMPDZ128rrik_alt; break; 412 case X86::VPCMPDZ256rmi: NewOpc = X86::VPCMPDZ256rmi_alt; break; 413 case X86::VPCMPDZ256rmib: NewOpc = X86::VPCMPDZ256rmib_alt; break; 414 case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPDZ256rmibk_alt; break; 415 case X86::VPCMPDZ256rmik: NewOpc = X86::VPCMPDZ256rmik_alt; break; 416 case X86::VPCMPDZ256rri: NewOpc = X86::VPCMPDZ256rri_alt; break; 417 case X86::VPCMPDZ256rrik: NewOpc = X86::VPCMPDZ256rrik_alt; break; 418 case X86::VPCMPDZrmi: NewOpc = X86::VPCMPDZrmi_alt; break; 419 case X86::VPCMPDZrmib: NewOpc = X86::VPCMPDZrmib_alt; break; 420 case X86::VPCMPDZrmibk: NewOpc = X86::VPCMPDZrmibk_alt; break; 421 case X86::VPCMPDZrmik: NewOpc = X86::VPCMPDZrmik_alt; break; 422 case X86::VPCMPDZrri: NewOpc = X86::VPCMPDZrri_alt; break; 423 case X86::VPCMPDZrrik: NewOpc = X86::VPCMPDZrrik_alt; break; 424 case X86::VPCMPQZ128rmi: NewOpc = X86::VPCMPQZ128rmi_alt; break; 425 case X86::VPCMPQZ128rmib: NewOpc = X86::VPCMPQZ128rmib_alt; break; 426 case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPQZ128rmibk_alt; break; 427 case X86::VPCMPQZ128rmik: NewOpc = X86::VPCMPQZ128rmik_alt; break; 428 case X86::VPCMPQZ128rri: NewOpc = X86::VPCMPQZ128rri_alt; break; 429 case X86::VPCMPQZ128rrik: NewOpc = X86::VPCMPQZ128rrik_alt; break; 430 case X86::VPCMPQZ256rmi: NewOpc = X86::VPCMPQZ256rmi_alt; break; 431 case X86::VPCMPQZ256rmib: NewOpc = X86::VPCMPQZ256rmib_alt; break; 432 case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPQZ256rmibk_alt; break; 433 case X86::VPCMPQZ256rmik: NewOpc = X86::VPCMPQZ256rmik_alt; break; 434 case X86::VPCMPQZ256rri: NewOpc = X86::VPCMPQZ256rri_alt; break; 435 case X86::VPCMPQZ256rrik: NewOpc = X86::VPCMPQZ256rrik_alt; break; 436 case X86::VPCMPQZrmi: NewOpc = X86::VPCMPQZrmi_alt; break; 437 case X86::VPCMPQZrmib: NewOpc = X86::VPCMPQZrmib_alt; break; 438 case X86::VPCMPQZrmibk: NewOpc = X86::VPCMPQZrmibk_alt; break; 439 case X86::VPCMPQZrmik: NewOpc = X86::VPCMPQZrmik_alt; break; 440 case X86::VPCMPQZrri: NewOpc = X86::VPCMPQZrri_alt; break; 441 case X86::VPCMPQZrrik: NewOpc = X86::VPCMPQZrrik_alt; break; 442 case X86::VPCMPUBZ128rmi: NewOpc = X86::VPCMPUBZ128rmi_alt; break; 443 case X86::VPCMPUBZ128rmik: NewOpc = X86::VPCMPUBZ128rmik_alt; break; 444 case X86::VPCMPUBZ128rri: NewOpc = X86::VPCMPUBZ128rri_alt; break; 445 case X86::VPCMPUBZ128rrik: NewOpc = X86::VPCMPUBZ128rrik_alt; break; 446 case X86::VPCMPUBZ256rmi: NewOpc = X86::VPCMPUBZ256rmi_alt; break; 447 case X86::VPCMPUBZ256rmik: NewOpc = X86::VPCMPUBZ256rmik_alt; break; 448 case X86::VPCMPUBZ256rri: NewOpc = X86::VPCMPUBZ256rri_alt; break; 449 case X86::VPCMPUBZ256rrik: NewOpc = X86::VPCMPUBZ256rrik_alt; break; 450 case X86::VPCMPUBZrmi: NewOpc = X86::VPCMPUBZrmi_alt; break; 451 case X86::VPCMPUBZrmik: NewOpc = X86::VPCMPUBZrmik_alt; break; 452 case X86::VPCMPUBZrri: NewOpc = X86::VPCMPUBZrri_alt; break; 453 case X86::VPCMPUBZrrik: NewOpc = X86::VPCMPUBZrrik_alt; break; 454 case X86::VPCMPUDZ128rmi: NewOpc = X86::VPCMPUDZ128rmi_alt; break; 455 case X86::VPCMPUDZ128rmib: NewOpc = X86::VPCMPUDZ128rmib_alt; break; 456 case X86::VPCMPUDZ128rmibk: NewOpc = X86::VPCMPUDZ128rmibk_alt; break; 457 case X86::VPCMPUDZ128rmik: NewOpc = X86::VPCMPUDZ128rmik_alt; break; 458 case X86::VPCMPUDZ128rri: NewOpc = X86::VPCMPUDZ128rri_alt; break; 459 case X86::VPCMPUDZ128rrik: NewOpc = X86::VPCMPUDZ128rrik_alt; break; 460 case X86::VPCMPUDZ256rmi: NewOpc = X86::VPCMPUDZ256rmi_alt; break; 461 case X86::VPCMPUDZ256rmib: NewOpc = X86::VPCMPUDZ256rmib_alt; break; 462 case X86::VPCMPUDZ256rmibk: NewOpc = X86::VPCMPUDZ256rmibk_alt; break; 463 case X86::VPCMPUDZ256rmik: NewOpc = X86::VPCMPUDZ256rmik_alt; break; 464 case X86::VPCMPUDZ256rri: NewOpc = X86::VPCMPUDZ256rri_alt; break; 465 case X86::VPCMPUDZ256rrik: NewOpc = X86::VPCMPUDZ256rrik_alt; break; 466 case X86::VPCMPUDZrmi: NewOpc = X86::VPCMPUDZrmi_alt; break; 467 case X86::VPCMPUDZrmib: NewOpc = X86::VPCMPUDZrmib_alt; break; 468 case X86::VPCMPUDZrmibk: NewOpc = X86::VPCMPUDZrmibk_alt; break; 469 case X86::VPCMPUDZrmik: NewOpc = X86::VPCMPUDZrmik_alt; break; 470 case X86::VPCMPUDZrri: NewOpc = X86::VPCMPUDZrri_alt; break; 471 case X86::VPCMPUDZrrik: NewOpc = X86::VPCMPUDZrrik_alt; break; 472 case X86::VPCMPUQZ128rmi: NewOpc = X86::VPCMPUQZ128rmi_alt; break; 473 case X86::VPCMPUQZ128rmib: NewOpc = X86::VPCMPUQZ128rmib_alt; break; 474 case X86::VPCMPUQZ128rmibk: NewOpc = X86::VPCMPUQZ128rmibk_alt; break; 475 case X86::VPCMPUQZ128rmik: NewOpc = X86::VPCMPUQZ128rmik_alt; break; 476 case X86::VPCMPUQZ128rri: NewOpc = X86::VPCMPUQZ128rri_alt; break; 477 case X86::VPCMPUQZ128rrik: NewOpc = X86::VPCMPUQZ128rrik_alt; break; 478 case X86::VPCMPUQZ256rmi: NewOpc = X86::VPCMPUQZ256rmi_alt; break; 479 case X86::VPCMPUQZ256rmib: NewOpc = X86::VPCMPUQZ256rmib_alt; break; 480 case X86::VPCMPUQZ256rmibk: NewOpc = X86::VPCMPUQZ256rmibk_alt; break; 481 case X86::VPCMPUQZ256rmik: NewOpc = X86::VPCMPUQZ256rmik_alt; break; 482 case X86::VPCMPUQZ256rri: NewOpc = X86::VPCMPUQZ256rri_alt; break; 483 case X86::VPCMPUQZ256rrik: NewOpc = X86::VPCMPUQZ256rrik_alt; break; 484 case X86::VPCMPUQZrmi: NewOpc = X86::VPCMPUQZrmi_alt; break; 485 case X86::VPCMPUQZrmib: NewOpc = X86::VPCMPUQZrmib_alt; break; 486 case X86::VPCMPUQZrmibk: NewOpc = X86::VPCMPUQZrmibk_alt; break; 487 case X86::VPCMPUQZrmik: NewOpc = X86::VPCMPUQZrmik_alt; break; 488 case X86::VPCMPUQZrri: NewOpc = X86::VPCMPUQZrri_alt; break; 489 case X86::VPCMPUQZrrik: NewOpc = X86::VPCMPUQZrrik_alt; break; 490 case X86::VPCMPUWZ128rmi: NewOpc = X86::VPCMPUWZ128rmi_alt; break; 491 case X86::VPCMPUWZ128rmik: NewOpc = X86::VPCMPUWZ128rmik_alt; break; 492 case X86::VPCMPUWZ128rri: NewOpc = X86::VPCMPUWZ128rri_alt; break; 493 case X86::VPCMPUWZ128rrik: NewOpc = X86::VPCMPUWZ128rrik_alt; break; 494 case X86::VPCMPUWZ256rmi: NewOpc = X86::VPCMPUWZ256rmi_alt; break; 495 case X86::VPCMPUWZ256rmik: NewOpc = X86::VPCMPUWZ256rmik_alt; break; 496 case X86::VPCMPUWZ256rri: NewOpc = X86::VPCMPUWZ256rri_alt; break; 497 case X86::VPCMPUWZ256rrik: NewOpc = X86::VPCMPUWZ256rrik_alt; break; 498 case X86::VPCMPUWZrmi: NewOpc = X86::VPCMPUWZrmi_alt; break; 499 case X86::VPCMPUWZrmik: NewOpc = X86::VPCMPUWZrmik_alt; break; 500 case X86::VPCMPUWZrri: NewOpc = X86::VPCMPUWZrri_alt; break; 501 case X86::VPCMPUWZrrik: NewOpc = X86::VPCMPUWZrrik_alt; break; 502 case X86::VPCMPWZ128rmi: NewOpc = X86::VPCMPWZ128rmi_alt; break; 503 case X86::VPCMPWZ128rmik: NewOpc = X86::VPCMPWZ128rmik_alt; break; 504 case X86::VPCMPWZ128rri: NewOpc = X86::VPCMPWZ128rri_alt; break; 505 case X86::VPCMPWZ128rrik: NewOpc = X86::VPCMPWZ128rrik_alt; break; 506 case X86::VPCMPWZ256rmi: NewOpc = X86::VPCMPWZ256rmi_alt; break; 507 case X86::VPCMPWZ256rmik: NewOpc = X86::VPCMPWZ256rmik_alt; break; 508 case X86::VPCMPWZ256rri: NewOpc = X86::VPCMPWZ256rri_alt; break; 509 case X86::VPCMPWZ256rrik: NewOpc = X86::VPCMPWZ256rrik_alt; break; 510 case X86::VPCMPWZrmi: NewOpc = X86::VPCMPWZrmi_alt; break; 511 case X86::VPCMPWZrmik: NewOpc = X86::VPCMPWZrmik_alt; break; 512 case X86::VPCMPWZrri: NewOpc = X86::VPCMPWZrri_alt; break; 513 case X86::VPCMPWZrrik: NewOpc = X86::VPCMPWZrrik_alt; break; 514 } 515 // Switch opcode to the one that doesn't get special printing. 516 mcInst.setOpcode(NewOpc); 517 } 518 } 519 520 switch (type) { 521 case TYPE_XMM32: 522 case TYPE_XMM64: 523 case TYPE_XMM128: 524 mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4))); 525 return; 526 case TYPE_XMM256: 527 mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4))); 528 return; 529 case TYPE_XMM512: 530 mcInst.addOperand(MCOperand::CreateReg(X86::ZMM0 + (immediate >> 4))); 531 return; 532 case TYPE_REL8: 533 isBranch = true; 534 pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; 535 if(immediate & 0x80) 536 immediate |= ~(0xffull); 537 break; 538 case TYPE_REL32: 539 case TYPE_REL64: 540 isBranch = true; 541 pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; 542 if(immediate & 0x80000000) 543 immediate |= ~(0xffffffffull); 544 break; 545 default: 546 // operand is 64 bits wide. Do nothing. 547 break; 548 } 549 550 if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation, 551 insn.immediateOffset, insn.immediateSize, 552 mcInst, Dis)) 553 mcInst.addOperand(MCOperand::CreateImm(immediate)); 554 555 if (type == TYPE_MOFFS8 || type == TYPE_MOFFS16 || 556 type == TYPE_MOFFS32 || type == TYPE_MOFFS64) { 557 MCOperand segmentReg; 558 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); 559 mcInst.addOperand(segmentReg); 560 } 561 } 562 563 /// translateRMRegister - Translates a register stored in the R/M field of the 564 /// ModR/M byte to its LLVM equivalent and appends it to an MCInst. 565 /// @param mcInst - The MCInst to append to. 566 /// @param insn - The internal instruction to extract the R/M field 567 /// from. 568 /// @return - 0 on success; -1 otherwise 569 static bool translateRMRegister(MCInst &mcInst, 570 InternalInstruction &insn) { 571 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 572 debug("A R/M register operand may not have a SIB byte"); 573 return true; 574 } 575 576 switch (insn.eaBase) { 577 default: 578 debug("Unexpected EA base register"); 579 return true; 580 case EA_BASE_NONE: 581 debug("EA_BASE_NONE for ModR/M base"); 582 return true; 583 #define ENTRY(x) case EA_BASE_##x: 584 ALL_EA_BASES 585 #undef ENTRY 586 debug("A R/M register operand may not have a base; " 587 "the operand must be a register."); 588 return true; 589 #define ENTRY(x) \ 590 case EA_REG_##x: \ 591 mcInst.addOperand(MCOperand::CreateReg(X86::x)); break; 592 ALL_REGS 593 #undef ENTRY 594 } 595 596 return false; 597 } 598 599 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M 600 /// fields of an internal instruction (and possibly its SIB byte) to a memory 601 /// operand in LLVM's format, and appends it to an MCInst. 602 /// 603 /// @param mcInst - The MCInst to append to. 604 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 605 /// from. 606 /// @return - 0 on success; nonzero otherwise 607 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, 608 const MCDisassembler *Dis) { 609 // Addresses in an MCInst are represented as five operands: 610 // 1. basereg (register) The R/M base, or (if there is a SIB) the 611 // SIB base 612 // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified 613 // scale amount 614 // 3. indexreg (register) x86_registerNONE, or (if there is a SIB) 615 // the index (which is multiplied by the 616 // scale amount) 617 // 4. displacement (immediate) 0, or the displacement if there is one 618 // 5. segmentreg (register) x86_registerNONE for now, but could be set 619 // if we have segment overrides 620 621 MCOperand baseReg; 622 MCOperand scaleAmount; 623 MCOperand indexReg; 624 MCOperand displacement; 625 MCOperand segmentReg; 626 uint64_t pcrel = 0; 627 628 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 629 if (insn.sibBase != SIB_BASE_NONE) { 630 switch (insn.sibBase) { 631 default: 632 debug("Unexpected sibBase"); 633 return true; 634 #define ENTRY(x) \ 635 case SIB_BASE_##x: \ 636 baseReg = MCOperand::CreateReg(X86::x); break; 637 ALL_SIB_BASES 638 #undef ENTRY 639 } 640 } else { 641 baseReg = MCOperand::CreateReg(0); 642 } 643 644 // Check whether we are handling VSIB addressing mode for GATHER. 645 // If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and 646 // we should use SIB_INDEX_XMM4|YMM4 for VSIB. 647 // I don't see a way to get the correct IndexReg in readSIB: 648 // We can tell whether it is VSIB or SIB after instruction ID is decoded, 649 // but instruction ID may not be decoded yet when calling readSIB. 650 uint32_t Opcode = mcInst.getOpcode(); 651 bool IndexIs128 = (Opcode == X86::VGATHERDPDrm || 652 Opcode == X86::VGATHERDPDYrm || 653 Opcode == X86::VGATHERQPDrm || 654 Opcode == X86::VGATHERDPSrm || 655 Opcode == X86::VGATHERQPSrm || 656 Opcode == X86::VPGATHERDQrm || 657 Opcode == X86::VPGATHERDQYrm || 658 Opcode == X86::VPGATHERQQrm || 659 Opcode == X86::VPGATHERDDrm || 660 Opcode == X86::VPGATHERQDrm); 661 bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm || 662 Opcode == X86::VGATHERDPSYrm || 663 Opcode == X86::VGATHERQPSYrm || 664 Opcode == X86::VGATHERDPDZrm || 665 Opcode == X86::VPGATHERDQZrm || 666 Opcode == X86::VPGATHERQQYrm || 667 Opcode == X86::VPGATHERDDYrm || 668 Opcode == X86::VPGATHERQDYrm); 669 bool IndexIs512 = (Opcode == X86::VGATHERQPDZrm || 670 Opcode == X86::VGATHERDPSZrm || 671 Opcode == X86::VGATHERQPSZrm || 672 Opcode == X86::VPGATHERQQZrm || 673 Opcode == X86::VPGATHERDDZrm || 674 Opcode == X86::VPGATHERQDZrm); 675 if (IndexIs128 || IndexIs256 || IndexIs512) { 676 unsigned IndexOffset = insn.sibIndex - 677 (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX); 678 SIBIndex IndexBase = IndexIs512 ? SIB_INDEX_ZMM0 : 679 IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0; 680 insn.sibIndex = (SIBIndex)(IndexBase + 681 (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset)); 682 } 683 684 if (insn.sibIndex != SIB_INDEX_NONE) { 685 switch (insn.sibIndex) { 686 default: 687 debug("Unexpected sibIndex"); 688 return true; 689 #define ENTRY(x) \ 690 case SIB_INDEX_##x: \ 691 indexReg = MCOperand::CreateReg(X86::x); break; 692 EA_BASES_32BIT 693 EA_BASES_64BIT 694 REGS_XMM 695 REGS_YMM 696 REGS_ZMM 697 #undef ENTRY 698 } 699 } else { 700 indexReg = MCOperand::CreateReg(0); 701 } 702 703 scaleAmount = MCOperand::CreateImm(insn.sibScale); 704 } else { 705 switch (insn.eaBase) { 706 case EA_BASE_NONE: 707 if (insn.eaDisplacement == EA_DISP_NONE) { 708 debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base"); 709 return true; 710 } 711 if (insn.mode == MODE_64BIT){ 712 pcrel = insn.startLocation + 713 insn.displacementOffset + insn.displacementSize; 714 tryAddingPcLoadReferenceComment(insn.startLocation + 715 insn.displacementOffset, 716 insn.displacement + pcrel, Dis); 717 baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6 718 } 719 else 720 baseReg = MCOperand::CreateReg(0); 721 722 indexReg = MCOperand::CreateReg(0); 723 break; 724 case EA_BASE_BX_SI: 725 baseReg = MCOperand::CreateReg(X86::BX); 726 indexReg = MCOperand::CreateReg(X86::SI); 727 break; 728 case EA_BASE_BX_DI: 729 baseReg = MCOperand::CreateReg(X86::BX); 730 indexReg = MCOperand::CreateReg(X86::DI); 731 break; 732 case EA_BASE_BP_SI: 733 baseReg = MCOperand::CreateReg(X86::BP); 734 indexReg = MCOperand::CreateReg(X86::SI); 735 break; 736 case EA_BASE_BP_DI: 737 baseReg = MCOperand::CreateReg(X86::BP); 738 indexReg = MCOperand::CreateReg(X86::DI); 739 break; 740 default: 741 indexReg = MCOperand::CreateReg(0); 742 switch (insn.eaBase) { 743 default: 744 debug("Unexpected eaBase"); 745 return true; 746 // Here, we will use the fill-ins defined above. However, 747 // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and 748 // sib and sib64 were handled in the top-level if, so they're only 749 // placeholders to keep the compiler happy. 750 #define ENTRY(x) \ 751 case EA_BASE_##x: \ 752 baseReg = MCOperand::CreateReg(X86::x); break; 753 ALL_EA_BASES 754 #undef ENTRY 755 #define ENTRY(x) case EA_REG_##x: 756 ALL_REGS 757 #undef ENTRY 758 debug("A R/M memory operand may not be a register; " 759 "the base field must be a base."); 760 return true; 761 } 762 } 763 764 scaleAmount = MCOperand::CreateImm(1); 765 } 766 767 displacement = MCOperand::CreateImm(insn.displacement); 768 769 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); 770 771 mcInst.addOperand(baseReg); 772 mcInst.addOperand(scaleAmount); 773 mcInst.addOperand(indexReg); 774 if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false, 775 insn.startLocation, insn.displacementOffset, 776 insn.displacementSize, mcInst, Dis)) 777 mcInst.addOperand(displacement); 778 mcInst.addOperand(segmentReg); 779 return false; 780 } 781 782 /// translateRM - Translates an operand stored in the R/M (and possibly SIB) 783 /// byte of an instruction to LLVM form, and appends it to an MCInst. 784 /// 785 /// @param mcInst - The MCInst to append to. 786 /// @param operand - The operand, as stored in the descriptor table. 787 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 788 /// from. 789 /// @return - 0 on success; nonzero otherwise 790 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, 791 InternalInstruction &insn, const MCDisassembler *Dis) { 792 switch (operand.type) { 793 default: 794 debug("Unexpected type for a R/M operand"); 795 return true; 796 case TYPE_R8: 797 case TYPE_R16: 798 case TYPE_R32: 799 case TYPE_R64: 800 case TYPE_Rv: 801 case TYPE_MM64: 802 case TYPE_XMM: 803 case TYPE_XMM32: 804 case TYPE_XMM64: 805 case TYPE_XMM128: 806 case TYPE_XMM256: 807 case TYPE_XMM512: 808 case TYPE_VK1: 809 case TYPE_VK8: 810 case TYPE_VK16: 811 case TYPE_DEBUGREG: 812 case TYPE_CONTROLREG: 813 return translateRMRegister(mcInst, insn); 814 case TYPE_M: 815 case TYPE_M8: 816 case TYPE_M16: 817 case TYPE_M32: 818 case TYPE_M64: 819 case TYPE_M128: 820 case TYPE_M256: 821 case TYPE_M512: 822 case TYPE_Mv: 823 case TYPE_M32FP: 824 case TYPE_M64FP: 825 case TYPE_M80FP: 826 case TYPE_M1616: 827 case TYPE_M1632: 828 case TYPE_M1664: 829 case TYPE_LEA: 830 return translateRMMemory(mcInst, insn, Dis); 831 } 832 } 833 834 /// translateFPRegister - Translates a stack position on the FPU stack to its 835 /// LLVM form, and appends it to an MCInst. 836 /// 837 /// @param mcInst - The MCInst to append to. 838 /// @param stackPos - The stack position to translate. 839 static void translateFPRegister(MCInst &mcInst, 840 uint8_t stackPos) { 841 mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos)); 842 } 843 844 /// translateMaskRegister - Translates a 3-bit mask register number to 845 /// LLVM form, and appends it to an MCInst. 846 /// 847 /// @param mcInst - The MCInst to append to. 848 /// @param maskRegNum - Number of mask register from 0 to 7. 849 /// @return - false on success; true otherwise. 850 static bool translateMaskRegister(MCInst &mcInst, 851 uint8_t maskRegNum) { 852 if (maskRegNum >= 8) { 853 debug("Invalid mask register number"); 854 return true; 855 } 856 857 mcInst.addOperand(MCOperand::CreateReg(X86::K0 + maskRegNum)); 858 return false; 859 } 860 861 /// translateOperand - Translates an operand stored in an internal instruction 862 /// to LLVM's format and appends it to an MCInst. 863 /// 864 /// @param mcInst - The MCInst to append to. 865 /// @param operand - The operand, as stored in the descriptor table. 866 /// @param insn - The internal instruction. 867 /// @return - false on success; true otherwise. 868 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, 869 InternalInstruction &insn, 870 const MCDisassembler *Dis) { 871 switch (operand.encoding) { 872 default: 873 debug("Unhandled operand encoding during translation"); 874 return true; 875 case ENCODING_REG: 876 translateRegister(mcInst, insn.reg); 877 return false; 878 case ENCODING_WRITEMASK: 879 return translateMaskRegister(mcInst, insn.writemask); 880 CASE_ENCODING_RM: 881 return translateRM(mcInst, operand, insn, Dis); 882 case ENCODING_CB: 883 case ENCODING_CW: 884 case ENCODING_CD: 885 case ENCODING_CP: 886 case ENCODING_CO: 887 case ENCODING_CT: 888 debug("Translation of code offsets isn't supported."); 889 return true; 890 case ENCODING_IB: 891 case ENCODING_IW: 892 case ENCODING_ID: 893 case ENCODING_IO: 894 case ENCODING_Iv: 895 case ENCODING_Ia: 896 translateImmediate(mcInst, 897 insn.immediates[insn.numImmediatesTranslated++], 898 operand, 899 insn, 900 Dis); 901 return false; 902 case ENCODING_SI: 903 return translateSrcIndex(mcInst, insn); 904 case ENCODING_DI: 905 return translateDstIndex(mcInst, insn); 906 case ENCODING_RB: 907 case ENCODING_RW: 908 case ENCODING_RD: 909 case ENCODING_RO: 910 case ENCODING_Rv: 911 translateRegister(mcInst, insn.opcodeRegister); 912 return false; 913 case ENCODING_FP: 914 translateFPRegister(mcInst, insn.modRM & 7); 915 return false; 916 case ENCODING_VVVV: 917 translateRegister(mcInst, insn.vvvv); 918 return false; 919 case ENCODING_DUP: 920 return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0], 921 insn, Dis); 922 } 923 } 924 925 /// translateInstruction - Translates an internal instruction and all its 926 /// operands to an MCInst. 927 /// 928 /// @param mcInst - The MCInst to populate with the instruction's data. 929 /// @param insn - The internal instruction. 930 /// @return - false on success; true otherwise. 931 static bool translateInstruction(MCInst &mcInst, 932 InternalInstruction &insn, 933 const MCDisassembler *Dis) { 934 if (!insn.spec) { 935 debug("Instruction has no specification"); 936 return true; 937 } 938 939 mcInst.setOpcode(insn.instructionID); 940 // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3 941 // prefix bytes should be disassembled as xrelease and xacquire then set the 942 // opcode to those instead of the rep and repne opcodes. 943 if (insn.xAcquireRelease) { 944 if(mcInst.getOpcode() == X86::REP_PREFIX) 945 mcInst.setOpcode(X86::XRELEASE_PREFIX); 946 else if(mcInst.getOpcode() == X86::REPNE_PREFIX) 947 mcInst.setOpcode(X86::XACQUIRE_PREFIX); 948 } 949 950 insn.numImmediatesTranslated = 0; 951 952 for (const auto &Op : insn.operands) { 953 if (Op.encoding != ENCODING_NONE) { 954 if (translateOperand(mcInst, Op, insn, Dis)) { 955 return true; 956 } 957 } 958 } 959 960 return false; 961 } 962 963 static MCDisassembler *createX86Disassembler(const Target &T, 964 const MCSubtargetInfo &STI, 965 MCContext &Ctx) { 966 std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo()); 967 return new X86Disassembler::X86GenericDisassembler(STI, Ctx, std::move(MII)); 968 } 969 970 extern "C" void LLVMInitializeX86Disassembler() { 971 // Register the disassembler. 972 TargetRegistry::RegisterMCDisassembler(TheX86_32Target, 973 createX86Disassembler); 974 TargetRegistry::RegisterMCDisassembler(TheX86_64Target, 975 createX86Disassembler); 976 } 977