1 //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is part of the X86 Disassembler.
10 // It contains code to translate the data produced by the decoder into
11 // MCInsts.
12 //
13 //
14 // The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
15 // 64-bit X86 instruction sets. The main decode sequence for an assembly
16 // instruction in this disassembler is:
17 //
18 // 1. Read the prefix bytes and determine the attributes of the instruction.
19 // These attributes, recorded in enum attributeBits
20 // (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM
21 // provides a mapping from bitmasks to contexts, which are represented by
22 // enum InstructionContext (ibid.).
23 //
24 // 2. Read the opcode, and determine what kind of opcode it is. The
25 // disassembler distinguishes four kinds of opcodes, which are enumerated in
26 // OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
27 // (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
28 // (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.
29 //
30 // 3. Depending on the opcode type, look in one of four ClassDecision structures
31 // (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which
32 // OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get
33 // a ModRMDecision (ibid.).
34 //
35 // 4. Some instructions, such as escape opcodes or extended opcodes, or even
36 // instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
37 // ModR/M byte to complete decode. The ModRMDecision's type is an entry from
38 // ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
39 // ModR/M byte is required and how to interpret it.
40 //
41 // 5. After resolving the ModRMDecision, the disassembler has a unique ID
42 // of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in
43 // INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
44 // meanings of its operands.
45 //
46 // 6. For each operand, its encoding is an entry from OperandEncoding
47 // (X86DisassemblerDecoderCommon.h) and its type is an entry from
48 // OperandType (ibid.). The encoding indicates how to read it from the
49 // instruction; the type indicates how to interpret the value once it has
50 // been read. For example, a register operand could be stored in the R/M
51 // field of the ModR/M byte, the REG field of the ModR/M byte, or added to
52 // the main opcode. This is orthogonal from its meaning (an GPR or an XMM
53 // register, for instance). Given this information, the operands can be
54 // extracted and interpreted.
55 //
56 // 7. As the last step, the disassembler translates the instruction information
57 // and operands into a format understandable by the client - in this case, an
58 // MCInst for use by the MC infrastructure.
59 //
60 // The disassembler is broken broadly into two parts: the table emitter that
61 // emits the instruction decode tables discussed above during compilation, and
62 // the disassembler itself. The table emitter is documented in more detail in
63 // utils/TableGen/X86DisassemblerEmitter.h.
64 //
65 // X86Disassembler.cpp contains the code responsible for step 7, and for
66 // invoking the decoder to execute steps 1-6.
67 // X86DisassemblerDecoderCommon.h contains the definitions needed by both the
68 // table emitter and the disassembler.
69 // X86DisassemblerDecoder.h contains the public interface of the decoder,
70 // factored out into C for possible use by other projects.
71 // X86DisassemblerDecoder.c contains the source code of the decoder, which is
72 // responsible for steps 1-6.
73 //
74 //===----------------------------------------------------------------------===//
75
76 #include "MCTargetDesc/X86BaseInfo.h"
77 #include "MCTargetDesc/X86MCTargetDesc.h"
78 #include "TargetInfo/X86TargetInfo.h"
79 #include "X86DisassemblerDecoder.h"
80 #include "llvm/MC/MCContext.h"
81 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
82 #include "llvm/MC/MCExpr.h"
83 #include "llvm/MC/MCInst.h"
84 #include "llvm/MC/MCInstrInfo.h"
85 #include "llvm/MC/MCSubtargetInfo.h"
86 #include "llvm/MC/TargetRegistry.h"
87 #include "llvm/Support/Debug.h"
88 #include "llvm/Support/Format.h"
89 #include "llvm/Support/raw_ostream.h"
90
91 using namespace llvm;
92 using namespace llvm::X86Disassembler;
93
94 #define DEBUG_TYPE "x86-disassembler"
95
96 #define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s);
97
98 // Specifies whether a ModR/M byte is needed and (if so) which
99 // instruction each possible value of the ModR/M byte corresponds to. Once
100 // this information is known, we have narrowed down to a single instruction.
101 struct ModRMDecision {
102 uint8_t modrm_type;
103 uint16_t instructionIDs;
104 };
105
106 // Specifies which set of ModR/M->instruction tables to look at
107 // given a particular opcode.
108 struct OpcodeDecision {
109 ModRMDecision modRMDecisions[256];
110 };
111
112 // Specifies which opcode->instruction tables to look at given
113 // a particular context (set of attributes). Since there are many possible
114 // contexts, the decoder first uses CONTEXTS_SYM to determine which context
115 // applies given a specific set of attributes. Hence there are only IC_max
116 // entries in this table, rather than 2^(ATTR_max).
117 struct ContextDecision {
118 OpcodeDecision opcodeDecisions[IC_max];
119 };
120
121 #include "X86GenDisassemblerTables.inc"
122
decode(OpcodeType type,InstructionContext insnContext,uint8_t opcode,uint8_t modRM)123 static InstrUID decode(OpcodeType type, InstructionContext insnContext,
124 uint8_t opcode, uint8_t modRM) {
125 const struct ModRMDecision *dec;
126
127 switch (type) {
128 case ONEBYTE:
129 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
130 break;
131 case TWOBYTE:
132 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
133 break;
134 case THREEBYTE_38:
135 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
136 break;
137 case THREEBYTE_3A:
138 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
139 break;
140 case XOP8_MAP:
141 dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
142 break;
143 case XOP9_MAP:
144 dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
145 break;
146 case XOPA_MAP:
147 dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
148 break;
149 case THREEDNOW_MAP:
150 dec =
151 &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
152 break;
153 case MAP5:
154 dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
155 break;
156 case MAP6:
157 dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
158 break;
159 }
160
161 switch (dec->modrm_type) {
162 default:
163 llvm_unreachable("Corrupt table! Unknown modrm_type");
164 return 0;
165 case MODRM_ONEENTRY:
166 return modRMTable[dec->instructionIDs];
167 case MODRM_SPLITRM:
168 if (modFromModRM(modRM) == 0x3)
169 return modRMTable[dec->instructionIDs + 1];
170 return modRMTable[dec->instructionIDs];
171 case MODRM_SPLITREG:
172 if (modFromModRM(modRM) == 0x3)
173 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3) + 8];
174 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
175 case MODRM_SPLITMISC:
176 if (modFromModRM(modRM) == 0x3)
177 return modRMTable[dec->instructionIDs + (modRM & 0x3f) + 8];
178 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
179 case MODRM_FULL:
180 return modRMTable[dec->instructionIDs + modRM];
181 }
182 }
183
peek(struct InternalInstruction * insn,uint8_t & byte)184 static bool peek(struct InternalInstruction *insn, uint8_t &byte) {
185 uint64_t offset = insn->readerCursor - insn->startLocation;
186 if (offset >= insn->bytes.size())
187 return true;
188 byte = insn->bytes[offset];
189 return false;
190 }
191
consume(InternalInstruction * insn,T & ptr)192 template <typename T> static bool consume(InternalInstruction *insn, T &ptr) {
193 auto r = insn->bytes;
194 uint64_t offset = insn->readerCursor - insn->startLocation;
195 if (offset + sizeof(T) > r.size())
196 return true;
197 T ret = 0;
198 for (unsigned i = 0; i < sizeof(T); ++i)
199 ret |= (uint64_t)r[offset + i] << (i * 8);
200 ptr = ret;
201 insn->readerCursor += sizeof(T);
202 return false;
203 }
204
isREX(struct InternalInstruction * insn,uint8_t prefix)205 static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
206 return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f;
207 }
208
209 // Consumes all of an instruction's prefix bytes, and marks the
210 // instruction as having them. Also sets the instruction's default operand,
211 // address, and other relevant data sizes to report operands correctly.
212 //
213 // insn must not be empty.
readPrefixes(struct InternalInstruction * insn)214 static int readPrefixes(struct InternalInstruction *insn) {
215 bool isPrefix = true;
216 uint8_t byte = 0;
217 uint8_t nextByte;
218
219 LLVM_DEBUG(dbgs() << "readPrefixes()");
220
221 while (isPrefix) {
222 // If we fail reading prefixes, just stop here and let the opcode reader
223 // deal with it.
224 if (consume(insn, byte))
225 break;
226
227 // If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
228 // break and let it be disassembled as a normal "instruction".
229 if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK
230 break;
231
232 if ((byte == 0xf2 || byte == 0xf3) && !peek(insn, nextByte)) {
233 // If the byte is 0xf2 or 0xf3, and any of the following conditions are
234 // met:
235 // - it is followed by a LOCK (0xf0) prefix
236 // - it is followed by an xchg instruction
237 // then it should be disassembled as a xacquire/xrelease not repne/rep.
238 if (((nextByte == 0xf0) ||
239 ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) {
240 insn->xAcquireRelease = true;
241 if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support
242 break;
243 }
244 // Also if the byte is 0xf3, and the following condition is met:
245 // - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
246 // "mov mem, imm" (opcode 0xc6/0xc7) instructions.
247 // then it should be disassembled as an xrelease not rep.
248 if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 ||
249 nextByte == 0xc6 || nextByte == 0xc7)) {
250 insn->xAcquireRelease = true;
251 break;
252 }
253 if (isREX(insn, nextByte)) {
254 uint8_t nnextByte;
255 // Go to REX prefix after the current one
256 if (consume(insn, nnextByte))
257 return -1;
258 // We should be able to read next byte after REX prefix
259 if (peek(insn, nnextByte))
260 return -1;
261 --insn->readerCursor;
262 }
263 }
264
265 switch (byte) {
266 case 0xf0: // LOCK
267 insn->hasLockPrefix = true;
268 break;
269 case 0xf2: // REPNE/REPNZ
270 case 0xf3: { // REP or REPE/REPZ
271 uint8_t nextByte;
272 if (peek(insn, nextByte))
273 break;
274 // TODO:
275 // 1. There could be several 0x66
276 // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then
277 // it's not mandatory prefix
278 // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need
279 // 0x0f exactly after it to be mandatory prefix
280 if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66)
281 // The last of 0xf2 /0xf3 is mandatory prefix
282 insn->mandatoryPrefix = byte;
283 insn->repeatPrefix = byte;
284 break;
285 }
286 case 0x2e: // CS segment override -OR- Branch not taken
287 insn->segmentOverride = SEG_OVERRIDE_CS;
288 break;
289 case 0x36: // SS segment override -OR- Branch taken
290 insn->segmentOverride = SEG_OVERRIDE_SS;
291 break;
292 case 0x3e: // DS segment override
293 insn->segmentOverride = SEG_OVERRIDE_DS;
294 break;
295 case 0x26: // ES segment override
296 insn->segmentOverride = SEG_OVERRIDE_ES;
297 break;
298 case 0x64: // FS segment override
299 insn->segmentOverride = SEG_OVERRIDE_FS;
300 break;
301 case 0x65: // GS segment override
302 insn->segmentOverride = SEG_OVERRIDE_GS;
303 break;
304 case 0x66: { // Operand-size override {
305 uint8_t nextByte;
306 insn->hasOpSize = true;
307 if (peek(insn, nextByte))
308 break;
309 // 0x66 can't overwrite existing mandatory prefix and should be ignored
310 if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte)))
311 insn->mandatoryPrefix = byte;
312 break;
313 }
314 case 0x67: // Address-size override
315 insn->hasAdSize = true;
316 break;
317 default: // Not a prefix byte
318 isPrefix = false;
319 break;
320 }
321
322 if (isPrefix)
323 LLVM_DEBUG(dbgs() << format("Found prefix 0x%hhx", byte));
324 }
325
326 insn->vectorExtensionType = TYPE_NO_VEX_XOP;
327
328 if (byte == 0x62) {
329 uint8_t byte1, byte2;
330 if (consume(insn, byte1)) {
331 LLVM_DEBUG(dbgs() << "Couldn't read second byte of EVEX prefix");
332 return -1;
333 }
334
335 if (peek(insn, byte2)) {
336 LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
337 return -1;
338 }
339
340 if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
341 ((~byte1 & 0x8) == 0x8) && ((byte2 & 0x4) == 0x4)) {
342 insn->vectorExtensionType = TYPE_EVEX;
343 } else {
344 --insn->readerCursor; // unconsume byte1
345 --insn->readerCursor; // unconsume byte
346 }
347
348 if (insn->vectorExtensionType == TYPE_EVEX) {
349 insn->vectorExtensionPrefix[0] = byte;
350 insn->vectorExtensionPrefix[1] = byte1;
351 if (consume(insn, insn->vectorExtensionPrefix[2])) {
352 LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
353 return -1;
354 }
355 if (consume(insn, insn->vectorExtensionPrefix[3])) {
356 LLVM_DEBUG(dbgs() << "Couldn't read fourth byte of EVEX prefix");
357 return -1;
358 }
359
360 // We simulate the REX prefix for simplicity's sake
361 if (insn->mode == MODE_64BIT) {
362 insn->rexPrefix = 0x40 |
363 (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) |
364 (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) |
365 (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) |
366 (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
367 }
368
369 LLVM_DEBUG(
370 dbgs() << format(
371 "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
372 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
373 insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]));
374 }
375 } else if (byte == 0xc4) {
376 uint8_t byte1;
377 if (peek(insn, byte1)) {
378 LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
379 return -1;
380 }
381
382 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
383 insn->vectorExtensionType = TYPE_VEX_3B;
384 else
385 --insn->readerCursor;
386
387 if (insn->vectorExtensionType == TYPE_VEX_3B) {
388 insn->vectorExtensionPrefix[0] = byte;
389 consume(insn, insn->vectorExtensionPrefix[1]);
390 consume(insn, insn->vectorExtensionPrefix[2]);
391
392 // We simulate the REX prefix for simplicity's sake
393
394 if (insn->mode == MODE_64BIT)
395 insn->rexPrefix = 0x40 |
396 (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) |
397 (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) |
398 (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) |
399 (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
400
401 LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
402 insn->vectorExtensionPrefix[0],
403 insn->vectorExtensionPrefix[1],
404 insn->vectorExtensionPrefix[2]));
405 }
406 } else if (byte == 0xc5) {
407 uint8_t byte1;
408 if (peek(insn, byte1)) {
409 LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
410 return -1;
411 }
412
413 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
414 insn->vectorExtensionType = TYPE_VEX_2B;
415 else
416 --insn->readerCursor;
417
418 if (insn->vectorExtensionType == TYPE_VEX_2B) {
419 insn->vectorExtensionPrefix[0] = byte;
420 consume(insn, insn->vectorExtensionPrefix[1]);
421
422 if (insn->mode == MODE_64BIT)
423 insn->rexPrefix =
424 0x40 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
425
426 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
427 default:
428 break;
429 case VEX_PREFIX_66:
430 insn->hasOpSize = true;
431 break;
432 }
433
434 LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx",
435 insn->vectorExtensionPrefix[0],
436 insn->vectorExtensionPrefix[1]));
437 }
438 } else if (byte == 0x8f) {
439 uint8_t byte1;
440 if (peek(insn, byte1)) {
441 LLVM_DEBUG(dbgs() << "Couldn't read second byte of XOP");
442 return -1;
443 }
444
445 if ((byte1 & 0x38) != 0x0) // 0 in these 3 bits is a POP instruction.
446 insn->vectorExtensionType = TYPE_XOP;
447 else
448 --insn->readerCursor;
449
450 if (insn->vectorExtensionType == TYPE_XOP) {
451 insn->vectorExtensionPrefix[0] = byte;
452 consume(insn, insn->vectorExtensionPrefix[1]);
453 consume(insn, insn->vectorExtensionPrefix[2]);
454
455 // We simulate the REX prefix for simplicity's sake
456
457 if (insn->mode == MODE_64BIT)
458 insn->rexPrefix = 0x40 |
459 (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) |
460 (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) |
461 (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) |
462 (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
463
464 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
465 default:
466 break;
467 case VEX_PREFIX_66:
468 insn->hasOpSize = true;
469 break;
470 }
471
472 LLVM_DEBUG(dbgs() << format("Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
473 insn->vectorExtensionPrefix[0],
474 insn->vectorExtensionPrefix[1],
475 insn->vectorExtensionPrefix[2]));
476 }
477 } else if (isREX(insn, byte)) {
478 if (peek(insn, nextByte))
479 return -1;
480 insn->rexPrefix = byte;
481 LLVM_DEBUG(dbgs() << format("Found REX prefix 0x%hhx", byte));
482 } else
483 --insn->readerCursor;
484
485 if (insn->mode == MODE_16BIT) {
486 insn->registerSize = (insn->hasOpSize ? 4 : 2);
487 insn->addressSize = (insn->hasAdSize ? 4 : 2);
488 insn->displacementSize = (insn->hasAdSize ? 4 : 2);
489 insn->immediateSize = (insn->hasOpSize ? 4 : 2);
490 } else if (insn->mode == MODE_32BIT) {
491 insn->registerSize = (insn->hasOpSize ? 2 : 4);
492 insn->addressSize = (insn->hasAdSize ? 2 : 4);
493 insn->displacementSize = (insn->hasAdSize ? 2 : 4);
494 insn->immediateSize = (insn->hasOpSize ? 2 : 4);
495 } else if (insn->mode == MODE_64BIT) {
496 insn->displacementSize = 4;
497 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
498 insn->registerSize = 8;
499 insn->addressSize = (insn->hasAdSize ? 4 : 8);
500 insn->immediateSize = 4;
501 insn->hasOpSize = false;
502 } else {
503 insn->registerSize = (insn->hasOpSize ? 2 : 4);
504 insn->addressSize = (insn->hasAdSize ? 4 : 8);
505 insn->immediateSize = (insn->hasOpSize ? 2 : 4);
506 }
507 }
508
509 return 0;
510 }
511
512 // Consumes the SIB byte to determine addressing information.
readSIB(struct InternalInstruction * insn)513 static int readSIB(struct InternalInstruction *insn) {
514 SIBBase sibBaseBase = SIB_BASE_NONE;
515 uint8_t index, base;
516
517 LLVM_DEBUG(dbgs() << "readSIB()");
518 switch (insn->addressSize) {
519 case 2:
520 default:
521 llvm_unreachable("SIB-based addressing doesn't work in 16-bit mode");
522 case 4:
523 insn->sibIndexBase = SIB_INDEX_EAX;
524 sibBaseBase = SIB_BASE_EAX;
525 break;
526 case 8:
527 insn->sibIndexBase = SIB_INDEX_RAX;
528 sibBaseBase = SIB_BASE_RAX;
529 break;
530 }
531
532 if (consume(insn, insn->sib))
533 return -1;
534
535 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
536
537 if (index == 0x4) {
538 insn->sibIndex = SIB_INDEX_NONE;
539 } else {
540 insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index);
541 }
542
543 insn->sibScale = 1 << scaleFromSIB(insn->sib);
544
545 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
546
547 switch (base) {
548 case 0x5:
549 case 0xd:
550 switch (modFromModRM(insn->modRM)) {
551 case 0x0:
552 insn->eaDisplacement = EA_DISP_32;
553 insn->sibBase = SIB_BASE_NONE;
554 break;
555 case 0x1:
556 insn->eaDisplacement = EA_DISP_8;
557 insn->sibBase = (SIBBase)(sibBaseBase + base);
558 break;
559 case 0x2:
560 insn->eaDisplacement = EA_DISP_32;
561 insn->sibBase = (SIBBase)(sibBaseBase + base);
562 break;
563 default:
564 llvm_unreachable("Cannot have Mod = 0b11 and a SIB byte");
565 }
566 break;
567 default:
568 insn->sibBase = (SIBBase)(sibBaseBase + base);
569 break;
570 }
571
572 return 0;
573 }
574
readDisplacement(struct InternalInstruction * insn)575 static int readDisplacement(struct InternalInstruction *insn) {
576 int8_t d8;
577 int16_t d16;
578 int32_t d32;
579 LLVM_DEBUG(dbgs() << "readDisplacement()");
580
581 insn->displacementOffset = insn->readerCursor - insn->startLocation;
582 switch (insn->eaDisplacement) {
583 case EA_DISP_NONE:
584 break;
585 case EA_DISP_8:
586 if (consume(insn, d8))
587 return -1;
588 insn->displacement = d8;
589 break;
590 case EA_DISP_16:
591 if (consume(insn, d16))
592 return -1;
593 insn->displacement = d16;
594 break;
595 case EA_DISP_32:
596 if (consume(insn, d32))
597 return -1;
598 insn->displacement = d32;
599 break;
600 }
601
602 return 0;
603 }
604
605 // Consumes all addressing information (ModR/M byte, SIB byte, and displacement.
readModRM(struct InternalInstruction * insn)606 static int readModRM(struct InternalInstruction *insn) {
607 uint8_t mod, rm, reg, evexrm;
608 LLVM_DEBUG(dbgs() << "readModRM()");
609
610 if (insn->consumedModRM)
611 return 0;
612
613 if (consume(insn, insn->modRM))
614 return -1;
615 insn->consumedModRM = true;
616
617 mod = modFromModRM(insn->modRM);
618 rm = rmFromModRM(insn->modRM);
619 reg = regFromModRM(insn->modRM);
620
621 // This goes by insn->registerSize to pick the correct register, which messes
622 // up if we're using (say) XMM or 8-bit register operands. That gets fixed in
623 // fixupReg().
624 switch (insn->registerSize) {
625 case 2:
626 insn->regBase = MODRM_REG_AX;
627 insn->eaRegBase = EA_REG_AX;
628 break;
629 case 4:
630 insn->regBase = MODRM_REG_EAX;
631 insn->eaRegBase = EA_REG_EAX;
632 break;
633 case 8:
634 insn->regBase = MODRM_REG_RAX;
635 insn->eaRegBase = EA_REG_RAX;
636 break;
637 }
638
639 reg |= rFromREX(insn->rexPrefix) << 3;
640 rm |= bFromREX(insn->rexPrefix) << 3;
641
642 evexrm = 0;
643 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) {
644 reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
645 evexrm = xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
646 }
647
648 insn->reg = (Reg)(insn->regBase + reg);
649
650 switch (insn->addressSize) {
651 case 2: {
652 EABase eaBaseBase = EA_BASE_BX_SI;
653
654 switch (mod) {
655 case 0x0:
656 if (rm == 0x6) {
657 insn->eaBase = EA_BASE_NONE;
658 insn->eaDisplacement = EA_DISP_16;
659 if (readDisplacement(insn))
660 return -1;
661 } else {
662 insn->eaBase = (EABase)(eaBaseBase + rm);
663 insn->eaDisplacement = EA_DISP_NONE;
664 }
665 break;
666 case 0x1:
667 insn->eaBase = (EABase)(eaBaseBase + rm);
668 insn->eaDisplacement = EA_DISP_8;
669 insn->displacementSize = 1;
670 if (readDisplacement(insn))
671 return -1;
672 break;
673 case 0x2:
674 insn->eaBase = (EABase)(eaBaseBase + rm);
675 insn->eaDisplacement = EA_DISP_16;
676 if (readDisplacement(insn))
677 return -1;
678 break;
679 case 0x3:
680 insn->eaBase = (EABase)(insn->eaRegBase + rm);
681 if (readDisplacement(insn))
682 return -1;
683 break;
684 }
685 break;
686 }
687 case 4:
688 case 8: {
689 EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
690
691 switch (mod) {
692 case 0x0:
693 insn->eaDisplacement = EA_DISP_NONE; // readSIB may override this
694 // In determining whether RIP-relative mode is used (rm=5),
695 // or whether a SIB byte is present (rm=4),
696 // the extension bits (REX.b and EVEX.x) are ignored.
697 switch (rm & 7) {
698 case 0x4: // SIB byte is present
699 insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64);
700 if (readSIB(insn) || readDisplacement(insn))
701 return -1;
702 break;
703 case 0x5: // RIP-relative
704 insn->eaBase = EA_BASE_NONE;
705 insn->eaDisplacement = EA_DISP_32;
706 if (readDisplacement(insn))
707 return -1;
708 break;
709 default:
710 insn->eaBase = (EABase)(eaBaseBase + rm);
711 break;
712 }
713 break;
714 case 0x1:
715 insn->displacementSize = 1;
716 LLVM_FALLTHROUGH;
717 case 0x2:
718 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
719 switch (rm & 7) {
720 case 0x4: // SIB byte is present
721 insn->eaBase = EA_BASE_sib;
722 if (readSIB(insn) || readDisplacement(insn))
723 return -1;
724 break;
725 default:
726 insn->eaBase = (EABase)(eaBaseBase + rm);
727 if (readDisplacement(insn))
728 return -1;
729 break;
730 }
731 break;
732 case 0x3:
733 insn->eaDisplacement = EA_DISP_NONE;
734 insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm);
735 break;
736 }
737 break;
738 }
739 } // switch (insn->addressSize)
740
741 return 0;
742 }
743
744 #define GENERIC_FIXUP_FUNC(name, base, prefix, mask) \
745 static uint16_t name(struct InternalInstruction *insn, OperandType type, \
746 uint8_t index, uint8_t *valid) { \
747 *valid = 1; \
748 switch (type) { \
749 default: \
750 debug("Unhandled register type"); \
751 *valid = 0; \
752 return 0; \
753 case TYPE_Rv: \
754 return base + index; \
755 case TYPE_R8: \
756 index &= mask; \
757 if (index > 0xf) \
758 *valid = 0; \
759 if (insn->rexPrefix && index >= 4 && index <= 7) { \
760 return prefix##_SPL + (index - 4); \
761 } else { \
762 return prefix##_AL + index; \
763 } \
764 case TYPE_R16: \
765 index &= mask; \
766 if (index > 0xf) \
767 *valid = 0; \
768 return prefix##_AX + index; \
769 case TYPE_R32: \
770 index &= mask; \
771 if (index > 0xf) \
772 *valid = 0; \
773 return prefix##_EAX + index; \
774 case TYPE_R64: \
775 index &= mask; \
776 if (index > 0xf) \
777 *valid = 0; \
778 return prefix##_RAX + index; \
779 case TYPE_ZMM: \
780 return prefix##_ZMM0 + index; \
781 case TYPE_YMM: \
782 return prefix##_YMM0 + index; \
783 case TYPE_XMM: \
784 return prefix##_XMM0 + index; \
785 case TYPE_TMM: \
786 if (index > 7) \
787 *valid = 0; \
788 return prefix##_TMM0 + index; \
789 case TYPE_VK: \
790 index &= 0xf; \
791 if (index > 7) \
792 *valid = 0; \
793 return prefix##_K0 + index; \
794 case TYPE_VK_PAIR: \
795 if (index > 7) \
796 *valid = 0; \
797 return prefix##_K0_K1 + (index / 2); \
798 case TYPE_MM64: \
799 return prefix##_MM0 + (index & 0x7); \
800 case TYPE_SEGMENTREG: \
801 if ((index & 7) > 5) \
802 *valid = 0; \
803 return prefix##_ES + (index & 7); \
804 case TYPE_DEBUGREG: \
805 return prefix##_DR0 + index; \
806 case TYPE_CONTROLREG: \
807 return prefix##_CR0 + index; \
808 case TYPE_MVSIBX: \
809 return prefix##_XMM0 + index; \
810 case TYPE_MVSIBY: \
811 return prefix##_YMM0 + index; \
812 case TYPE_MVSIBZ: \
813 return prefix##_ZMM0 + index; \
814 } \
815 }
816
817 // Consult an operand type to determine the meaning of the reg or R/M field. If
818 // the operand is an XMM operand, for example, an operand would be XMM0 instead
819 // of AX, which readModRM() would otherwise misinterpret it as.
820 //
821 // @param insn - The instruction containing the operand.
822 // @param type - The operand type.
823 // @param index - The existing value of the field as reported by readModRM().
824 // @param valid - The address of a uint8_t. The target is set to 1 if the
825 // field is valid for the register class; 0 if not.
826 // @return - The proper value.
827 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG, 0x1f)
828 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG, 0xf)
829
830 // Consult an operand specifier to determine which of the fixup*Value functions
831 // to use in correcting readModRM()'ss interpretation.
832 //
833 // @param insn - See fixup*Value().
834 // @param op - The operand specifier.
835 // @return - 0 if fixup was successful; -1 if the register returned was
836 // invalid for its class.
fixupReg(struct InternalInstruction * insn,const struct OperandSpecifier * op)837 static int fixupReg(struct InternalInstruction *insn,
838 const struct OperandSpecifier *op) {
839 uint8_t valid;
840 LLVM_DEBUG(dbgs() << "fixupReg()");
841
842 switch ((OperandEncoding)op->encoding) {
843 default:
844 debug("Expected a REG or R/M encoding in fixupReg");
845 return -1;
846 case ENCODING_VVVV:
847 insn->vvvv =
848 (Reg)fixupRegValue(insn, (OperandType)op->type, insn->vvvv, &valid);
849 if (!valid)
850 return -1;
851 break;
852 case ENCODING_REG:
853 insn->reg = (Reg)fixupRegValue(insn, (OperandType)op->type,
854 insn->reg - insn->regBase, &valid);
855 if (!valid)
856 return -1;
857 break;
858 case ENCODING_SIB:
859 CASE_ENCODING_RM:
860 if (insn->eaBase >= insn->eaRegBase) {
861 insn->eaBase = (EABase)fixupRMValue(
862 insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid);
863 if (!valid)
864 return -1;
865 }
866 break;
867 }
868
869 return 0;
870 }
871
872 // Read the opcode (except the ModR/M byte in the case of extended or escape
873 // opcodes).
readOpcode(struct InternalInstruction * insn)874 static bool readOpcode(struct InternalInstruction *insn) {
875 uint8_t current;
876 LLVM_DEBUG(dbgs() << "readOpcode()");
877
878 insn->opcodeType = ONEBYTE;
879 if (insn->vectorExtensionType == TYPE_EVEX) {
880 switch (mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
881 default:
882 LLVM_DEBUG(
883 dbgs() << format("Unhandled mmm field for instruction (0x%hhx)",
884 mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])));
885 return true;
886 case VEX_LOB_0F:
887 insn->opcodeType = TWOBYTE;
888 return consume(insn, insn->opcode);
889 case VEX_LOB_0F38:
890 insn->opcodeType = THREEBYTE_38;
891 return consume(insn, insn->opcode);
892 case VEX_LOB_0F3A:
893 insn->opcodeType = THREEBYTE_3A;
894 return consume(insn, insn->opcode);
895 case VEX_LOB_MAP5:
896 insn->opcodeType = MAP5;
897 return consume(insn, insn->opcode);
898 case VEX_LOB_MAP6:
899 insn->opcodeType = MAP6;
900 return consume(insn, insn->opcode);
901 }
902 } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
903 switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
904 default:
905 LLVM_DEBUG(
906 dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
907 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])));
908 return true;
909 case VEX_LOB_0F:
910 insn->opcodeType = TWOBYTE;
911 return consume(insn, insn->opcode);
912 case VEX_LOB_0F38:
913 insn->opcodeType = THREEBYTE_38;
914 return consume(insn, insn->opcode);
915 case VEX_LOB_0F3A:
916 insn->opcodeType = THREEBYTE_3A;
917 return consume(insn, insn->opcode);
918 case VEX_LOB_MAP5:
919 insn->opcodeType = MAP5;
920 return consume(insn, insn->opcode);
921 case VEX_LOB_MAP6:
922 insn->opcodeType = MAP6;
923 return consume(insn, insn->opcode);
924 }
925 } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
926 insn->opcodeType = TWOBYTE;
927 return consume(insn, insn->opcode);
928 } else if (insn->vectorExtensionType == TYPE_XOP) {
929 switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
930 default:
931 LLVM_DEBUG(
932 dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
933 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])));
934 return true;
935 case XOP_MAP_SELECT_8:
936 insn->opcodeType = XOP8_MAP;
937 return consume(insn, insn->opcode);
938 case XOP_MAP_SELECT_9:
939 insn->opcodeType = XOP9_MAP;
940 return consume(insn, insn->opcode);
941 case XOP_MAP_SELECT_A:
942 insn->opcodeType = XOPA_MAP;
943 return consume(insn, insn->opcode);
944 }
945 }
946
947 if (consume(insn, current))
948 return true;
949
950 if (current == 0x0f) {
951 LLVM_DEBUG(
952 dbgs() << format("Found a two-byte escape prefix (0x%hhx)", current));
953 if (consume(insn, current))
954 return true;
955
956 if (current == 0x38) {
957 LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
958 current));
959 if (consume(insn, current))
960 return true;
961
962 insn->opcodeType = THREEBYTE_38;
963 } else if (current == 0x3a) {
964 LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
965 current));
966 if (consume(insn, current))
967 return true;
968
969 insn->opcodeType = THREEBYTE_3A;
970 } else if (current == 0x0f) {
971 LLVM_DEBUG(
972 dbgs() << format("Found a 3dnow escape prefix (0x%hhx)", current));
973
974 // Consume operands before the opcode to comply with the 3DNow encoding
975 if (readModRM(insn))
976 return true;
977
978 if (consume(insn, current))
979 return true;
980
981 insn->opcodeType = THREEDNOW_MAP;
982 } else {
983 LLVM_DEBUG(dbgs() << "Didn't find a three-byte escape prefix");
984 insn->opcodeType = TWOBYTE;
985 }
986 } else if (insn->mandatoryPrefix)
987 // The opcode with mandatory prefix must start with opcode escape.
988 // If not it's legacy repeat prefix
989 insn->mandatoryPrefix = 0;
990
991 // At this point we have consumed the full opcode.
992 // Anything we consume from here on must be unconsumed.
993 insn->opcode = current;
994
995 return false;
996 }
997
998 // Determine whether equiv is the 16-bit equivalent of orig (32-bit or 64-bit).
is16BitEquivalent(const char * orig,const char * equiv)999 static bool is16BitEquivalent(const char *orig, const char *equiv) {
1000 for (int i = 0;; i++) {
1001 if (orig[i] == '\0' && equiv[i] == '\0')
1002 return true;
1003 if (orig[i] == '\0' || equiv[i] == '\0')
1004 return false;
1005 if (orig[i] != equiv[i]) {
1006 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
1007 continue;
1008 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
1009 continue;
1010 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
1011 continue;
1012 return false;
1013 }
1014 }
1015 }
1016
1017 // Determine whether this instruction is a 64-bit instruction.
is64Bit(const char * name)1018 static bool is64Bit(const char *name) {
1019 for (int i = 0;; ++i) {
1020 if (name[i] == '\0')
1021 return false;
1022 if (name[i] == '6' && name[i + 1] == '4')
1023 return true;
1024 }
1025 }
1026
1027 // Determine the ID of an instruction, consuming the ModR/M byte as appropriate
1028 // for extended and escape opcodes, and using a supplied attribute mask.
getInstructionIDWithAttrMask(uint16_t * instructionID,struct InternalInstruction * insn,uint16_t attrMask)1029 static int getInstructionIDWithAttrMask(uint16_t *instructionID,
1030 struct InternalInstruction *insn,
1031 uint16_t attrMask) {
1032 auto insnCtx = InstructionContext(x86DisassemblerContexts[attrMask]);
1033 const ContextDecision *decision;
1034 switch (insn->opcodeType) {
1035 case ONEBYTE:
1036 decision = &ONEBYTE_SYM;
1037 break;
1038 case TWOBYTE:
1039 decision = &TWOBYTE_SYM;
1040 break;
1041 case THREEBYTE_38:
1042 decision = &THREEBYTE38_SYM;
1043 break;
1044 case THREEBYTE_3A:
1045 decision = &THREEBYTE3A_SYM;
1046 break;
1047 case XOP8_MAP:
1048 decision = &XOP8_MAP_SYM;
1049 break;
1050 case XOP9_MAP:
1051 decision = &XOP9_MAP_SYM;
1052 break;
1053 case XOPA_MAP:
1054 decision = &XOPA_MAP_SYM;
1055 break;
1056 case THREEDNOW_MAP:
1057 decision = &THREEDNOW_MAP_SYM;
1058 break;
1059 case MAP5:
1060 decision = &MAP5_SYM;
1061 break;
1062 case MAP6:
1063 decision = &MAP6_SYM;
1064 break;
1065 }
1066
1067 if (decision->opcodeDecisions[insnCtx]
1068 .modRMDecisions[insn->opcode]
1069 .modrm_type != MODRM_ONEENTRY) {
1070 if (readModRM(insn))
1071 return -1;
1072 *instructionID =
1073 decode(insn->opcodeType, insnCtx, insn->opcode, insn->modRM);
1074 } else {
1075 *instructionID = decode(insn->opcodeType, insnCtx, insn->opcode, 0);
1076 }
1077
1078 return 0;
1079 }
1080
1081 // Determine the ID of an instruction, consuming the ModR/M byte as appropriate
1082 // for extended and escape opcodes. Determines the attributes and context for
1083 // the instruction before doing so.
getInstructionID(struct InternalInstruction * insn,const MCInstrInfo * mii)1084 static int getInstructionID(struct InternalInstruction *insn,
1085 const MCInstrInfo *mii) {
1086 uint16_t attrMask;
1087 uint16_t instructionID;
1088
1089 LLVM_DEBUG(dbgs() << "getID()");
1090
1091 attrMask = ATTR_NONE;
1092
1093 if (insn->mode == MODE_64BIT)
1094 attrMask |= ATTR_64BIT;
1095
1096 if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
1097 attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
1098
1099 if (insn->vectorExtensionType == TYPE_EVEX) {
1100 switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
1101 case VEX_PREFIX_66:
1102 attrMask |= ATTR_OPSIZE;
1103 break;
1104 case VEX_PREFIX_F3:
1105 attrMask |= ATTR_XS;
1106 break;
1107 case VEX_PREFIX_F2:
1108 attrMask |= ATTR_XD;
1109 break;
1110 }
1111
1112 if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1113 attrMask |= ATTR_EVEXKZ;
1114 if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1115 attrMask |= ATTR_EVEXB;
1116 if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1117 attrMask |= ATTR_EVEXK;
1118 if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1119 attrMask |= ATTR_VEXL;
1120 if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
1121 attrMask |= ATTR_EVEXL2;
1122 } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
1123 switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
1124 case VEX_PREFIX_66:
1125 attrMask |= ATTR_OPSIZE;
1126 break;
1127 case VEX_PREFIX_F3:
1128 attrMask |= ATTR_XS;
1129 break;
1130 case VEX_PREFIX_F2:
1131 attrMask |= ATTR_XD;
1132 break;
1133 }
1134
1135 if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
1136 attrMask |= ATTR_VEXL;
1137 } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
1138 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
1139 case VEX_PREFIX_66:
1140 attrMask |= ATTR_OPSIZE;
1141 if (insn->hasAdSize)
1142 attrMask |= ATTR_ADSIZE;
1143 break;
1144 case VEX_PREFIX_F3:
1145 attrMask |= ATTR_XS;
1146 break;
1147 case VEX_PREFIX_F2:
1148 attrMask |= ATTR_XD;
1149 break;
1150 }
1151
1152 if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
1153 attrMask |= ATTR_VEXL;
1154 } else if (insn->vectorExtensionType == TYPE_XOP) {
1155 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
1156 case VEX_PREFIX_66:
1157 attrMask |= ATTR_OPSIZE;
1158 break;
1159 case VEX_PREFIX_F3:
1160 attrMask |= ATTR_XS;
1161 break;
1162 case VEX_PREFIX_F2:
1163 attrMask |= ATTR_XD;
1164 break;
1165 }
1166
1167 if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
1168 attrMask |= ATTR_VEXL;
1169 } else {
1170 return -1;
1171 }
1172 } else if (!insn->mandatoryPrefix) {
1173 // If we don't have mandatory prefix we should use legacy prefixes here
1174 if (insn->hasOpSize && (insn->mode != MODE_16BIT))
1175 attrMask |= ATTR_OPSIZE;
1176 if (insn->hasAdSize)
1177 attrMask |= ATTR_ADSIZE;
1178 if (insn->opcodeType == ONEBYTE) {
1179 if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90))
1180 // Special support for PAUSE
1181 attrMask |= ATTR_XS;
1182 } else {
1183 if (insn->repeatPrefix == 0xf2)
1184 attrMask |= ATTR_XD;
1185 else if (insn->repeatPrefix == 0xf3)
1186 attrMask |= ATTR_XS;
1187 }
1188 } else {
1189 switch (insn->mandatoryPrefix) {
1190 case 0xf2:
1191 attrMask |= ATTR_XD;
1192 break;
1193 case 0xf3:
1194 attrMask |= ATTR_XS;
1195 break;
1196 case 0x66:
1197 if (insn->mode != MODE_16BIT)
1198 attrMask |= ATTR_OPSIZE;
1199 if (insn->hasAdSize)
1200 attrMask |= ATTR_ADSIZE;
1201 break;
1202 case 0x67:
1203 attrMask |= ATTR_ADSIZE;
1204 break;
1205 }
1206 }
1207
1208 if (insn->rexPrefix & 0x08) {
1209 attrMask |= ATTR_REXW;
1210 attrMask &= ~ATTR_ADSIZE;
1211 }
1212
1213 if (insn->mode == MODE_16BIT) {
1214 // JCXZ/JECXZ need special handling for 16-bit mode because the meaning
1215 // of the AdSize prefix is inverted w.r.t. 32-bit mode.
1216 if (insn->opcodeType == ONEBYTE && insn->opcode == 0xE3)
1217 attrMask ^= ATTR_ADSIZE;
1218 // If we're in 16-bit mode and this is one of the relative jumps and opsize
1219 // prefix isn't present, we need to force the opsize attribute since the
1220 // prefix is inverted relative to 32-bit mode.
1221 if (!insn->hasOpSize && insn->opcodeType == ONEBYTE &&
1222 (insn->opcode == 0xE8 || insn->opcode == 0xE9))
1223 attrMask |= ATTR_OPSIZE;
1224
1225 if (!insn->hasOpSize && insn->opcodeType == TWOBYTE &&
1226 insn->opcode >= 0x80 && insn->opcode <= 0x8F)
1227 attrMask |= ATTR_OPSIZE;
1228 }
1229
1230
1231 if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask))
1232 return -1;
1233
1234 // The following clauses compensate for limitations of the tables.
1235
1236 if (insn->mode != MODE_64BIT &&
1237 insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
1238 // The tables can't distinquish between cases where the W-bit is used to
1239 // select register size and cases where its a required part of the opcode.
1240 if ((insn->vectorExtensionType == TYPE_EVEX &&
1241 wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||
1242 (insn->vectorExtensionType == TYPE_VEX_3B &&
1243 wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||
1244 (insn->vectorExtensionType == TYPE_XOP &&
1245 wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {
1246
1247 uint16_t instructionIDWithREXW;
1248 if (getInstructionIDWithAttrMask(&instructionIDWithREXW, insn,
1249 attrMask | ATTR_REXW)) {
1250 insn->instructionID = instructionID;
1251 insn->spec = &INSTRUCTIONS_SYM[instructionID];
1252 return 0;
1253 }
1254
1255 auto SpecName = mii->getName(instructionIDWithREXW);
1256 // If not a 64-bit instruction. Switch the opcode.
1257 if (!is64Bit(SpecName.data())) {
1258 insn->instructionID = instructionIDWithREXW;
1259 insn->spec = &INSTRUCTIONS_SYM[instructionIDWithREXW];
1260 return 0;
1261 }
1262 }
1263 }
1264
1265 // Absolute moves, umonitor, and movdir64b need special handling.
1266 // -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
1267 // inverted w.r.t.
1268 // -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
1269 // any position.
1270 if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) ||
1271 (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) ||
1272 (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8)) {
1273 // Make sure we observed the prefixes in any position.
1274 if (insn->hasAdSize)
1275 attrMask |= ATTR_ADSIZE;
1276 if (insn->hasOpSize)
1277 attrMask |= ATTR_OPSIZE;
1278
1279 // In 16-bit, invert the attributes.
1280 if (insn->mode == MODE_16BIT) {
1281 attrMask ^= ATTR_ADSIZE;
1282
1283 // The OpSize attribute is only valid with the absolute moves.
1284 if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0))
1285 attrMask ^= ATTR_OPSIZE;
1286 }
1287
1288 if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask))
1289 return -1;
1290
1291 insn->instructionID = instructionID;
1292 insn->spec = &INSTRUCTIONS_SYM[instructionID];
1293 return 0;
1294 }
1295
1296 if ((insn->mode == MODE_16BIT || insn->hasOpSize) &&
1297 !(attrMask & ATTR_OPSIZE)) {
1298 // The instruction tables make no distinction between instructions that
1299 // allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
1300 // particular spot (i.e., many MMX operations). In general we're
1301 // conservative, but in the specific case where OpSize is present but not in
1302 // the right place we check if there's a 16-bit operation.
1303 const struct InstructionSpecifier *spec;
1304 uint16_t instructionIDWithOpsize;
1305 llvm::StringRef specName, specWithOpSizeName;
1306
1307 spec = &INSTRUCTIONS_SYM[instructionID];
1308
1309 if (getInstructionIDWithAttrMask(&instructionIDWithOpsize, insn,
1310 attrMask | ATTR_OPSIZE)) {
1311 // ModRM required with OpSize but not present. Give up and return the
1312 // version without OpSize set.
1313 insn->instructionID = instructionID;
1314 insn->spec = spec;
1315 return 0;
1316 }
1317
1318 specName = mii->getName(instructionID);
1319 specWithOpSizeName = mii->getName(instructionIDWithOpsize);
1320
1321 if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) &&
1322 (insn->mode == MODE_16BIT) ^ insn->hasOpSize) {
1323 insn->instructionID = instructionIDWithOpsize;
1324 insn->spec = &INSTRUCTIONS_SYM[instructionIDWithOpsize];
1325 } else {
1326 insn->instructionID = instructionID;
1327 insn->spec = spec;
1328 }
1329 return 0;
1330 }
1331
1332 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
1333 insn->rexPrefix & 0x01) {
1334 // NOOP shouldn't decode as NOOP if REX.b is set. Instead it should decode
1335 // as XCHG %r8, %eax.
1336 const struct InstructionSpecifier *spec;
1337 uint16_t instructionIDWithNewOpcode;
1338 const struct InstructionSpecifier *specWithNewOpcode;
1339
1340 spec = &INSTRUCTIONS_SYM[instructionID];
1341
1342 // Borrow opcode from one of the other XCHGar opcodes
1343 insn->opcode = 0x91;
1344
1345 if (getInstructionIDWithAttrMask(&instructionIDWithNewOpcode, insn,
1346 attrMask)) {
1347 insn->opcode = 0x90;
1348
1349 insn->instructionID = instructionID;
1350 insn->spec = spec;
1351 return 0;
1352 }
1353
1354 specWithNewOpcode = &INSTRUCTIONS_SYM[instructionIDWithNewOpcode];
1355
1356 // Change back
1357 insn->opcode = 0x90;
1358
1359 insn->instructionID = instructionIDWithNewOpcode;
1360 insn->spec = specWithNewOpcode;
1361
1362 return 0;
1363 }
1364
1365 insn->instructionID = instructionID;
1366 insn->spec = &INSTRUCTIONS_SYM[insn->instructionID];
1367
1368 return 0;
1369 }
1370
1371 // Read an operand from the opcode field of an instruction and interprets it
1372 // appropriately given the operand width. Handles AddRegFrm instructions.
1373 //
1374 // @param insn - the instruction whose opcode field is to be read.
1375 // @param size - The width (in bytes) of the register being specified.
1376 // 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1377 // RAX.
1378 // @return - 0 on success; nonzero otherwise.
readOpcodeRegister(struct InternalInstruction * insn,uint8_t size)1379 static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
1380 LLVM_DEBUG(dbgs() << "readOpcodeRegister()");
1381
1382 if (size == 0)
1383 size = insn->registerSize;
1384
1385 switch (size) {
1386 case 1:
1387 insn->opcodeRegister = (Reg)(
1388 MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1389 if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1390 insn->opcodeRegister < MODRM_REG_AL + 0x8) {
1391 insn->opcodeRegister =
1392 (Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - 4));
1393 }
1394
1395 break;
1396 case 2:
1397 insn->opcodeRegister = (Reg)(
1398 MODRM_REG_AX + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1399 break;
1400 case 4:
1401 insn->opcodeRegister =
1402 (Reg)(MODRM_REG_EAX +
1403 ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1404 break;
1405 case 8:
1406 insn->opcodeRegister =
1407 (Reg)(MODRM_REG_RAX +
1408 ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1409 break;
1410 }
1411
1412 return 0;
1413 }
1414
1415 // Consume an immediate operand from an instruction, given the desired operand
1416 // size.
1417 //
1418 // @param insn - The instruction whose operand is to be read.
1419 // @param size - The width (in bytes) of the operand.
1420 // @return - 0 if the immediate was successfully consumed; nonzero
1421 // otherwise.
readImmediate(struct InternalInstruction * insn,uint8_t size)1422 static int readImmediate(struct InternalInstruction *insn, uint8_t size) {
1423 uint8_t imm8;
1424 uint16_t imm16;
1425 uint32_t imm32;
1426 uint64_t imm64;
1427
1428 LLVM_DEBUG(dbgs() << "readImmediate()");
1429
1430 assert(insn->numImmediatesConsumed < 2 && "Already consumed two immediates");
1431
1432 insn->immediateSize = size;
1433 insn->immediateOffset = insn->readerCursor - insn->startLocation;
1434
1435 switch (size) {
1436 case 1:
1437 if (consume(insn, imm8))
1438 return -1;
1439 insn->immediates[insn->numImmediatesConsumed] = imm8;
1440 break;
1441 case 2:
1442 if (consume(insn, imm16))
1443 return -1;
1444 insn->immediates[insn->numImmediatesConsumed] = imm16;
1445 break;
1446 case 4:
1447 if (consume(insn, imm32))
1448 return -1;
1449 insn->immediates[insn->numImmediatesConsumed] = imm32;
1450 break;
1451 case 8:
1452 if (consume(insn, imm64))
1453 return -1;
1454 insn->immediates[insn->numImmediatesConsumed] = imm64;
1455 break;
1456 default:
1457 llvm_unreachable("invalid size");
1458 }
1459
1460 insn->numImmediatesConsumed++;
1461
1462 return 0;
1463 }
1464
1465 // Consume vvvv from an instruction if it has a VEX prefix.
readVVVV(struct InternalInstruction * insn)1466 static int readVVVV(struct InternalInstruction *insn) {
1467 LLVM_DEBUG(dbgs() << "readVVVV()");
1468
1469 int vvvv;
1470 if (insn->vectorExtensionType == TYPE_EVEX)
1471 vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |
1472 vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]));
1473 else if (insn->vectorExtensionType == TYPE_VEX_3B)
1474 vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
1475 else if (insn->vectorExtensionType == TYPE_VEX_2B)
1476 vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
1477 else if (insn->vectorExtensionType == TYPE_XOP)
1478 vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
1479 else
1480 return -1;
1481
1482 if (insn->mode != MODE_64BIT)
1483 vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later.
1484
1485 insn->vvvv = static_cast<Reg>(vvvv);
1486 return 0;
1487 }
1488
1489 // Read an mask register from the opcode field of an instruction.
1490 //
1491 // @param insn - The instruction whose opcode field is to be read.
1492 // @return - 0 on success; nonzero otherwise.
readMaskRegister(struct InternalInstruction * insn)1493 static int readMaskRegister(struct InternalInstruction *insn) {
1494 LLVM_DEBUG(dbgs() << "readMaskRegister()");
1495
1496 if (insn->vectorExtensionType != TYPE_EVEX)
1497 return -1;
1498
1499 insn->writemask =
1500 static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));
1501 return 0;
1502 }
1503
1504 // Consults the specifier for an instruction and consumes all
1505 // operands for that instruction, interpreting them as it goes.
readOperands(struct InternalInstruction * insn)1506 static int readOperands(struct InternalInstruction *insn) {
1507 int hasVVVV, needVVVV;
1508 int sawRegImm = 0;
1509
1510 LLVM_DEBUG(dbgs() << "readOperands()");
1511
1512 // If non-zero vvvv specified, make sure one of the operands uses it.
1513 hasVVVV = !readVVVV(insn);
1514 needVVVV = hasVVVV && (insn->vvvv != 0);
1515
1516 for (const auto &Op : x86OperandSets[insn->spec->operands]) {
1517 switch (Op.encoding) {
1518 case ENCODING_NONE:
1519 case ENCODING_SI:
1520 case ENCODING_DI:
1521 break;
1522 CASE_ENCODING_VSIB:
1523 // VSIB can use the V2 bit so check only the other bits.
1524 if (needVVVV)
1525 needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);
1526 if (readModRM(insn))
1527 return -1;
1528
1529 // Reject if SIB wasn't used.
1530 if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1531 return -1;
1532
1533 // If sibIndex was set to SIB_INDEX_NONE, index offset is 4.
1534 if (insn->sibIndex == SIB_INDEX_NONE)
1535 insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4);
1536
1537 // If EVEX.v2 is set this is one of the 16-31 registers.
1538 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
1539 v2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
1540 insn->sibIndex = (SIBIndex)(insn->sibIndex + 16);
1541
1542 // Adjust the index register to the correct size.
1543 switch ((OperandType)Op.type) {
1544 default:
1545 debug("Unhandled VSIB index type");
1546 return -1;
1547 case TYPE_MVSIBX:
1548 insn->sibIndex =
1549 (SIBIndex)(SIB_INDEX_XMM0 + (insn->sibIndex - insn->sibIndexBase));
1550 break;
1551 case TYPE_MVSIBY:
1552 insn->sibIndex =
1553 (SIBIndex)(SIB_INDEX_YMM0 + (insn->sibIndex - insn->sibIndexBase));
1554 break;
1555 case TYPE_MVSIBZ:
1556 insn->sibIndex =
1557 (SIBIndex)(SIB_INDEX_ZMM0 + (insn->sibIndex - insn->sibIndexBase));
1558 break;
1559 }
1560
1561 // Apply the AVX512 compressed displacement scaling factor.
1562 if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1563 insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);
1564 break;
1565 case ENCODING_SIB:
1566 // Reject if SIB wasn't used.
1567 if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1568 return -1;
1569 if (readModRM(insn))
1570 return -1;
1571 if (fixupReg(insn, &Op))
1572 return -1;
1573 break;
1574 case ENCODING_REG:
1575 CASE_ENCODING_RM:
1576 if (readModRM(insn))
1577 return -1;
1578 if (fixupReg(insn, &Op))
1579 return -1;
1580 // Apply the AVX512 compressed displacement scaling factor.
1581 if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1582 insn->displacement *= 1 << (Op.encoding - ENCODING_RM);
1583 break;
1584 case ENCODING_IB:
1585 if (sawRegImm) {
1586 // Saw a register immediate so don't read again and instead split the
1587 // previous immediate. FIXME: This is a hack.
1588 insn->immediates[insn->numImmediatesConsumed] =
1589 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
1590 ++insn->numImmediatesConsumed;
1591 break;
1592 }
1593 if (readImmediate(insn, 1))
1594 return -1;
1595 if (Op.type == TYPE_XMM || Op.type == TYPE_YMM)
1596 sawRegImm = 1;
1597 break;
1598 case ENCODING_IW:
1599 if (readImmediate(insn, 2))
1600 return -1;
1601 break;
1602 case ENCODING_ID:
1603 if (readImmediate(insn, 4))
1604 return -1;
1605 break;
1606 case ENCODING_IO:
1607 if (readImmediate(insn, 8))
1608 return -1;
1609 break;
1610 case ENCODING_Iv:
1611 if (readImmediate(insn, insn->immediateSize))
1612 return -1;
1613 break;
1614 case ENCODING_Ia:
1615 if (readImmediate(insn, insn->addressSize))
1616 return -1;
1617 break;
1618 case ENCODING_IRC:
1619 insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) |
1620 lFromEVEX4of4(insn->vectorExtensionPrefix[3]);
1621 break;
1622 case ENCODING_RB:
1623 if (readOpcodeRegister(insn, 1))
1624 return -1;
1625 break;
1626 case ENCODING_RW:
1627 if (readOpcodeRegister(insn, 2))
1628 return -1;
1629 break;
1630 case ENCODING_RD:
1631 if (readOpcodeRegister(insn, 4))
1632 return -1;
1633 break;
1634 case ENCODING_RO:
1635 if (readOpcodeRegister(insn, 8))
1636 return -1;
1637 break;
1638 case ENCODING_Rv:
1639 if (readOpcodeRegister(insn, 0))
1640 return -1;
1641 break;
1642 case ENCODING_CC:
1643 insn->immediates[1] = insn->opcode & 0xf;
1644 break;
1645 case ENCODING_FP:
1646 break;
1647 case ENCODING_VVVV:
1648 needVVVV = 0; // Mark that we have found a VVVV operand.
1649 if (!hasVVVV)
1650 return -1;
1651 if (insn->mode != MODE_64BIT)
1652 insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7);
1653 if (fixupReg(insn, &Op))
1654 return -1;
1655 break;
1656 case ENCODING_WRITEMASK:
1657 if (readMaskRegister(insn))
1658 return -1;
1659 break;
1660 case ENCODING_DUP:
1661 break;
1662 default:
1663 LLVM_DEBUG(dbgs() << "Encountered an operand with an unknown encoding.");
1664 return -1;
1665 }
1666 }
1667
1668 // If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail
1669 if (needVVVV)
1670 return -1;
1671
1672 return 0;
1673 }
1674
1675 namespace llvm {
1676
1677 // Fill-ins to make the compiler happy. These constants are never actually
1678 // assigned; they are just filler to make an automatically-generated switch
1679 // statement work.
1680 namespace X86 {
1681 enum {
1682 BX_SI = 500,
1683 BX_DI = 501,
1684 BP_SI = 502,
1685 BP_DI = 503,
1686 sib = 504,
1687 sib64 = 505
1688 };
1689 } // namespace X86
1690
1691 } // namespace llvm
1692
1693 static bool translateInstruction(MCInst &target,
1694 InternalInstruction &source,
1695 const MCDisassembler *Dis);
1696
1697 namespace {
1698
1699 /// Generic disassembler for all X86 platforms. All each platform class should
1700 /// have to do is subclass the constructor, and provide a different
1701 /// disassemblerMode value.
1702 class X86GenericDisassembler : public MCDisassembler {
1703 std::unique_ptr<const MCInstrInfo> MII;
1704 public:
1705 X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
1706 std::unique_ptr<const MCInstrInfo> MII);
1707 public:
1708 DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
1709 ArrayRef<uint8_t> Bytes, uint64_t Address,
1710 raw_ostream &cStream) const override;
1711
1712 private:
1713 DisassemblerMode fMode;
1714 };
1715
1716 } // namespace
1717
X86GenericDisassembler(const MCSubtargetInfo & STI,MCContext & Ctx,std::unique_ptr<const MCInstrInfo> MII)1718 X86GenericDisassembler::X86GenericDisassembler(
1719 const MCSubtargetInfo &STI,
1720 MCContext &Ctx,
1721 std::unique_ptr<const MCInstrInfo> MII)
1722 : MCDisassembler(STI, Ctx), MII(std::move(MII)) {
1723 const FeatureBitset &FB = STI.getFeatureBits();
1724 if (FB[X86::Is16Bit]) {
1725 fMode = MODE_16BIT;
1726 return;
1727 } else if (FB[X86::Is32Bit]) {
1728 fMode = MODE_32BIT;
1729 return;
1730 } else if (FB[X86::Is64Bit]) {
1731 fMode = MODE_64BIT;
1732 return;
1733 }
1734
1735 llvm_unreachable("Invalid CPU mode");
1736 }
1737
getInstruction(MCInst & Instr,uint64_t & Size,ArrayRef<uint8_t> Bytes,uint64_t Address,raw_ostream & CStream) const1738 MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction(
1739 MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
1740 raw_ostream &CStream) const {
1741 CommentStream = &CStream;
1742
1743 InternalInstruction Insn;
1744 memset(&Insn, 0, sizeof(InternalInstruction));
1745 Insn.bytes = Bytes;
1746 Insn.startLocation = Address;
1747 Insn.readerCursor = Address;
1748 Insn.mode = fMode;
1749
1750 if (Bytes.empty() || readPrefixes(&Insn) || readOpcode(&Insn) ||
1751 getInstructionID(&Insn, MII.get()) || Insn.instructionID == 0 ||
1752 readOperands(&Insn)) {
1753 Size = Insn.readerCursor - Address;
1754 return Fail;
1755 }
1756
1757 Insn.operands = x86OperandSets[Insn.spec->operands];
1758 Insn.length = Insn.readerCursor - Insn.startLocation;
1759 Size = Insn.length;
1760 if (Size > 15)
1761 LLVM_DEBUG(dbgs() << "Instruction exceeds 15-byte limit");
1762
1763 bool Ret = translateInstruction(Instr, Insn, this);
1764 if (!Ret) {
1765 unsigned Flags = X86::IP_NO_PREFIX;
1766 if (Insn.hasAdSize)
1767 Flags |= X86::IP_HAS_AD_SIZE;
1768 if (!Insn.mandatoryPrefix) {
1769 if (Insn.hasOpSize)
1770 Flags |= X86::IP_HAS_OP_SIZE;
1771 if (Insn.repeatPrefix == 0xf2)
1772 Flags |= X86::IP_HAS_REPEAT_NE;
1773 else if (Insn.repeatPrefix == 0xf3 &&
1774 // It should not be 'pause' f3 90
1775 Insn.opcode != 0x90)
1776 Flags |= X86::IP_HAS_REPEAT;
1777 if (Insn.hasLockPrefix)
1778 Flags |= X86::IP_HAS_LOCK;
1779 }
1780 Instr.setFlags(Flags);
1781 }
1782 return (!Ret) ? Success : Fail;
1783 }
1784
1785 //
1786 // Private code that translates from struct InternalInstructions to MCInsts.
1787 //
1788
1789 /// translateRegister - Translates an internal register to the appropriate LLVM
1790 /// register, and appends it as an operand to an MCInst.
1791 ///
1792 /// @param mcInst - The MCInst to append to.
1793 /// @param reg - The Reg to append.
translateRegister(MCInst & mcInst,Reg reg)1794 static void translateRegister(MCInst &mcInst, Reg reg) {
1795 #define ENTRY(x) X86::x,
1796 static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS};
1797 #undef ENTRY
1798
1799 MCPhysReg llvmRegnum = llvmRegnums[reg];
1800 mcInst.addOperand(MCOperand::createReg(llvmRegnum));
1801 }
1802
1803 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
1804 0, // SEG_OVERRIDE_NONE
1805 X86::CS,
1806 X86::SS,
1807 X86::DS,
1808 X86::ES,
1809 X86::FS,
1810 X86::GS
1811 };
1812
1813 /// translateSrcIndex - Appends a source index operand to an MCInst.
1814 ///
1815 /// @param mcInst - The MCInst to append to.
1816 /// @param insn - The internal instruction.
translateSrcIndex(MCInst & mcInst,InternalInstruction & insn)1817 static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) {
1818 unsigned baseRegNo;
1819
1820 if (insn.mode == MODE_64BIT)
1821 baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI;
1822 else if (insn.mode == MODE_32BIT)
1823 baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI;
1824 else {
1825 assert(insn.mode == MODE_16BIT);
1826 baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI;
1827 }
1828 MCOperand baseReg = MCOperand::createReg(baseRegNo);
1829 mcInst.addOperand(baseReg);
1830
1831 MCOperand segmentReg;
1832 segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
1833 mcInst.addOperand(segmentReg);
1834 return false;
1835 }
1836
1837 /// translateDstIndex - Appends a destination index operand to an MCInst.
1838 ///
1839 /// @param mcInst - The MCInst to append to.
1840 /// @param insn - The internal instruction.
1841
translateDstIndex(MCInst & mcInst,InternalInstruction & insn)1842 static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) {
1843 unsigned baseRegNo;
1844
1845 if (insn.mode == MODE_64BIT)
1846 baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI;
1847 else if (insn.mode == MODE_32BIT)
1848 baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI;
1849 else {
1850 assert(insn.mode == MODE_16BIT);
1851 baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI;
1852 }
1853 MCOperand baseReg = MCOperand::createReg(baseRegNo);
1854 mcInst.addOperand(baseReg);
1855 return false;
1856 }
1857
1858 /// translateImmediate - Appends an immediate operand to an MCInst.
1859 ///
1860 /// @param mcInst - The MCInst to append to.
1861 /// @param immediate - The immediate value to append.
1862 /// @param operand - The operand, as stored in the descriptor table.
1863 /// @param insn - The internal instruction.
translateImmediate(MCInst & mcInst,uint64_t immediate,const OperandSpecifier & operand,InternalInstruction & insn,const MCDisassembler * Dis)1864 static void translateImmediate(MCInst &mcInst, uint64_t immediate,
1865 const OperandSpecifier &operand,
1866 InternalInstruction &insn,
1867 const MCDisassembler *Dis) {
1868 // Sign-extend the immediate if necessary.
1869
1870 OperandType type = (OperandType)operand.type;
1871
1872 bool isBranch = false;
1873 uint64_t pcrel = 0;
1874 if (type == TYPE_REL) {
1875 isBranch = true;
1876 pcrel = insn.startLocation + insn.length;
1877 switch (operand.encoding) {
1878 default:
1879 break;
1880 case ENCODING_Iv:
1881 switch (insn.displacementSize) {
1882 default:
1883 break;
1884 case 1:
1885 if(immediate & 0x80)
1886 immediate |= ~(0xffull);
1887 break;
1888 case 2:
1889 if(immediate & 0x8000)
1890 immediate |= ~(0xffffull);
1891 break;
1892 case 4:
1893 if(immediate & 0x80000000)
1894 immediate |= ~(0xffffffffull);
1895 break;
1896 case 8:
1897 break;
1898 }
1899 break;
1900 case ENCODING_IB:
1901 if(immediate & 0x80)
1902 immediate |= ~(0xffull);
1903 break;
1904 case ENCODING_IW:
1905 if(immediate & 0x8000)
1906 immediate |= ~(0xffffull);
1907 break;
1908 case ENCODING_ID:
1909 if(immediate & 0x80000000)
1910 immediate |= ~(0xffffffffull);
1911 break;
1912 }
1913 }
1914 // By default sign-extend all X86 immediates based on their encoding.
1915 else if (type == TYPE_IMM) {
1916 switch (operand.encoding) {
1917 default:
1918 break;
1919 case ENCODING_IB:
1920 if(immediate & 0x80)
1921 immediate |= ~(0xffull);
1922 break;
1923 case ENCODING_IW:
1924 if(immediate & 0x8000)
1925 immediate |= ~(0xffffull);
1926 break;
1927 case ENCODING_ID:
1928 if(immediate & 0x80000000)
1929 immediate |= ~(0xffffffffull);
1930 break;
1931 case ENCODING_IO:
1932 break;
1933 }
1934 }
1935
1936 switch (type) {
1937 case TYPE_XMM:
1938 mcInst.addOperand(MCOperand::createReg(X86::XMM0 + (immediate >> 4)));
1939 return;
1940 case TYPE_YMM:
1941 mcInst.addOperand(MCOperand::createReg(X86::YMM0 + (immediate >> 4)));
1942 return;
1943 case TYPE_ZMM:
1944 mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4)));
1945 return;
1946 default:
1947 // operand is 64 bits wide. Do nothing.
1948 break;
1949 }
1950
1951 if (!Dis->tryAddingSymbolicOperand(
1952 mcInst, immediate + pcrel, insn.startLocation, isBranch,
1953 insn.immediateOffset, insn.immediateSize, insn.length))
1954 mcInst.addOperand(MCOperand::createImm(immediate));
1955
1956 if (type == TYPE_MOFFS) {
1957 MCOperand segmentReg;
1958 segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
1959 mcInst.addOperand(segmentReg);
1960 }
1961 }
1962
1963 /// translateRMRegister - Translates a register stored in the R/M field of the
1964 /// ModR/M byte to its LLVM equivalent and appends it to an MCInst.
1965 /// @param mcInst - The MCInst to append to.
1966 /// @param insn - The internal instruction to extract the R/M field
1967 /// from.
1968 /// @return - 0 on success; -1 otherwise
translateRMRegister(MCInst & mcInst,InternalInstruction & insn)1969 static bool translateRMRegister(MCInst &mcInst,
1970 InternalInstruction &insn) {
1971 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
1972 debug("A R/M register operand may not have a SIB byte");
1973 return true;
1974 }
1975
1976 switch (insn.eaBase) {
1977 default:
1978 debug("Unexpected EA base register");
1979 return true;
1980 case EA_BASE_NONE:
1981 debug("EA_BASE_NONE for ModR/M base");
1982 return true;
1983 #define ENTRY(x) case EA_BASE_##x:
1984 ALL_EA_BASES
1985 #undef ENTRY
1986 debug("A R/M register operand may not have a base; "
1987 "the operand must be a register.");
1988 return true;
1989 #define ENTRY(x) \
1990 case EA_REG_##x: \
1991 mcInst.addOperand(MCOperand::createReg(X86::x)); break;
1992 ALL_REGS
1993 #undef ENTRY
1994 }
1995
1996 return false;
1997 }
1998
1999 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M
2000 /// fields of an internal instruction (and possibly its SIB byte) to a memory
2001 /// operand in LLVM's format, and appends it to an MCInst.
2002 ///
2003 /// @param mcInst - The MCInst to append to.
2004 /// @param insn - The instruction to extract Mod, R/M, and SIB fields
2005 /// from.
2006 /// @param ForceSIB - The instruction must use SIB.
2007 /// @return - 0 on success; nonzero otherwise
translateRMMemory(MCInst & mcInst,InternalInstruction & insn,const MCDisassembler * Dis,bool ForceSIB=false)2008 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
2009 const MCDisassembler *Dis,
2010 bool ForceSIB = false) {
2011 // Addresses in an MCInst are represented as five operands:
2012 // 1. basereg (register) The R/M base, or (if there is a SIB) the
2013 // SIB base
2014 // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
2015 // scale amount
2016 // 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
2017 // the index (which is multiplied by the
2018 // scale amount)
2019 // 4. displacement (immediate) 0, or the displacement if there is one
2020 // 5. segmentreg (register) x86_registerNONE for now, but could be set
2021 // if we have segment overrides
2022
2023 MCOperand baseReg;
2024 MCOperand scaleAmount;
2025 MCOperand indexReg;
2026 MCOperand displacement;
2027 MCOperand segmentReg;
2028 uint64_t pcrel = 0;
2029
2030 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
2031 if (insn.sibBase != SIB_BASE_NONE) {
2032 switch (insn.sibBase) {
2033 default:
2034 debug("Unexpected sibBase");
2035 return true;
2036 #define ENTRY(x) \
2037 case SIB_BASE_##x: \
2038 baseReg = MCOperand::createReg(X86::x); break;
2039 ALL_SIB_BASES
2040 #undef ENTRY
2041 }
2042 } else {
2043 baseReg = MCOperand::createReg(X86::NoRegister);
2044 }
2045
2046 if (insn.sibIndex != SIB_INDEX_NONE) {
2047 switch (insn.sibIndex) {
2048 default:
2049 debug("Unexpected sibIndex");
2050 return true;
2051 #define ENTRY(x) \
2052 case SIB_INDEX_##x: \
2053 indexReg = MCOperand::createReg(X86::x); break;
2054 EA_BASES_32BIT
2055 EA_BASES_64BIT
2056 REGS_XMM
2057 REGS_YMM
2058 REGS_ZMM
2059 #undef ENTRY
2060 }
2061 } else {
2062 // Use EIZ/RIZ for a few ambiguous cases where the SIB byte is present,
2063 // but no index is used and modrm alone should have been enough.
2064 // -No base register in 32-bit mode. In 64-bit mode this is used to
2065 // avoid rip-relative addressing.
2066 // -Any base register used other than ESP/RSP/R12D/R12. Using these as a
2067 // base always requires a SIB byte.
2068 // -A scale other than 1 is used.
2069 if (!ForceSIB &&
2070 (insn.sibScale != 1 ||
2071 (insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) ||
2072 (insn.sibBase != SIB_BASE_NONE &&
2073 insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP &&
2074 insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) {
2075 indexReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIZ :
2076 X86::RIZ);
2077 } else
2078 indexReg = MCOperand::createReg(X86::NoRegister);
2079 }
2080
2081 scaleAmount = MCOperand::createImm(insn.sibScale);
2082 } else {
2083 switch (insn.eaBase) {
2084 case EA_BASE_NONE:
2085 if (insn.eaDisplacement == EA_DISP_NONE) {
2086 debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
2087 return true;
2088 }
2089 if (insn.mode == MODE_64BIT){
2090 pcrel = insn.startLocation + insn.length;
2091 Dis->tryAddingPcLoadReferenceComment(insn.displacement + pcrel,
2092 insn.startLocation +
2093 insn.displacementOffset);
2094 // Section 2.2.1.6
2095 baseReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIP :
2096 X86::RIP);
2097 }
2098 else
2099 baseReg = MCOperand::createReg(X86::NoRegister);
2100
2101 indexReg = MCOperand::createReg(X86::NoRegister);
2102 break;
2103 case EA_BASE_BX_SI:
2104 baseReg = MCOperand::createReg(X86::BX);
2105 indexReg = MCOperand::createReg(X86::SI);
2106 break;
2107 case EA_BASE_BX_DI:
2108 baseReg = MCOperand::createReg(X86::BX);
2109 indexReg = MCOperand::createReg(X86::DI);
2110 break;
2111 case EA_BASE_BP_SI:
2112 baseReg = MCOperand::createReg(X86::BP);
2113 indexReg = MCOperand::createReg(X86::SI);
2114 break;
2115 case EA_BASE_BP_DI:
2116 baseReg = MCOperand::createReg(X86::BP);
2117 indexReg = MCOperand::createReg(X86::DI);
2118 break;
2119 default:
2120 indexReg = MCOperand::createReg(X86::NoRegister);
2121 switch (insn.eaBase) {
2122 default:
2123 debug("Unexpected eaBase");
2124 return true;
2125 // Here, we will use the fill-ins defined above. However,
2126 // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
2127 // sib and sib64 were handled in the top-level if, so they're only
2128 // placeholders to keep the compiler happy.
2129 #define ENTRY(x) \
2130 case EA_BASE_##x: \
2131 baseReg = MCOperand::createReg(X86::x); break;
2132 ALL_EA_BASES
2133 #undef ENTRY
2134 #define ENTRY(x) case EA_REG_##x:
2135 ALL_REGS
2136 #undef ENTRY
2137 debug("A R/M memory operand may not be a register; "
2138 "the base field must be a base.");
2139 return true;
2140 }
2141 }
2142
2143 scaleAmount = MCOperand::createImm(1);
2144 }
2145
2146 displacement = MCOperand::createImm(insn.displacement);
2147
2148 segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
2149
2150 mcInst.addOperand(baseReg);
2151 mcInst.addOperand(scaleAmount);
2152 mcInst.addOperand(indexReg);
2153
2154 const uint8_t dispSize =
2155 (insn.eaDisplacement == EA_DISP_NONE) ? 0 : insn.displacementSize;
2156
2157 if (!Dis->tryAddingSymbolicOperand(
2158 mcInst, insn.displacement + pcrel, insn.startLocation, false,
2159 insn.displacementOffset, dispSize, insn.length))
2160 mcInst.addOperand(displacement);
2161 mcInst.addOperand(segmentReg);
2162 return false;
2163 }
2164
2165 /// translateRM - Translates an operand stored in the R/M (and possibly SIB)
2166 /// byte of an instruction to LLVM form, and appends it to an MCInst.
2167 ///
2168 /// @param mcInst - The MCInst to append to.
2169 /// @param operand - The operand, as stored in the descriptor table.
2170 /// @param insn - The instruction to extract Mod, R/M, and SIB fields
2171 /// from.
2172 /// @return - 0 on success; nonzero otherwise
translateRM(MCInst & mcInst,const OperandSpecifier & operand,InternalInstruction & insn,const MCDisassembler * Dis)2173 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
2174 InternalInstruction &insn, const MCDisassembler *Dis) {
2175 switch (operand.type) {
2176 default:
2177 debug("Unexpected type for a R/M operand");
2178 return true;
2179 case TYPE_R8:
2180 case TYPE_R16:
2181 case TYPE_R32:
2182 case TYPE_R64:
2183 case TYPE_Rv:
2184 case TYPE_MM64:
2185 case TYPE_XMM:
2186 case TYPE_YMM:
2187 case TYPE_ZMM:
2188 case TYPE_TMM:
2189 case TYPE_VK_PAIR:
2190 case TYPE_VK:
2191 case TYPE_DEBUGREG:
2192 case TYPE_CONTROLREG:
2193 case TYPE_BNDR:
2194 return translateRMRegister(mcInst, insn);
2195 case TYPE_M:
2196 case TYPE_MVSIBX:
2197 case TYPE_MVSIBY:
2198 case TYPE_MVSIBZ:
2199 return translateRMMemory(mcInst, insn, Dis);
2200 case TYPE_MSIB:
2201 return translateRMMemory(mcInst, insn, Dis, true);
2202 }
2203 }
2204
2205 /// translateFPRegister - Translates a stack position on the FPU stack to its
2206 /// LLVM form, and appends it to an MCInst.
2207 ///
2208 /// @param mcInst - The MCInst to append to.
2209 /// @param stackPos - The stack position to translate.
translateFPRegister(MCInst & mcInst,uint8_t stackPos)2210 static void translateFPRegister(MCInst &mcInst,
2211 uint8_t stackPos) {
2212 mcInst.addOperand(MCOperand::createReg(X86::ST0 + stackPos));
2213 }
2214
2215 /// translateMaskRegister - Translates a 3-bit mask register number to
2216 /// LLVM form, and appends it to an MCInst.
2217 ///
2218 /// @param mcInst - The MCInst to append to.
2219 /// @param maskRegNum - Number of mask register from 0 to 7.
2220 /// @return - false on success; true otherwise.
translateMaskRegister(MCInst & mcInst,uint8_t maskRegNum)2221 static bool translateMaskRegister(MCInst &mcInst,
2222 uint8_t maskRegNum) {
2223 if (maskRegNum >= 8) {
2224 debug("Invalid mask register number");
2225 return true;
2226 }
2227
2228 mcInst.addOperand(MCOperand::createReg(X86::K0 + maskRegNum));
2229 return false;
2230 }
2231
2232 /// translateOperand - Translates an operand stored in an internal instruction
2233 /// to LLVM's format and appends it to an MCInst.
2234 ///
2235 /// @param mcInst - The MCInst to append to.
2236 /// @param operand - The operand, as stored in the descriptor table.
2237 /// @param insn - The internal instruction.
2238 /// @return - false on success; true otherwise.
translateOperand(MCInst & mcInst,const OperandSpecifier & operand,InternalInstruction & insn,const MCDisassembler * Dis)2239 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
2240 InternalInstruction &insn,
2241 const MCDisassembler *Dis) {
2242 switch (operand.encoding) {
2243 default:
2244 debug("Unhandled operand encoding during translation");
2245 return true;
2246 case ENCODING_REG:
2247 translateRegister(mcInst, insn.reg);
2248 return false;
2249 case ENCODING_WRITEMASK:
2250 return translateMaskRegister(mcInst, insn.writemask);
2251 case ENCODING_SIB:
2252 CASE_ENCODING_RM:
2253 CASE_ENCODING_VSIB:
2254 return translateRM(mcInst, operand, insn, Dis);
2255 case ENCODING_IB:
2256 case ENCODING_IW:
2257 case ENCODING_ID:
2258 case ENCODING_IO:
2259 case ENCODING_Iv:
2260 case ENCODING_Ia:
2261 translateImmediate(mcInst,
2262 insn.immediates[insn.numImmediatesTranslated++],
2263 operand,
2264 insn,
2265 Dis);
2266 return false;
2267 case ENCODING_IRC:
2268 mcInst.addOperand(MCOperand::createImm(insn.RC));
2269 return false;
2270 case ENCODING_SI:
2271 return translateSrcIndex(mcInst, insn);
2272 case ENCODING_DI:
2273 return translateDstIndex(mcInst, insn);
2274 case ENCODING_RB:
2275 case ENCODING_RW:
2276 case ENCODING_RD:
2277 case ENCODING_RO:
2278 case ENCODING_Rv:
2279 translateRegister(mcInst, insn.opcodeRegister);
2280 return false;
2281 case ENCODING_CC:
2282 mcInst.addOperand(MCOperand::createImm(insn.immediates[1]));
2283 return false;
2284 case ENCODING_FP:
2285 translateFPRegister(mcInst, insn.modRM & 7);
2286 return false;
2287 case ENCODING_VVVV:
2288 translateRegister(mcInst, insn.vvvv);
2289 return false;
2290 case ENCODING_DUP:
2291 return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0],
2292 insn, Dis);
2293 }
2294 }
2295
2296 /// translateInstruction - Translates an internal instruction and all its
2297 /// operands to an MCInst.
2298 ///
2299 /// @param mcInst - The MCInst to populate with the instruction's data.
2300 /// @param insn - The internal instruction.
2301 /// @return - false on success; true otherwise.
translateInstruction(MCInst & mcInst,InternalInstruction & insn,const MCDisassembler * Dis)2302 static bool translateInstruction(MCInst &mcInst,
2303 InternalInstruction &insn,
2304 const MCDisassembler *Dis) {
2305 if (!insn.spec) {
2306 debug("Instruction has no specification");
2307 return true;
2308 }
2309
2310 mcInst.clear();
2311 mcInst.setOpcode(insn.instructionID);
2312 // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3
2313 // prefix bytes should be disassembled as xrelease and xacquire then set the
2314 // opcode to those instead of the rep and repne opcodes.
2315 if (insn.xAcquireRelease) {
2316 if(mcInst.getOpcode() == X86::REP_PREFIX)
2317 mcInst.setOpcode(X86::XRELEASE_PREFIX);
2318 else if(mcInst.getOpcode() == X86::REPNE_PREFIX)
2319 mcInst.setOpcode(X86::XACQUIRE_PREFIX);
2320 }
2321
2322 insn.numImmediatesTranslated = 0;
2323
2324 for (const auto &Op : insn.operands) {
2325 if (Op.encoding != ENCODING_NONE) {
2326 if (translateOperand(mcInst, Op, insn, Dis)) {
2327 return true;
2328 }
2329 }
2330 }
2331
2332 return false;
2333 }
2334
createX86Disassembler(const Target & T,const MCSubtargetInfo & STI,MCContext & Ctx)2335 static MCDisassembler *createX86Disassembler(const Target &T,
2336 const MCSubtargetInfo &STI,
2337 MCContext &Ctx) {
2338 std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo());
2339 return new X86GenericDisassembler(STI, Ctx, std::move(MII));
2340 }
2341
LLVMInitializeX86Disassembler()2342 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Disassembler() {
2343 // Register the disassembler.
2344 TargetRegistry::RegisterMCDisassembler(getTheX86_32Target(),
2345 createX86Disassembler);
2346 TargetRegistry::RegisterMCDisassembler(getTheX86_64Target(),
2347 createX86Disassembler);
2348 }
2349