139d628a0SDimitry Andric //===-- X86DisassemblerDecoder.cpp - Disassembler decoder -----------------===//
291bc56edSDimitry Andric //
391bc56edSDimitry Andric // The LLVM Compiler Infrastructure
491bc56edSDimitry Andric //
591bc56edSDimitry Andric // This file is distributed under the University of Illinois Open Source
691bc56edSDimitry Andric // License. See LICENSE.TXT for details.
791bc56edSDimitry Andric //
891bc56edSDimitry Andric //===----------------------------------------------------------------------===//
991bc56edSDimitry Andric //
1091bc56edSDimitry Andric // This file is part of the X86 Disassembler.
1191bc56edSDimitry Andric // It contains the implementation of the instruction decoder.
1291bc56edSDimitry Andric // Documentation for the disassembler can be found in X86Disassembler.h.
1391bc56edSDimitry Andric //
1491bc56edSDimitry Andric //===----------------------------------------------------------------------===//
1591bc56edSDimitry Andric
1639d628a0SDimitry Andric #include <cstdarg> /* for va_*() */
1739d628a0SDimitry Andric #include <cstdio> /* for vsnprintf() */
1839d628a0SDimitry Andric #include <cstdlib> /* for exit() */
1939d628a0SDimitry Andric #include <cstring> /* for memset() */
2091bc56edSDimitry Andric
2191bc56edSDimitry Andric #include "X86DisassemblerDecoder.h"
2291bc56edSDimitry Andric
2391bc56edSDimitry Andric using namespace llvm::X86Disassembler;
2491bc56edSDimitry Andric
2591bc56edSDimitry Andric /// Specifies whether a ModR/M byte is needed and (if so) which
2691bc56edSDimitry Andric /// instruction each possible value of the ModR/M byte corresponds to. Once
2791bc56edSDimitry Andric /// this information is known, we have narrowed down to a single instruction.
2891bc56edSDimitry Andric struct ModRMDecision {
2991bc56edSDimitry Andric uint8_t modrm_type;
3091bc56edSDimitry Andric uint16_t instructionIDs;
3191bc56edSDimitry Andric };
3291bc56edSDimitry Andric
3391bc56edSDimitry Andric /// Specifies which set of ModR/M->instruction tables to look at
3491bc56edSDimitry Andric /// given a particular opcode.
3591bc56edSDimitry Andric struct OpcodeDecision {
3691bc56edSDimitry Andric ModRMDecision modRMDecisions[256];
3791bc56edSDimitry Andric };
3891bc56edSDimitry Andric
3991bc56edSDimitry Andric /// Specifies which opcode->instruction tables to look at given
4091bc56edSDimitry Andric /// a particular context (set of attributes). Since there are many possible
4191bc56edSDimitry Andric /// contexts, the decoder first uses CONTEXTS_SYM to determine which context
4291bc56edSDimitry Andric /// applies given a specific set of attributes. Hence there are only IC_max
4391bc56edSDimitry Andric /// entries in this table, rather than 2^(ATTR_max).
4491bc56edSDimitry Andric struct ContextDecision {
4591bc56edSDimitry Andric OpcodeDecision opcodeDecisions[IC_max];
4691bc56edSDimitry Andric };
4791bc56edSDimitry Andric
4891bc56edSDimitry Andric #include "X86GenDisassemblerTables.inc"
4991bc56edSDimitry Andric
5091bc56edSDimitry Andric #ifndef NDEBUG
5191bc56edSDimitry Andric #define debug(s) do { Debug(__FILE__, __LINE__, s); } while (0)
5291bc56edSDimitry Andric #else
5391bc56edSDimitry Andric #define debug(s) do { } while (0)
5491bc56edSDimitry Andric #endif
5591bc56edSDimitry Andric
5691bc56edSDimitry Andric /*
5791bc56edSDimitry Andric * contextForAttrs - Client for the instruction context table. Takes a set of
5891bc56edSDimitry Andric * attributes and returns the appropriate decode context.
5991bc56edSDimitry Andric *
6091bc56edSDimitry Andric * @param attrMask - Attributes, from the enumeration attributeBits.
6191bc56edSDimitry Andric * @return - The InstructionContext to use when looking up an
6291bc56edSDimitry Andric * an instruction with these attributes.
6391bc56edSDimitry Andric */
contextForAttrs(uint16_t attrMask)6491bc56edSDimitry Andric static InstructionContext contextForAttrs(uint16_t attrMask) {
6591bc56edSDimitry Andric return static_cast<InstructionContext>(CONTEXTS_SYM[attrMask]);
6691bc56edSDimitry Andric }
6791bc56edSDimitry Andric
6891bc56edSDimitry Andric /*
6991bc56edSDimitry Andric * modRMRequired - Reads the appropriate instruction table to determine whether
7091bc56edSDimitry Andric * the ModR/M byte is required to decode a particular instruction.
7191bc56edSDimitry Andric *
7291bc56edSDimitry Andric * @param type - The opcode type (i.e., how many bytes it has).
7391bc56edSDimitry Andric * @param insnContext - The context for the instruction, as returned by
7491bc56edSDimitry Andric * contextForAttrs.
7591bc56edSDimitry Andric * @param opcode - The last byte of the instruction's opcode, not counting
7691bc56edSDimitry Andric * ModR/M extensions and escapes.
7791bc56edSDimitry Andric * @return - true if the ModR/M byte is required, false otherwise.
7891bc56edSDimitry Andric */
modRMRequired(OpcodeType type,InstructionContext insnContext,uint16_t opcode)7991bc56edSDimitry Andric static int modRMRequired(OpcodeType type,
8091bc56edSDimitry Andric InstructionContext insnContext,
8191bc56edSDimitry Andric uint16_t opcode) {
8291bc56edSDimitry Andric const struct ContextDecision* decision = nullptr;
8391bc56edSDimitry Andric
8491bc56edSDimitry Andric switch (type) {
8591bc56edSDimitry Andric case ONEBYTE:
8691bc56edSDimitry Andric decision = &ONEBYTE_SYM;
8791bc56edSDimitry Andric break;
8891bc56edSDimitry Andric case TWOBYTE:
8991bc56edSDimitry Andric decision = &TWOBYTE_SYM;
9091bc56edSDimitry Andric break;
9191bc56edSDimitry Andric case THREEBYTE_38:
9291bc56edSDimitry Andric decision = &THREEBYTE38_SYM;
9391bc56edSDimitry Andric break;
9491bc56edSDimitry Andric case THREEBYTE_3A:
9591bc56edSDimitry Andric decision = &THREEBYTE3A_SYM;
9691bc56edSDimitry Andric break;
9791bc56edSDimitry Andric case XOP8_MAP:
9891bc56edSDimitry Andric decision = &XOP8_MAP_SYM;
9991bc56edSDimitry Andric break;
10091bc56edSDimitry Andric case XOP9_MAP:
10191bc56edSDimitry Andric decision = &XOP9_MAP_SYM;
10291bc56edSDimitry Andric break;
10391bc56edSDimitry Andric case XOPA_MAP:
10491bc56edSDimitry Andric decision = &XOPA_MAP_SYM;
10591bc56edSDimitry Andric break;
1064ba319b5SDimitry Andric case THREEDNOW_MAP:
1074ba319b5SDimitry Andric decision = &THREEDNOW_MAP_SYM;
1084ba319b5SDimitry Andric break;
10991bc56edSDimitry Andric }
11091bc56edSDimitry Andric
11191bc56edSDimitry Andric return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
11291bc56edSDimitry Andric modrm_type != MODRM_ONEENTRY;
11391bc56edSDimitry Andric }
11491bc56edSDimitry Andric
11591bc56edSDimitry Andric /*
11691bc56edSDimitry Andric * decode - Reads the appropriate instruction table to obtain the unique ID of
11791bc56edSDimitry Andric * an instruction.
11891bc56edSDimitry Andric *
11991bc56edSDimitry Andric * @param type - See modRMRequired().
12091bc56edSDimitry Andric * @param insnContext - See modRMRequired().
12191bc56edSDimitry Andric * @param opcode - See modRMRequired().
12291bc56edSDimitry Andric * @param modRM - The ModR/M byte if required, or any value if not.
12391bc56edSDimitry Andric * @return - The UID of the instruction, or 0 on failure.
12491bc56edSDimitry Andric */
decode(OpcodeType type,InstructionContext insnContext,uint8_t opcode,uint8_t modRM)12591bc56edSDimitry Andric static InstrUID decode(OpcodeType type,
12691bc56edSDimitry Andric InstructionContext insnContext,
12791bc56edSDimitry Andric uint8_t opcode,
12891bc56edSDimitry Andric uint8_t modRM) {
12991bc56edSDimitry Andric const struct ModRMDecision* dec = nullptr;
13091bc56edSDimitry Andric
13191bc56edSDimitry Andric switch (type) {
13291bc56edSDimitry Andric case ONEBYTE:
13391bc56edSDimitry Andric dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
13491bc56edSDimitry Andric break;
13591bc56edSDimitry Andric case TWOBYTE:
13691bc56edSDimitry Andric dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
13791bc56edSDimitry Andric break;
13891bc56edSDimitry Andric case THREEBYTE_38:
13991bc56edSDimitry Andric dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
14091bc56edSDimitry Andric break;
14191bc56edSDimitry Andric case THREEBYTE_3A:
14291bc56edSDimitry Andric dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
14391bc56edSDimitry Andric break;
14491bc56edSDimitry Andric case XOP8_MAP:
14591bc56edSDimitry Andric dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
14691bc56edSDimitry Andric break;
14791bc56edSDimitry Andric case XOP9_MAP:
14891bc56edSDimitry Andric dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
14991bc56edSDimitry Andric break;
15091bc56edSDimitry Andric case XOPA_MAP:
15191bc56edSDimitry Andric dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
15291bc56edSDimitry Andric break;
1534ba319b5SDimitry Andric case THREEDNOW_MAP:
1544ba319b5SDimitry Andric dec = &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
1554ba319b5SDimitry Andric break;
15691bc56edSDimitry Andric }
15791bc56edSDimitry Andric
15891bc56edSDimitry Andric switch (dec->modrm_type) {
15991bc56edSDimitry Andric default:
16091bc56edSDimitry Andric debug("Corrupt table! Unknown modrm_type");
16191bc56edSDimitry Andric return 0;
16291bc56edSDimitry Andric case MODRM_ONEENTRY:
16391bc56edSDimitry Andric return modRMTable[dec->instructionIDs];
16491bc56edSDimitry Andric case MODRM_SPLITRM:
16591bc56edSDimitry Andric if (modFromModRM(modRM) == 0x3)
16691bc56edSDimitry Andric return modRMTable[dec->instructionIDs+1];
16791bc56edSDimitry Andric return modRMTable[dec->instructionIDs];
16891bc56edSDimitry Andric case MODRM_SPLITREG:
16991bc56edSDimitry Andric if (modFromModRM(modRM) == 0x3)
17091bc56edSDimitry Andric return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
17191bc56edSDimitry Andric return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
17291bc56edSDimitry Andric case MODRM_SPLITMISC:
17391bc56edSDimitry Andric if (modFromModRM(modRM) == 0x3)
17491bc56edSDimitry Andric return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8];
17591bc56edSDimitry Andric return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
17691bc56edSDimitry Andric case MODRM_FULL:
17791bc56edSDimitry Andric return modRMTable[dec->instructionIDs+modRM];
17891bc56edSDimitry Andric }
17991bc56edSDimitry Andric }
18091bc56edSDimitry Andric
18191bc56edSDimitry Andric /*
18291bc56edSDimitry Andric * specifierForUID - Given a UID, returns the name and operand specification for
18391bc56edSDimitry Andric * that instruction.
18491bc56edSDimitry Andric *
18591bc56edSDimitry Andric * @param uid - The unique ID for the instruction. This should be returned by
18691bc56edSDimitry Andric * decode(); specifierForUID will not check bounds.
18791bc56edSDimitry Andric * @return - A pointer to the specification for that instruction.
18891bc56edSDimitry Andric */
specifierForUID(InstrUID uid)18991bc56edSDimitry Andric static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
19091bc56edSDimitry Andric return &INSTRUCTIONS_SYM[uid];
19191bc56edSDimitry Andric }
19291bc56edSDimitry Andric
19391bc56edSDimitry Andric /*
19491bc56edSDimitry Andric * consumeByte - Uses the reader function provided by the user to consume one
19591bc56edSDimitry Andric * byte from the instruction's memory and advance the cursor.
19691bc56edSDimitry Andric *
19791bc56edSDimitry Andric * @param insn - The instruction with the reader function to use. The cursor
19891bc56edSDimitry Andric * for this instruction is advanced.
19991bc56edSDimitry Andric * @param byte - A pointer to a pre-allocated memory buffer to be populated
20091bc56edSDimitry Andric * with the data read.
20191bc56edSDimitry Andric * @return - 0 if the read was successful; nonzero otherwise.
20291bc56edSDimitry Andric */
consumeByte(struct InternalInstruction * insn,uint8_t * byte)20391bc56edSDimitry Andric static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
20491bc56edSDimitry Andric int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
20591bc56edSDimitry Andric
20691bc56edSDimitry Andric if (!ret)
20791bc56edSDimitry Andric ++(insn->readerCursor);
20891bc56edSDimitry Andric
20991bc56edSDimitry Andric return ret;
21091bc56edSDimitry Andric }
21191bc56edSDimitry Andric
21291bc56edSDimitry Andric /*
21391bc56edSDimitry Andric * lookAtByte - Like consumeByte, but does not advance the cursor.
21491bc56edSDimitry Andric *
21591bc56edSDimitry Andric * @param insn - See consumeByte().
21691bc56edSDimitry Andric * @param byte - See consumeByte().
21791bc56edSDimitry Andric * @return - See consumeByte().
21891bc56edSDimitry Andric */
lookAtByte(struct InternalInstruction * insn,uint8_t * byte)21991bc56edSDimitry Andric static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
22091bc56edSDimitry Andric return insn->reader(insn->readerArg, byte, insn->readerCursor);
22191bc56edSDimitry Andric }
22291bc56edSDimitry Andric
unconsumeByte(struct InternalInstruction * insn)22391bc56edSDimitry Andric static void unconsumeByte(struct InternalInstruction* insn) {
22491bc56edSDimitry Andric insn->readerCursor--;
22591bc56edSDimitry Andric }
22691bc56edSDimitry Andric
22791bc56edSDimitry Andric #define CONSUME_FUNC(name, type) \
22891bc56edSDimitry Andric static int name(struct InternalInstruction* insn, type* ptr) { \
22991bc56edSDimitry Andric type combined = 0; \
23091bc56edSDimitry Andric unsigned offset; \
23191bc56edSDimitry Andric for (offset = 0; offset < sizeof(type); ++offset) { \
23291bc56edSDimitry Andric uint8_t byte; \
23391bc56edSDimitry Andric int ret = insn->reader(insn->readerArg, \
23491bc56edSDimitry Andric &byte, \
23591bc56edSDimitry Andric insn->readerCursor + offset); \
23691bc56edSDimitry Andric if (ret) \
23791bc56edSDimitry Andric return ret; \
23891bc56edSDimitry Andric combined = combined | ((uint64_t)byte << (offset * 8)); \
23991bc56edSDimitry Andric } \
24091bc56edSDimitry Andric *ptr = combined; \
24191bc56edSDimitry Andric insn->readerCursor += sizeof(type); \
24291bc56edSDimitry Andric return 0; \
24391bc56edSDimitry Andric }
24491bc56edSDimitry Andric
24591bc56edSDimitry Andric /*
24691bc56edSDimitry Andric * consume* - Use the reader function provided by the user to consume data
24791bc56edSDimitry Andric * values of various sizes from the instruction's memory and advance the
24891bc56edSDimitry Andric * cursor appropriately. These readers perform endian conversion.
24991bc56edSDimitry Andric *
25091bc56edSDimitry Andric * @param insn - See consumeByte().
25191bc56edSDimitry Andric * @param ptr - A pointer to a pre-allocated memory of appropriate size to
25291bc56edSDimitry Andric * be populated with the data read.
25391bc56edSDimitry Andric * @return - See consumeByte().
25491bc56edSDimitry Andric */
CONSUME_FUNC(consumeInt8,int8_t)25591bc56edSDimitry Andric CONSUME_FUNC(consumeInt8, int8_t)
25691bc56edSDimitry Andric CONSUME_FUNC(consumeInt16, int16_t)
25791bc56edSDimitry Andric CONSUME_FUNC(consumeInt32, int32_t)
25891bc56edSDimitry Andric CONSUME_FUNC(consumeUInt16, uint16_t)
25991bc56edSDimitry Andric CONSUME_FUNC(consumeUInt32, uint32_t)
26091bc56edSDimitry Andric CONSUME_FUNC(consumeUInt64, uint64_t)
26191bc56edSDimitry Andric
26291bc56edSDimitry Andric /*
26391bc56edSDimitry Andric * dbgprintf - Uses the logging function provided by the user to log a single
26491bc56edSDimitry Andric * message, typically without a carriage-return.
26591bc56edSDimitry Andric *
26691bc56edSDimitry Andric * @param insn - The instruction containing the logging function.
26791bc56edSDimitry Andric * @param format - See printf().
26891bc56edSDimitry Andric * @param ... - See printf().
26991bc56edSDimitry Andric */
27091bc56edSDimitry Andric static void dbgprintf(struct InternalInstruction* insn,
27191bc56edSDimitry Andric const char* format,
27291bc56edSDimitry Andric ...) {
27391bc56edSDimitry Andric char buffer[256];
27491bc56edSDimitry Andric va_list ap;
27591bc56edSDimitry Andric
27691bc56edSDimitry Andric if (!insn->dlog)
27791bc56edSDimitry Andric return;
27891bc56edSDimitry Andric
27991bc56edSDimitry Andric va_start(ap, format);
28091bc56edSDimitry Andric (void)vsnprintf(buffer, sizeof(buffer), format, ap);
28191bc56edSDimitry Andric va_end(ap);
28291bc56edSDimitry Andric
28391bc56edSDimitry Andric insn->dlog(insn->dlogArg, buffer);
28491bc56edSDimitry Andric }
28591bc56edSDimitry Andric
isREX(struct InternalInstruction * insn,uint8_t prefix)2862cab237bSDimitry Andric static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
2872cab237bSDimitry Andric if (insn->mode == MODE_64BIT)
2882cab237bSDimitry Andric return prefix >= 0x40 && prefix <= 0x4f;
2892cab237bSDimitry Andric return false;
29091bc56edSDimitry Andric }
29191bc56edSDimitry Andric
29291bc56edSDimitry Andric /*
2932cab237bSDimitry Andric * setPrefixPresent - Marks that a particular prefix is present as mandatory
29491bc56edSDimitry Andric *
2952cab237bSDimitry Andric * @param insn - The instruction to be marked as having the prefix.
2962cab237bSDimitry Andric * @param prefix - The prefix that is present.
29791bc56edSDimitry Andric */
setPrefixPresent(struct InternalInstruction * insn,uint8_t prefix)2982cab237bSDimitry Andric static void setPrefixPresent(struct InternalInstruction *insn, uint8_t prefix) {
2992cab237bSDimitry Andric uint8_t nextByte;
3002cab237bSDimitry Andric switch (prefix) {
3014ba319b5SDimitry Andric case 0xf0:
3024ba319b5SDimitry Andric insn->hasLockPrefix = true;
3034ba319b5SDimitry Andric break;
3042cab237bSDimitry Andric case 0xf2:
3052cab237bSDimitry Andric case 0xf3:
3062cab237bSDimitry Andric if (lookAtByte(insn, &nextByte))
3072cab237bSDimitry Andric break;
3082cab237bSDimitry Andric // TODO:
3092cab237bSDimitry Andric // 1. There could be several 0x66
3102cab237bSDimitry Andric // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then
3112cab237bSDimitry Andric // it's not mandatory prefix
3122cab237bSDimitry Andric // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need
3132cab237bSDimitry Andric // 0x0f exactly after it to be mandatory prefix
3142cab237bSDimitry Andric if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66)
3152cab237bSDimitry Andric // The last of 0xf2 /0xf3 is mandatory prefix
3162cab237bSDimitry Andric insn->mandatoryPrefix = prefix;
3172cab237bSDimitry Andric insn->repeatPrefix = prefix;
3182cab237bSDimitry Andric break;
3192cab237bSDimitry Andric case 0x66:
3202cab237bSDimitry Andric if (lookAtByte(insn, &nextByte))
3212cab237bSDimitry Andric break;
3222cab237bSDimitry Andric // 0x66 can't overwrite existing mandatory prefix and should be ignored
3232cab237bSDimitry Andric if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte)))
3242cab237bSDimitry Andric insn->mandatoryPrefix = prefix;
3252cab237bSDimitry Andric break;
3262cab237bSDimitry Andric }
32791bc56edSDimitry Andric }
32891bc56edSDimitry Andric
32991bc56edSDimitry Andric /*
33091bc56edSDimitry Andric * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
33191bc56edSDimitry Andric * instruction as having them. Also sets the instruction's default operand,
33291bc56edSDimitry Andric * address, and other relevant data sizes to report operands correctly.
33391bc56edSDimitry Andric *
33491bc56edSDimitry Andric * @param insn - The instruction whose prefixes are to be read.
33591bc56edSDimitry Andric * @return - 0 if the instruction could be read until the end of the prefix
33691bc56edSDimitry Andric * bytes, and no prefixes conflicted; nonzero otherwise.
33791bc56edSDimitry Andric */
readPrefixes(struct InternalInstruction * insn)33891bc56edSDimitry Andric static int readPrefixes(struct InternalInstruction* insn) {
33991bc56edSDimitry Andric bool isPrefix = true;
34091bc56edSDimitry Andric uint8_t byte = 0;
34191bc56edSDimitry Andric uint8_t nextByte;
34291bc56edSDimitry Andric
34391bc56edSDimitry Andric dbgprintf(insn, "readPrefixes()");
34491bc56edSDimitry Andric
34591bc56edSDimitry Andric while (isPrefix) {
34691bc56edSDimitry Andric /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */
34791bc56edSDimitry Andric if (consumeByte(insn, &byte))
34891bc56edSDimitry Andric break;
34991bc56edSDimitry Andric
35091bc56edSDimitry Andric /*
35191bc56edSDimitry Andric * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
35291bc56edSDimitry Andric * break and let it be disassembled as a normal "instruction".
35391bc56edSDimitry Andric */
3542cab237bSDimitry Andric if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK
35591bc56edSDimitry Andric break;
35691bc56edSDimitry Andric
3572cab237bSDimitry Andric if ((byte == 0xf2 || byte == 0xf3) && !lookAtByte(insn, &nextByte)) {
35891bc56edSDimitry Andric /*
35991bc56edSDimitry Andric * If the byte is 0xf2 or 0xf3, and any of the following conditions are
36091bc56edSDimitry Andric * met:
36191bc56edSDimitry Andric * - it is followed by a LOCK (0xf0) prefix
36291bc56edSDimitry Andric * - it is followed by an xchg instruction
36391bc56edSDimitry Andric * then it should be disassembled as a xacquire/xrelease not repne/rep.
36491bc56edSDimitry Andric */
3652cab237bSDimitry Andric if (((nextByte == 0xf0) ||
3662cab237bSDimitry Andric ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) {
36791bc56edSDimitry Andric insn->xAcquireRelease = true;
3682cab237bSDimitry Andric if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support
3692cab237bSDimitry Andric break;
3702cab237bSDimitry Andric }
37191bc56edSDimitry Andric /*
37291bc56edSDimitry Andric * Also if the byte is 0xf3, and the following condition is met:
37391bc56edSDimitry Andric * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
37491bc56edSDimitry Andric * "mov mem, imm" (opcode 0xc6/0xc7) instructions.
37591bc56edSDimitry Andric * then it should be disassembled as an xrelease not rep.
37691bc56edSDimitry Andric */
3772cab237bSDimitry Andric if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 ||
3782cab237bSDimitry Andric nextByte == 0xc6 || nextByte == 0xc7)) {
37991bc56edSDimitry Andric insn->xAcquireRelease = true;
3802cab237bSDimitry Andric if (nextByte != 0x90) // PAUSE instruction support
3812cab237bSDimitry Andric break;
3822cab237bSDimitry Andric }
3832cab237bSDimitry Andric if (isREX(insn, nextByte)) {
3842cab237bSDimitry Andric uint8_t nnextByte;
3852cab237bSDimitry Andric // Go to REX prefix after the current one
3862cab237bSDimitry Andric if (consumeByte(insn, &nnextByte))
38791bc56edSDimitry Andric return -1;
3882cab237bSDimitry Andric // We should be able to read next byte after REX prefix
3892cab237bSDimitry Andric if (lookAtByte(insn, &nnextByte))
39091bc56edSDimitry Andric return -1;
39191bc56edSDimitry Andric unconsumeByte(insn);
39291bc56edSDimitry Andric }
39391bc56edSDimitry Andric }
39491bc56edSDimitry Andric
39591bc56edSDimitry Andric switch (byte) {
39691bc56edSDimitry Andric case 0xf0: /* LOCK */
39791bc56edSDimitry Andric case 0xf2: /* REPNE/REPNZ */
39891bc56edSDimitry Andric case 0xf3: /* REP or REPE/REPZ */
3992cab237bSDimitry Andric setPrefixPresent(insn, byte);
40091bc56edSDimitry Andric break;
40191bc56edSDimitry Andric case 0x2e: /* CS segment override -OR- Branch not taken */
40291bc56edSDimitry Andric case 0x36: /* SS segment override -OR- Branch taken */
40391bc56edSDimitry Andric case 0x3e: /* DS segment override */
40491bc56edSDimitry Andric case 0x26: /* ES segment override */
40591bc56edSDimitry Andric case 0x64: /* FS segment override */
40691bc56edSDimitry Andric case 0x65: /* GS segment override */
40791bc56edSDimitry Andric switch (byte) {
40891bc56edSDimitry Andric case 0x2e:
40991bc56edSDimitry Andric insn->segmentOverride = SEG_OVERRIDE_CS;
41091bc56edSDimitry Andric break;
41191bc56edSDimitry Andric case 0x36:
41291bc56edSDimitry Andric insn->segmentOverride = SEG_OVERRIDE_SS;
41391bc56edSDimitry Andric break;
41491bc56edSDimitry Andric case 0x3e:
41591bc56edSDimitry Andric insn->segmentOverride = SEG_OVERRIDE_DS;
41691bc56edSDimitry Andric break;
41791bc56edSDimitry Andric case 0x26:
41891bc56edSDimitry Andric insn->segmentOverride = SEG_OVERRIDE_ES;
41991bc56edSDimitry Andric break;
42091bc56edSDimitry Andric case 0x64:
42191bc56edSDimitry Andric insn->segmentOverride = SEG_OVERRIDE_FS;
42291bc56edSDimitry Andric break;
42391bc56edSDimitry Andric case 0x65:
42491bc56edSDimitry Andric insn->segmentOverride = SEG_OVERRIDE_GS;
42591bc56edSDimitry Andric break;
42691bc56edSDimitry Andric default:
42791bc56edSDimitry Andric debug("Unhandled override");
42891bc56edSDimitry Andric return -1;
42991bc56edSDimitry Andric }
4302cab237bSDimitry Andric setPrefixPresent(insn, byte);
43191bc56edSDimitry Andric break;
43291bc56edSDimitry Andric case 0x66: /* Operand-size override */
4332cab237bSDimitry Andric insn->hasOpSize = true;
4342cab237bSDimitry Andric setPrefixPresent(insn, byte);
43591bc56edSDimitry Andric break;
43691bc56edSDimitry Andric case 0x67: /* Address-size override */
4372cab237bSDimitry Andric insn->hasAdSize = true;
4382cab237bSDimitry Andric setPrefixPresent(insn, byte);
43991bc56edSDimitry Andric break;
44091bc56edSDimitry Andric default: /* Not a prefix byte */
44191bc56edSDimitry Andric isPrefix = false;
44291bc56edSDimitry Andric break;
44391bc56edSDimitry Andric }
44491bc56edSDimitry Andric
44591bc56edSDimitry Andric if (isPrefix)
44691bc56edSDimitry Andric dbgprintf(insn, "Found prefix 0x%hhx", byte);
44791bc56edSDimitry Andric }
44891bc56edSDimitry Andric
44991bc56edSDimitry Andric insn->vectorExtensionType = TYPE_NO_VEX_XOP;
45091bc56edSDimitry Andric
45191bc56edSDimitry Andric if (byte == 0x62) {
45291bc56edSDimitry Andric uint8_t byte1, byte2;
45391bc56edSDimitry Andric
45491bc56edSDimitry Andric if (consumeByte(insn, &byte1)) {
45591bc56edSDimitry Andric dbgprintf(insn, "Couldn't read second byte of EVEX prefix");
45691bc56edSDimitry Andric return -1;
45791bc56edSDimitry Andric }
45891bc56edSDimitry Andric
45991bc56edSDimitry Andric if (lookAtByte(insn, &byte2)) {
46091bc56edSDimitry Andric dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
46191bc56edSDimitry Andric return -1;
46291bc56edSDimitry Andric }
46391bc56edSDimitry Andric
46491bc56edSDimitry Andric if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
46591bc56edSDimitry Andric ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) {
46691bc56edSDimitry Andric insn->vectorExtensionType = TYPE_EVEX;
46739d628a0SDimitry Andric } else {
46891bc56edSDimitry Andric unconsumeByte(insn); /* unconsume byte1 */
46991bc56edSDimitry Andric unconsumeByte(insn); /* unconsume byte */
47091bc56edSDimitry Andric }
47191bc56edSDimitry Andric
47291bc56edSDimitry Andric if (insn->vectorExtensionType == TYPE_EVEX) {
47391bc56edSDimitry Andric insn->vectorExtensionPrefix[0] = byte;
47491bc56edSDimitry Andric insn->vectorExtensionPrefix[1] = byte1;
47591bc56edSDimitry Andric if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) {
47691bc56edSDimitry Andric dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
47791bc56edSDimitry Andric return -1;
47891bc56edSDimitry Andric }
47991bc56edSDimitry Andric if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) {
48091bc56edSDimitry Andric dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix");
48191bc56edSDimitry Andric return -1;
48291bc56edSDimitry Andric }
48391bc56edSDimitry Andric
48491bc56edSDimitry Andric /* We simulate the REX prefix for simplicity's sake */
48591bc56edSDimitry Andric if (insn->mode == MODE_64BIT) {
48691bc56edSDimitry Andric insn->rexPrefix = 0x40
48791bc56edSDimitry Andric | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3)
48891bc56edSDimitry Andric | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2)
48991bc56edSDimitry Andric | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1)
49091bc56edSDimitry Andric | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
49191bc56edSDimitry Andric }
49291bc56edSDimitry Andric
49391bc56edSDimitry Andric dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
49491bc56edSDimitry Andric insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
49591bc56edSDimitry Andric insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]);
49691bc56edSDimitry Andric }
49739d628a0SDimitry Andric } else if (byte == 0xc4) {
49891bc56edSDimitry Andric uint8_t byte1;
49991bc56edSDimitry Andric
50091bc56edSDimitry Andric if (lookAtByte(insn, &byte1)) {
50191bc56edSDimitry Andric dbgprintf(insn, "Couldn't read second byte of VEX");
50291bc56edSDimitry Andric return -1;
50391bc56edSDimitry Andric }
50491bc56edSDimitry Andric
5052cab237bSDimitry Andric if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
50691bc56edSDimitry Andric insn->vectorExtensionType = TYPE_VEX_3B;
5072cab237bSDimitry Andric else
50891bc56edSDimitry Andric unconsumeByte(insn);
50991bc56edSDimitry Andric
51091bc56edSDimitry Andric if (insn->vectorExtensionType == TYPE_VEX_3B) {
51191bc56edSDimitry Andric insn->vectorExtensionPrefix[0] = byte;
51291bc56edSDimitry Andric consumeByte(insn, &insn->vectorExtensionPrefix[1]);
51391bc56edSDimitry Andric consumeByte(insn, &insn->vectorExtensionPrefix[2]);
51491bc56edSDimitry Andric
51591bc56edSDimitry Andric /* We simulate the REX prefix for simplicity's sake */
51691bc56edSDimitry Andric
5172cab237bSDimitry Andric if (insn->mode == MODE_64BIT)
51891bc56edSDimitry Andric insn->rexPrefix = 0x40
51991bc56edSDimitry Andric | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3)
52091bc56edSDimitry Andric | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2)
52191bc56edSDimitry Andric | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1)
52291bc56edSDimitry Andric | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
52391bc56edSDimitry Andric
52491bc56edSDimitry Andric dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
52591bc56edSDimitry Andric insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
52691bc56edSDimitry Andric insn->vectorExtensionPrefix[2]);
52791bc56edSDimitry Andric }
52839d628a0SDimitry Andric } else if (byte == 0xc5) {
52991bc56edSDimitry Andric uint8_t byte1;
53091bc56edSDimitry Andric
53191bc56edSDimitry Andric if (lookAtByte(insn, &byte1)) {
53291bc56edSDimitry Andric dbgprintf(insn, "Couldn't read second byte of VEX");
53391bc56edSDimitry Andric return -1;
53491bc56edSDimitry Andric }
53591bc56edSDimitry Andric
5362cab237bSDimitry Andric if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
53791bc56edSDimitry Andric insn->vectorExtensionType = TYPE_VEX_2B;
5382cab237bSDimitry Andric else
53991bc56edSDimitry Andric unconsumeByte(insn);
54091bc56edSDimitry Andric
54191bc56edSDimitry Andric if (insn->vectorExtensionType == TYPE_VEX_2B) {
54291bc56edSDimitry Andric insn->vectorExtensionPrefix[0] = byte;
54391bc56edSDimitry Andric consumeByte(insn, &insn->vectorExtensionPrefix[1]);
54491bc56edSDimitry Andric
5452cab237bSDimitry Andric if (insn->mode == MODE_64BIT)
54691bc56edSDimitry Andric insn->rexPrefix = 0x40
54791bc56edSDimitry Andric | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
54891bc56edSDimitry Andric
54939d628a0SDimitry Andric switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
55091bc56edSDimitry Andric default:
55191bc56edSDimitry Andric break;
55291bc56edSDimitry Andric case VEX_PREFIX_66:
5532cab237bSDimitry Andric insn->hasOpSize = true;
55491bc56edSDimitry Andric break;
55591bc56edSDimitry Andric }
55691bc56edSDimitry Andric
55791bc56edSDimitry Andric dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx",
55891bc56edSDimitry Andric insn->vectorExtensionPrefix[0],
55991bc56edSDimitry Andric insn->vectorExtensionPrefix[1]);
56091bc56edSDimitry Andric }
56139d628a0SDimitry Andric } else if (byte == 0x8f) {
56291bc56edSDimitry Andric uint8_t byte1;
56391bc56edSDimitry Andric
56491bc56edSDimitry Andric if (lookAtByte(insn, &byte1)) {
56591bc56edSDimitry Andric dbgprintf(insn, "Couldn't read second byte of XOP");
56691bc56edSDimitry Andric return -1;
56791bc56edSDimitry Andric }
56891bc56edSDimitry Andric
5692cab237bSDimitry Andric if ((byte1 & 0x38) != 0x0) /* 0 in these 3 bits is a POP instruction. */
57091bc56edSDimitry Andric insn->vectorExtensionType = TYPE_XOP;
5712cab237bSDimitry Andric else
57291bc56edSDimitry Andric unconsumeByte(insn);
57391bc56edSDimitry Andric
57491bc56edSDimitry Andric if (insn->vectorExtensionType == TYPE_XOP) {
57591bc56edSDimitry Andric insn->vectorExtensionPrefix[0] = byte;
57691bc56edSDimitry Andric consumeByte(insn, &insn->vectorExtensionPrefix[1]);
57791bc56edSDimitry Andric consumeByte(insn, &insn->vectorExtensionPrefix[2]);
57891bc56edSDimitry Andric
57991bc56edSDimitry Andric /* We simulate the REX prefix for simplicity's sake */
58091bc56edSDimitry Andric
5812cab237bSDimitry Andric if (insn->mode == MODE_64BIT)
58291bc56edSDimitry Andric insn->rexPrefix = 0x40
58391bc56edSDimitry Andric | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3)
58491bc56edSDimitry Andric | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2)
58591bc56edSDimitry Andric | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1)
58691bc56edSDimitry Andric | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
58791bc56edSDimitry Andric
58839d628a0SDimitry Andric switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
58991bc56edSDimitry Andric default:
59091bc56edSDimitry Andric break;
59191bc56edSDimitry Andric case VEX_PREFIX_66:
5922cab237bSDimitry Andric insn->hasOpSize = true;
59391bc56edSDimitry Andric break;
59491bc56edSDimitry Andric }
59591bc56edSDimitry Andric
59691bc56edSDimitry Andric dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
59791bc56edSDimitry Andric insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
59891bc56edSDimitry Andric insn->vectorExtensionPrefix[2]);
59991bc56edSDimitry Andric }
6002cab237bSDimitry Andric } else if (isREX(insn, byte)) {
6012cab237bSDimitry Andric if (lookAtByte(insn, &nextByte))
60291bc56edSDimitry Andric return -1;
60391bc56edSDimitry Andric insn->rexPrefix = byte;
60491bc56edSDimitry Andric dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
6052cab237bSDimitry Andric } else
60691bc56edSDimitry Andric unconsumeByte(insn);
60791bc56edSDimitry Andric
60891bc56edSDimitry Andric if (insn->mode == MODE_16BIT) {
6092cab237bSDimitry Andric insn->registerSize = (insn->hasOpSize ? 4 : 2);
6102cab237bSDimitry Andric insn->addressSize = (insn->hasAdSize ? 4 : 2);
6112cab237bSDimitry Andric insn->displacementSize = (insn->hasAdSize ? 4 : 2);
6122cab237bSDimitry Andric insn->immediateSize = (insn->hasOpSize ? 4 : 2);
61391bc56edSDimitry Andric } else if (insn->mode == MODE_32BIT) {
6142cab237bSDimitry Andric insn->registerSize = (insn->hasOpSize ? 2 : 4);
6152cab237bSDimitry Andric insn->addressSize = (insn->hasAdSize ? 2 : 4);
6162cab237bSDimitry Andric insn->displacementSize = (insn->hasAdSize ? 2 : 4);
6172cab237bSDimitry Andric insn->immediateSize = (insn->hasOpSize ? 2 : 4);
61891bc56edSDimitry Andric } else if (insn->mode == MODE_64BIT) {
61991bc56edSDimitry Andric if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
62091bc56edSDimitry Andric insn->registerSize = 8;
6212cab237bSDimitry Andric insn->addressSize = (insn->hasAdSize ? 4 : 8);
62291bc56edSDimitry Andric insn->displacementSize = 4;
62391bc56edSDimitry Andric insn->immediateSize = 4;
62491bc56edSDimitry Andric } else {
6252cab237bSDimitry Andric insn->registerSize = (insn->hasOpSize ? 2 : 4);
6262cab237bSDimitry Andric insn->addressSize = (insn->hasAdSize ? 4 : 8);
6272cab237bSDimitry Andric insn->displacementSize = (insn->hasOpSize ? 2 : 4);
6282cab237bSDimitry Andric insn->immediateSize = (insn->hasOpSize ? 2 : 4);
62991bc56edSDimitry Andric }
63091bc56edSDimitry Andric }
63191bc56edSDimitry Andric
63291bc56edSDimitry Andric return 0;
63391bc56edSDimitry Andric }
63491bc56edSDimitry Andric
6354ba319b5SDimitry Andric static int readModRM(struct InternalInstruction* insn);
6364ba319b5SDimitry Andric
63791bc56edSDimitry Andric /*
63891bc56edSDimitry Andric * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
63991bc56edSDimitry Andric * extended or escape opcodes).
64091bc56edSDimitry Andric *
64191bc56edSDimitry Andric * @param insn - The instruction whose opcode is to be read.
64291bc56edSDimitry Andric * @return - 0 if the opcode could be read successfully; nonzero otherwise.
64391bc56edSDimitry Andric */
readOpcode(struct InternalInstruction * insn)64491bc56edSDimitry Andric static int readOpcode(struct InternalInstruction* insn) {
64591bc56edSDimitry Andric /* Determine the length of the primary opcode */
64691bc56edSDimitry Andric
64791bc56edSDimitry Andric uint8_t current;
64891bc56edSDimitry Andric
64991bc56edSDimitry Andric dbgprintf(insn, "readOpcode()");
65091bc56edSDimitry Andric
65191bc56edSDimitry Andric insn->opcodeType = ONEBYTE;
65291bc56edSDimitry Andric
65339d628a0SDimitry Andric if (insn->vectorExtensionType == TYPE_EVEX) {
65491bc56edSDimitry Andric switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
65591bc56edSDimitry Andric default:
65691bc56edSDimitry Andric dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)",
65791bc56edSDimitry Andric mmFromEVEX2of4(insn->vectorExtensionPrefix[1]));
65891bc56edSDimitry Andric return -1;
65991bc56edSDimitry Andric case VEX_LOB_0F:
66091bc56edSDimitry Andric insn->opcodeType = TWOBYTE;
66191bc56edSDimitry Andric return consumeByte(insn, &insn->opcode);
66291bc56edSDimitry Andric case VEX_LOB_0F38:
66391bc56edSDimitry Andric insn->opcodeType = THREEBYTE_38;
66491bc56edSDimitry Andric return consumeByte(insn, &insn->opcode);
66591bc56edSDimitry Andric case VEX_LOB_0F3A:
66691bc56edSDimitry Andric insn->opcodeType = THREEBYTE_3A;
66791bc56edSDimitry Andric return consumeByte(insn, &insn->opcode);
66891bc56edSDimitry Andric }
66939d628a0SDimitry Andric } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
67091bc56edSDimitry Andric switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
67191bc56edSDimitry Andric default:
67291bc56edSDimitry Andric dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
67391bc56edSDimitry Andric mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
67491bc56edSDimitry Andric return -1;
67591bc56edSDimitry Andric case VEX_LOB_0F:
67691bc56edSDimitry Andric insn->opcodeType = TWOBYTE;
67791bc56edSDimitry Andric return consumeByte(insn, &insn->opcode);
67891bc56edSDimitry Andric case VEX_LOB_0F38:
67991bc56edSDimitry Andric insn->opcodeType = THREEBYTE_38;
68091bc56edSDimitry Andric return consumeByte(insn, &insn->opcode);
68191bc56edSDimitry Andric case VEX_LOB_0F3A:
68291bc56edSDimitry Andric insn->opcodeType = THREEBYTE_3A;
68391bc56edSDimitry Andric return consumeByte(insn, &insn->opcode);
68491bc56edSDimitry Andric }
68539d628a0SDimitry Andric } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
68691bc56edSDimitry Andric insn->opcodeType = TWOBYTE;
68791bc56edSDimitry Andric return consumeByte(insn, &insn->opcode);
68839d628a0SDimitry Andric } else if (insn->vectorExtensionType == TYPE_XOP) {
68991bc56edSDimitry Andric switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
69091bc56edSDimitry Andric default:
69191bc56edSDimitry Andric dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
69291bc56edSDimitry Andric mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
69391bc56edSDimitry Andric return -1;
69491bc56edSDimitry Andric case XOP_MAP_SELECT_8:
69591bc56edSDimitry Andric insn->opcodeType = XOP8_MAP;
69691bc56edSDimitry Andric return consumeByte(insn, &insn->opcode);
69791bc56edSDimitry Andric case XOP_MAP_SELECT_9:
69891bc56edSDimitry Andric insn->opcodeType = XOP9_MAP;
69991bc56edSDimitry Andric return consumeByte(insn, &insn->opcode);
70091bc56edSDimitry Andric case XOP_MAP_SELECT_A:
70191bc56edSDimitry Andric insn->opcodeType = XOPA_MAP;
70291bc56edSDimitry Andric return consumeByte(insn, &insn->opcode);
70391bc56edSDimitry Andric }
70491bc56edSDimitry Andric }
70591bc56edSDimitry Andric
70691bc56edSDimitry Andric if (consumeByte(insn, ¤t))
70791bc56edSDimitry Andric return -1;
70891bc56edSDimitry Andric
70991bc56edSDimitry Andric if (current == 0x0f) {
71091bc56edSDimitry Andric dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
71191bc56edSDimitry Andric
71291bc56edSDimitry Andric if (consumeByte(insn, ¤t))
71391bc56edSDimitry Andric return -1;
71491bc56edSDimitry Andric
71591bc56edSDimitry Andric if (current == 0x38) {
71691bc56edSDimitry Andric dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
71791bc56edSDimitry Andric
71891bc56edSDimitry Andric if (consumeByte(insn, ¤t))
71991bc56edSDimitry Andric return -1;
72091bc56edSDimitry Andric
72191bc56edSDimitry Andric insn->opcodeType = THREEBYTE_38;
72291bc56edSDimitry Andric } else if (current == 0x3a) {
72391bc56edSDimitry Andric dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
72491bc56edSDimitry Andric
72591bc56edSDimitry Andric if (consumeByte(insn, ¤t))
72691bc56edSDimitry Andric return -1;
72791bc56edSDimitry Andric
72891bc56edSDimitry Andric insn->opcodeType = THREEBYTE_3A;
7294ba319b5SDimitry Andric } else if (current == 0x0f) {
7304ba319b5SDimitry Andric dbgprintf(insn, "Found a 3dnow escape prefix (0x%hhx)", current);
7314ba319b5SDimitry Andric
7324ba319b5SDimitry Andric // Consume operands before the opcode to comply with the 3DNow encoding
7334ba319b5SDimitry Andric if (readModRM(insn))
7344ba319b5SDimitry Andric return -1;
7354ba319b5SDimitry Andric
7364ba319b5SDimitry Andric if (consumeByte(insn, ¤t))
7374ba319b5SDimitry Andric return -1;
7384ba319b5SDimitry Andric
7394ba319b5SDimitry Andric insn->opcodeType = THREEDNOW_MAP;
74091bc56edSDimitry Andric } else {
74191bc56edSDimitry Andric dbgprintf(insn, "Didn't find a three-byte escape prefix");
74291bc56edSDimitry Andric
74391bc56edSDimitry Andric insn->opcodeType = TWOBYTE;
74491bc56edSDimitry Andric }
7452cab237bSDimitry Andric } else if (insn->mandatoryPrefix)
7462cab237bSDimitry Andric // The opcode with mandatory prefix must start with opcode escape.
7472cab237bSDimitry Andric // If not it's legacy repeat prefix
7482cab237bSDimitry Andric insn->mandatoryPrefix = 0;
74991bc56edSDimitry Andric
75091bc56edSDimitry Andric /*
75191bc56edSDimitry Andric * At this point we have consumed the full opcode.
75291bc56edSDimitry Andric * Anything we consume from here on must be unconsumed.
75391bc56edSDimitry Andric */
75491bc56edSDimitry Andric
75591bc56edSDimitry Andric insn->opcode = current;
75691bc56edSDimitry Andric
75791bc56edSDimitry Andric return 0;
75891bc56edSDimitry Andric }
75991bc56edSDimitry Andric
76091bc56edSDimitry Andric /*
76191bc56edSDimitry Andric * getIDWithAttrMask - Determines the ID of an instruction, consuming
76291bc56edSDimitry Andric * the ModR/M byte as appropriate for extended and escape opcodes,
76391bc56edSDimitry Andric * and using a supplied attribute mask.
76491bc56edSDimitry Andric *
76591bc56edSDimitry Andric * @param instructionID - A pointer whose target is filled in with the ID of the
76691bc56edSDimitry Andric * instruction.
76791bc56edSDimitry Andric * @param insn - The instruction whose ID is to be determined.
76891bc56edSDimitry Andric * @param attrMask - The attribute mask to search.
76991bc56edSDimitry Andric * @return - 0 if the ModR/M could be read when needed or was not
77091bc56edSDimitry Andric * needed; nonzero otherwise.
77191bc56edSDimitry Andric */
getIDWithAttrMask(uint16_t * instructionID,struct InternalInstruction * insn,uint16_t attrMask)77291bc56edSDimitry Andric static int getIDWithAttrMask(uint16_t* instructionID,
77391bc56edSDimitry Andric struct InternalInstruction* insn,
77491bc56edSDimitry Andric uint16_t attrMask) {
77591bc56edSDimitry Andric bool hasModRMExtension;
77691bc56edSDimitry Andric
77791bc56edSDimitry Andric InstructionContext instructionClass = contextForAttrs(attrMask);
77891bc56edSDimitry Andric
77991bc56edSDimitry Andric hasModRMExtension = modRMRequired(insn->opcodeType,
78091bc56edSDimitry Andric instructionClass,
78191bc56edSDimitry Andric insn->opcode);
78291bc56edSDimitry Andric
78391bc56edSDimitry Andric if (hasModRMExtension) {
78491bc56edSDimitry Andric if (readModRM(insn))
78591bc56edSDimitry Andric return -1;
78691bc56edSDimitry Andric
78791bc56edSDimitry Andric *instructionID = decode(insn->opcodeType,
78891bc56edSDimitry Andric instructionClass,
78991bc56edSDimitry Andric insn->opcode,
79091bc56edSDimitry Andric insn->modRM);
79191bc56edSDimitry Andric } else {
79291bc56edSDimitry Andric *instructionID = decode(insn->opcodeType,
79391bc56edSDimitry Andric instructionClass,
79491bc56edSDimitry Andric insn->opcode,
79591bc56edSDimitry Andric 0);
79691bc56edSDimitry Andric }
79791bc56edSDimitry Andric
79891bc56edSDimitry Andric return 0;
79991bc56edSDimitry Andric }
80091bc56edSDimitry Andric
80191bc56edSDimitry Andric /*
80291bc56edSDimitry Andric * is16BitEquivalent - Determines whether two instruction names refer to
80391bc56edSDimitry Andric * equivalent instructions but one is 16-bit whereas the other is not.
80491bc56edSDimitry Andric *
80591bc56edSDimitry Andric * @param orig - The instruction that is not 16-bit
80691bc56edSDimitry Andric * @param equiv - The instruction that is 16-bit
80791bc56edSDimitry Andric */
is16BitEquivalent(const char * orig,const char * equiv)80891bc56edSDimitry Andric static bool is16BitEquivalent(const char *orig, const char *equiv) {
80991bc56edSDimitry Andric off_t i;
81091bc56edSDimitry Andric
81191bc56edSDimitry Andric for (i = 0;; i++) {
81291bc56edSDimitry Andric if (orig[i] == '\0' && equiv[i] == '\0')
81391bc56edSDimitry Andric return true;
81491bc56edSDimitry Andric if (orig[i] == '\0' || equiv[i] == '\0')
81591bc56edSDimitry Andric return false;
81691bc56edSDimitry Andric if (orig[i] != equiv[i]) {
81791bc56edSDimitry Andric if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
81891bc56edSDimitry Andric continue;
81991bc56edSDimitry Andric if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
82091bc56edSDimitry Andric continue;
82191bc56edSDimitry Andric if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
82291bc56edSDimitry Andric continue;
82391bc56edSDimitry Andric return false;
82491bc56edSDimitry Andric }
82591bc56edSDimitry Andric }
82691bc56edSDimitry Andric }
82791bc56edSDimitry Andric
82891bc56edSDimitry Andric /*
82939d628a0SDimitry Andric * is64Bit - Determines whether this instruction is a 64-bit instruction.
83039d628a0SDimitry Andric *
83139d628a0SDimitry Andric * @param name - The instruction that is not 16-bit
83239d628a0SDimitry Andric */
is64Bit(const char * name)83339d628a0SDimitry Andric static bool is64Bit(const char *name) {
83439d628a0SDimitry Andric off_t i;
83539d628a0SDimitry Andric
83639d628a0SDimitry Andric for (i = 0;; ++i) {
83739d628a0SDimitry Andric if (name[i] == '\0')
83839d628a0SDimitry Andric return false;
83939d628a0SDimitry Andric if (name[i] == '6' && name[i+1] == '4')
84039d628a0SDimitry Andric return true;
84139d628a0SDimitry Andric }
84239d628a0SDimitry Andric }
84339d628a0SDimitry Andric
84439d628a0SDimitry Andric /*
84591bc56edSDimitry Andric * getID - Determines the ID of an instruction, consuming the ModR/M byte as
84691bc56edSDimitry Andric * appropriate for extended and escape opcodes. Determines the attributes and
84791bc56edSDimitry Andric * context for the instruction before doing so.
84891bc56edSDimitry Andric *
84991bc56edSDimitry Andric * @param insn - The instruction whose ID is to be determined.
85091bc56edSDimitry Andric * @return - 0 if the ModR/M could be read when needed or was not needed;
85191bc56edSDimitry Andric * nonzero otherwise.
85291bc56edSDimitry Andric */
getID(struct InternalInstruction * insn,const void * miiArg)85391bc56edSDimitry Andric static int getID(struct InternalInstruction* insn, const void *miiArg) {
85491bc56edSDimitry Andric uint16_t attrMask;
85591bc56edSDimitry Andric uint16_t instructionID;
85691bc56edSDimitry Andric
85791bc56edSDimitry Andric dbgprintf(insn, "getID()");
85891bc56edSDimitry Andric
85991bc56edSDimitry Andric attrMask = ATTR_NONE;
86091bc56edSDimitry Andric
86191bc56edSDimitry Andric if (insn->mode == MODE_64BIT)
86291bc56edSDimitry Andric attrMask |= ATTR_64BIT;
86391bc56edSDimitry Andric
86491bc56edSDimitry Andric if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
86591bc56edSDimitry Andric attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
86691bc56edSDimitry Andric
86791bc56edSDimitry Andric if (insn->vectorExtensionType == TYPE_EVEX) {
86891bc56edSDimitry Andric switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
86991bc56edSDimitry Andric case VEX_PREFIX_66:
87091bc56edSDimitry Andric attrMask |= ATTR_OPSIZE;
87191bc56edSDimitry Andric break;
87291bc56edSDimitry Andric case VEX_PREFIX_F3:
87391bc56edSDimitry Andric attrMask |= ATTR_XS;
87491bc56edSDimitry Andric break;
87591bc56edSDimitry Andric case VEX_PREFIX_F2:
87691bc56edSDimitry Andric attrMask |= ATTR_XD;
87791bc56edSDimitry Andric break;
87891bc56edSDimitry Andric }
87991bc56edSDimitry Andric
88091bc56edSDimitry Andric if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
88191bc56edSDimitry Andric attrMask |= ATTR_EVEXKZ;
88291bc56edSDimitry Andric if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
88391bc56edSDimitry Andric attrMask |= ATTR_EVEXB;
88491bc56edSDimitry Andric if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
88591bc56edSDimitry Andric attrMask |= ATTR_EVEXK;
88691bc56edSDimitry Andric if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
88791bc56edSDimitry Andric attrMask |= ATTR_EVEXL;
88891bc56edSDimitry Andric if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
88991bc56edSDimitry Andric attrMask |= ATTR_EVEXL2;
89039d628a0SDimitry Andric } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
89191bc56edSDimitry Andric switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
89291bc56edSDimitry Andric case VEX_PREFIX_66:
89391bc56edSDimitry Andric attrMask |= ATTR_OPSIZE;
89491bc56edSDimitry Andric break;
89591bc56edSDimitry Andric case VEX_PREFIX_F3:
89691bc56edSDimitry Andric attrMask |= ATTR_XS;
89791bc56edSDimitry Andric break;
89891bc56edSDimitry Andric case VEX_PREFIX_F2:
89991bc56edSDimitry Andric attrMask |= ATTR_XD;
90091bc56edSDimitry Andric break;
90191bc56edSDimitry Andric }
90291bc56edSDimitry Andric
90391bc56edSDimitry Andric if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
90491bc56edSDimitry Andric attrMask |= ATTR_VEXL;
90539d628a0SDimitry Andric } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
90691bc56edSDimitry Andric switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
90791bc56edSDimitry Andric case VEX_PREFIX_66:
90891bc56edSDimitry Andric attrMask |= ATTR_OPSIZE;
90991bc56edSDimitry Andric break;
91091bc56edSDimitry Andric case VEX_PREFIX_F3:
91191bc56edSDimitry Andric attrMask |= ATTR_XS;
91291bc56edSDimitry Andric break;
91391bc56edSDimitry Andric case VEX_PREFIX_F2:
91491bc56edSDimitry Andric attrMask |= ATTR_XD;
91591bc56edSDimitry Andric break;
91691bc56edSDimitry Andric }
91791bc56edSDimitry Andric
91891bc56edSDimitry Andric if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
91991bc56edSDimitry Andric attrMask |= ATTR_VEXL;
92039d628a0SDimitry Andric } else if (insn->vectorExtensionType == TYPE_XOP) {
92191bc56edSDimitry Andric switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
92291bc56edSDimitry Andric case VEX_PREFIX_66:
92391bc56edSDimitry Andric attrMask |= ATTR_OPSIZE;
92491bc56edSDimitry Andric break;
92591bc56edSDimitry Andric case VEX_PREFIX_F3:
92691bc56edSDimitry Andric attrMask |= ATTR_XS;
92791bc56edSDimitry Andric break;
92891bc56edSDimitry Andric case VEX_PREFIX_F2:
92991bc56edSDimitry Andric attrMask |= ATTR_XD;
93091bc56edSDimitry Andric break;
93191bc56edSDimitry Andric }
93291bc56edSDimitry Andric
93391bc56edSDimitry Andric if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
93491bc56edSDimitry Andric attrMask |= ATTR_VEXL;
93539d628a0SDimitry Andric } else {
93691bc56edSDimitry Andric return -1;
93791bc56edSDimitry Andric }
9382cab237bSDimitry Andric } else if (!insn->mandatoryPrefix) {
9392cab237bSDimitry Andric // If we don't have mandatory prefix we should use legacy prefixes here
9402cab237bSDimitry Andric if (insn->hasOpSize && (insn->mode != MODE_16BIT))
94191bc56edSDimitry Andric attrMask |= ATTR_OPSIZE;
9422cab237bSDimitry Andric if (insn->hasAdSize)
94391bc56edSDimitry Andric attrMask |= ATTR_ADSIZE;
9442cab237bSDimitry Andric if (insn->opcodeType == ONEBYTE) {
9452cab237bSDimitry Andric if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90))
9462cab237bSDimitry Andric // Special support for PAUSE
94791bc56edSDimitry Andric attrMask |= ATTR_XS;
9482cab237bSDimitry Andric } else {
9492cab237bSDimitry Andric if (insn->repeatPrefix == 0xf2)
95091bc56edSDimitry Andric attrMask |= ATTR_XD;
9512cab237bSDimitry Andric else if (insn->repeatPrefix == 0xf3)
9522cab237bSDimitry Andric attrMask |= ATTR_XS;
9532cab237bSDimitry Andric }
9542cab237bSDimitry Andric } else {
9552cab237bSDimitry Andric switch (insn->mandatoryPrefix) {
9562cab237bSDimitry Andric case 0xf2:
9572cab237bSDimitry Andric attrMask |= ATTR_XD;
9582cab237bSDimitry Andric break;
9592cab237bSDimitry Andric case 0xf3:
9602cab237bSDimitry Andric attrMask |= ATTR_XS;
9612cab237bSDimitry Andric break;
9622cab237bSDimitry Andric case 0x66:
9632cab237bSDimitry Andric if (insn->mode != MODE_16BIT)
9642cab237bSDimitry Andric attrMask |= ATTR_OPSIZE;
9652cab237bSDimitry Andric break;
9662cab237bSDimitry Andric case 0x67:
9672cab237bSDimitry Andric attrMask |= ATTR_ADSIZE;
9682cab237bSDimitry Andric break;
9692cab237bSDimitry Andric }
9704ba319b5SDimitry Andric
97191bc56edSDimitry Andric }
97291bc56edSDimitry Andric
9732cab237bSDimitry Andric if (insn->rexPrefix & 0x08) {
97491bc56edSDimitry Andric attrMask |= ATTR_REXW;
9752cab237bSDimitry Andric attrMask &= ~ATTR_ADSIZE;
9762cab237bSDimitry Andric }
97791bc56edSDimitry Andric
97891bc56edSDimitry Andric /*
97991bc56edSDimitry Andric * JCXZ/JECXZ need special handling for 16-bit mode because the meaning
98091bc56edSDimitry Andric * of the AdSize prefix is inverted w.r.t. 32-bit mode.
98191bc56edSDimitry Andric */
98239d628a0SDimitry Andric if (insn->mode == MODE_16BIT && insn->opcodeType == ONEBYTE &&
98339d628a0SDimitry Andric insn->opcode == 0xE3)
98491bc56edSDimitry Andric attrMask ^= ATTR_ADSIZE;
98539d628a0SDimitry Andric
986*b5893f02SDimitry Andric // If we're in 16-bit mode and this is one of the relative jumps and opsize
987*b5893f02SDimitry Andric // prefix isn't present, we need to force the opsize attribute since the
988*b5893f02SDimitry Andric // prefix is inverted relative to 32-bit mode.
989*b5893f02SDimitry Andric if (insn->mode == MODE_16BIT && !insn->hasOpSize &&
990*b5893f02SDimitry Andric insn->opcodeType == ONEBYTE &&
991*b5893f02SDimitry Andric (insn->opcode == 0xE8 || insn->opcode == 0xE9))
992*b5893f02SDimitry Andric attrMask |= ATTR_OPSIZE;
9937d523365SDimitry Andric
994*b5893f02SDimitry Andric if (insn->mode == MODE_16BIT && !insn->hasOpSize &&
995*b5893f02SDimitry Andric insn->opcodeType == TWOBYTE &&
996*b5893f02SDimitry Andric insn->opcode >= 0x80 && insn->opcode <= 0x8F)
997*b5893f02SDimitry Andric attrMask |= ATTR_OPSIZE;
9987d523365SDimitry Andric
99991bc56edSDimitry Andric if (getIDWithAttrMask(&instructionID, insn, attrMask))
100091bc56edSDimitry Andric return -1;
100139d628a0SDimitry Andric
100239d628a0SDimitry Andric /* The following clauses compensate for limitations of the tables. */
100339d628a0SDimitry Andric
100439d628a0SDimitry Andric if (insn->mode != MODE_64BIT &&
100539d628a0SDimitry Andric insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
100639d628a0SDimitry Andric /*
100739d628a0SDimitry Andric * The tables can't distinquish between cases where the W-bit is used to
100839d628a0SDimitry Andric * select register size and cases where its a required part of the opcode.
100939d628a0SDimitry Andric */
101039d628a0SDimitry Andric if ((insn->vectorExtensionType == TYPE_EVEX &&
101139d628a0SDimitry Andric wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||
101239d628a0SDimitry Andric (insn->vectorExtensionType == TYPE_VEX_3B &&
101339d628a0SDimitry Andric wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||
101439d628a0SDimitry Andric (insn->vectorExtensionType == TYPE_XOP &&
101539d628a0SDimitry Andric wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {
101639d628a0SDimitry Andric
101739d628a0SDimitry Andric uint16_t instructionIDWithREXW;
101839d628a0SDimitry Andric if (getIDWithAttrMask(&instructionIDWithREXW,
101939d628a0SDimitry Andric insn, attrMask | ATTR_REXW)) {
102039d628a0SDimitry Andric insn->instructionID = instructionID;
102139d628a0SDimitry Andric insn->spec = specifierForUID(instructionID);
102239d628a0SDimitry Andric return 0;
102339d628a0SDimitry Andric }
102439d628a0SDimitry Andric
1025d88c1a5aSDimitry Andric auto SpecName = GetInstrName(instructionIDWithREXW, miiArg);
102639d628a0SDimitry Andric // If not a 64-bit instruction. Switch the opcode.
1027d88c1a5aSDimitry Andric if (!is64Bit(SpecName.data())) {
102839d628a0SDimitry Andric insn->instructionID = instructionIDWithREXW;
102939d628a0SDimitry Andric insn->spec = specifierForUID(instructionIDWithREXW);
103039d628a0SDimitry Andric return 0;
103139d628a0SDimitry Andric }
103291bc56edSDimitry Andric }
103391bc56edSDimitry Andric }
103491bc56edSDimitry Andric
103539d628a0SDimitry Andric /*
10364ba319b5SDimitry Andric * Absolute moves, umonitor, and movdir64b need special handling.
103739d628a0SDimitry Andric * -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
103839d628a0SDimitry Andric * inverted w.r.t.
103939d628a0SDimitry Andric * -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
104039d628a0SDimitry Andric * any position.
104139d628a0SDimitry Andric */
10424ba319b5SDimitry Andric if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) ||
10434ba319b5SDimitry Andric (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) ||
10444ba319b5SDimitry Andric (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8)) {
104539d628a0SDimitry Andric /* Make sure we observed the prefixes in any position. */
10462cab237bSDimitry Andric if (insn->hasAdSize)
104739d628a0SDimitry Andric attrMask |= ATTR_ADSIZE;
10482cab237bSDimitry Andric if (insn->hasOpSize)
104939d628a0SDimitry Andric attrMask |= ATTR_OPSIZE;
105039d628a0SDimitry Andric
105139d628a0SDimitry Andric /* In 16-bit, invert the attributes. */
10524ba319b5SDimitry Andric if (insn->mode == MODE_16BIT) {
10534ba319b5SDimitry Andric attrMask ^= ATTR_ADSIZE;
10544ba319b5SDimitry Andric
10554ba319b5SDimitry Andric /* The OpSize attribute is only valid with the absolute moves. */
10564ba319b5SDimitry Andric if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0))
10574ba319b5SDimitry Andric attrMask ^= ATTR_OPSIZE;
10584ba319b5SDimitry Andric }
105939d628a0SDimitry Andric
106039d628a0SDimitry Andric if (getIDWithAttrMask(&instructionID, insn, attrMask))
106139d628a0SDimitry Andric return -1;
106239d628a0SDimitry Andric
106339d628a0SDimitry Andric insn->instructionID = instructionID;
106439d628a0SDimitry Andric insn->spec = specifierForUID(instructionID);
106539d628a0SDimitry Andric return 0;
106639d628a0SDimitry Andric }
106791bc56edSDimitry Andric
10682cab237bSDimitry Andric if ((insn->mode == MODE_16BIT || insn->hasOpSize) &&
106991bc56edSDimitry Andric !(attrMask & ATTR_OPSIZE)) {
107091bc56edSDimitry Andric /*
107191bc56edSDimitry Andric * The instruction tables make no distinction between instructions that
107291bc56edSDimitry Andric * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
107391bc56edSDimitry Andric * particular spot (i.e., many MMX operations). In general we're
107491bc56edSDimitry Andric * conservative, but in the specific case where OpSize is present but not
107591bc56edSDimitry Andric * in the right place we check if there's a 16-bit operation.
107691bc56edSDimitry Andric */
107791bc56edSDimitry Andric
107891bc56edSDimitry Andric const struct InstructionSpecifier *spec;
107991bc56edSDimitry Andric uint16_t instructionIDWithOpsize;
1080d88c1a5aSDimitry Andric llvm::StringRef specName, specWithOpSizeName;
108191bc56edSDimitry Andric
108291bc56edSDimitry Andric spec = specifierForUID(instructionID);
108391bc56edSDimitry Andric
108491bc56edSDimitry Andric if (getIDWithAttrMask(&instructionIDWithOpsize,
108591bc56edSDimitry Andric insn,
108691bc56edSDimitry Andric attrMask | ATTR_OPSIZE)) {
108791bc56edSDimitry Andric /*
108891bc56edSDimitry Andric * ModRM required with OpSize but not present; give up and return version
108991bc56edSDimitry Andric * without OpSize set
109091bc56edSDimitry Andric */
109191bc56edSDimitry Andric
109291bc56edSDimitry Andric insn->instructionID = instructionID;
109391bc56edSDimitry Andric insn->spec = spec;
109491bc56edSDimitry Andric return 0;
109591bc56edSDimitry Andric }
109691bc56edSDimitry Andric
109791bc56edSDimitry Andric specName = GetInstrName(instructionID, miiArg);
109891bc56edSDimitry Andric specWithOpSizeName = GetInstrName(instructionIDWithOpsize, miiArg);
109991bc56edSDimitry Andric
1100d88c1a5aSDimitry Andric if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) &&
11012cab237bSDimitry Andric (insn->mode == MODE_16BIT) ^ insn->hasOpSize) {
110291bc56edSDimitry Andric insn->instructionID = instructionIDWithOpsize;
110391bc56edSDimitry Andric insn->spec = specifierForUID(instructionIDWithOpsize);
110491bc56edSDimitry Andric } else {
110591bc56edSDimitry Andric insn->instructionID = instructionID;
110691bc56edSDimitry Andric insn->spec = spec;
110791bc56edSDimitry Andric }
110891bc56edSDimitry Andric return 0;
110991bc56edSDimitry Andric }
111091bc56edSDimitry Andric
111191bc56edSDimitry Andric if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
111291bc56edSDimitry Andric insn->rexPrefix & 0x01) {
111391bc56edSDimitry Andric /*
111491bc56edSDimitry Andric * NOOP shouldn't decode as NOOP if REX.b is set. Instead
111591bc56edSDimitry Andric * it should decode as XCHG %r8, %eax.
111691bc56edSDimitry Andric */
111791bc56edSDimitry Andric
111891bc56edSDimitry Andric const struct InstructionSpecifier *spec;
111991bc56edSDimitry Andric uint16_t instructionIDWithNewOpcode;
112091bc56edSDimitry Andric const struct InstructionSpecifier *specWithNewOpcode;
112191bc56edSDimitry Andric
112291bc56edSDimitry Andric spec = specifierForUID(instructionID);
112391bc56edSDimitry Andric
112491bc56edSDimitry Andric /* Borrow opcode from one of the other XCHGar opcodes */
112591bc56edSDimitry Andric insn->opcode = 0x91;
112691bc56edSDimitry Andric
112791bc56edSDimitry Andric if (getIDWithAttrMask(&instructionIDWithNewOpcode,
112891bc56edSDimitry Andric insn,
112991bc56edSDimitry Andric attrMask)) {
113091bc56edSDimitry Andric insn->opcode = 0x90;
113191bc56edSDimitry Andric
113291bc56edSDimitry Andric insn->instructionID = instructionID;
113391bc56edSDimitry Andric insn->spec = spec;
113491bc56edSDimitry Andric return 0;
113591bc56edSDimitry Andric }
113691bc56edSDimitry Andric
113791bc56edSDimitry Andric specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
113891bc56edSDimitry Andric
113991bc56edSDimitry Andric /* Change back */
114091bc56edSDimitry Andric insn->opcode = 0x90;
114191bc56edSDimitry Andric
114291bc56edSDimitry Andric insn->instructionID = instructionIDWithNewOpcode;
114391bc56edSDimitry Andric insn->spec = specWithNewOpcode;
114491bc56edSDimitry Andric
114591bc56edSDimitry Andric return 0;
114691bc56edSDimitry Andric }
114791bc56edSDimitry Andric
114891bc56edSDimitry Andric insn->instructionID = instructionID;
114991bc56edSDimitry Andric insn->spec = specifierForUID(insn->instructionID);
115091bc56edSDimitry Andric
115191bc56edSDimitry Andric return 0;
115291bc56edSDimitry Andric }
115391bc56edSDimitry Andric
115491bc56edSDimitry Andric /*
115591bc56edSDimitry Andric * readSIB - Consumes the SIB byte to determine addressing information for an
115691bc56edSDimitry Andric * instruction.
115791bc56edSDimitry Andric *
115891bc56edSDimitry Andric * @param insn - The instruction whose SIB byte is to be read.
115991bc56edSDimitry Andric * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
116091bc56edSDimitry Andric */
readSIB(struct InternalInstruction * insn)116191bc56edSDimitry Andric static int readSIB(struct InternalInstruction* insn) {
116291bc56edSDimitry Andric SIBBase sibBaseBase = SIB_BASE_NONE;
116391bc56edSDimitry Andric uint8_t index, base;
116491bc56edSDimitry Andric
116591bc56edSDimitry Andric dbgprintf(insn, "readSIB()");
116691bc56edSDimitry Andric
116791bc56edSDimitry Andric if (insn->consumedSIB)
116891bc56edSDimitry Andric return 0;
116991bc56edSDimitry Andric
117091bc56edSDimitry Andric insn->consumedSIB = true;
117191bc56edSDimitry Andric
117291bc56edSDimitry Andric switch (insn->addressSize) {
117391bc56edSDimitry Andric case 2:
117491bc56edSDimitry Andric dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
117591bc56edSDimitry Andric return -1;
117691bc56edSDimitry Andric case 4:
11772cab237bSDimitry Andric insn->sibIndexBase = SIB_INDEX_EAX;
117891bc56edSDimitry Andric sibBaseBase = SIB_BASE_EAX;
117991bc56edSDimitry Andric break;
118091bc56edSDimitry Andric case 8:
11812cab237bSDimitry Andric insn->sibIndexBase = SIB_INDEX_RAX;
118291bc56edSDimitry Andric sibBaseBase = SIB_BASE_RAX;
118391bc56edSDimitry Andric break;
118491bc56edSDimitry Andric }
118591bc56edSDimitry Andric
118691bc56edSDimitry Andric if (consumeByte(insn, &insn->sib))
118791bc56edSDimitry Andric return -1;
118891bc56edSDimitry Andric
118991bc56edSDimitry Andric index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
11903dac3a9bSDimitry Andric
11913dac3a9bSDimitry Andric if (index == 0x4) {
119291bc56edSDimitry Andric insn->sibIndex = SIB_INDEX_NONE;
11933dac3a9bSDimitry Andric } else {
11942cab237bSDimitry Andric insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index);
119591bc56edSDimitry Andric }
119691bc56edSDimitry Andric
11973dac3a9bSDimitry Andric insn->sibScale = 1 << scaleFromSIB(insn->sib);
119891bc56edSDimitry Andric
119991bc56edSDimitry Andric base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
120091bc56edSDimitry Andric
120191bc56edSDimitry Andric switch (base) {
120291bc56edSDimitry Andric case 0x5:
120391bc56edSDimitry Andric case 0xd:
120491bc56edSDimitry Andric switch (modFromModRM(insn->modRM)) {
120591bc56edSDimitry Andric case 0x0:
120691bc56edSDimitry Andric insn->eaDisplacement = EA_DISP_32;
120791bc56edSDimitry Andric insn->sibBase = SIB_BASE_NONE;
120891bc56edSDimitry Andric break;
120991bc56edSDimitry Andric case 0x1:
121091bc56edSDimitry Andric insn->eaDisplacement = EA_DISP_8;
121191bc56edSDimitry Andric insn->sibBase = (SIBBase)(sibBaseBase + base);
121291bc56edSDimitry Andric break;
121391bc56edSDimitry Andric case 0x2:
121491bc56edSDimitry Andric insn->eaDisplacement = EA_DISP_32;
121591bc56edSDimitry Andric insn->sibBase = (SIBBase)(sibBaseBase + base);
121691bc56edSDimitry Andric break;
121791bc56edSDimitry Andric case 0x3:
121891bc56edSDimitry Andric debug("Cannot have Mod = 0b11 and a SIB byte");
121991bc56edSDimitry Andric return -1;
122091bc56edSDimitry Andric }
122191bc56edSDimitry Andric break;
122291bc56edSDimitry Andric default:
122391bc56edSDimitry Andric insn->sibBase = (SIBBase)(sibBaseBase + base);
122491bc56edSDimitry Andric break;
122591bc56edSDimitry Andric }
122691bc56edSDimitry Andric
122791bc56edSDimitry Andric return 0;
122891bc56edSDimitry Andric }
122991bc56edSDimitry Andric
123091bc56edSDimitry Andric /*
123191bc56edSDimitry Andric * readDisplacement - Consumes the displacement of an instruction.
123291bc56edSDimitry Andric *
123391bc56edSDimitry Andric * @param insn - The instruction whose displacement is to be read.
123491bc56edSDimitry Andric * @return - 0 if the displacement byte was successfully read; nonzero
123591bc56edSDimitry Andric * otherwise.
123691bc56edSDimitry Andric */
readDisplacement(struct InternalInstruction * insn)123791bc56edSDimitry Andric static int readDisplacement(struct InternalInstruction* insn) {
123891bc56edSDimitry Andric int8_t d8;
123991bc56edSDimitry Andric int16_t d16;
124091bc56edSDimitry Andric int32_t d32;
124191bc56edSDimitry Andric
124291bc56edSDimitry Andric dbgprintf(insn, "readDisplacement()");
124391bc56edSDimitry Andric
124491bc56edSDimitry Andric if (insn->consumedDisplacement)
124591bc56edSDimitry Andric return 0;
124691bc56edSDimitry Andric
124791bc56edSDimitry Andric insn->consumedDisplacement = true;
124891bc56edSDimitry Andric insn->displacementOffset = insn->readerCursor - insn->startLocation;
124991bc56edSDimitry Andric
125091bc56edSDimitry Andric switch (insn->eaDisplacement) {
125191bc56edSDimitry Andric case EA_DISP_NONE:
125291bc56edSDimitry Andric insn->consumedDisplacement = false;
125391bc56edSDimitry Andric break;
125491bc56edSDimitry Andric case EA_DISP_8:
125591bc56edSDimitry Andric if (consumeInt8(insn, &d8))
125691bc56edSDimitry Andric return -1;
125791bc56edSDimitry Andric insn->displacement = d8;
125891bc56edSDimitry Andric break;
125991bc56edSDimitry Andric case EA_DISP_16:
126091bc56edSDimitry Andric if (consumeInt16(insn, &d16))
126191bc56edSDimitry Andric return -1;
126291bc56edSDimitry Andric insn->displacement = d16;
126391bc56edSDimitry Andric break;
126491bc56edSDimitry Andric case EA_DISP_32:
126591bc56edSDimitry Andric if (consumeInt32(insn, &d32))
126691bc56edSDimitry Andric return -1;
126791bc56edSDimitry Andric insn->displacement = d32;
126891bc56edSDimitry Andric break;
126991bc56edSDimitry Andric }
127091bc56edSDimitry Andric
127191bc56edSDimitry Andric insn->consumedDisplacement = true;
127291bc56edSDimitry Andric return 0;
127391bc56edSDimitry Andric }
127491bc56edSDimitry Andric
127591bc56edSDimitry Andric /*
127691bc56edSDimitry Andric * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
127791bc56edSDimitry Andric * displacement) for an instruction and interprets it.
127891bc56edSDimitry Andric *
127991bc56edSDimitry Andric * @param insn - The instruction whose addressing information is to be read.
128091bc56edSDimitry Andric * @return - 0 if the information was successfully read; nonzero otherwise.
128191bc56edSDimitry Andric */
readModRM(struct InternalInstruction * insn)128291bc56edSDimitry Andric static int readModRM(struct InternalInstruction* insn) {
12834ba319b5SDimitry Andric uint8_t mod, rm, reg, evexrm;
128491bc56edSDimitry Andric
128591bc56edSDimitry Andric dbgprintf(insn, "readModRM()");
128691bc56edSDimitry Andric
128791bc56edSDimitry Andric if (insn->consumedModRM)
128891bc56edSDimitry Andric return 0;
128991bc56edSDimitry Andric
129091bc56edSDimitry Andric if (consumeByte(insn, &insn->modRM))
129191bc56edSDimitry Andric return -1;
129291bc56edSDimitry Andric insn->consumedModRM = true;
129391bc56edSDimitry Andric
129491bc56edSDimitry Andric mod = modFromModRM(insn->modRM);
129591bc56edSDimitry Andric rm = rmFromModRM(insn->modRM);
129691bc56edSDimitry Andric reg = regFromModRM(insn->modRM);
129791bc56edSDimitry Andric
129891bc56edSDimitry Andric /*
129991bc56edSDimitry Andric * This goes by insn->registerSize to pick the correct register, which messes
130091bc56edSDimitry Andric * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
130191bc56edSDimitry Andric * fixupReg().
130291bc56edSDimitry Andric */
130391bc56edSDimitry Andric switch (insn->registerSize) {
130491bc56edSDimitry Andric case 2:
130591bc56edSDimitry Andric insn->regBase = MODRM_REG_AX;
130691bc56edSDimitry Andric insn->eaRegBase = EA_REG_AX;
130791bc56edSDimitry Andric break;
130891bc56edSDimitry Andric case 4:
130991bc56edSDimitry Andric insn->regBase = MODRM_REG_EAX;
131091bc56edSDimitry Andric insn->eaRegBase = EA_REG_EAX;
131191bc56edSDimitry Andric break;
131291bc56edSDimitry Andric case 8:
131391bc56edSDimitry Andric insn->regBase = MODRM_REG_RAX;
131491bc56edSDimitry Andric insn->eaRegBase = EA_REG_RAX;
131591bc56edSDimitry Andric break;
131691bc56edSDimitry Andric }
131791bc56edSDimitry Andric
131891bc56edSDimitry Andric reg |= rFromREX(insn->rexPrefix) << 3;
131991bc56edSDimitry Andric rm |= bFromREX(insn->rexPrefix) << 3;
13204ba319b5SDimitry Andric
13214ba319b5SDimitry Andric evexrm = 0;
13224ba319b5SDimitry Andric if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) {
132391bc56edSDimitry Andric reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
13244ba319b5SDimitry Andric evexrm = xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
132591bc56edSDimitry Andric }
132691bc56edSDimitry Andric
132791bc56edSDimitry Andric insn->reg = (Reg)(insn->regBase + reg);
132891bc56edSDimitry Andric
132991bc56edSDimitry Andric switch (insn->addressSize) {
13304ba319b5SDimitry Andric case 2: {
13314ba319b5SDimitry Andric EABase eaBaseBase = EA_BASE_BX_SI;
133291bc56edSDimitry Andric
133391bc56edSDimitry Andric switch (mod) {
133491bc56edSDimitry Andric case 0x0:
133591bc56edSDimitry Andric if (rm == 0x6) {
133691bc56edSDimitry Andric insn->eaBase = EA_BASE_NONE;
133791bc56edSDimitry Andric insn->eaDisplacement = EA_DISP_16;
133891bc56edSDimitry Andric if (readDisplacement(insn))
133991bc56edSDimitry Andric return -1;
134091bc56edSDimitry Andric } else {
13414ba319b5SDimitry Andric insn->eaBase = (EABase)(eaBaseBase + rm);
134291bc56edSDimitry Andric insn->eaDisplacement = EA_DISP_NONE;
134391bc56edSDimitry Andric }
134491bc56edSDimitry Andric break;
134591bc56edSDimitry Andric case 0x1:
13464ba319b5SDimitry Andric insn->eaBase = (EABase)(eaBaseBase + rm);
134791bc56edSDimitry Andric insn->eaDisplacement = EA_DISP_8;
134891bc56edSDimitry Andric insn->displacementSize = 1;
134991bc56edSDimitry Andric if (readDisplacement(insn))
135091bc56edSDimitry Andric return -1;
135191bc56edSDimitry Andric break;
135291bc56edSDimitry Andric case 0x2:
13534ba319b5SDimitry Andric insn->eaBase = (EABase)(eaBaseBase + rm);
135491bc56edSDimitry Andric insn->eaDisplacement = EA_DISP_16;
135591bc56edSDimitry Andric if (readDisplacement(insn))
135691bc56edSDimitry Andric return -1;
135791bc56edSDimitry Andric break;
135891bc56edSDimitry Andric case 0x3:
135991bc56edSDimitry Andric insn->eaBase = (EABase)(insn->eaRegBase + rm);
136091bc56edSDimitry Andric if (readDisplacement(insn))
136191bc56edSDimitry Andric return -1;
136291bc56edSDimitry Andric break;
136391bc56edSDimitry Andric }
136491bc56edSDimitry Andric break;
13654ba319b5SDimitry Andric }
136691bc56edSDimitry Andric case 4:
13674ba319b5SDimitry Andric case 8: {
13684ba319b5SDimitry Andric EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
136991bc56edSDimitry Andric
137091bc56edSDimitry Andric switch (mod) {
137191bc56edSDimitry Andric case 0x0:
137291bc56edSDimitry Andric insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
1373ff0cc061SDimitry Andric // In determining whether RIP-relative mode is used (rm=5),
1374ff0cc061SDimitry Andric // or whether a SIB byte is present (rm=4),
1375ff0cc061SDimitry Andric // the extension bits (REX.b and EVEX.x) are ignored.
1376ff0cc061SDimitry Andric switch (rm & 7) {
1377ff0cc061SDimitry Andric case 0x4: // SIB byte is present
137891bc56edSDimitry Andric insn->eaBase = (insn->addressSize == 4 ?
137991bc56edSDimitry Andric EA_BASE_sib : EA_BASE_sib64);
138091bc56edSDimitry Andric if (readSIB(insn) || readDisplacement(insn))
138191bc56edSDimitry Andric return -1;
138291bc56edSDimitry Andric break;
1383ff0cc061SDimitry Andric case 0x5: // RIP-relative
138491bc56edSDimitry Andric insn->eaBase = EA_BASE_NONE;
138591bc56edSDimitry Andric insn->eaDisplacement = EA_DISP_32;
138691bc56edSDimitry Andric if (readDisplacement(insn))
138791bc56edSDimitry Andric return -1;
138891bc56edSDimitry Andric break;
138991bc56edSDimitry Andric default:
13904ba319b5SDimitry Andric insn->eaBase = (EABase)(eaBaseBase + rm);
139191bc56edSDimitry Andric break;
139291bc56edSDimitry Andric }
139391bc56edSDimitry Andric break;
139491bc56edSDimitry Andric case 0x1:
139591bc56edSDimitry Andric insn->displacementSize = 1;
1396*b5893f02SDimitry Andric LLVM_FALLTHROUGH;
139791bc56edSDimitry Andric case 0x2:
139891bc56edSDimitry Andric insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
1399ff0cc061SDimitry Andric switch (rm & 7) {
1400ff0cc061SDimitry Andric case 0x4: // SIB byte is present
140191bc56edSDimitry Andric insn->eaBase = EA_BASE_sib;
140291bc56edSDimitry Andric if (readSIB(insn) || readDisplacement(insn))
140391bc56edSDimitry Andric return -1;
140491bc56edSDimitry Andric break;
140591bc56edSDimitry Andric default:
14064ba319b5SDimitry Andric insn->eaBase = (EABase)(eaBaseBase + rm);
140791bc56edSDimitry Andric if (readDisplacement(insn))
140891bc56edSDimitry Andric return -1;
140991bc56edSDimitry Andric break;
141091bc56edSDimitry Andric }
141191bc56edSDimitry Andric break;
141291bc56edSDimitry Andric case 0x3:
141391bc56edSDimitry Andric insn->eaDisplacement = EA_DISP_NONE;
14144ba319b5SDimitry Andric insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm);
141591bc56edSDimitry Andric break;
141691bc56edSDimitry Andric }
141791bc56edSDimitry Andric break;
14184ba319b5SDimitry Andric }
141991bc56edSDimitry Andric } /* switch (insn->addressSize) */
142091bc56edSDimitry Andric
142191bc56edSDimitry Andric return 0;
142291bc56edSDimitry Andric }
142391bc56edSDimitry Andric
14244ba319b5SDimitry Andric #define GENERIC_FIXUP_FUNC(name, base, prefix, mask) \
14253ca95b02SDimitry Andric static uint16_t name(struct InternalInstruction *insn, \
142691bc56edSDimitry Andric OperandType type, \
142791bc56edSDimitry Andric uint8_t index, \
142891bc56edSDimitry Andric uint8_t *valid) { \
142991bc56edSDimitry Andric *valid = 1; \
143091bc56edSDimitry Andric switch (type) { \
143191bc56edSDimitry Andric default: \
143291bc56edSDimitry Andric debug("Unhandled register type"); \
143391bc56edSDimitry Andric *valid = 0; \
143491bc56edSDimitry Andric return 0; \
143591bc56edSDimitry Andric case TYPE_Rv: \
143691bc56edSDimitry Andric return base + index; \
143791bc56edSDimitry Andric case TYPE_R8: \
14384ba319b5SDimitry Andric index &= mask; \
14394ba319b5SDimitry Andric if (index > 0xf) \
14404ba319b5SDimitry Andric *valid = 0; \
144191bc56edSDimitry Andric if (insn->rexPrefix && \
144291bc56edSDimitry Andric index >= 4 && index <= 7) { \
144391bc56edSDimitry Andric return prefix##_SPL + (index - 4); \
144491bc56edSDimitry Andric } else { \
144591bc56edSDimitry Andric return prefix##_AL + index; \
144691bc56edSDimitry Andric } \
144791bc56edSDimitry Andric case TYPE_R16: \
14484ba319b5SDimitry Andric index &= mask; \
14494ba319b5SDimitry Andric if (index > 0xf) \
14504ba319b5SDimitry Andric *valid = 0; \
145191bc56edSDimitry Andric return prefix##_AX + index; \
145291bc56edSDimitry Andric case TYPE_R32: \
14534ba319b5SDimitry Andric index &= mask; \
14544ba319b5SDimitry Andric if (index > 0xf) \
14554ba319b5SDimitry Andric *valid = 0; \
145691bc56edSDimitry Andric return prefix##_EAX + index; \
145791bc56edSDimitry Andric case TYPE_R64: \
14584ba319b5SDimitry Andric index &= mask; \
14594ba319b5SDimitry Andric if (index > 0xf) \
14604ba319b5SDimitry Andric *valid = 0; \
146191bc56edSDimitry Andric return prefix##_RAX + index; \
14627a7e6055SDimitry Andric case TYPE_ZMM: \
146391bc56edSDimitry Andric return prefix##_ZMM0 + index; \
14647a7e6055SDimitry Andric case TYPE_YMM: \
146591bc56edSDimitry Andric return prefix##_YMM0 + index; \
14667a7e6055SDimitry Andric case TYPE_XMM: \
146791bc56edSDimitry Andric return prefix##_XMM0 + index; \
14687a7e6055SDimitry Andric case TYPE_VK: \
14694ba319b5SDimitry Andric index &= 0xf; \
1470ff0cc061SDimitry Andric if (index > 7) \
1471ff0cc061SDimitry Andric *valid = 0; \
147291bc56edSDimitry Andric return prefix##_K0 + index; \
147391bc56edSDimitry Andric case TYPE_MM64: \
147439d628a0SDimitry Andric return prefix##_MM0 + (index & 0x7); \
147591bc56edSDimitry Andric case TYPE_SEGMENTREG: \
14762cab237bSDimitry Andric if ((index & 7) > 5) \
147791bc56edSDimitry Andric *valid = 0; \
14782cab237bSDimitry Andric return prefix##_ES + (index & 7); \
147991bc56edSDimitry Andric case TYPE_DEBUGREG: \
148091bc56edSDimitry Andric return prefix##_DR0 + index; \
148191bc56edSDimitry Andric case TYPE_CONTROLREG: \
148291bc56edSDimitry Andric return prefix##_CR0 + index; \
14833ca95b02SDimitry Andric case TYPE_BNDR: \
14843ca95b02SDimitry Andric if (index > 3) \
14853ca95b02SDimitry Andric *valid = 0; \
14863ca95b02SDimitry Andric return prefix##_BND0 + index; \
14872cab237bSDimitry Andric case TYPE_MVSIBX: \
14882cab237bSDimitry Andric return prefix##_XMM0 + index; \
14892cab237bSDimitry Andric case TYPE_MVSIBY: \
14902cab237bSDimitry Andric return prefix##_YMM0 + index; \
14912cab237bSDimitry Andric case TYPE_MVSIBZ: \
14922cab237bSDimitry Andric return prefix##_ZMM0 + index; \
149391bc56edSDimitry Andric } \
149491bc56edSDimitry Andric }
149591bc56edSDimitry Andric
149691bc56edSDimitry Andric /*
149791bc56edSDimitry Andric * fixup*Value - Consults an operand type to determine the meaning of the
149891bc56edSDimitry Andric * reg or R/M field. If the operand is an XMM operand, for example, an
149991bc56edSDimitry Andric * operand would be XMM0 instead of AX, which readModRM() would otherwise
150091bc56edSDimitry Andric * misinterpret it as.
150191bc56edSDimitry Andric *
150291bc56edSDimitry Andric * @param insn - The instruction containing the operand.
150391bc56edSDimitry Andric * @param type - The operand type.
150491bc56edSDimitry Andric * @param index - The existing value of the field as reported by readModRM().
150591bc56edSDimitry Andric * @param valid - The address of a uint8_t. The target is set to 1 if the
150691bc56edSDimitry Andric * field is valid for the register class; 0 if not.
150791bc56edSDimitry Andric * @return - The proper value.
150891bc56edSDimitry Andric */
15094ba319b5SDimitry Andric GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG, 0x1f)
15104ba319b5SDimitry Andric GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG, 0xf)
151191bc56edSDimitry Andric
151291bc56edSDimitry Andric /*
151391bc56edSDimitry Andric * fixupReg - Consults an operand specifier to determine which of the
151491bc56edSDimitry Andric * fixup*Value functions to use in correcting readModRM()'ss interpretation.
151591bc56edSDimitry Andric *
151691bc56edSDimitry Andric * @param insn - See fixup*Value().
151791bc56edSDimitry Andric * @param op - The operand specifier.
151891bc56edSDimitry Andric * @return - 0 if fixup was successful; -1 if the register returned was
151991bc56edSDimitry Andric * invalid for its class.
152091bc56edSDimitry Andric */
fixupReg(struct InternalInstruction * insn,const struct OperandSpecifier * op)152191bc56edSDimitry Andric static int fixupReg(struct InternalInstruction *insn,
152291bc56edSDimitry Andric const struct OperandSpecifier *op) {
152391bc56edSDimitry Andric uint8_t valid;
152491bc56edSDimitry Andric
152591bc56edSDimitry Andric dbgprintf(insn, "fixupReg()");
152691bc56edSDimitry Andric
152791bc56edSDimitry Andric switch ((OperandEncoding)op->encoding) {
152891bc56edSDimitry Andric default:
152991bc56edSDimitry Andric debug("Expected a REG or R/M encoding in fixupReg");
153091bc56edSDimitry Andric return -1;
153191bc56edSDimitry Andric case ENCODING_VVVV:
153291bc56edSDimitry Andric insn->vvvv = (Reg)fixupRegValue(insn,
153391bc56edSDimitry Andric (OperandType)op->type,
153491bc56edSDimitry Andric insn->vvvv,
153591bc56edSDimitry Andric &valid);
153691bc56edSDimitry Andric if (!valid)
153791bc56edSDimitry Andric return -1;
153891bc56edSDimitry Andric break;
153991bc56edSDimitry Andric case ENCODING_REG:
154091bc56edSDimitry Andric insn->reg = (Reg)fixupRegValue(insn,
154191bc56edSDimitry Andric (OperandType)op->type,
154291bc56edSDimitry Andric insn->reg - insn->regBase,
154391bc56edSDimitry Andric &valid);
154491bc56edSDimitry Andric if (!valid)
154591bc56edSDimitry Andric return -1;
154691bc56edSDimitry Andric break;
154791bc56edSDimitry Andric CASE_ENCODING_RM:
154891bc56edSDimitry Andric if (insn->eaBase >= insn->eaRegBase) {
154991bc56edSDimitry Andric insn->eaBase = (EABase)fixupRMValue(insn,
155091bc56edSDimitry Andric (OperandType)op->type,
155191bc56edSDimitry Andric insn->eaBase - insn->eaRegBase,
155291bc56edSDimitry Andric &valid);
155391bc56edSDimitry Andric if (!valid)
155491bc56edSDimitry Andric return -1;
155591bc56edSDimitry Andric }
155691bc56edSDimitry Andric break;
155791bc56edSDimitry Andric }
155891bc56edSDimitry Andric
155991bc56edSDimitry Andric return 0;
156091bc56edSDimitry Andric }
156191bc56edSDimitry Andric
156291bc56edSDimitry Andric /*
156391bc56edSDimitry Andric * readOpcodeRegister - Reads an operand from the opcode field of an
156491bc56edSDimitry Andric * instruction and interprets it appropriately given the operand width.
156591bc56edSDimitry Andric * Handles AddRegFrm instructions.
156691bc56edSDimitry Andric *
156791bc56edSDimitry Andric * @param insn - the instruction whose opcode field is to be read.
156891bc56edSDimitry Andric * @param size - The width (in bytes) of the register being specified.
156991bc56edSDimitry Andric * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
157091bc56edSDimitry Andric * RAX.
157191bc56edSDimitry Andric * @return - 0 on success; nonzero otherwise.
157291bc56edSDimitry Andric */
readOpcodeRegister(struct InternalInstruction * insn,uint8_t size)157391bc56edSDimitry Andric static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
157491bc56edSDimitry Andric dbgprintf(insn, "readOpcodeRegister()");
157591bc56edSDimitry Andric
157691bc56edSDimitry Andric if (size == 0)
157791bc56edSDimitry Andric size = insn->registerSize;
157891bc56edSDimitry Andric
157991bc56edSDimitry Andric switch (size) {
158091bc56edSDimitry Andric case 1:
158191bc56edSDimitry Andric insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
158291bc56edSDimitry Andric | (insn->opcode & 7)));
158391bc56edSDimitry Andric if (insn->rexPrefix &&
158491bc56edSDimitry Andric insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
158591bc56edSDimitry Andric insn->opcodeRegister < MODRM_REG_AL + 0x8) {
158691bc56edSDimitry Andric insn->opcodeRegister = (Reg)(MODRM_REG_SPL
158791bc56edSDimitry Andric + (insn->opcodeRegister - MODRM_REG_AL - 4));
158891bc56edSDimitry Andric }
158991bc56edSDimitry Andric
159091bc56edSDimitry Andric break;
159191bc56edSDimitry Andric case 2:
159291bc56edSDimitry Andric insn->opcodeRegister = (Reg)(MODRM_REG_AX
159391bc56edSDimitry Andric + ((bFromREX(insn->rexPrefix) << 3)
159491bc56edSDimitry Andric | (insn->opcode & 7)));
159591bc56edSDimitry Andric break;
159691bc56edSDimitry Andric case 4:
159791bc56edSDimitry Andric insn->opcodeRegister = (Reg)(MODRM_REG_EAX
159891bc56edSDimitry Andric + ((bFromREX(insn->rexPrefix) << 3)
159991bc56edSDimitry Andric | (insn->opcode & 7)));
160091bc56edSDimitry Andric break;
160191bc56edSDimitry Andric case 8:
160291bc56edSDimitry Andric insn->opcodeRegister = (Reg)(MODRM_REG_RAX
160391bc56edSDimitry Andric + ((bFromREX(insn->rexPrefix) << 3)
160491bc56edSDimitry Andric | (insn->opcode & 7)));
160591bc56edSDimitry Andric break;
160691bc56edSDimitry Andric }
160791bc56edSDimitry Andric
160891bc56edSDimitry Andric return 0;
160991bc56edSDimitry Andric }
161091bc56edSDimitry Andric
161191bc56edSDimitry Andric /*
161291bc56edSDimitry Andric * readImmediate - Consumes an immediate operand from an instruction, given the
161391bc56edSDimitry Andric * desired operand size.
161491bc56edSDimitry Andric *
161591bc56edSDimitry Andric * @param insn - The instruction whose operand is to be read.
161691bc56edSDimitry Andric * @param size - The width (in bytes) of the operand.
161791bc56edSDimitry Andric * @return - 0 if the immediate was successfully consumed; nonzero
161891bc56edSDimitry Andric * otherwise.
161991bc56edSDimitry Andric */
readImmediate(struct InternalInstruction * insn,uint8_t size)162091bc56edSDimitry Andric static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
162191bc56edSDimitry Andric uint8_t imm8;
162291bc56edSDimitry Andric uint16_t imm16;
162391bc56edSDimitry Andric uint32_t imm32;
162491bc56edSDimitry Andric uint64_t imm64;
162591bc56edSDimitry Andric
162691bc56edSDimitry Andric dbgprintf(insn, "readImmediate()");
162791bc56edSDimitry Andric
162891bc56edSDimitry Andric if (insn->numImmediatesConsumed == 2) {
162991bc56edSDimitry Andric debug("Already consumed two immediates");
163091bc56edSDimitry Andric return -1;
163191bc56edSDimitry Andric }
163291bc56edSDimitry Andric
163391bc56edSDimitry Andric if (size == 0)
163491bc56edSDimitry Andric size = insn->immediateSize;
163591bc56edSDimitry Andric else
163691bc56edSDimitry Andric insn->immediateSize = size;
163791bc56edSDimitry Andric insn->immediateOffset = insn->readerCursor - insn->startLocation;
163891bc56edSDimitry Andric
163991bc56edSDimitry Andric switch (size) {
164091bc56edSDimitry Andric case 1:
164191bc56edSDimitry Andric if (consumeByte(insn, &imm8))
164291bc56edSDimitry Andric return -1;
164391bc56edSDimitry Andric insn->immediates[insn->numImmediatesConsumed] = imm8;
164491bc56edSDimitry Andric break;
164591bc56edSDimitry Andric case 2:
164691bc56edSDimitry Andric if (consumeUInt16(insn, &imm16))
164791bc56edSDimitry Andric return -1;
164891bc56edSDimitry Andric insn->immediates[insn->numImmediatesConsumed] = imm16;
164991bc56edSDimitry Andric break;
165091bc56edSDimitry Andric case 4:
165191bc56edSDimitry Andric if (consumeUInt32(insn, &imm32))
165291bc56edSDimitry Andric return -1;
165391bc56edSDimitry Andric insn->immediates[insn->numImmediatesConsumed] = imm32;
165491bc56edSDimitry Andric break;
165591bc56edSDimitry Andric case 8:
165691bc56edSDimitry Andric if (consumeUInt64(insn, &imm64))
165791bc56edSDimitry Andric return -1;
165891bc56edSDimitry Andric insn->immediates[insn->numImmediatesConsumed] = imm64;
165991bc56edSDimitry Andric break;
166091bc56edSDimitry Andric }
166191bc56edSDimitry Andric
166291bc56edSDimitry Andric insn->numImmediatesConsumed++;
166391bc56edSDimitry Andric
166491bc56edSDimitry Andric return 0;
166591bc56edSDimitry Andric }
166691bc56edSDimitry Andric
166791bc56edSDimitry Andric /*
166891bc56edSDimitry Andric * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
166991bc56edSDimitry Andric *
167091bc56edSDimitry Andric * @param insn - The instruction whose operand is to be read.
167191bc56edSDimitry Andric * @return - 0 if the vvvv was successfully consumed; nonzero
167291bc56edSDimitry Andric * otherwise.
167391bc56edSDimitry Andric */
readVVVV(struct InternalInstruction * insn)167491bc56edSDimitry Andric static int readVVVV(struct InternalInstruction* insn) {
167591bc56edSDimitry Andric dbgprintf(insn, "readVVVV()");
167691bc56edSDimitry Andric
167791bc56edSDimitry Andric int vvvv;
167891bc56edSDimitry Andric if (insn->vectorExtensionType == TYPE_EVEX)
167991bc56edSDimitry Andric vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |
168091bc56edSDimitry Andric vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]));
168191bc56edSDimitry Andric else if (insn->vectorExtensionType == TYPE_VEX_3B)
168291bc56edSDimitry Andric vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
168391bc56edSDimitry Andric else if (insn->vectorExtensionType == TYPE_VEX_2B)
168491bc56edSDimitry Andric vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
168591bc56edSDimitry Andric else if (insn->vectorExtensionType == TYPE_XOP)
168691bc56edSDimitry Andric vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
168791bc56edSDimitry Andric else
168891bc56edSDimitry Andric return -1;
168991bc56edSDimitry Andric
169091bc56edSDimitry Andric if (insn->mode != MODE_64BIT)
16914ba319b5SDimitry Andric vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later.
169291bc56edSDimitry Andric
169391bc56edSDimitry Andric insn->vvvv = static_cast<Reg>(vvvv);
169491bc56edSDimitry Andric return 0;
169591bc56edSDimitry Andric }
169691bc56edSDimitry Andric
169791bc56edSDimitry Andric /*
169891bc56edSDimitry Andric * readMaskRegister - Reads an mask register from the opcode field of an
169991bc56edSDimitry Andric * instruction.
170091bc56edSDimitry Andric *
170191bc56edSDimitry Andric * @param insn - The instruction whose opcode field is to be read.
170291bc56edSDimitry Andric * @return - 0 on success; nonzero otherwise.
170391bc56edSDimitry Andric */
readMaskRegister(struct InternalInstruction * insn)170491bc56edSDimitry Andric static int readMaskRegister(struct InternalInstruction* insn) {
170591bc56edSDimitry Andric dbgprintf(insn, "readMaskRegister()");
170691bc56edSDimitry Andric
170791bc56edSDimitry Andric if (insn->vectorExtensionType != TYPE_EVEX)
170891bc56edSDimitry Andric return -1;
170991bc56edSDimitry Andric
171091bc56edSDimitry Andric insn->writemask =
171191bc56edSDimitry Andric static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));
171291bc56edSDimitry Andric return 0;
171391bc56edSDimitry Andric }
171491bc56edSDimitry Andric
171591bc56edSDimitry Andric /*
171691bc56edSDimitry Andric * readOperands - Consults the specifier for an instruction and consumes all
171791bc56edSDimitry Andric * operands for that instruction, interpreting them as it goes.
171891bc56edSDimitry Andric *
171991bc56edSDimitry Andric * @param insn - The instruction whose operands are to be read and interpreted.
172091bc56edSDimitry Andric * @return - 0 if all operands could be read; nonzero otherwise.
172191bc56edSDimitry Andric */
readOperands(struct InternalInstruction * insn)172291bc56edSDimitry Andric static int readOperands(struct InternalInstruction* insn) {
172391bc56edSDimitry Andric int hasVVVV, needVVVV;
172491bc56edSDimitry Andric int sawRegImm = 0;
172591bc56edSDimitry Andric
172691bc56edSDimitry Andric dbgprintf(insn, "readOperands()");
172791bc56edSDimitry Andric
172891bc56edSDimitry Andric /* If non-zero vvvv specified, need to make sure one of the operands
172991bc56edSDimitry Andric uses it. */
173091bc56edSDimitry Andric hasVVVV = !readVVVV(insn);
173191bc56edSDimitry Andric needVVVV = hasVVVV && (insn->vvvv != 0);
173291bc56edSDimitry Andric
173391bc56edSDimitry Andric for (const auto &Op : x86OperandSets[insn->spec->operands]) {
173491bc56edSDimitry Andric switch (Op.encoding) {
173591bc56edSDimitry Andric case ENCODING_NONE:
173691bc56edSDimitry Andric case ENCODING_SI:
173791bc56edSDimitry Andric case ENCODING_DI:
173891bc56edSDimitry Andric break;
17397a7e6055SDimitry Andric CASE_ENCODING_VSIB:
17407a7e6055SDimitry Andric // VSIB can use the V2 bit so check only the other bits.
17417a7e6055SDimitry Andric if (needVVVV)
17427a7e6055SDimitry Andric needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);
17437a7e6055SDimitry Andric if (readModRM(insn))
17447a7e6055SDimitry Andric return -1;
17452cab237bSDimitry Andric
17462cab237bSDimitry Andric // Reject if SIB wasn't used.
17472cab237bSDimitry Andric if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
17487a7e6055SDimitry Andric return -1;
17492cab237bSDimitry Andric
17502cab237bSDimitry Andric // If sibIndex was set to SIB_INDEX_NONE, index offset is 4.
17512cab237bSDimitry Andric if (insn->sibIndex == SIB_INDEX_NONE)
17524ba319b5SDimitry Andric insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4);
17532cab237bSDimitry Andric
17542cab237bSDimitry Andric // If EVEX.v2 is set this is one of the 16-31 registers.
17554ba319b5SDimitry Andric if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
17562cab237bSDimitry Andric v2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
17572cab237bSDimitry Andric insn->sibIndex = (SIBIndex)(insn->sibIndex + 16);
17582cab237bSDimitry Andric
17592cab237bSDimitry Andric // Adjust the index register to the correct size.
17602cab237bSDimitry Andric switch ((OperandType)Op.type) {
17612cab237bSDimitry Andric default:
17622cab237bSDimitry Andric debug("Unhandled VSIB index type");
17632cab237bSDimitry Andric return -1;
17642cab237bSDimitry Andric case TYPE_MVSIBX:
17652cab237bSDimitry Andric insn->sibIndex = (SIBIndex)(SIB_INDEX_XMM0 +
17662cab237bSDimitry Andric (insn->sibIndex - insn->sibIndexBase));
17672cab237bSDimitry Andric break;
17682cab237bSDimitry Andric case TYPE_MVSIBY:
17692cab237bSDimitry Andric insn->sibIndex = (SIBIndex)(SIB_INDEX_YMM0 +
17702cab237bSDimitry Andric (insn->sibIndex - insn->sibIndexBase));
17712cab237bSDimitry Andric break;
17722cab237bSDimitry Andric case TYPE_MVSIBZ:
17732cab237bSDimitry Andric insn->sibIndex = (SIBIndex)(SIB_INDEX_ZMM0 +
17742cab237bSDimitry Andric (insn->sibIndex - insn->sibIndexBase));
17752cab237bSDimitry Andric break;
17762cab237bSDimitry Andric }
17772cab237bSDimitry Andric
17787a7e6055SDimitry Andric // Apply the AVX512 compressed displacement scaling factor.
17797a7e6055SDimitry Andric if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
17807a7e6055SDimitry Andric insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);
17817a7e6055SDimitry Andric break;
178291bc56edSDimitry Andric case ENCODING_REG:
178391bc56edSDimitry Andric CASE_ENCODING_RM:
178491bc56edSDimitry Andric if (readModRM(insn))
178591bc56edSDimitry Andric return -1;
178691bc56edSDimitry Andric if (fixupReg(insn, &Op))
178791bc56edSDimitry Andric return -1;
178891bc56edSDimitry Andric // Apply the AVX512 compressed displacement scaling factor.
178991bc56edSDimitry Andric if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
179091bc56edSDimitry Andric insn->displacement *= 1 << (Op.encoding - ENCODING_RM);
179191bc56edSDimitry Andric break;
179291bc56edSDimitry Andric case ENCODING_IB:
179391bc56edSDimitry Andric if (sawRegImm) {
179491bc56edSDimitry Andric /* Saw a register immediate so don't read again and instead split the
179591bc56edSDimitry Andric previous immediate. FIXME: This is a hack. */
179691bc56edSDimitry Andric insn->immediates[insn->numImmediatesConsumed] =
179791bc56edSDimitry Andric insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
179891bc56edSDimitry Andric ++insn->numImmediatesConsumed;
179991bc56edSDimitry Andric break;
180091bc56edSDimitry Andric }
180191bc56edSDimitry Andric if (readImmediate(insn, 1))
180291bc56edSDimitry Andric return -1;
18037a7e6055SDimitry Andric if (Op.type == TYPE_XMM || Op.type == TYPE_YMM)
180491bc56edSDimitry Andric sawRegImm = 1;
180591bc56edSDimitry Andric break;
180691bc56edSDimitry Andric case ENCODING_IW:
180791bc56edSDimitry Andric if (readImmediate(insn, 2))
180891bc56edSDimitry Andric return -1;
180991bc56edSDimitry Andric break;
181091bc56edSDimitry Andric case ENCODING_ID:
181191bc56edSDimitry Andric if (readImmediate(insn, 4))
181291bc56edSDimitry Andric return -1;
181391bc56edSDimitry Andric break;
181491bc56edSDimitry Andric case ENCODING_IO:
181591bc56edSDimitry Andric if (readImmediate(insn, 8))
181691bc56edSDimitry Andric return -1;
181791bc56edSDimitry Andric break;
181891bc56edSDimitry Andric case ENCODING_Iv:
181991bc56edSDimitry Andric if (readImmediate(insn, insn->immediateSize))
182091bc56edSDimitry Andric return -1;
182191bc56edSDimitry Andric break;
182291bc56edSDimitry Andric case ENCODING_Ia:
182391bc56edSDimitry Andric if (readImmediate(insn, insn->addressSize))
182491bc56edSDimitry Andric return -1;
182591bc56edSDimitry Andric break;
18262cab237bSDimitry Andric case ENCODING_IRC:
18272cab237bSDimitry Andric insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) |
18282cab237bSDimitry Andric lFromEVEX4of4(insn->vectorExtensionPrefix[3]);
18292cab237bSDimitry Andric break;
183091bc56edSDimitry Andric case ENCODING_RB:
183191bc56edSDimitry Andric if (readOpcodeRegister(insn, 1))
183291bc56edSDimitry Andric return -1;
183391bc56edSDimitry Andric break;
183491bc56edSDimitry Andric case ENCODING_RW:
183591bc56edSDimitry Andric if (readOpcodeRegister(insn, 2))
183691bc56edSDimitry Andric return -1;
183791bc56edSDimitry Andric break;
183891bc56edSDimitry Andric case ENCODING_RD:
183991bc56edSDimitry Andric if (readOpcodeRegister(insn, 4))
184091bc56edSDimitry Andric return -1;
184191bc56edSDimitry Andric break;
184291bc56edSDimitry Andric case ENCODING_RO:
184391bc56edSDimitry Andric if (readOpcodeRegister(insn, 8))
184491bc56edSDimitry Andric return -1;
184591bc56edSDimitry Andric break;
184691bc56edSDimitry Andric case ENCODING_Rv:
184791bc56edSDimitry Andric if (readOpcodeRegister(insn, 0))
184891bc56edSDimitry Andric return -1;
184991bc56edSDimitry Andric break;
185091bc56edSDimitry Andric case ENCODING_FP:
185191bc56edSDimitry Andric break;
185291bc56edSDimitry Andric case ENCODING_VVVV:
185391bc56edSDimitry Andric needVVVV = 0; /* Mark that we have found a VVVV operand. */
185491bc56edSDimitry Andric if (!hasVVVV)
185591bc56edSDimitry Andric return -1;
18564ba319b5SDimitry Andric if (insn->mode != MODE_64BIT)
18574ba319b5SDimitry Andric insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7);
185891bc56edSDimitry Andric if (fixupReg(insn, &Op))
185991bc56edSDimitry Andric return -1;
186091bc56edSDimitry Andric break;
186191bc56edSDimitry Andric case ENCODING_WRITEMASK:
186291bc56edSDimitry Andric if (readMaskRegister(insn))
186391bc56edSDimitry Andric return -1;
186491bc56edSDimitry Andric break;
186591bc56edSDimitry Andric case ENCODING_DUP:
186691bc56edSDimitry Andric break;
186791bc56edSDimitry Andric default:
186891bc56edSDimitry Andric dbgprintf(insn, "Encountered an operand with an unknown encoding.");
186991bc56edSDimitry Andric return -1;
187091bc56edSDimitry Andric }
187191bc56edSDimitry Andric }
187291bc56edSDimitry Andric
187391bc56edSDimitry Andric /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
187491bc56edSDimitry Andric if (needVVVV) return -1;
187591bc56edSDimitry Andric
187691bc56edSDimitry Andric return 0;
187791bc56edSDimitry Andric }
187891bc56edSDimitry Andric
187991bc56edSDimitry Andric /*
188091bc56edSDimitry Andric * decodeInstruction - Reads and interprets a full instruction provided by the
188191bc56edSDimitry Andric * user.
188291bc56edSDimitry Andric *
188391bc56edSDimitry Andric * @param insn - A pointer to the instruction to be populated. Must be
188491bc56edSDimitry Andric * pre-allocated.
188591bc56edSDimitry Andric * @param reader - The function to be used to read the instruction's bytes.
188691bc56edSDimitry Andric * @param readerArg - A generic argument to be passed to the reader to store
188791bc56edSDimitry Andric * any internal state.
188891bc56edSDimitry Andric * @param logger - If non-NULL, the function to be used to write log messages
188991bc56edSDimitry Andric * and warnings.
189091bc56edSDimitry Andric * @param loggerArg - A generic argument to be passed to the logger to store
189191bc56edSDimitry Andric * any internal state.
189291bc56edSDimitry Andric * @param startLoc - The address (in the reader's address space) of the first
189391bc56edSDimitry Andric * byte in the instruction.
189491bc56edSDimitry Andric * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
189591bc56edSDimitry Andric * decode the instruction in.
189691bc56edSDimitry Andric * @return - 0 if the instruction's memory could be read; nonzero if
189791bc56edSDimitry Andric * not.
189891bc56edSDimitry Andric */
decodeInstruction(struct InternalInstruction * insn,byteReader_t reader,const void * readerArg,dlog_t logger,void * loggerArg,const void * miiArg,uint64_t startLoc,DisassemblerMode mode)189991bc56edSDimitry Andric int llvm::X86Disassembler::decodeInstruction(
190091bc56edSDimitry Andric struct InternalInstruction *insn, byteReader_t reader,
190191bc56edSDimitry Andric const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg,
190291bc56edSDimitry Andric uint64_t startLoc, DisassemblerMode mode) {
190391bc56edSDimitry Andric memset(insn, 0, sizeof(struct InternalInstruction));
190491bc56edSDimitry Andric
190591bc56edSDimitry Andric insn->reader = reader;
190691bc56edSDimitry Andric insn->readerArg = readerArg;
190791bc56edSDimitry Andric insn->dlog = logger;
190891bc56edSDimitry Andric insn->dlogArg = loggerArg;
190991bc56edSDimitry Andric insn->startLocation = startLoc;
191091bc56edSDimitry Andric insn->readerCursor = startLoc;
191191bc56edSDimitry Andric insn->mode = mode;
191291bc56edSDimitry Andric insn->numImmediatesConsumed = 0;
191391bc56edSDimitry Andric
191491bc56edSDimitry Andric if (readPrefixes(insn) ||
191591bc56edSDimitry Andric readOpcode(insn) ||
191691bc56edSDimitry Andric getID(insn, miiArg) ||
191791bc56edSDimitry Andric insn->instructionID == 0 ||
191891bc56edSDimitry Andric readOperands(insn))
191991bc56edSDimitry Andric return -1;
192091bc56edSDimitry Andric
192191bc56edSDimitry Andric insn->operands = x86OperandSets[insn->spec->operands];
192291bc56edSDimitry Andric
192391bc56edSDimitry Andric insn->length = insn->readerCursor - insn->startLocation;
192491bc56edSDimitry Andric
192591bc56edSDimitry Andric dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
192691bc56edSDimitry Andric startLoc, insn->readerCursor, insn->length);
192791bc56edSDimitry Andric
192891bc56edSDimitry Andric if (insn->length > 15)
192991bc56edSDimitry Andric dbgprintf(insn, "Instruction exceeds 15-byte limit");
193091bc56edSDimitry Andric
193191bc56edSDimitry Andric return 0;
193291bc56edSDimitry Andric }
1933