139d628a0SDimitry Andric //===-- X86DisassemblerDecoder.cpp - Disassembler decoder -----------------===//
291bc56edSDimitry Andric //
391bc56edSDimitry Andric //                     The LLVM Compiler Infrastructure
491bc56edSDimitry Andric //
591bc56edSDimitry Andric // This file is distributed under the University of Illinois Open Source
691bc56edSDimitry Andric // License. See LICENSE.TXT for details.
791bc56edSDimitry Andric //
891bc56edSDimitry Andric //===----------------------------------------------------------------------===//
991bc56edSDimitry Andric //
1091bc56edSDimitry Andric // This file is part of the X86 Disassembler.
1191bc56edSDimitry Andric // It contains the implementation of the instruction decoder.
1291bc56edSDimitry Andric // Documentation for the disassembler can be found in X86Disassembler.h.
1391bc56edSDimitry Andric //
1491bc56edSDimitry Andric //===----------------------------------------------------------------------===//
1591bc56edSDimitry Andric 
1639d628a0SDimitry Andric #include <cstdarg> /* for va_*()       */
1739d628a0SDimitry Andric #include <cstdio>  /* for vsnprintf()  */
1839d628a0SDimitry Andric #include <cstdlib> /* for exit()       */
1939d628a0SDimitry Andric #include <cstring> /* for memset()     */
2091bc56edSDimitry Andric 
2191bc56edSDimitry Andric #include "X86DisassemblerDecoder.h"
2291bc56edSDimitry Andric 
2391bc56edSDimitry Andric using namespace llvm::X86Disassembler;
2491bc56edSDimitry Andric 
2591bc56edSDimitry Andric /// Specifies whether a ModR/M byte is needed and (if so) which
2691bc56edSDimitry Andric /// instruction each possible value of the ModR/M byte corresponds to.  Once
2791bc56edSDimitry Andric /// this information is known, we have narrowed down to a single instruction.
2891bc56edSDimitry Andric struct ModRMDecision {
2991bc56edSDimitry Andric   uint8_t modrm_type;
3091bc56edSDimitry Andric   uint16_t instructionIDs;
3191bc56edSDimitry Andric };
3291bc56edSDimitry Andric 
3391bc56edSDimitry Andric /// Specifies which set of ModR/M->instruction tables to look at
3491bc56edSDimitry Andric /// given a particular opcode.
3591bc56edSDimitry Andric struct OpcodeDecision {
3691bc56edSDimitry Andric   ModRMDecision modRMDecisions[256];
3791bc56edSDimitry Andric };
3891bc56edSDimitry Andric 
3991bc56edSDimitry Andric /// Specifies which opcode->instruction tables to look at given
4091bc56edSDimitry Andric /// a particular context (set of attributes).  Since there are many possible
4191bc56edSDimitry Andric /// contexts, the decoder first uses CONTEXTS_SYM to determine which context
4291bc56edSDimitry Andric /// applies given a specific set of attributes.  Hence there are only IC_max
4391bc56edSDimitry Andric /// entries in this table, rather than 2^(ATTR_max).
4491bc56edSDimitry Andric struct ContextDecision {
4591bc56edSDimitry Andric   OpcodeDecision opcodeDecisions[IC_max];
4691bc56edSDimitry Andric };
4791bc56edSDimitry Andric 
4891bc56edSDimitry Andric #include "X86GenDisassemblerTables.inc"
4991bc56edSDimitry Andric 
5091bc56edSDimitry Andric #ifndef NDEBUG
5191bc56edSDimitry Andric #define debug(s) do { Debug(__FILE__, __LINE__, s); } while (0)
5291bc56edSDimitry Andric #else
5391bc56edSDimitry Andric #define debug(s) do { } while (0)
5491bc56edSDimitry Andric #endif
5591bc56edSDimitry Andric 
5691bc56edSDimitry Andric /*
5791bc56edSDimitry Andric  * contextForAttrs - Client for the instruction context table.  Takes a set of
5891bc56edSDimitry Andric  *   attributes and returns the appropriate decode context.
5991bc56edSDimitry Andric  *
6091bc56edSDimitry Andric  * @param attrMask  - Attributes, from the enumeration attributeBits.
6191bc56edSDimitry Andric  * @return          - The InstructionContext to use when looking up an
6291bc56edSDimitry Andric  *                    an instruction with these attributes.
6391bc56edSDimitry Andric  */
contextForAttrs(uint16_t attrMask)6491bc56edSDimitry Andric static InstructionContext contextForAttrs(uint16_t attrMask) {
6591bc56edSDimitry Andric   return static_cast<InstructionContext>(CONTEXTS_SYM[attrMask]);
6691bc56edSDimitry Andric }
6791bc56edSDimitry Andric 
6891bc56edSDimitry Andric /*
6991bc56edSDimitry Andric  * modRMRequired - Reads the appropriate instruction table to determine whether
7091bc56edSDimitry Andric  *   the ModR/M byte is required to decode a particular instruction.
7191bc56edSDimitry Andric  *
7291bc56edSDimitry Andric  * @param type        - The opcode type (i.e., how many bytes it has).
7391bc56edSDimitry Andric  * @param insnContext - The context for the instruction, as returned by
7491bc56edSDimitry Andric  *                      contextForAttrs.
7591bc56edSDimitry Andric  * @param opcode      - The last byte of the instruction's opcode, not counting
7691bc56edSDimitry Andric  *                      ModR/M extensions and escapes.
7791bc56edSDimitry Andric  * @return            - true if the ModR/M byte is required, false otherwise.
7891bc56edSDimitry Andric  */
modRMRequired(OpcodeType type,InstructionContext insnContext,uint16_t opcode)7991bc56edSDimitry Andric static int modRMRequired(OpcodeType type,
8091bc56edSDimitry Andric                          InstructionContext insnContext,
8191bc56edSDimitry Andric                          uint16_t opcode) {
8291bc56edSDimitry Andric   const struct ContextDecision* decision = nullptr;
8391bc56edSDimitry Andric 
8491bc56edSDimitry Andric   switch (type) {
8591bc56edSDimitry Andric   case ONEBYTE:
8691bc56edSDimitry Andric     decision = &ONEBYTE_SYM;
8791bc56edSDimitry Andric     break;
8891bc56edSDimitry Andric   case TWOBYTE:
8991bc56edSDimitry Andric     decision = &TWOBYTE_SYM;
9091bc56edSDimitry Andric     break;
9191bc56edSDimitry Andric   case THREEBYTE_38:
9291bc56edSDimitry Andric     decision = &THREEBYTE38_SYM;
9391bc56edSDimitry Andric     break;
9491bc56edSDimitry Andric   case THREEBYTE_3A:
9591bc56edSDimitry Andric     decision = &THREEBYTE3A_SYM;
9691bc56edSDimitry Andric     break;
9791bc56edSDimitry Andric   case XOP8_MAP:
9891bc56edSDimitry Andric     decision = &XOP8_MAP_SYM;
9991bc56edSDimitry Andric     break;
10091bc56edSDimitry Andric   case XOP9_MAP:
10191bc56edSDimitry Andric     decision = &XOP9_MAP_SYM;
10291bc56edSDimitry Andric     break;
10391bc56edSDimitry Andric   case XOPA_MAP:
10491bc56edSDimitry Andric     decision = &XOPA_MAP_SYM;
10591bc56edSDimitry Andric     break;
1064ba319b5SDimitry Andric   case THREEDNOW_MAP:
1074ba319b5SDimitry Andric     decision = &THREEDNOW_MAP_SYM;
1084ba319b5SDimitry Andric     break;
10991bc56edSDimitry Andric   }
11091bc56edSDimitry Andric 
11191bc56edSDimitry Andric   return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
11291bc56edSDimitry Andric     modrm_type != MODRM_ONEENTRY;
11391bc56edSDimitry Andric }
11491bc56edSDimitry Andric 
11591bc56edSDimitry Andric /*
11691bc56edSDimitry Andric  * decode - Reads the appropriate instruction table to obtain the unique ID of
11791bc56edSDimitry Andric  *   an instruction.
11891bc56edSDimitry Andric  *
11991bc56edSDimitry Andric  * @param type        - See modRMRequired().
12091bc56edSDimitry Andric  * @param insnContext - See modRMRequired().
12191bc56edSDimitry Andric  * @param opcode      - See modRMRequired().
12291bc56edSDimitry Andric  * @param modRM       - The ModR/M byte if required, or any value if not.
12391bc56edSDimitry Andric  * @return            - The UID of the instruction, or 0 on failure.
12491bc56edSDimitry Andric  */
decode(OpcodeType type,InstructionContext insnContext,uint8_t opcode,uint8_t modRM)12591bc56edSDimitry Andric static InstrUID decode(OpcodeType type,
12691bc56edSDimitry Andric                        InstructionContext insnContext,
12791bc56edSDimitry Andric                        uint8_t opcode,
12891bc56edSDimitry Andric                        uint8_t modRM) {
12991bc56edSDimitry Andric   const struct ModRMDecision* dec = nullptr;
13091bc56edSDimitry Andric 
13191bc56edSDimitry Andric   switch (type) {
13291bc56edSDimitry Andric   case ONEBYTE:
13391bc56edSDimitry Andric     dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
13491bc56edSDimitry Andric     break;
13591bc56edSDimitry Andric   case TWOBYTE:
13691bc56edSDimitry Andric     dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
13791bc56edSDimitry Andric     break;
13891bc56edSDimitry Andric   case THREEBYTE_38:
13991bc56edSDimitry Andric     dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
14091bc56edSDimitry Andric     break;
14191bc56edSDimitry Andric   case THREEBYTE_3A:
14291bc56edSDimitry Andric     dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
14391bc56edSDimitry Andric     break;
14491bc56edSDimitry Andric   case XOP8_MAP:
14591bc56edSDimitry Andric     dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
14691bc56edSDimitry Andric     break;
14791bc56edSDimitry Andric   case XOP9_MAP:
14891bc56edSDimitry Andric     dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
14991bc56edSDimitry Andric     break;
15091bc56edSDimitry Andric   case XOPA_MAP:
15191bc56edSDimitry Andric     dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
15291bc56edSDimitry Andric     break;
1534ba319b5SDimitry Andric   case THREEDNOW_MAP:
1544ba319b5SDimitry Andric     dec = &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
1554ba319b5SDimitry Andric     break;
15691bc56edSDimitry Andric   }
15791bc56edSDimitry Andric 
15891bc56edSDimitry Andric   switch (dec->modrm_type) {
15991bc56edSDimitry Andric   default:
16091bc56edSDimitry Andric     debug("Corrupt table!  Unknown modrm_type");
16191bc56edSDimitry Andric     return 0;
16291bc56edSDimitry Andric   case MODRM_ONEENTRY:
16391bc56edSDimitry Andric     return modRMTable[dec->instructionIDs];
16491bc56edSDimitry Andric   case MODRM_SPLITRM:
16591bc56edSDimitry Andric     if (modFromModRM(modRM) == 0x3)
16691bc56edSDimitry Andric       return modRMTable[dec->instructionIDs+1];
16791bc56edSDimitry Andric     return modRMTable[dec->instructionIDs];
16891bc56edSDimitry Andric   case MODRM_SPLITREG:
16991bc56edSDimitry Andric     if (modFromModRM(modRM) == 0x3)
17091bc56edSDimitry Andric       return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
17191bc56edSDimitry Andric     return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
17291bc56edSDimitry Andric   case MODRM_SPLITMISC:
17391bc56edSDimitry Andric     if (modFromModRM(modRM) == 0x3)
17491bc56edSDimitry Andric       return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8];
17591bc56edSDimitry Andric     return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
17691bc56edSDimitry Andric   case MODRM_FULL:
17791bc56edSDimitry Andric     return modRMTable[dec->instructionIDs+modRM];
17891bc56edSDimitry Andric   }
17991bc56edSDimitry Andric }
18091bc56edSDimitry Andric 
18191bc56edSDimitry Andric /*
18291bc56edSDimitry Andric  * specifierForUID - Given a UID, returns the name and operand specification for
18391bc56edSDimitry Andric  *   that instruction.
18491bc56edSDimitry Andric  *
18591bc56edSDimitry Andric  * @param uid - The unique ID for the instruction.  This should be returned by
18691bc56edSDimitry Andric  *              decode(); specifierForUID will not check bounds.
18791bc56edSDimitry Andric  * @return    - A pointer to the specification for that instruction.
18891bc56edSDimitry Andric  */
specifierForUID(InstrUID uid)18991bc56edSDimitry Andric static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
19091bc56edSDimitry Andric   return &INSTRUCTIONS_SYM[uid];
19191bc56edSDimitry Andric }
19291bc56edSDimitry Andric 
19391bc56edSDimitry Andric /*
19491bc56edSDimitry Andric  * consumeByte - Uses the reader function provided by the user to consume one
19591bc56edSDimitry Andric  *   byte from the instruction's memory and advance the cursor.
19691bc56edSDimitry Andric  *
19791bc56edSDimitry Andric  * @param insn  - The instruction with the reader function to use.  The cursor
19891bc56edSDimitry Andric  *                for this instruction is advanced.
19991bc56edSDimitry Andric  * @param byte  - A pointer to a pre-allocated memory buffer to be populated
20091bc56edSDimitry Andric  *                with the data read.
20191bc56edSDimitry Andric  * @return      - 0 if the read was successful; nonzero otherwise.
20291bc56edSDimitry Andric  */
consumeByte(struct InternalInstruction * insn,uint8_t * byte)20391bc56edSDimitry Andric static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
20491bc56edSDimitry Andric   int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
20591bc56edSDimitry Andric 
20691bc56edSDimitry Andric   if (!ret)
20791bc56edSDimitry Andric     ++(insn->readerCursor);
20891bc56edSDimitry Andric 
20991bc56edSDimitry Andric   return ret;
21091bc56edSDimitry Andric }
21191bc56edSDimitry Andric 
21291bc56edSDimitry Andric /*
21391bc56edSDimitry Andric  * lookAtByte - Like consumeByte, but does not advance the cursor.
21491bc56edSDimitry Andric  *
21591bc56edSDimitry Andric  * @param insn  - See consumeByte().
21691bc56edSDimitry Andric  * @param byte  - See consumeByte().
21791bc56edSDimitry Andric  * @return      - See consumeByte().
21891bc56edSDimitry Andric  */
lookAtByte(struct InternalInstruction * insn,uint8_t * byte)21991bc56edSDimitry Andric static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
22091bc56edSDimitry Andric   return insn->reader(insn->readerArg, byte, insn->readerCursor);
22191bc56edSDimitry Andric }
22291bc56edSDimitry Andric 
unconsumeByte(struct InternalInstruction * insn)22391bc56edSDimitry Andric static void unconsumeByte(struct InternalInstruction* insn) {
22491bc56edSDimitry Andric   insn->readerCursor--;
22591bc56edSDimitry Andric }
22691bc56edSDimitry Andric 
22791bc56edSDimitry Andric #define CONSUME_FUNC(name, type)                                  \
22891bc56edSDimitry Andric   static int name(struct InternalInstruction* insn, type* ptr) {  \
22991bc56edSDimitry Andric     type combined = 0;                                            \
23091bc56edSDimitry Andric     unsigned offset;                                              \
23191bc56edSDimitry Andric     for (offset = 0; offset < sizeof(type); ++offset) {           \
23291bc56edSDimitry Andric       uint8_t byte;                                               \
23391bc56edSDimitry Andric       int ret = insn->reader(insn->readerArg,                     \
23491bc56edSDimitry Andric                              &byte,                               \
23591bc56edSDimitry Andric                              insn->readerCursor + offset);        \
23691bc56edSDimitry Andric       if (ret)                                                    \
23791bc56edSDimitry Andric         return ret;                                               \
23891bc56edSDimitry Andric       combined = combined | ((uint64_t)byte << (offset * 8));     \
23991bc56edSDimitry Andric     }                                                             \
24091bc56edSDimitry Andric     *ptr = combined;                                              \
24191bc56edSDimitry Andric     insn->readerCursor += sizeof(type);                           \
24291bc56edSDimitry Andric     return 0;                                                     \
24391bc56edSDimitry Andric   }
24491bc56edSDimitry Andric 
24591bc56edSDimitry Andric /*
24691bc56edSDimitry Andric  * consume* - Use the reader function provided by the user to consume data
24791bc56edSDimitry Andric  *   values of various sizes from the instruction's memory and advance the
24891bc56edSDimitry Andric  *   cursor appropriately.  These readers perform endian conversion.
24991bc56edSDimitry Andric  *
25091bc56edSDimitry Andric  * @param insn    - See consumeByte().
25191bc56edSDimitry Andric  * @param ptr     - A pointer to a pre-allocated memory of appropriate size to
25291bc56edSDimitry Andric  *                  be populated with the data read.
25391bc56edSDimitry Andric  * @return        - See consumeByte().
25491bc56edSDimitry Andric  */
CONSUME_FUNC(consumeInt8,int8_t)25591bc56edSDimitry Andric CONSUME_FUNC(consumeInt8, int8_t)
25691bc56edSDimitry Andric CONSUME_FUNC(consumeInt16, int16_t)
25791bc56edSDimitry Andric CONSUME_FUNC(consumeInt32, int32_t)
25891bc56edSDimitry Andric CONSUME_FUNC(consumeUInt16, uint16_t)
25991bc56edSDimitry Andric CONSUME_FUNC(consumeUInt32, uint32_t)
26091bc56edSDimitry Andric CONSUME_FUNC(consumeUInt64, uint64_t)
26191bc56edSDimitry Andric 
26291bc56edSDimitry Andric /*
26391bc56edSDimitry Andric  * dbgprintf - Uses the logging function provided by the user to log a single
26491bc56edSDimitry Andric  *   message, typically without a carriage-return.
26591bc56edSDimitry Andric  *
26691bc56edSDimitry Andric  * @param insn    - The instruction containing the logging function.
26791bc56edSDimitry Andric  * @param format  - See printf().
26891bc56edSDimitry Andric  * @param ...     - See printf().
26991bc56edSDimitry Andric  */
27091bc56edSDimitry Andric static void dbgprintf(struct InternalInstruction* insn,
27191bc56edSDimitry Andric                       const char* format,
27291bc56edSDimitry Andric                       ...) {
27391bc56edSDimitry Andric   char buffer[256];
27491bc56edSDimitry Andric   va_list ap;
27591bc56edSDimitry Andric 
27691bc56edSDimitry Andric   if (!insn->dlog)
27791bc56edSDimitry Andric     return;
27891bc56edSDimitry Andric 
27991bc56edSDimitry Andric   va_start(ap, format);
28091bc56edSDimitry Andric   (void)vsnprintf(buffer, sizeof(buffer), format, ap);
28191bc56edSDimitry Andric   va_end(ap);
28291bc56edSDimitry Andric 
28391bc56edSDimitry Andric   insn->dlog(insn->dlogArg, buffer);
28491bc56edSDimitry Andric }
28591bc56edSDimitry Andric 
isREX(struct InternalInstruction * insn,uint8_t prefix)2862cab237bSDimitry Andric static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
2872cab237bSDimitry Andric   if (insn->mode == MODE_64BIT)
2882cab237bSDimitry Andric     return prefix >= 0x40 && prefix <= 0x4f;
2892cab237bSDimitry Andric   return false;
29091bc56edSDimitry Andric }
29191bc56edSDimitry Andric 
29291bc56edSDimitry Andric /*
2932cab237bSDimitry Andric  * setPrefixPresent - Marks that a particular prefix is present as mandatory
29491bc56edSDimitry Andric  *
2952cab237bSDimitry Andric  * @param insn      - The instruction to be marked as having the prefix.
2962cab237bSDimitry Andric  * @param prefix    - The prefix that is present.
29791bc56edSDimitry Andric  */
setPrefixPresent(struct InternalInstruction * insn,uint8_t prefix)2982cab237bSDimitry Andric static void setPrefixPresent(struct InternalInstruction *insn, uint8_t prefix) {
2992cab237bSDimitry Andric   uint8_t nextByte;
3002cab237bSDimitry Andric   switch (prefix) {
3014ba319b5SDimitry Andric   case 0xf0:
3024ba319b5SDimitry Andric     insn->hasLockPrefix = true;
3034ba319b5SDimitry Andric     break;
3042cab237bSDimitry Andric   case 0xf2:
3052cab237bSDimitry Andric   case 0xf3:
3062cab237bSDimitry Andric     if (lookAtByte(insn, &nextByte))
3072cab237bSDimitry Andric       break;
3082cab237bSDimitry Andric     // TODO:
3092cab237bSDimitry Andric     //  1. There could be several 0x66
3102cab237bSDimitry Andric     //  2. if (nextByte == 0x66) and nextNextByte != 0x0f then
3112cab237bSDimitry Andric     //      it's not mandatory prefix
3122cab237bSDimitry Andric     //  3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need
3132cab237bSDimitry Andric     //     0x0f exactly after it to be mandatory prefix
3142cab237bSDimitry Andric     if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66)
3152cab237bSDimitry Andric       // The last of 0xf2 /0xf3 is mandatory prefix
3162cab237bSDimitry Andric       insn->mandatoryPrefix = prefix;
3172cab237bSDimitry Andric     insn->repeatPrefix = prefix;
3182cab237bSDimitry Andric     break;
3192cab237bSDimitry Andric   case 0x66:
3202cab237bSDimitry Andric     if (lookAtByte(insn, &nextByte))
3212cab237bSDimitry Andric       break;
3222cab237bSDimitry Andric     // 0x66 can't overwrite existing mandatory prefix and should be ignored
3232cab237bSDimitry Andric     if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte)))
3242cab237bSDimitry Andric       insn->mandatoryPrefix = prefix;
3252cab237bSDimitry Andric     break;
3262cab237bSDimitry Andric   }
32791bc56edSDimitry Andric }
32891bc56edSDimitry Andric 
32991bc56edSDimitry Andric /*
33091bc56edSDimitry Andric  * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
33191bc56edSDimitry Andric  *   instruction as having them.  Also sets the instruction's default operand,
33291bc56edSDimitry Andric  *   address, and other relevant data sizes to report operands correctly.
33391bc56edSDimitry Andric  *
33491bc56edSDimitry Andric  * @param insn  - The instruction whose prefixes are to be read.
33591bc56edSDimitry Andric  * @return      - 0 if the instruction could be read until the end of the prefix
33691bc56edSDimitry Andric  *                bytes, and no prefixes conflicted; nonzero otherwise.
33791bc56edSDimitry Andric  */
readPrefixes(struct InternalInstruction * insn)33891bc56edSDimitry Andric static int readPrefixes(struct InternalInstruction* insn) {
33991bc56edSDimitry Andric   bool isPrefix = true;
34091bc56edSDimitry Andric   uint8_t byte = 0;
34191bc56edSDimitry Andric   uint8_t nextByte;
34291bc56edSDimitry Andric 
34391bc56edSDimitry Andric   dbgprintf(insn, "readPrefixes()");
34491bc56edSDimitry Andric 
34591bc56edSDimitry Andric   while (isPrefix) {
34691bc56edSDimitry Andric     /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */
34791bc56edSDimitry Andric     if (consumeByte(insn, &byte))
34891bc56edSDimitry Andric       break;
34991bc56edSDimitry Andric 
35091bc56edSDimitry Andric     /*
35191bc56edSDimitry Andric      * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
35291bc56edSDimitry Andric      * break and let it be disassembled as a normal "instruction".
35391bc56edSDimitry Andric      */
3542cab237bSDimitry Andric     if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK
35591bc56edSDimitry Andric       break;
35691bc56edSDimitry Andric 
3572cab237bSDimitry Andric     if ((byte == 0xf2 || byte == 0xf3) && !lookAtByte(insn, &nextByte)) {
35891bc56edSDimitry Andric       /*
35991bc56edSDimitry Andric        * If the byte is 0xf2 or 0xf3, and any of the following conditions are
36091bc56edSDimitry Andric        * met:
36191bc56edSDimitry Andric        * - it is followed by a LOCK (0xf0) prefix
36291bc56edSDimitry Andric        * - it is followed by an xchg instruction
36391bc56edSDimitry Andric        * then it should be disassembled as a xacquire/xrelease not repne/rep.
36491bc56edSDimitry Andric        */
3652cab237bSDimitry Andric       if (((nextByte == 0xf0) ||
3662cab237bSDimitry Andric            ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) {
36791bc56edSDimitry Andric         insn->xAcquireRelease = true;
3682cab237bSDimitry Andric         if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support
3692cab237bSDimitry Andric           break;
3702cab237bSDimitry Andric       }
37191bc56edSDimitry Andric       /*
37291bc56edSDimitry Andric        * Also if the byte is 0xf3, and the following condition is met:
37391bc56edSDimitry Andric        * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
37491bc56edSDimitry Andric        *                       "mov mem, imm" (opcode 0xc6/0xc7) instructions.
37591bc56edSDimitry Andric        * then it should be disassembled as an xrelease not rep.
37691bc56edSDimitry Andric        */
3772cab237bSDimitry Andric       if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 ||
3782cab237bSDimitry Andric                            nextByte == 0xc6 || nextByte == 0xc7)) {
37991bc56edSDimitry Andric         insn->xAcquireRelease = true;
3802cab237bSDimitry Andric         if (nextByte != 0x90) // PAUSE instruction support
3812cab237bSDimitry Andric           break;
3822cab237bSDimitry Andric       }
3832cab237bSDimitry Andric       if (isREX(insn, nextByte)) {
3842cab237bSDimitry Andric         uint8_t nnextByte;
3852cab237bSDimitry Andric         // Go to REX prefix after the current one
3862cab237bSDimitry Andric         if (consumeByte(insn, &nnextByte))
38791bc56edSDimitry Andric           return -1;
3882cab237bSDimitry Andric         // We should be able to read next byte after REX prefix
3892cab237bSDimitry Andric         if (lookAtByte(insn, &nnextByte))
39091bc56edSDimitry Andric           return -1;
39191bc56edSDimitry Andric         unconsumeByte(insn);
39291bc56edSDimitry Andric       }
39391bc56edSDimitry Andric     }
39491bc56edSDimitry Andric 
39591bc56edSDimitry Andric     switch (byte) {
39691bc56edSDimitry Andric     case 0xf0:  /* LOCK */
39791bc56edSDimitry Andric     case 0xf2:  /* REPNE/REPNZ */
39891bc56edSDimitry Andric     case 0xf3:  /* REP or REPE/REPZ */
3992cab237bSDimitry Andric       setPrefixPresent(insn, byte);
40091bc56edSDimitry Andric       break;
40191bc56edSDimitry Andric     case 0x2e:  /* CS segment override -OR- Branch not taken */
40291bc56edSDimitry Andric     case 0x36:  /* SS segment override -OR- Branch taken */
40391bc56edSDimitry Andric     case 0x3e:  /* DS segment override */
40491bc56edSDimitry Andric     case 0x26:  /* ES segment override */
40591bc56edSDimitry Andric     case 0x64:  /* FS segment override */
40691bc56edSDimitry Andric     case 0x65:  /* GS segment override */
40791bc56edSDimitry Andric       switch (byte) {
40891bc56edSDimitry Andric       case 0x2e:
40991bc56edSDimitry Andric         insn->segmentOverride = SEG_OVERRIDE_CS;
41091bc56edSDimitry Andric         break;
41191bc56edSDimitry Andric       case 0x36:
41291bc56edSDimitry Andric         insn->segmentOverride = SEG_OVERRIDE_SS;
41391bc56edSDimitry Andric         break;
41491bc56edSDimitry Andric       case 0x3e:
41591bc56edSDimitry Andric         insn->segmentOverride = SEG_OVERRIDE_DS;
41691bc56edSDimitry Andric         break;
41791bc56edSDimitry Andric       case 0x26:
41891bc56edSDimitry Andric         insn->segmentOverride = SEG_OVERRIDE_ES;
41991bc56edSDimitry Andric         break;
42091bc56edSDimitry Andric       case 0x64:
42191bc56edSDimitry Andric         insn->segmentOverride = SEG_OVERRIDE_FS;
42291bc56edSDimitry Andric         break;
42391bc56edSDimitry Andric       case 0x65:
42491bc56edSDimitry Andric         insn->segmentOverride = SEG_OVERRIDE_GS;
42591bc56edSDimitry Andric         break;
42691bc56edSDimitry Andric       default:
42791bc56edSDimitry Andric         debug("Unhandled override");
42891bc56edSDimitry Andric         return -1;
42991bc56edSDimitry Andric       }
4302cab237bSDimitry Andric       setPrefixPresent(insn, byte);
43191bc56edSDimitry Andric       break;
43291bc56edSDimitry Andric     case 0x66:  /* Operand-size override */
4332cab237bSDimitry Andric       insn->hasOpSize = true;
4342cab237bSDimitry Andric       setPrefixPresent(insn, byte);
43591bc56edSDimitry Andric       break;
43691bc56edSDimitry Andric     case 0x67:  /* Address-size override */
4372cab237bSDimitry Andric       insn->hasAdSize = true;
4382cab237bSDimitry Andric       setPrefixPresent(insn, byte);
43991bc56edSDimitry Andric       break;
44091bc56edSDimitry Andric     default:    /* Not a prefix byte */
44191bc56edSDimitry Andric       isPrefix = false;
44291bc56edSDimitry Andric       break;
44391bc56edSDimitry Andric     }
44491bc56edSDimitry Andric 
44591bc56edSDimitry Andric     if (isPrefix)
44691bc56edSDimitry Andric       dbgprintf(insn, "Found prefix 0x%hhx", byte);
44791bc56edSDimitry Andric   }
44891bc56edSDimitry Andric 
44991bc56edSDimitry Andric   insn->vectorExtensionType = TYPE_NO_VEX_XOP;
45091bc56edSDimitry Andric 
45191bc56edSDimitry Andric   if (byte == 0x62) {
45291bc56edSDimitry Andric     uint8_t byte1, byte2;
45391bc56edSDimitry Andric 
45491bc56edSDimitry Andric     if (consumeByte(insn, &byte1)) {
45591bc56edSDimitry Andric       dbgprintf(insn, "Couldn't read second byte of EVEX prefix");
45691bc56edSDimitry Andric       return -1;
45791bc56edSDimitry Andric     }
45891bc56edSDimitry Andric 
45991bc56edSDimitry Andric     if (lookAtByte(insn, &byte2)) {
46091bc56edSDimitry Andric       dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
46191bc56edSDimitry Andric       return -1;
46291bc56edSDimitry Andric     }
46391bc56edSDimitry Andric 
46491bc56edSDimitry Andric     if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
46591bc56edSDimitry Andric        ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) {
46691bc56edSDimitry Andric       insn->vectorExtensionType = TYPE_EVEX;
46739d628a0SDimitry Andric     } else {
46891bc56edSDimitry Andric       unconsumeByte(insn); /* unconsume byte1 */
46991bc56edSDimitry Andric       unconsumeByte(insn); /* unconsume byte  */
47091bc56edSDimitry Andric     }
47191bc56edSDimitry Andric 
47291bc56edSDimitry Andric     if (insn->vectorExtensionType == TYPE_EVEX) {
47391bc56edSDimitry Andric       insn->vectorExtensionPrefix[0] = byte;
47491bc56edSDimitry Andric       insn->vectorExtensionPrefix[1] = byte1;
47591bc56edSDimitry Andric       if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) {
47691bc56edSDimitry Andric         dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
47791bc56edSDimitry Andric         return -1;
47891bc56edSDimitry Andric       }
47991bc56edSDimitry Andric       if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) {
48091bc56edSDimitry Andric         dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix");
48191bc56edSDimitry Andric         return -1;
48291bc56edSDimitry Andric       }
48391bc56edSDimitry Andric 
48491bc56edSDimitry Andric       /* We simulate the REX prefix for simplicity's sake */
48591bc56edSDimitry Andric       if (insn->mode == MODE_64BIT) {
48691bc56edSDimitry Andric         insn->rexPrefix = 0x40
48791bc56edSDimitry Andric                         | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3)
48891bc56edSDimitry Andric                         | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2)
48991bc56edSDimitry Andric                         | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1)
49091bc56edSDimitry Andric                         | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
49191bc56edSDimitry Andric       }
49291bc56edSDimitry Andric 
49391bc56edSDimitry Andric       dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
49491bc56edSDimitry Andric               insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
49591bc56edSDimitry Andric               insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]);
49691bc56edSDimitry Andric     }
49739d628a0SDimitry Andric   } else if (byte == 0xc4) {
49891bc56edSDimitry Andric     uint8_t byte1;
49991bc56edSDimitry Andric 
50091bc56edSDimitry Andric     if (lookAtByte(insn, &byte1)) {
50191bc56edSDimitry Andric       dbgprintf(insn, "Couldn't read second byte of VEX");
50291bc56edSDimitry Andric       return -1;
50391bc56edSDimitry Andric     }
50491bc56edSDimitry Andric 
5052cab237bSDimitry Andric     if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
50691bc56edSDimitry Andric       insn->vectorExtensionType = TYPE_VEX_3B;
5072cab237bSDimitry Andric     else
50891bc56edSDimitry Andric       unconsumeByte(insn);
50991bc56edSDimitry Andric 
51091bc56edSDimitry Andric     if (insn->vectorExtensionType == TYPE_VEX_3B) {
51191bc56edSDimitry Andric       insn->vectorExtensionPrefix[0] = byte;
51291bc56edSDimitry Andric       consumeByte(insn, &insn->vectorExtensionPrefix[1]);
51391bc56edSDimitry Andric       consumeByte(insn, &insn->vectorExtensionPrefix[2]);
51491bc56edSDimitry Andric 
51591bc56edSDimitry Andric       /* We simulate the REX prefix for simplicity's sake */
51691bc56edSDimitry Andric 
5172cab237bSDimitry Andric       if (insn->mode == MODE_64BIT)
51891bc56edSDimitry Andric         insn->rexPrefix = 0x40
51991bc56edSDimitry Andric                         | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3)
52091bc56edSDimitry Andric                         | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2)
52191bc56edSDimitry Andric                         | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1)
52291bc56edSDimitry Andric                         | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
52391bc56edSDimitry Andric 
52491bc56edSDimitry Andric       dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
52591bc56edSDimitry Andric                 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
52691bc56edSDimitry Andric                 insn->vectorExtensionPrefix[2]);
52791bc56edSDimitry Andric     }
52839d628a0SDimitry Andric   } else if (byte == 0xc5) {
52991bc56edSDimitry Andric     uint8_t byte1;
53091bc56edSDimitry Andric 
53191bc56edSDimitry Andric     if (lookAtByte(insn, &byte1)) {
53291bc56edSDimitry Andric       dbgprintf(insn, "Couldn't read second byte of VEX");
53391bc56edSDimitry Andric       return -1;
53491bc56edSDimitry Andric     }
53591bc56edSDimitry Andric 
5362cab237bSDimitry Andric     if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
53791bc56edSDimitry Andric       insn->vectorExtensionType = TYPE_VEX_2B;
5382cab237bSDimitry Andric     else
53991bc56edSDimitry Andric       unconsumeByte(insn);
54091bc56edSDimitry Andric 
54191bc56edSDimitry Andric     if (insn->vectorExtensionType == TYPE_VEX_2B) {
54291bc56edSDimitry Andric       insn->vectorExtensionPrefix[0] = byte;
54391bc56edSDimitry Andric       consumeByte(insn, &insn->vectorExtensionPrefix[1]);
54491bc56edSDimitry Andric 
5452cab237bSDimitry Andric       if (insn->mode == MODE_64BIT)
54691bc56edSDimitry Andric         insn->rexPrefix = 0x40
54791bc56edSDimitry Andric                         | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
54891bc56edSDimitry Andric 
54939d628a0SDimitry Andric       switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
55091bc56edSDimitry Andric       default:
55191bc56edSDimitry Andric         break;
55291bc56edSDimitry Andric       case VEX_PREFIX_66:
5532cab237bSDimitry Andric         insn->hasOpSize = true;
55491bc56edSDimitry Andric         break;
55591bc56edSDimitry Andric       }
55691bc56edSDimitry Andric 
55791bc56edSDimitry Andric       dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx",
55891bc56edSDimitry Andric                 insn->vectorExtensionPrefix[0],
55991bc56edSDimitry Andric                 insn->vectorExtensionPrefix[1]);
56091bc56edSDimitry Andric     }
56139d628a0SDimitry Andric   } else if (byte == 0x8f) {
56291bc56edSDimitry Andric     uint8_t byte1;
56391bc56edSDimitry Andric 
56491bc56edSDimitry Andric     if (lookAtByte(insn, &byte1)) {
56591bc56edSDimitry Andric       dbgprintf(insn, "Couldn't read second byte of XOP");
56691bc56edSDimitry Andric       return -1;
56791bc56edSDimitry Andric     }
56891bc56edSDimitry Andric 
5692cab237bSDimitry Andric     if ((byte1 & 0x38) != 0x0) /* 0 in these 3 bits is a POP instruction. */
57091bc56edSDimitry Andric       insn->vectorExtensionType = TYPE_XOP;
5712cab237bSDimitry Andric     else
57291bc56edSDimitry Andric       unconsumeByte(insn);
57391bc56edSDimitry Andric 
57491bc56edSDimitry Andric     if (insn->vectorExtensionType == TYPE_XOP) {
57591bc56edSDimitry Andric       insn->vectorExtensionPrefix[0] = byte;
57691bc56edSDimitry Andric       consumeByte(insn, &insn->vectorExtensionPrefix[1]);
57791bc56edSDimitry Andric       consumeByte(insn, &insn->vectorExtensionPrefix[2]);
57891bc56edSDimitry Andric 
57991bc56edSDimitry Andric       /* We simulate the REX prefix for simplicity's sake */
58091bc56edSDimitry Andric 
5812cab237bSDimitry Andric       if (insn->mode == MODE_64BIT)
58291bc56edSDimitry Andric         insn->rexPrefix = 0x40
58391bc56edSDimitry Andric                         | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3)
58491bc56edSDimitry Andric                         | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2)
58591bc56edSDimitry Andric                         | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1)
58691bc56edSDimitry Andric                         | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
58791bc56edSDimitry Andric 
58839d628a0SDimitry Andric       switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
58991bc56edSDimitry Andric       default:
59091bc56edSDimitry Andric         break;
59191bc56edSDimitry Andric       case VEX_PREFIX_66:
5922cab237bSDimitry Andric         insn->hasOpSize = true;
59391bc56edSDimitry Andric         break;
59491bc56edSDimitry Andric       }
59591bc56edSDimitry Andric 
59691bc56edSDimitry Andric       dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
59791bc56edSDimitry Andric                 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
59891bc56edSDimitry Andric                 insn->vectorExtensionPrefix[2]);
59991bc56edSDimitry Andric     }
6002cab237bSDimitry Andric   } else if (isREX(insn, byte)) {
6012cab237bSDimitry Andric     if (lookAtByte(insn, &nextByte))
60291bc56edSDimitry Andric       return -1;
60391bc56edSDimitry Andric     insn->rexPrefix = byte;
60491bc56edSDimitry Andric     dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
6052cab237bSDimitry Andric   } else
60691bc56edSDimitry Andric     unconsumeByte(insn);
60791bc56edSDimitry Andric 
60891bc56edSDimitry Andric   if (insn->mode == MODE_16BIT) {
6092cab237bSDimitry Andric     insn->registerSize = (insn->hasOpSize ? 4 : 2);
6102cab237bSDimitry Andric     insn->addressSize = (insn->hasAdSize ? 4 : 2);
6112cab237bSDimitry Andric     insn->displacementSize = (insn->hasAdSize ? 4 : 2);
6122cab237bSDimitry Andric     insn->immediateSize = (insn->hasOpSize ? 4 : 2);
61391bc56edSDimitry Andric   } else if (insn->mode == MODE_32BIT) {
6142cab237bSDimitry Andric     insn->registerSize = (insn->hasOpSize ? 2 : 4);
6152cab237bSDimitry Andric     insn->addressSize = (insn->hasAdSize ? 2 : 4);
6162cab237bSDimitry Andric     insn->displacementSize = (insn->hasAdSize ? 2 : 4);
6172cab237bSDimitry Andric     insn->immediateSize = (insn->hasOpSize ? 2 : 4);
61891bc56edSDimitry Andric   } else if (insn->mode == MODE_64BIT) {
61991bc56edSDimitry Andric     if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
62091bc56edSDimitry Andric       insn->registerSize       = 8;
6212cab237bSDimitry Andric       insn->addressSize = (insn->hasAdSize ? 4 : 8);
62291bc56edSDimitry Andric       insn->displacementSize   = 4;
62391bc56edSDimitry Andric       insn->immediateSize      = 4;
62491bc56edSDimitry Andric     } else {
6252cab237bSDimitry Andric       insn->registerSize = (insn->hasOpSize ? 2 : 4);
6262cab237bSDimitry Andric       insn->addressSize = (insn->hasAdSize ? 4 : 8);
6272cab237bSDimitry Andric       insn->displacementSize = (insn->hasOpSize ? 2 : 4);
6282cab237bSDimitry Andric       insn->immediateSize = (insn->hasOpSize ? 2 : 4);
62991bc56edSDimitry Andric     }
63091bc56edSDimitry Andric   }
63191bc56edSDimitry Andric 
63291bc56edSDimitry Andric   return 0;
63391bc56edSDimitry Andric }
63491bc56edSDimitry Andric 
6354ba319b5SDimitry Andric static int readModRM(struct InternalInstruction* insn);
6364ba319b5SDimitry Andric 
63791bc56edSDimitry Andric /*
63891bc56edSDimitry Andric  * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
63991bc56edSDimitry Andric  *   extended or escape opcodes).
64091bc56edSDimitry Andric  *
64191bc56edSDimitry Andric  * @param insn  - The instruction whose opcode is to be read.
64291bc56edSDimitry Andric  * @return      - 0 if the opcode could be read successfully; nonzero otherwise.
64391bc56edSDimitry Andric  */
readOpcode(struct InternalInstruction * insn)64491bc56edSDimitry Andric static int readOpcode(struct InternalInstruction* insn) {
64591bc56edSDimitry Andric   /* Determine the length of the primary opcode */
64691bc56edSDimitry Andric 
64791bc56edSDimitry Andric   uint8_t current;
64891bc56edSDimitry Andric 
64991bc56edSDimitry Andric   dbgprintf(insn, "readOpcode()");
65091bc56edSDimitry Andric 
65191bc56edSDimitry Andric   insn->opcodeType = ONEBYTE;
65291bc56edSDimitry Andric 
65339d628a0SDimitry Andric   if (insn->vectorExtensionType == TYPE_EVEX) {
65491bc56edSDimitry Andric     switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
65591bc56edSDimitry Andric     default:
65691bc56edSDimitry Andric       dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)",
65791bc56edSDimitry Andric                 mmFromEVEX2of4(insn->vectorExtensionPrefix[1]));
65891bc56edSDimitry Andric       return -1;
65991bc56edSDimitry Andric     case VEX_LOB_0F:
66091bc56edSDimitry Andric       insn->opcodeType = TWOBYTE;
66191bc56edSDimitry Andric       return consumeByte(insn, &insn->opcode);
66291bc56edSDimitry Andric     case VEX_LOB_0F38:
66391bc56edSDimitry Andric       insn->opcodeType = THREEBYTE_38;
66491bc56edSDimitry Andric       return consumeByte(insn, &insn->opcode);
66591bc56edSDimitry Andric     case VEX_LOB_0F3A:
66691bc56edSDimitry Andric       insn->opcodeType = THREEBYTE_3A;
66791bc56edSDimitry Andric       return consumeByte(insn, &insn->opcode);
66891bc56edSDimitry Andric     }
66939d628a0SDimitry Andric   } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
67091bc56edSDimitry Andric     switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
67191bc56edSDimitry Andric     default:
67291bc56edSDimitry Andric       dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
67391bc56edSDimitry Andric                 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
67491bc56edSDimitry Andric       return -1;
67591bc56edSDimitry Andric     case VEX_LOB_0F:
67691bc56edSDimitry Andric       insn->opcodeType = TWOBYTE;
67791bc56edSDimitry Andric       return consumeByte(insn, &insn->opcode);
67891bc56edSDimitry Andric     case VEX_LOB_0F38:
67991bc56edSDimitry Andric       insn->opcodeType = THREEBYTE_38;
68091bc56edSDimitry Andric       return consumeByte(insn, &insn->opcode);
68191bc56edSDimitry Andric     case VEX_LOB_0F3A:
68291bc56edSDimitry Andric       insn->opcodeType = THREEBYTE_3A;
68391bc56edSDimitry Andric       return consumeByte(insn, &insn->opcode);
68491bc56edSDimitry Andric     }
68539d628a0SDimitry Andric   } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
68691bc56edSDimitry Andric     insn->opcodeType = TWOBYTE;
68791bc56edSDimitry Andric     return consumeByte(insn, &insn->opcode);
68839d628a0SDimitry Andric   } else if (insn->vectorExtensionType == TYPE_XOP) {
68991bc56edSDimitry Andric     switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
69091bc56edSDimitry Andric     default:
69191bc56edSDimitry Andric       dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
69291bc56edSDimitry Andric                 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
69391bc56edSDimitry Andric       return -1;
69491bc56edSDimitry Andric     case XOP_MAP_SELECT_8:
69591bc56edSDimitry Andric       insn->opcodeType = XOP8_MAP;
69691bc56edSDimitry Andric       return consumeByte(insn, &insn->opcode);
69791bc56edSDimitry Andric     case XOP_MAP_SELECT_9:
69891bc56edSDimitry Andric       insn->opcodeType = XOP9_MAP;
69991bc56edSDimitry Andric       return consumeByte(insn, &insn->opcode);
70091bc56edSDimitry Andric     case XOP_MAP_SELECT_A:
70191bc56edSDimitry Andric       insn->opcodeType = XOPA_MAP;
70291bc56edSDimitry Andric       return consumeByte(insn, &insn->opcode);
70391bc56edSDimitry Andric     }
70491bc56edSDimitry Andric   }
70591bc56edSDimitry Andric 
70691bc56edSDimitry Andric   if (consumeByte(insn, &current))
70791bc56edSDimitry Andric     return -1;
70891bc56edSDimitry Andric 
70991bc56edSDimitry Andric   if (current == 0x0f) {
71091bc56edSDimitry Andric     dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
71191bc56edSDimitry Andric 
71291bc56edSDimitry Andric     if (consumeByte(insn, &current))
71391bc56edSDimitry Andric       return -1;
71491bc56edSDimitry Andric 
71591bc56edSDimitry Andric     if (current == 0x38) {
71691bc56edSDimitry Andric       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
71791bc56edSDimitry Andric 
71891bc56edSDimitry Andric       if (consumeByte(insn, &current))
71991bc56edSDimitry Andric         return -1;
72091bc56edSDimitry Andric 
72191bc56edSDimitry Andric       insn->opcodeType = THREEBYTE_38;
72291bc56edSDimitry Andric     } else if (current == 0x3a) {
72391bc56edSDimitry Andric       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
72491bc56edSDimitry Andric 
72591bc56edSDimitry Andric       if (consumeByte(insn, &current))
72691bc56edSDimitry Andric         return -1;
72791bc56edSDimitry Andric 
72891bc56edSDimitry Andric       insn->opcodeType = THREEBYTE_3A;
7294ba319b5SDimitry Andric     } else if (current == 0x0f) {
7304ba319b5SDimitry Andric       dbgprintf(insn, "Found a 3dnow escape prefix (0x%hhx)", current);
7314ba319b5SDimitry Andric 
7324ba319b5SDimitry Andric       // Consume operands before the opcode to comply with the 3DNow encoding
7334ba319b5SDimitry Andric       if (readModRM(insn))
7344ba319b5SDimitry Andric         return -1;
7354ba319b5SDimitry Andric 
7364ba319b5SDimitry Andric       if (consumeByte(insn, &current))
7374ba319b5SDimitry Andric         return -1;
7384ba319b5SDimitry Andric 
7394ba319b5SDimitry Andric       insn->opcodeType = THREEDNOW_MAP;
74091bc56edSDimitry Andric     } else {
74191bc56edSDimitry Andric       dbgprintf(insn, "Didn't find a three-byte escape prefix");
74291bc56edSDimitry Andric 
74391bc56edSDimitry Andric       insn->opcodeType = TWOBYTE;
74491bc56edSDimitry Andric     }
7452cab237bSDimitry Andric   } else if (insn->mandatoryPrefix)
7462cab237bSDimitry Andric     // The opcode with mandatory prefix must start with opcode escape.
7472cab237bSDimitry Andric     // If not it's legacy repeat prefix
7482cab237bSDimitry Andric     insn->mandatoryPrefix = 0;
74991bc56edSDimitry Andric 
75091bc56edSDimitry Andric   /*
75191bc56edSDimitry Andric    * At this point we have consumed the full opcode.
75291bc56edSDimitry Andric    * Anything we consume from here on must be unconsumed.
75391bc56edSDimitry Andric    */
75491bc56edSDimitry Andric 
75591bc56edSDimitry Andric   insn->opcode = current;
75691bc56edSDimitry Andric 
75791bc56edSDimitry Andric   return 0;
75891bc56edSDimitry Andric }
75991bc56edSDimitry Andric 
76091bc56edSDimitry Andric /*
76191bc56edSDimitry Andric  * getIDWithAttrMask - Determines the ID of an instruction, consuming
76291bc56edSDimitry Andric  *   the ModR/M byte as appropriate for extended and escape opcodes,
76391bc56edSDimitry Andric  *   and using a supplied attribute mask.
76491bc56edSDimitry Andric  *
76591bc56edSDimitry Andric  * @param instructionID - A pointer whose target is filled in with the ID of the
76691bc56edSDimitry Andric  *                        instruction.
76791bc56edSDimitry Andric  * @param insn          - The instruction whose ID is to be determined.
76891bc56edSDimitry Andric  * @param attrMask      - The attribute mask to search.
76991bc56edSDimitry Andric  * @return              - 0 if the ModR/M could be read when needed or was not
77091bc56edSDimitry Andric  *                        needed; nonzero otherwise.
77191bc56edSDimitry Andric  */
getIDWithAttrMask(uint16_t * instructionID,struct InternalInstruction * insn,uint16_t attrMask)77291bc56edSDimitry Andric static int getIDWithAttrMask(uint16_t* instructionID,
77391bc56edSDimitry Andric                              struct InternalInstruction* insn,
77491bc56edSDimitry Andric                              uint16_t attrMask) {
77591bc56edSDimitry Andric   bool hasModRMExtension;
77691bc56edSDimitry Andric 
77791bc56edSDimitry Andric   InstructionContext instructionClass = contextForAttrs(attrMask);
77891bc56edSDimitry Andric 
77991bc56edSDimitry Andric   hasModRMExtension = modRMRequired(insn->opcodeType,
78091bc56edSDimitry Andric                                     instructionClass,
78191bc56edSDimitry Andric                                     insn->opcode);
78291bc56edSDimitry Andric 
78391bc56edSDimitry Andric   if (hasModRMExtension) {
78491bc56edSDimitry Andric     if (readModRM(insn))
78591bc56edSDimitry Andric       return -1;
78691bc56edSDimitry Andric 
78791bc56edSDimitry Andric     *instructionID = decode(insn->opcodeType,
78891bc56edSDimitry Andric                             instructionClass,
78991bc56edSDimitry Andric                             insn->opcode,
79091bc56edSDimitry Andric                             insn->modRM);
79191bc56edSDimitry Andric   } else {
79291bc56edSDimitry Andric     *instructionID = decode(insn->opcodeType,
79391bc56edSDimitry Andric                             instructionClass,
79491bc56edSDimitry Andric                             insn->opcode,
79591bc56edSDimitry Andric                             0);
79691bc56edSDimitry Andric   }
79791bc56edSDimitry Andric 
79891bc56edSDimitry Andric   return 0;
79991bc56edSDimitry Andric }
80091bc56edSDimitry Andric 
80191bc56edSDimitry Andric /*
80291bc56edSDimitry Andric  * is16BitEquivalent - Determines whether two instruction names refer to
80391bc56edSDimitry Andric  * equivalent instructions but one is 16-bit whereas the other is not.
80491bc56edSDimitry Andric  *
80591bc56edSDimitry Andric  * @param orig  - The instruction that is not 16-bit
80691bc56edSDimitry Andric  * @param equiv - The instruction that is 16-bit
80791bc56edSDimitry Andric  */
is16BitEquivalent(const char * orig,const char * equiv)80891bc56edSDimitry Andric static bool is16BitEquivalent(const char *orig, const char *equiv) {
80991bc56edSDimitry Andric   off_t i;
81091bc56edSDimitry Andric 
81191bc56edSDimitry Andric   for (i = 0;; i++) {
81291bc56edSDimitry Andric     if (orig[i] == '\0' && equiv[i] == '\0')
81391bc56edSDimitry Andric       return true;
81491bc56edSDimitry Andric     if (orig[i] == '\0' || equiv[i] == '\0')
81591bc56edSDimitry Andric       return false;
81691bc56edSDimitry Andric     if (orig[i] != equiv[i]) {
81791bc56edSDimitry Andric       if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
81891bc56edSDimitry Andric         continue;
81991bc56edSDimitry Andric       if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
82091bc56edSDimitry Andric         continue;
82191bc56edSDimitry Andric       if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
82291bc56edSDimitry Andric         continue;
82391bc56edSDimitry Andric       return false;
82491bc56edSDimitry Andric     }
82591bc56edSDimitry Andric   }
82691bc56edSDimitry Andric }
82791bc56edSDimitry Andric 
82891bc56edSDimitry Andric /*
82939d628a0SDimitry Andric  * is64Bit - Determines whether this instruction is a 64-bit instruction.
83039d628a0SDimitry Andric  *
83139d628a0SDimitry Andric  * @param name - The instruction that is not 16-bit
83239d628a0SDimitry Andric  */
is64Bit(const char * name)83339d628a0SDimitry Andric static bool is64Bit(const char *name) {
83439d628a0SDimitry Andric   off_t i;
83539d628a0SDimitry Andric 
83639d628a0SDimitry Andric   for (i = 0;; ++i) {
83739d628a0SDimitry Andric     if (name[i] == '\0')
83839d628a0SDimitry Andric       return false;
83939d628a0SDimitry Andric     if (name[i] == '6' && name[i+1] == '4')
84039d628a0SDimitry Andric       return true;
84139d628a0SDimitry Andric   }
84239d628a0SDimitry Andric }
84339d628a0SDimitry Andric 
84439d628a0SDimitry Andric /*
84591bc56edSDimitry Andric  * getID - Determines the ID of an instruction, consuming the ModR/M byte as
84691bc56edSDimitry Andric  *   appropriate for extended and escape opcodes.  Determines the attributes and
84791bc56edSDimitry Andric  *   context for the instruction before doing so.
84891bc56edSDimitry Andric  *
84991bc56edSDimitry Andric  * @param insn  - The instruction whose ID is to be determined.
85091bc56edSDimitry Andric  * @return      - 0 if the ModR/M could be read when needed or was not needed;
85191bc56edSDimitry Andric  *                nonzero otherwise.
85291bc56edSDimitry Andric  */
getID(struct InternalInstruction * insn,const void * miiArg)85391bc56edSDimitry Andric static int getID(struct InternalInstruction* insn, const void *miiArg) {
85491bc56edSDimitry Andric   uint16_t attrMask;
85591bc56edSDimitry Andric   uint16_t instructionID;
85691bc56edSDimitry Andric 
85791bc56edSDimitry Andric   dbgprintf(insn, "getID()");
85891bc56edSDimitry Andric 
85991bc56edSDimitry Andric   attrMask = ATTR_NONE;
86091bc56edSDimitry Andric 
86191bc56edSDimitry Andric   if (insn->mode == MODE_64BIT)
86291bc56edSDimitry Andric     attrMask |= ATTR_64BIT;
86391bc56edSDimitry Andric 
86491bc56edSDimitry Andric   if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
86591bc56edSDimitry Andric     attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
86691bc56edSDimitry Andric 
86791bc56edSDimitry Andric     if (insn->vectorExtensionType == TYPE_EVEX) {
86891bc56edSDimitry Andric       switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
86991bc56edSDimitry Andric       case VEX_PREFIX_66:
87091bc56edSDimitry Andric         attrMask |= ATTR_OPSIZE;
87191bc56edSDimitry Andric         break;
87291bc56edSDimitry Andric       case VEX_PREFIX_F3:
87391bc56edSDimitry Andric         attrMask |= ATTR_XS;
87491bc56edSDimitry Andric         break;
87591bc56edSDimitry Andric       case VEX_PREFIX_F2:
87691bc56edSDimitry Andric         attrMask |= ATTR_XD;
87791bc56edSDimitry Andric         break;
87891bc56edSDimitry Andric       }
87991bc56edSDimitry Andric 
88091bc56edSDimitry Andric       if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
88191bc56edSDimitry Andric         attrMask |= ATTR_EVEXKZ;
88291bc56edSDimitry Andric       if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
88391bc56edSDimitry Andric         attrMask |= ATTR_EVEXB;
88491bc56edSDimitry Andric       if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
88591bc56edSDimitry Andric         attrMask |= ATTR_EVEXK;
88691bc56edSDimitry Andric       if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
88791bc56edSDimitry Andric         attrMask |= ATTR_EVEXL;
88891bc56edSDimitry Andric       if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
88991bc56edSDimitry Andric         attrMask |= ATTR_EVEXL2;
89039d628a0SDimitry Andric     } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
89191bc56edSDimitry Andric       switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
89291bc56edSDimitry Andric       case VEX_PREFIX_66:
89391bc56edSDimitry Andric         attrMask |= ATTR_OPSIZE;
89491bc56edSDimitry Andric         break;
89591bc56edSDimitry Andric       case VEX_PREFIX_F3:
89691bc56edSDimitry Andric         attrMask |= ATTR_XS;
89791bc56edSDimitry Andric         break;
89891bc56edSDimitry Andric       case VEX_PREFIX_F2:
89991bc56edSDimitry Andric         attrMask |= ATTR_XD;
90091bc56edSDimitry Andric         break;
90191bc56edSDimitry Andric       }
90291bc56edSDimitry Andric 
90391bc56edSDimitry Andric       if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
90491bc56edSDimitry Andric         attrMask |= ATTR_VEXL;
90539d628a0SDimitry Andric     } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
90691bc56edSDimitry Andric       switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
90791bc56edSDimitry Andric       case VEX_PREFIX_66:
90891bc56edSDimitry Andric         attrMask |= ATTR_OPSIZE;
90991bc56edSDimitry Andric         break;
91091bc56edSDimitry Andric       case VEX_PREFIX_F3:
91191bc56edSDimitry Andric         attrMask |= ATTR_XS;
91291bc56edSDimitry Andric         break;
91391bc56edSDimitry Andric       case VEX_PREFIX_F2:
91491bc56edSDimitry Andric         attrMask |= ATTR_XD;
91591bc56edSDimitry Andric         break;
91691bc56edSDimitry Andric       }
91791bc56edSDimitry Andric 
91891bc56edSDimitry Andric       if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
91991bc56edSDimitry Andric         attrMask |= ATTR_VEXL;
92039d628a0SDimitry Andric     } else if (insn->vectorExtensionType == TYPE_XOP) {
92191bc56edSDimitry Andric       switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
92291bc56edSDimitry Andric       case VEX_PREFIX_66:
92391bc56edSDimitry Andric         attrMask |= ATTR_OPSIZE;
92491bc56edSDimitry Andric         break;
92591bc56edSDimitry Andric       case VEX_PREFIX_F3:
92691bc56edSDimitry Andric         attrMask |= ATTR_XS;
92791bc56edSDimitry Andric         break;
92891bc56edSDimitry Andric       case VEX_PREFIX_F2:
92991bc56edSDimitry Andric         attrMask |= ATTR_XD;
93091bc56edSDimitry Andric         break;
93191bc56edSDimitry Andric       }
93291bc56edSDimitry Andric 
93391bc56edSDimitry Andric       if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
93491bc56edSDimitry Andric         attrMask |= ATTR_VEXL;
93539d628a0SDimitry Andric     } else {
93691bc56edSDimitry Andric       return -1;
93791bc56edSDimitry Andric     }
9382cab237bSDimitry Andric   } else if (!insn->mandatoryPrefix) {
9392cab237bSDimitry Andric     // If we don't have mandatory prefix we should use legacy prefixes here
9402cab237bSDimitry Andric     if (insn->hasOpSize && (insn->mode != MODE_16BIT))
94191bc56edSDimitry Andric       attrMask |= ATTR_OPSIZE;
9422cab237bSDimitry Andric     if (insn->hasAdSize)
94391bc56edSDimitry Andric       attrMask |= ATTR_ADSIZE;
9442cab237bSDimitry Andric     if (insn->opcodeType == ONEBYTE) {
9452cab237bSDimitry Andric       if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90))
9462cab237bSDimitry Andric         // Special support for PAUSE
94791bc56edSDimitry Andric         attrMask |= ATTR_XS;
9482cab237bSDimitry Andric     } else {
9492cab237bSDimitry Andric       if (insn->repeatPrefix == 0xf2)
95091bc56edSDimitry Andric         attrMask |= ATTR_XD;
9512cab237bSDimitry Andric       else if (insn->repeatPrefix == 0xf3)
9522cab237bSDimitry Andric         attrMask |= ATTR_XS;
9532cab237bSDimitry Andric     }
9542cab237bSDimitry Andric   } else {
9552cab237bSDimitry Andric     switch (insn->mandatoryPrefix) {
9562cab237bSDimitry Andric     case 0xf2:
9572cab237bSDimitry Andric       attrMask |= ATTR_XD;
9582cab237bSDimitry Andric       break;
9592cab237bSDimitry Andric     case 0xf3:
9602cab237bSDimitry Andric       attrMask |= ATTR_XS;
9612cab237bSDimitry Andric       break;
9622cab237bSDimitry Andric     case 0x66:
9632cab237bSDimitry Andric       if (insn->mode != MODE_16BIT)
9642cab237bSDimitry Andric         attrMask |= ATTR_OPSIZE;
9652cab237bSDimitry Andric       break;
9662cab237bSDimitry Andric     case 0x67:
9672cab237bSDimitry Andric       attrMask |= ATTR_ADSIZE;
9682cab237bSDimitry Andric       break;
9692cab237bSDimitry Andric     }
9704ba319b5SDimitry Andric 
97191bc56edSDimitry Andric   }
97291bc56edSDimitry Andric 
9732cab237bSDimitry Andric   if (insn->rexPrefix & 0x08) {
97491bc56edSDimitry Andric     attrMask |= ATTR_REXW;
9752cab237bSDimitry Andric     attrMask &= ~ATTR_ADSIZE;
9762cab237bSDimitry Andric   }
97791bc56edSDimitry Andric 
97891bc56edSDimitry Andric   /*
97991bc56edSDimitry Andric    * JCXZ/JECXZ need special handling for 16-bit mode because the meaning
98091bc56edSDimitry Andric    * of the AdSize prefix is inverted w.r.t. 32-bit mode.
98191bc56edSDimitry Andric    */
98239d628a0SDimitry Andric   if (insn->mode == MODE_16BIT && insn->opcodeType == ONEBYTE &&
98339d628a0SDimitry Andric       insn->opcode == 0xE3)
98491bc56edSDimitry Andric     attrMask ^= ATTR_ADSIZE;
98539d628a0SDimitry Andric 
986*b5893f02SDimitry Andric   // If we're in 16-bit mode and this is one of the relative jumps and opsize
987*b5893f02SDimitry Andric   // prefix isn't present, we need to force the opsize attribute since the
988*b5893f02SDimitry Andric   // prefix is inverted relative to 32-bit mode.
989*b5893f02SDimitry Andric   if (insn->mode == MODE_16BIT && !insn->hasOpSize &&
990*b5893f02SDimitry Andric       insn->opcodeType == ONEBYTE &&
991*b5893f02SDimitry Andric       (insn->opcode == 0xE8 || insn->opcode == 0xE9))
992*b5893f02SDimitry Andric     attrMask |= ATTR_OPSIZE;
9937d523365SDimitry Andric 
994*b5893f02SDimitry Andric   if (insn->mode == MODE_16BIT && !insn->hasOpSize &&
995*b5893f02SDimitry Andric       insn->opcodeType == TWOBYTE &&
996*b5893f02SDimitry Andric       insn->opcode >= 0x80 && insn->opcode <= 0x8F)
997*b5893f02SDimitry Andric     attrMask |= ATTR_OPSIZE;
9987d523365SDimitry Andric 
99991bc56edSDimitry Andric   if (getIDWithAttrMask(&instructionID, insn, attrMask))
100091bc56edSDimitry Andric     return -1;
100139d628a0SDimitry Andric 
100239d628a0SDimitry Andric   /* The following clauses compensate for limitations of the tables. */
100339d628a0SDimitry Andric 
100439d628a0SDimitry Andric   if (insn->mode != MODE_64BIT &&
100539d628a0SDimitry Andric       insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
100639d628a0SDimitry Andric     /*
100739d628a0SDimitry Andric      * The tables can't distinquish between cases where the W-bit is used to
100839d628a0SDimitry Andric      * select register size and cases where its a required part of the opcode.
100939d628a0SDimitry Andric      */
101039d628a0SDimitry Andric     if ((insn->vectorExtensionType == TYPE_EVEX &&
101139d628a0SDimitry Andric          wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||
101239d628a0SDimitry Andric         (insn->vectorExtensionType == TYPE_VEX_3B &&
101339d628a0SDimitry Andric          wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||
101439d628a0SDimitry Andric         (insn->vectorExtensionType == TYPE_XOP &&
101539d628a0SDimitry Andric          wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {
101639d628a0SDimitry Andric 
101739d628a0SDimitry Andric       uint16_t instructionIDWithREXW;
101839d628a0SDimitry Andric       if (getIDWithAttrMask(&instructionIDWithREXW,
101939d628a0SDimitry Andric                             insn, attrMask | ATTR_REXW)) {
102039d628a0SDimitry Andric         insn->instructionID = instructionID;
102139d628a0SDimitry Andric         insn->spec = specifierForUID(instructionID);
102239d628a0SDimitry Andric         return 0;
102339d628a0SDimitry Andric       }
102439d628a0SDimitry Andric 
1025d88c1a5aSDimitry Andric       auto SpecName = GetInstrName(instructionIDWithREXW, miiArg);
102639d628a0SDimitry Andric       // If not a 64-bit instruction. Switch the opcode.
1027d88c1a5aSDimitry Andric       if (!is64Bit(SpecName.data())) {
102839d628a0SDimitry Andric         insn->instructionID = instructionIDWithREXW;
102939d628a0SDimitry Andric         insn->spec = specifierForUID(instructionIDWithREXW);
103039d628a0SDimitry Andric         return 0;
103139d628a0SDimitry Andric       }
103291bc56edSDimitry Andric     }
103391bc56edSDimitry Andric   }
103491bc56edSDimitry Andric 
103539d628a0SDimitry Andric   /*
10364ba319b5SDimitry Andric    * Absolute moves, umonitor, and movdir64b need special handling.
103739d628a0SDimitry Andric    * -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
103839d628a0SDimitry Andric    *  inverted w.r.t.
103939d628a0SDimitry Andric    * -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
104039d628a0SDimitry Andric    *  any position.
104139d628a0SDimitry Andric    */
10424ba319b5SDimitry Andric   if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) ||
10434ba319b5SDimitry Andric       (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) ||
10444ba319b5SDimitry Andric       (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8)) {
104539d628a0SDimitry Andric     /* Make sure we observed the prefixes in any position. */
10462cab237bSDimitry Andric     if (insn->hasAdSize)
104739d628a0SDimitry Andric       attrMask |= ATTR_ADSIZE;
10482cab237bSDimitry Andric     if (insn->hasOpSize)
104939d628a0SDimitry Andric       attrMask |= ATTR_OPSIZE;
105039d628a0SDimitry Andric 
105139d628a0SDimitry Andric     /* In 16-bit, invert the attributes. */
10524ba319b5SDimitry Andric     if (insn->mode == MODE_16BIT) {
10534ba319b5SDimitry Andric       attrMask ^= ATTR_ADSIZE;
10544ba319b5SDimitry Andric 
10554ba319b5SDimitry Andric       /* The OpSize attribute is only valid with the absolute moves. */
10564ba319b5SDimitry Andric       if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0))
10574ba319b5SDimitry Andric         attrMask ^= ATTR_OPSIZE;
10584ba319b5SDimitry Andric     }
105939d628a0SDimitry Andric 
106039d628a0SDimitry Andric     if (getIDWithAttrMask(&instructionID, insn, attrMask))
106139d628a0SDimitry Andric       return -1;
106239d628a0SDimitry Andric 
106339d628a0SDimitry Andric     insn->instructionID = instructionID;
106439d628a0SDimitry Andric     insn->spec = specifierForUID(instructionID);
106539d628a0SDimitry Andric     return 0;
106639d628a0SDimitry Andric   }
106791bc56edSDimitry Andric 
10682cab237bSDimitry Andric   if ((insn->mode == MODE_16BIT || insn->hasOpSize) &&
106991bc56edSDimitry Andric       !(attrMask & ATTR_OPSIZE)) {
107091bc56edSDimitry Andric     /*
107191bc56edSDimitry Andric      * The instruction tables make no distinction between instructions that
107291bc56edSDimitry Andric      * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
107391bc56edSDimitry Andric      * particular spot (i.e., many MMX operations).  In general we're
107491bc56edSDimitry Andric      * conservative, but in the specific case where OpSize is present but not
107591bc56edSDimitry Andric      * in the right place we check if there's a 16-bit operation.
107691bc56edSDimitry Andric      */
107791bc56edSDimitry Andric 
107891bc56edSDimitry Andric     const struct InstructionSpecifier *spec;
107991bc56edSDimitry Andric     uint16_t instructionIDWithOpsize;
1080d88c1a5aSDimitry Andric     llvm::StringRef specName, specWithOpSizeName;
108191bc56edSDimitry Andric 
108291bc56edSDimitry Andric     spec = specifierForUID(instructionID);
108391bc56edSDimitry Andric 
108491bc56edSDimitry Andric     if (getIDWithAttrMask(&instructionIDWithOpsize,
108591bc56edSDimitry Andric                           insn,
108691bc56edSDimitry Andric                           attrMask | ATTR_OPSIZE)) {
108791bc56edSDimitry Andric       /*
108891bc56edSDimitry Andric        * ModRM required with OpSize but not present; give up and return version
108991bc56edSDimitry Andric        * without OpSize set
109091bc56edSDimitry Andric        */
109191bc56edSDimitry Andric 
109291bc56edSDimitry Andric       insn->instructionID = instructionID;
109391bc56edSDimitry Andric       insn->spec = spec;
109491bc56edSDimitry Andric       return 0;
109591bc56edSDimitry Andric     }
109691bc56edSDimitry Andric 
109791bc56edSDimitry Andric     specName = GetInstrName(instructionID, miiArg);
109891bc56edSDimitry Andric     specWithOpSizeName = GetInstrName(instructionIDWithOpsize, miiArg);
109991bc56edSDimitry Andric 
1100d88c1a5aSDimitry Andric     if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) &&
11012cab237bSDimitry Andric         (insn->mode == MODE_16BIT) ^ insn->hasOpSize) {
110291bc56edSDimitry Andric       insn->instructionID = instructionIDWithOpsize;
110391bc56edSDimitry Andric       insn->spec = specifierForUID(instructionIDWithOpsize);
110491bc56edSDimitry Andric     } else {
110591bc56edSDimitry Andric       insn->instructionID = instructionID;
110691bc56edSDimitry Andric       insn->spec = spec;
110791bc56edSDimitry Andric     }
110891bc56edSDimitry Andric     return 0;
110991bc56edSDimitry Andric   }
111091bc56edSDimitry Andric 
111191bc56edSDimitry Andric   if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
111291bc56edSDimitry Andric       insn->rexPrefix & 0x01) {
111391bc56edSDimitry Andric     /*
111491bc56edSDimitry Andric      * NOOP shouldn't decode as NOOP if REX.b is set. Instead
111591bc56edSDimitry Andric      * it should decode as XCHG %r8, %eax.
111691bc56edSDimitry Andric      */
111791bc56edSDimitry Andric 
111891bc56edSDimitry Andric     const struct InstructionSpecifier *spec;
111991bc56edSDimitry Andric     uint16_t instructionIDWithNewOpcode;
112091bc56edSDimitry Andric     const struct InstructionSpecifier *specWithNewOpcode;
112191bc56edSDimitry Andric 
112291bc56edSDimitry Andric     spec = specifierForUID(instructionID);
112391bc56edSDimitry Andric 
112491bc56edSDimitry Andric     /* Borrow opcode from one of the other XCHGar opcodes */
112591bc56edSDimitry Andric     insn->opcode = 0x91;
112691bc56edSDimitry Andric 
112791bc56edSDimitry Andric     if (getIDWithAttrMask(&instructionIDWithNewOpcode,
112891bc56edSDimitry Andric                           insn,
112991bc56edSDimitry Andric                           attrMask)) {
113091bc56edSDimitry Andric       insn->opcode = 0x90;
113191bc56edSDimitry Andric 
113291bc56edSDimitry Andric       insn->instructionID = instructionID;
113391bc56edSDimitry Andric       insn->spec = spec;
113491bc56edSDimitry Andric       return 0;
113591bc56edSDimitry Andric     }
113691bc56edSDimitry Andric 
113791bc56edSDimitry Andric     specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
113891bc56edSDimitry Andric 
113991bc56edSDimitry Andric     /* Change back */
114091bc56edSDimitry Andric     insn->opcode = 0x90;
114191bc56edSDimitry Andric 
114291bc56edSDimitry Andric     insn->instructionID = instructionIDWithNewOpcode;
114391bc56edSDimitry Andric     insn->spec = specWithNewOpcode;
114491bc56edSDimitry Andric 
114591bc56edSDimitry Andric     return 0;
114691bc56edSDimitry Andric   }
114791bc56edSDimitry Andric 
114891bc56edSDimitry Andric   insn->instructionID = instructionID;
114991bc56edSDimitry Andric   insn->spec = specifierForUID(insn->instructionID);
115091bc56edSDimitry Andric 
115191bc56edSDimitry Andric   return 0;
115291bc56edSDimitry Andric }
115391bc56edSDimitry Andric 
115491bc56edSDimitry Andric /*
115591bc56edSDimitry Andric  * readSIB - Consumes the SIB byte to determine addressing information for an
115691bc56edSDimitry Andric  *   instruction.
115791bc56edSDimitry Andric  *
115891bc56edSDimitry Andric  * @param insn  - The instruction whose SIB byte is to be read.
115991bc56edSDimitry Andric  * @return      - 0 if the SIB byte was successfully read; nonzero otherwise.
116091bc56edSDimitry Andric  */
readSIB(struct InternalInstruction * insn)116191bc56edSDimitry Andric static int readSIB(struct InternalInstruction* insn) {
116291bc56edSDimitry Andric   SIBBase sibBaseBase = SIB_BASE_NONE;
116391bc56edSDimitry Andric   uint8_t index, base;
116491bc56edSDimitry Andric 
116591bc56edSDimitry Andric   dbgprintf(insn, "readSIB()");
116691bc56edSDimitry Andric 
116791bc56edSDimitry Andric   if (insn->consumedSIB)
116891bc56edSDimitry Andric     return 0;
116991bc56edSDimitry Andric 
117091bc56edSDimitry Andric   insn->consumedSIB = true;
117191bc56edSDimitry Andric 
117291bc56edSDimitry Andric   switch (insn->addressSize) {
117391bc56edSDimitry Andric   case 2:
117491bc56edSDimitry Andric     dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
117591bc56edSDimitry Andric     return -1;
117691bc56edSDimitry Andric   case 4:
11772cab237bSDimitry Andric     insn->sibIndexBase = SIB_INDEX_EAX;
117891bc56edSDimitry Andric     sibBaseBase = SIB_BASE_EAX;
117991bc56edSDimitry Andric     break;
118091bc56edSDimitry Andric   case 8:
11812cab237bSDimitry Andric     insn->sibIndexBase = SIB_INDEX_RAX;
118291bc56edSDimitry Andric     sibBaseBase = SIB_BASE_RAX;
118391bc56edSDimitry Andric     break;
118491bc56edSDimitry Andric   }
118591bc56edSDimitry Andric 
118691bc56edSDimitry Andric   if (consumeByte(insn, &insn->sib))
118791bc56edSDimitry Andric     return -1;
118891bc56edSDimitry Andric 
118991bc56edSDimitry Andric   index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
11903dac3a9bSDimitry Andric 
11913dac3a9bSDimitry Andric   if (index == 0x4) {
119291bc56edSDimitry Andric     insn->sibIndex = SIB_INDEX_NONE;
11933dac3a9bSDimitry Andric   } else {
11942cab237bSDimitry Andric     insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index);
119591bc56edSDimitry Andric   }
119691bc56edSDimitry Andric 
11973dac3a9bSDimitry Andric   insn->sibScale = 1 << scaleFromSIB(insn->sib);
119891bc56edSDimitry Andric 
119991bc56edSDimitry Andric   base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
120091bc56edSDimitry Andric 
120191bc56edSDimitry Andric   switch (base) {
120291bc56edSDimitry Andric   case 0x5:
120391bc56edSDimitry Andric   case 0xd:
120491bc56edSDimitry Andric     switch (modFromModRM(insn->modRM)) {
120591bc56edSDimitry Andric     case 0x0:
120691bc56edSDimitry Andric       insn->eaDisplacement = EA_DISP_32;
120791bc56edSDimitry Andric       insn->sibBase = SIB_BASE_NONE;
120891bc56edSDimitry Andric       break;
120991bc56edSDimitry Andric     case 0x1:
121091bc56edSDimitry Andric       insn->eaDisplacement = EA_DISP_8;
121191bc56edSDimitry Andric       insn->sibBase = (SIBBase)(sibBaseBase + base);
121291bc56edSDimitry Andric       break;
121391bc56edSDimitry Andric     case 0x2:
121491bc56edSDimitry Andric       insn->eaDisplacement = EA_DISP_32;
121591bc56edSDimitry Andric       insn->sibBase = (SIBBase)(sibBaseBase + base);
121691bc56edSDimitry Andric       break;
121791bc56edSDimitry Andric     case 0x3:
121891bc56edSDimitry Andric       debug("Cannot have Mod = 0b11 and a SIB byte");
121991bc56edSDimitry Andric       return -1;
122091bc56edSDimitry Andric     }
122191bc56edSDimitry Andric     break;
122291bc56edSDimitry Andric   default:
122391bc56edSDimitry Andric     insn->sibBase = (SIBBase)(sibBaseBase + base);
122491bc56edSDimitry Andric     break;
122591bc56edSDimitry Andric   }
122691bc56edSDimitry Andric 
122791bc56edSDimitry Andric   return 0;
122891bc56edSDimitry Andric }
122991bc56edSDimitry Andric 
123091bc56edSDimitry Andric /*
123191bc56edSDimitry Andric  * readDisplacement - Consumes the displacement of an instruction.
123291bc56edSDimitry Andric  *
123391bc56edSDimitry Andric  * @param insn  - The instruction whose displacement is to be read.
123491bc56edSDimitry Andric  * @return      - 0 if the displacement byte was successfully read; nonzero
123591bc56edSDimitry Andric  *                otherwise.
123691bc56edSDimitry Andric  */
readDisplacement(struct InternalInstruction * insn)123791bc56edSDimitry Andric static int readDisplacement(struct InternalInstruction* insn) {
123891bc56edSDimitry Andric   int8_t d8;
123991bc56edSDimitry Andric   int16_t d16;
124091bc56edSDimitry Andric   int32_t d32;
124191bc56edSDimitry Andric 
124291bc56edSDimitry Andric   dbgprintf(insn, "readDisplacement()");
124391bc56edSDimitry Andric 
124491bc56edSDimitry Andric   if (insn->consumedDisplacement)
124591bc56edSDimitry Andric     return 0;
124691bc56edSDimitry Andric 
124791bc56edSDimitry Andric   insn->consumedDisplacement = true;
124891bc56edSDimitry Andric   insn->displacementOffset = insn->readerCursor - insn->startLocation;
124991bc56edSDimitry Andric 
125091bc56edSDimitry Andric   switch (insn->eaDisplacement) {
125191bc56edSDimitry Andric   case EA_DISP_NONE:
125291bc56edSDimitry Andric     insn->consumedDisplacement = false;
125391bc56edSDimitry Andric     break;
125491bc56edSDimitry Andric   case EA_DISP_8:
125591bc56edSDimitry Andric     if (consumeInt8(insn, &d8))
125691bc56edSDimitry Andric       return -1;
125791bc56edSDimitry Andric     insn->displacement = d8;
125891bc56edSDimitry Andric     break;
125991bc56edSDimitry Andric   case EA_DISP_16:
126091bc56edSDimitry Andric     if (consumeInt16(insn, &d16))
126191bc56edSDimitry Andric       return -1;
126291bc56edSDimitry Andric     insn->displacement = d16;
126391bc56edSDimitry Andric     break;
126491bc56edSDimitry Andric   case EA_DISP_32:
126591bc56edSDimitry Andric     if (consumeInt32(insn, &d32))
126691bc56edSDimitry Andric       return -1;
126791bc56edSDimitry Andric     insn->displacement = d32;
126891bc56edSDimitry Andric     break;
126991bc56edSDimitry Andric   }
127091bc56edSDimitry Andric 
127191bc56edSDimitry Andric   insn->consumedDisplacement = true;
127291bc56edSDimitry Andric   return 0;
127391bc56edSDimitry Andric }
127491bc56edSDimitry Andric 
127591bc56edSDimitry Andric /*
127691bc56edSDimitry Andric  * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
127791bc56edSDimitry Andric  *   displacement) for an instruction and interprets it.
127891bc56edSDimitry Andric  *
127991bc56edSDimitry Andric  * @param insn  - The instruction whose addressing information is to be read.
128091bc56edSDimitry Andric  * @return      - 0 if the information was successfully read; nonzero otherwise.
128191bc56edSDimitry Andric  */
readModRM(struct InternalInstruction * insn)128291bc56edSDimitry Andric static int readModRM(struct InternalInstruction* insn) {
12834ba319b5SDimitry Andric   uint8_t mod, rm, reg, evexrm;
128491bc56edSDimitry Andric 
128591bc56edSDimitry Andric   dbgprintf(insn, "readModRM()");
128691bc56edSDimitry Andric 
128791bc56edSDimitry Andric   if (insn->consumedModRM)
128891bc56edSDimitry Andric     return 0;
128991bc56edSDimitry Andric 
129091bc56edSDimitry Andric   if (consumeByte(insn, &insn->modRM))
129191bc56edSDimitry Andric     return -1;
129291bc56edSDimitry Andric   insn->consumedModRM = true;
129391bc56edSDimitry Andric 
129491bc56edSDimitry Andric   mod     = modFromModRM(insn->modRM);
129591bc56edSDimitry Andric   rm      = rmFromModRM(insn->modRM);
129691bc56edSDimitry Andric   reg     = regFromModRM(insn->modRM);
129791bc56edSDimitry Andric 
129891bc56edSDimitry Andric   /*
129991bc56edSDimitry Andric    * This goes by insn->registerSize to pick the correct register, which messes
130091bc56edSDimitry Andric    * up if we're using (say) XMM or 8-bit register operands.  That gets fixed in
130191bc56edSDimitry Andric    * fixupReg().
130291bc56edSDimitry Andric    */
130391bc56edSDimitry Andric   switch (insn->registerSize) {
130491bc56edSDimitry Andric   case 2:
130591bc56edSDimitry Andric     insn->regBase = MODRM_REG_AX;
130691bc56edSDimitry Andric     insn->eaRegBase = EA_REG_AX;
130791bc56edSDimitry Andric     break;
130891bc56edSDimitry Andric   case 4:
130991bc56edSDimitry Andric     insn->regBase = MODRM_REG_EAX;
131091bc56edSDimitry Andric     insn->eaRegBase = EA_REG_EAX;
131191bc56edSDimitry Andric     break;
131291bc56edSDimitry Andric   case 8:
131391bc56edSDimitry Andric     insn->regBase = MODRM_REG_RAX;
131491bc56edSDimitry Andric     insn->eaRegBase = EA_REG_RAX;
131591bc56edSDimitry Andric     break;
131691bc56edSDimitry Andric   }
131791bc56edSDimitry Andric 
131891bc56edSDimitry Andric   reg |= rFromREX(insn->rexPrefix) << 3;
131991bc56edSDimitry Andric   rm  |= bFromREX(insn->rexPrefix) << 3;
13204ba319b5SDimitry Andric 
13214ba319b5SDimitry Andric   evexrm = 0;
13224ba319b5SDimitry Andric   if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) {
132391bc56edSDimitry Andric     reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
13244ba319b5SDimitry Andric     evexrm = xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
132591bc56edSDimitry Andric   }
132691bc56edSDimitry Andric 
132791bc56edSDimitry Andric   insn->reg = (Reg)(insn->regBase + reg);
132891bc56edSDimitry Andric 
132991bc56edSDimitry Andric   switch (insn->addressSize) {
13304ba319b5SDimitry Andric   case 2: {
13314ba319b5SDimitry Andric     EABase eaBaseBase = EA_BASE_BX_SI;
133291bc56edSDimitry Andric 
133391bc56edSDimitry Andric     switch (mod) {
133491bc56edSDimitry Andric     case 0x0:
133591bc56edSDimitry Andric       if (rm == 0x6) {
133691bc56edSDimitry Andric         insn->eaBase = EA_BASE_NONE;
133791bc56edSDimitry Andric         insn->eaDisplacement = EA_DISP_16;
133891bc56edSDimitry Andric         if (readDisplacement(insn))
133991bc56edSDimitry Andric           return -1;
134091bc56edSDimitry Andric       } else {
13414ba319b5SDimitry Andric         insn->eaBase = (EABase)(eaBaseBase + rm);
134291bc56edSDimitry Andric         insn->eaDisplacement = EA_DISP_NONE;
134391bc56edSDimitry Andric       }
134491bc56edSDimitry Andric       break;
134591bc56edSDimitry Andric     case 0x1:
13464ba319b5SDimitry Andric       insn->eaBase = (EABase)(eaBaseBase + rm);
134791bc56edSDimitry Andric       insn->eaDisplacement = EA_DISP_8;
134891bc56edSDimitry Andric       insn->displacementSize = 1;
134991bc56edSDimitry Andric       if (readDisplacement(insn))
135091bc56edSDimitry Andric         return -1;
135191bc56edSDimitry Andric       break;
135291bc56edSDimitry Andric     case 0x2:
13534ba319b5SDimitry Andric       insn->eaBase = (EABase)(eaBaseBase + rm);
135491bc56edSDimitry Andric       insn->eaDisplacement = EA_DISP_16;
135591bc56edSDimitry Andric       if (readDisplacement(insn))
135691bc56edSDimitry Andric         return -1;
135791bc56edSDimitry Andric       break;
135891bc56edSDimitry Andric     case 0x3:
135991bc56edSDimitry Andric       insn->eaBase = (EABase)(insn->eaRegBase + rm);
136091bc56edSDimitry Andric       if (readDisplacement(insn))
136191bc56edSDimitry Andric         return -1;
136291bc56edSDimitry Andric       break;
136391bc56edSDimitry Andric     }
136491bc56edSDimitry Andric     break;
13654ba319b5SDimitry Andric   }
136691bc56edSDimitry Andric   case 4:
13674ba319b5SDimitry Andric   case 8: {
13684ba319b5SDimitry Andric     EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
136991bc56edSDimitry Andric 
137091bc56edSDimitry Andric     switch (mod) {
137191bc56edSDimitry Andric     case 0x0:
137291bc56edSDimitry Andric       insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
1373ff0cc061SDimitry Andric       // In determining whether RIP-relative mode is used (rm=5),
1374ff0cc061SDimitry Andric       // or whether a SIB byte is present (rm=4),
1375ff0cc061SDimitry Andric       // the extension bits (REX.b and EVEX.x) are ignored.
1376ff0cc061SDimitry Andric       switch (rm & 7) {
1377ff0cc061SDimitry Andric       case 0x4: // SIB byte is present
137891bc56edSDimitry Andric         insn->eaBase = (insn->addressSize == 4 ?
137991bc56edSDimitry Andric                         EA_BASE_sib : EA_BASE_sib64);
138091bc56edSDimitry Andric         if (readSIB(insn) || readDisplacement(insn))
138191bc56edSDimitry Andric           return -1;
138291bc56edSDimitry Andric         break;
1383ff0cc061SDimitry Andric       case 0x5: // RIP-relative
138491bc56edSDimitry Andric         insn->eaBase = EA_BASE_NONE;
138591bc56edSDimitry Andric         insn->eaDisplacement = EA_DISP_32;
138691bc56edSDimitry Andric         if (readDisplacement(insn))
138791bc56edSDimitry Andric           return -1;
138891bc56edSDimitry Andric         break;
138991bc56edSDimitry Andric       default:
13904ba319b5SDimitry Andric         insn->eaBase = (EABase)(eaBaseBase + rm);
139191bc56edSDimitry Andric         break;
139291bc56edSDimitry Andric       }
139391bc56edSDimitry Andric       break;
139491bc56edSDimitry Andric     case 0x1:
139591bc56edSDimitry Andric       insn->displacementSize = 1;
1396*b5893f02SDimitry Andric       LLVM_FALLTHROUGH;
139791bc56edSDimitry Andric     case 0x2:
139891bc56edSDimitry Andric       insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
1399ff0cc061SDimitry Andric       switch (rm & 7) {
1400ff0cc061SDimitry Andric       case 0x4: // SIB byte is present
140191bc56edSDimitry Andric         insn->eaBase = EA_BASE_sib;
140291bc56edSDimitry Andric         if (readSIB(insn) || readDisplacement(insn))
140391bc56edSDimitry Andric           return -1;
140491bc56edSDimitry Andric         break;
140591bc56edSDimitry Andric       default:
14064ba319b5SDimitry Andric         insn->eaBase = (EABase)(eaBaseBase + rm);
140791bc56edSDimitry Andric         if (readDisplacement(insn))
140891bc56edSDimitry Andric           return -1;
140991bc56edSDimitry Andric         break;
141091bc56edSDimitry Andric       }
141191bc56edSDimitry Andric       break;
141291bc56edSDimitry Andric     case 0x3:
141391bc56edSDimitry Andric       insn->eaDisplacement = EA_DISP_NONE;
14144ba319b5SDimitry Andric       insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm);
141591bc56edSDimitry Andric       break;
141691bc56edSDimitry Andric     }
141791bc56edSDimitry Andric     break;
14184ba319b5SDimitry Andric   }
141991bc56edSDimitry Andric   } /* switch (insn->addressSize) */
142091bc56edSDimitry Andric 
142191bc56edSDimitry Andric   return 0;
142291bc56edSDimitry Andric }
142391bc56edSDimitry Andric 
14244ba319b5SDimitry Andric #define GENERIC_FIXUP_FUNC(name, base, prefix, mask)      \
14253ca95b02SDimitry Andric   static uint16_t name(struct InternalInstruction *insn,  \
142691bc56edSDimitry Andric                        OperandType type,                  \
142791bc56edSDimitry Andric                        uint8_t index,                     \
142891bc56edSDimitry Andric                        uint8_t *valid) {                  \
142991bc56edSDimitry Andric     *valid = 1;                                           \
143091bc56edSDimitry Andric     switch (type) {                                       \
143191bc56edSDimitry Andric     default:                                              \
143291bc56edSDimitry Andric       debug("Unhandled register type");                   \
143391bc56edSDimitry Andric       *valid = 0;                                         \
143491bc56edSDimitry Andric       return 0;                                           \
143591bc56edSDimitry Andric     case TYPE_Rv:                                         \
143691bc56edSDimitry Andric       return base + index;                                \
143791bc56edSDimitry Andric     case TYPE_R8:                                         \
14384ba319b5SDimitry Andric       index &= mask;                                      \
14394ba319b5SDimitry Andric       if (index > 0xf)                                    \
14404ba319b5SDimitry Andric         *valid = 0;                                       \
144191bc56edSDimitry Andric       if (insn->rexPrefix &&                              \
144291bc56edSDimitry Andric          index >= 4 && index <= 7) {                      \
144391bc56edSDimitry Andric         return prefix##_SPL + (index - 4);                \
144491bc56edSDimitry Andric       } else {                                            \
144591bc56edSDimitry Andric         return prefix##_AL + index;                       \
144691bc56edSDimitry Andric       }                                                   \
144791bc56edSDimitry Andric     case TYPE_R16:                                        \
14484ba319b5SDimitry Andric       index &= mask;                                      \
14494ba319b5SDimitry Andric       if (index > 0xf)                                    \
14504ba319b5SDimitry Andric         *valid = 0;                                       \
145191bc56edSDimitry Andric       return prefix##_AX + index;                         \
145291bc56edSDimitry Andric     case TYPE_R32:                                        \
14534ba319b5SDimitry Andric       index &= mask;                                      \
14544ba319b5SDimitry Andric       if (index > 0xf)                                    \
14554ba319b5SDimitry Andric         *valid = 0;                                       \
145691bc56edSDimitry Andric       return prefix##_EAX + index;                        \
145791bc56edSDimitry Andric     case TYPE_R64:                                        \
14584ba319b5SDimitry Andric       index &= mask;                                      \
14594ba319b5SDimitry Andric       if (index > 0xf)                                    \
14604ba319b5SDimitry Andric         *valid = 0;                                       \
146191bc56edSDimitry Andric       return prefix##_RAX + index;                        \
14627a7e6055SDimitry Andric     case TYPE_ZMM:                                        \
146391bc56edSDimitry Andric       return prefix##_ZMM0 + index;                       \
14647a7e6055SDimitry Andric     case TYPE_YMM:                                        \
146591bc56edSDimitry Andric       return prefix##_YMM0 + index;                       \
14667a7e6055SDimitry Andric     case TYPE_XMM:                                        \
146791bc56edSDimitry Andric       return prefix##_XMM0 + index;                       \
14687a7e6055SDimitry Andric     case TYPE_VK:                                         \
14694ba319b5SDimitry Andric       index &= 0xf;                                       \
1470ff0cc061SDimitry Andric       if (index > 7)                                      \
1471ff0cc061SDimitry Andric         *valid = 0;                                       \
147291bc56edSDimitry Andric       return prefix##_K0 + index;                         \
147391bc56edSDimitry Andric     case TYPE_MM64:                                       \
147439d628a0SDimitry Andric       return prefix##_MM0 + (index & 0x7);                \
147591bc56edSDimitry Andric     case TYPE_SEGMENTREG:                                 \
14762cab237bSDimitry Andric       if ((index & 7) > 5)                                \
147791bc56edSDimitry Andric         *valid = 0;                                       \
14782cab237bSDimitry Andric       return prefix##_ES + (index & 7);                   \
147991bc56edSDimitry Andric     case TYPE_DEBUGREG:                                   \
148091bc56edSDimitry Andric       return prefix##_DR0 + index;                        \
148191bc56edSDimitry Andric     case TYPE_CONTROLREG:                                 \
148291bc56edSDimitry Andric       return prefix##_CR0 + index;                        \
14833ca95b02SDimitry Andric     case TYPE_BNDR:                                       \
14843ca95b02SDimitry Andric       if (index > 3)                                      \
14853ca95b02SDimitry Andric         *valid = 0;                                       \
14863ca95b02SDimitry Andric       return prefix##_BND0 + index;                       \
14872cab237bSDimitry Andric     case TYPE_MVSIBX:                                     \
14882cab237bSDimitry Andric       return prefix##_XMM0 + index;                       \
14892cab237bSDimitry Andric     case TYPE_MVSIBY:                                     \
14902cab237bSDimitry Andric       return prefix##_YMM0 + index;                       \
14912cab237bSDimitry Andric     case TYPE_MVSIBZ:                                     \
14922cab237bSDimitry Andric       return prefix##_ZMM0 + index;                       \
149391bc56edSDimitry Andric     }                                                     \
149491bc56edSDimitry Andric   }
149591bc56edSDimitry Andric 
149691bc56edSDimitry Andric /*
149791bc56edSDimitry Andric  * fixup*Value - Consults an operand type to determine the meaning of the
149891bc56edSDimitry Andric  *   reg or R/M field.  If the operand is an XMM operand, for example, an
149991bc56edSDimitry Andric  *   operand would be XMM0 instead of AX, which readModRM() would otherwise
150091bc56edSDimitry Andric  *   misinterpret it as.
150191bc56edSDimitry Andric  *
150291bc56edSDimitry Andric  * @param insn  - The instruction containing the operand.
150391bc56edSDimitry Andric  * @param type  - The operand type.
150491bc56edSDimitry Andric  * @param index - The existing value of the field as reported by readModRM().
150591bc56edSDimitry Andric  * @param valid - The address of a uint8_t.  The target is set to 1 if the
150691bc56edSDimitry Andric  *                field is valid for the register class; 0 if not.
150791bc56edSDimitry Andric  * @return      - The proper value.
150891bc56edSDimitry Andric  */
15094ba319b5SDimitry Andric GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase,    MODRM_REG, 0x1f)
15104ba319b5SDimitry Andric GENERIC_FIXUP_FUNC(fixupRMValue,  insn->eaRegBase,  EA_REG,    0xf)
151191bc56edSDimitry Andric 
151291bc56edSDimitry Andric /*
151391bc56edSDimitry Andric  * fixupReg - Consults an operand specifier to determine which of the
151491bc56edSDimitry Andric  *   fixup*Value functions to use in correcting readModRM()'ss interpretation.
151591bc56edSDimitry Andric  *
151691bc56edSDimitry Andric  * @param insn  - See fixup*Value().
151791bc56edSDimitry Andric  * @param op    - The operand specifier.
151891bc56edSDimitry Andric  * @return      - 0 if fixup was successful; -1 if the register returned was
151991bc56edSDimitry Andric  *                invalid for its class.
152091bc56edSDimitry Andric  */
fixupReg(struct InternalInstruction * insn,const struct OperandSpecifier * op)152191bc56edSDimitry Andric static int fixupReg(struct InternalInstruction *insn,
152291bc56edSDimitry Andric                     const struct OperandSpecifier *op) {
152391bc56edSDimitry Andric   uint8_t valid;
152491bc56edSDimitry Andric 
152591bc56edSDimitry Andric   dbgprintf(insn, "fixupReg()");
152691bc56edSDimitry Andric 
152791bc56edSDimitry Andric   switch ((OperandEncoding)op->encoding) {
152891bc56edSDimitry Andric   default:
152991bc56edSDimitry Andric     debug("Expected a REG or R/M encoding in fixupReg");
153091bc56edSDimitry Andric     return -1;
153191bc56edSDimitry Andric   case ENCODING_VVVV:
153291bc56edSDimitry Andric     insn->vvvv = (Reg)fixupRegValue(insn,
153391bc56edSDimitry Andric                                     (OperandType)op->type,
153491bc56edSDimitry Andric                                     insn->vvvv,
153591bc56edSDimitry Andric                                     &valid);
153691bc56edSDimitry Andric     if (!valid)
153791bc56edSDimitry Andric       return -1;
153891bc56edSDimitry Andric     break;
153991bc56edSDimitry Andric   case ENCODING_REG:
154091bc56edSDimitry Andric     insn->reg = (Reg)fixupRegValue(insn,
154191bc56edSDimitry Andric                                    (OperandType)op->type,
154291bc56edSDimitry Andric                                    insn->reg - insn->regBase,
154391bc56edSDimitry Andric                                    &valid);
154491bc56edSDimitry Andric     if (!valid)
154591bc56edSDimitry Andric       return -1;
154691bc56edSDimitry Andric     break;
154791bc56edSDimitry Andric   CASE_ENCODING_RM:
154891bc56edSDimitry Andric     if (insn->eaBase >= insn->eaRegBase) {
154991bc56edSDimitry Andric       insn->eaBase = (EABase)fixupRMValue(insn,
155091bc56edSDimitry Andric                                           (OperandType)op->type,
155191bc56edSDimitry Andric                                           insn->eaBase - insn->eaRegBase,
155291bc56edSDimitry Andric                                           &valid);
155391bc56edSDimitry Andric       if (!valid)
155491bc56edSDimitry Andric         return -1;
155591bc56edSDimitry Andric     }
155691bc56edSDimitry Andric     break;
155791bc56edSDimitry Andric   }
155891bc56edSDimitry Andric 
155991bc56edSDimitry Andric   return 0;
156091bc56edSDimitry Andric }
156191bc56edSDimitry Andric 
156291bc56edSDimitry Andric /*
156391bc56edSDimitry Andric  * readOpcodeRegister - Reads an operand from the opcode field of an
156491bc56edSDimitry Andric  *   instruction and interprets it appropriately given the operand width.
156591bc56edSDimitry Andric  *   Handles AddRegFrm instructions.
156691bc56edSDimitry Andric  *
156791bc56edSDimitry Andric  * @param insn  - the instruction whose opcode field is to be read.
156891bc56edSDimitry Andric  * @param size  - The width (in bytes) of the register being specified.
156991bc56edSDimitry Andric  *                1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
157091bc56edSDimitry Andric  *                RAX.
157191bc56edSDimitry Andric  * @return      - 0 on success; nonzero otherwise.
157291bc56edSDimitry Andric  */
readOpcodeRegister(struct InternalInstruction * insn,uint8_t size)157391bc56edSDimitry Andric static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
157491bc56edSDimitry Andric   dbgprintf(insn, "readOpcodeRegister()");
157591bc56edSDimitry Andric 
157691bc56edSDimitry Andric   if (size == 0)
157791bc56edSDimitry Andric     size = insn->registerSize;
157891bc56edSDimitry Andric 
157991bc56edSDimitry Andric   switch (size) {
158091bc56edSDimitry Andric   case 1:
158191bc56edSDimitry Andric     insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
158291bc56edSDimitry Andric                                                   | (insn->opcode & 7)));
158391bc56edSDimitry Andric     if (insn->rexPrefix &&
158491bc56edSDimitry Andric         insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
158591bc56edSDimitry Andric         insn->opcodeRegister < MODRM_REG_AL + 0x8) {
158691bc56edSDimitry Andric       insn->opcodeRegister = (Reg)(MODRM_REG_SPL
158791bc56edSDimitry Andric                                    + (insn->opcodeRegister - MODRM_REG_AL - 4));
158891bc56edSDimitry Andric     }
158991bc56edSDimitry Andric 
159091bc56edSDimitry Andric     break;
159191bc56edSDimitry Andric   case 2:
159291bc56edSDimitry Andric     insn->opcodeRegister = (Reg)(MODRM_REG_AX
159391bc56edSDimitry Andric                                  + ((bFromREX(insn->rexPrefix) << 3)
159491bc56edSDimitry Andric                                     | (insn->opcode & 7)));
159591bc56edSDimitry Andric     break;
159691bc56edSDimitry Andric   case 4:
159791bc56edSDimitry Andric     insn->opcodeRegister = (Reg)(MODRM_REG_EAX
159891bc56edSDimitry Andric                                  + ((bFromREX(insn->rexPrefix) << 3)
159991bc56edSDimitry Andric                                     | (insn->opcode & 7)));
160091bc56edSDimitry Andric     break;
160191bc56edSDimitry Andric   case 8:
160291bc56edSDimitry Andric     insn->opcodeRegister = (Reg)(MODRM_REG_RAX
160391bc56edSDimitry Andric                                  + ((bFromREX(insn->rexPrefix) << 3)
160491bc56edSDimitry Andric                                     | (insn->opcode & 7)));
160591bc56edSDimitry Andric     break;
160691bc56edSDimitry Andric   }
160791bc56edSDimitry Andric 
160891bc56edSDimitry Andric   return 0;
160991bc56edSDimitry Andric }
161091bc56edSDimitry Andric 
161191bc56edSDimitry Andric /*
161291bc56edSDimitry Andric  * readImmediate - Consumes an immediate operand from an instruction, given the
161391bc56edSDimitry Andric  *   desired operand size.
161491bc56edSDimitry Andric  *
161591bc56edSDimitry Andric  * @param insn  - The instruction whose operand is to be read.
161691bc56edSDimitry Andric  * @param size  - The width (in bytes) of the operand.
161791bc56edSDimitry Andric  * @return      - 0 if the immediate was successfully consumed; nonzero
161891bc56edSDimitry Andric  *                otherwise.
161991bc56edSDimitry Andric  */
readImmediate(struct InternalInstruction * insn,uint8_t size)162091bc56edSDimitry Andric static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
162191bc56edSDimitry Andric   uint8_t imm8;
162291bc56edSDimitry Andric   uint16_t imm16;
162391bc56edSDimitry Andric   uint32_t imm32;
162491bc56edSDimitry Andric   uint64_t imm64;
162591bc56edSDimitry Andric 
162691bc56edSDimitry Andric   dbgprintf(insn, "readImmediate()");
162791bc56edSDimitry Andric 
162891bc56edSDimitry Andric   if (insn->numImmediatesConsumed == 2) {
162991bc56edSDimitry Andric     debug("Already consumed two immediates");
163091bc56edSDimitry Andric     return -1;
163191bc56edSDimitry Andric   }
163291bc56edSDimitry Andric 
163391bc56edSDimitry Andric   if (size == 0)
163491bc56edSDimitry Andric     size = insn->immediateSize;
163591bc56edSDimitry Andric   else
163691bc56edSDimitry Andric     insn->immediateSize = size;
163791bc56edSDimitry Andric   insn->immediateOffset = insn->readerCursor - insn->startLocation;
163891bc56edSDimitry Andric 
163991bc56edSDimitry Andric   switch (size) {
164091bc56edSDimitry Andric   case 1:
164191bc56edSDimitry Andric     if (consumeByte(insn, &imm8))
164291bc56edSDimitry Andric       return -1;
164391bc56edSDimitry Andric     insn->immediates[insn->numImmediatesConsumed] = imm8;
164491bc56edSDimitry Andric     break;
164591bc56edSDimitry Andric   case 2:
164691bc56edSDimitry Andric     if (consumeUInt16(insn, &imm16))
164791bc56edSDimitry Andric       return -1;
164891bc56edSDimitry Andric     insn->immediates[insn->numImmediatesConsumed] = imm16;
164991bc56edSDimitry Andric     break;
165091bc56edSDimitry Andric   case 4:
165191bc56edSDimitry Andric     if (consumeUInt32(insn, &imm32))
165291bc56edSDimitry Andric       return -1;
165391bc56edSDimitry Andric     insn->immediates[insn->numImmediatesConsumed] = imm32;
165491bc56edSDimitry Andric     break;
165591bc56edSDimitry Andric   case 8:
165691bc56edSDimitry Andric     if (consumeUInt64(insn, &imm64))
165791bc56edSDimitry Andric       return -1;
165891bc56edSDimitry Andric     insn->immediates[insn->numImmediatesConsumed] = imm64;
165991bc56edSDimitry Andric     break;
166091bc56edSDimitry Andric   }
166191bc56edSDimitry Andric 
166291bc56edSDimitry Andric   insn->numImmediatesConsumed++;
166391bc56edSDimitry Andric 
166491bc56edSDimitry Andric   return 0;
166591bc56edSDimitry Andric }
166691bc56edSDimitry Andric 
166791bc56edSDimitry Andric /*
166891bc56edSDimitry Andric  * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
166991bc56edSDimitry Andric  *
167091bc56edSDimitry Andric  * @param insn  - The instruction whose operand is to be read.
167191bc56edSDimitry Andric  * @return      - 0 if the vvvv was successfully consumed; nonzero
167291bc56edSDimitry Andric  *                otherwise.
167391bc56edSDimitry Andric  */
readVVVV(struct InternalInstruction * insn)167491bc56edSDimitry Andric static int readVVVV(struct InternalInstruction* insn) {
167591bc56edSDimitry Andric   dbgprintf(insn, "readVVVV()");
167691bc56edSDimitry Andric 
167791bc56edSDimitry Andric   int vvvv;
167891bc56edSDimitry Andric   if (insn->vectorExtensionType == TYPE_EVEX)
167991bc56edSDimitry Andric     vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |
168091bc56edSDimitry Andric             vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]));
168191bc56edSDimitry Andric   else if (insn->vectorExtensionType == TYPE_VEX_3B)
168291bc56edSDimitry Andric     vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
168391bc56edSDimitry Andric   else if (insn->vectorExtensionType == TYPE_VEX_2B)
168491bc56edSDimitry Andric     vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
168591bc56edSDimitry Andric   else if (insn->vectorExtensionType == TYPE_XOP)
168691bc56edSDimitry Andric     vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
168791bc56edSDimitry Andric   else
168891bc56edSDimitry Andric     return -1;
168991bc56edSDimitry Andric 
169091bc56edSDimitry Andric   if (insn->mode != MODE_64BIT)
16914ba319b5SDimitry Andric     vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later.
169291bc56edSDimitry Andric 
169391bc56edSDimitry Andric   insn->vvvv = static_cast<Reg>(vvvv);
169491bc56edSDimitry Andric   return 0;
169591bc56edSDimitry Andric }
169691bc56edSDimitry Andric 
169791bc56edSDimitry Andric /*
169891bc56edSDimitry Andric  * readMaskRegister - Reads an mask register from the opcode field of an
169991bc56edSDimitry Andric  *   instruction.
170091bc56edSDimitry Andric  *
170191bc56edSDimitry Andric  * @param insn    - The instruction whose opcode field is to be read.
170291bc56edSDimitry Andric  * @return        - 0 on success; nonzero otherwise.
170391bc56edSDimitry Andric  */
readMaskRegister(struct InternalInstruction * insn)170491bc56edSDimitry Andric static int readMaskRegister(struct InternalInstruction* insn) {
170591bc56edSDimitry Andric   dbgprintf(insn, "readMaskRegister()");
170691bc56edSDimitry Andric 
170791bc56edSDimitry Andric   if (insn->vectorExtensionType != TYPE_EVEX)
170891bc56edSDimitry Andric     return -1;
170991bc56edSDimitry Andric 
171091bc56edSDimitry Andric   insn->writemask =
171191bc56edSDimitry Andric       static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));
171291bc56edSDimitry Andric   return 0;
171391bc56edSDimitry Andric }
171491bc56edSDimitry Andric 
171591bc56edSDimitry Andric /*
171691bc56edSDimitry Andric  * readOperands - Consults the specifier for an instruction and consumes all
171791bc56edSDimitry Andric  *   operands for that instruction, interpreting them as it goes.
171891bc56edSDimitry Andric  *
171991bc56edSDimitry Andric  * @param insn  - The instruction whose operands are to be read and interpreted.
172091bc56edSDimitry Andric  * @return      - 0 if all operands could be read; nonzero otherwise.
172191bc56edSDimitry Andric  */
readOperands(struct InternalInstruction * insn)172291bc56edSDimitry Andric static int readOperands(struct InternalInstruction* insn) {
172391bc56edSDimitry Andric   int hasVVVV, needVVVV;
172491bc56edSDimitry Andric   int sawRegImm = 0;
172591bc56edSDimitry Andric 
172691bc56edSDimitry Andric   dbgprintf(insn, "readOperands()");
172791bc56edSDimitry Andric 
172891bc56edSDimitry Andric   /* If non-zero vvvv specified, need to make sure one of the operands
172991bc56edSDimitry Andric      uses it. */
173091bc56edSDimitry Andric   hasVVVV = !readVVVV(insn);
173191bc56edSDimitry Andric   needVVVV = hasVVVV && (insn->vvvv != 0);
173291bc56edSDimitry Andric 
173391bc56edSDimitry Andric   for (const auto &Op : x86OperandSets[insn->spec->operands]) {
173491bc56edSDimitry Andric     switch (Op.encoding) {
173591bc56edSDimitry Andric     case ENCODING_NONE:
173691bc56edSDimitry Andric     case ENCODING_SI:
173791bc56edSDimitry Andric     case ENCODING_DI:
173891bc56edSDimitry Andric       break;
17397a7e6055SDimitry Andric     CASE_ENCODING_VSIB:
17407a7e6055SDimitry Andric       // VSIB can use the V2 bit so check only the other bits.
17417a7e6055SDimitry Andric       if (needVVVV)
17427a7e6055SDimitry Andric         needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);
17437a7e6055SDimitry Andric       if (readModRM(insn))
17447a7e6055SDimitry Andric         return -1;
17452cab237bSDimitry Andric 
17462cab237bSDimitry Andric       // Reject if SIB wasn't used.
17472cab237bSDimitry Andric       if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
17487a7e6055SDimitry Andric         return -1;
17492cab237bSDimitry Andric 
17502cab237bSDimitry Andric       // If sibIndex was set to SIB_INDEX_NONE, index offset is 4.
17512cab237bSDimitry Andric       if (insn->sibIndex == SIB_INDEX_NONE)
17524ba319b5SDimitry Andric         insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4);
17532cab237bSDimitry Andric 
17542cab237bSDimitry Andric       // If EVEX.v2 is set this is one of the 16-31 registers.
17554ba319b5SDimitry Andric       if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
17562cab237bSDimitry Andric           v2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
17572cab237bSDimitry Andric         insn->sibIndex = (SIBIndex)(insn->sibIndex + 16);
17582cab237bSDimitry Andric 
17592cab237bSDimitry Andric       // Adjust the index register to the correct size.
17602cab237bSDimitry Andric       switch ((OperandType)Op.type) {
17612cab237bSDimitry Andric       default:
17622cab237bSDimitry Andric         debug("Unhandled VSIB index type");
17632cab237bSDimitry Andric         return -1;
17642cab237bSDimitry Andric       case TYPE_MVSIBX:
17652cab237bSDimitry Andric         insn->sibIndex = (SIBIndex)(SIB_INDEX_XMM0 +
17662cab237bSDimitry Andric                                     (insn->sibIndex - insn->sibIndexBase));
17672cab237bSDimitry Andric         break;
17682cab237bSDimitry Andric       case TYPE_MVSIBY:
17692cab237bSDimitry Andric         insn->sibIndex = (SIBIndex)(SIB_INDEX_YMM0 +
17702cab237bSDimitry Andric                                     (insn->sibIndex - insn->sibIndexBase));
17712cab237bSDimitry Andric         break;
17722cab237bSDimitry Andric       case TYPE_MVSIBZ:
17732cab237bSDimitry Andric         insn->sibIndex = (SIBIndex)(SIB_INDEX_ZMM0 +
17742cab237bSDimitry Andric                                     (insn->sibIndex - insn->sibIndexBase));
17752cab237bSDimitry Andric         break;
17762cab237bSDimitry Andric       }
17772cab237bSDimitry Andric 
17787a7e6055SDimitry Andric       // Apply the AVX512 compressed displacement scaling factor.
17797a7e6055SDimitry Andric       if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
17807a7e6055SDimitry Andric         insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);
17817a7e6055SDimitry Andric       break;
178291bc56edSDimitry Andric     case ENCODING_REG:
178391bc56edSDimitry Andric     CASE_ENCODING_RM:
178491bc56edSDimitry Andric       if (readModRM(insn))
178591bc56edSDimitry Andric         return -1;
178691bc56edSDimitry Andric       if (fixupReg(insn, &Op))
178791bc56edSDimitry Andric         return -1;
178891bc56edSDimitry Andric       // Apply the AVX512 compressed displacement scaling factor.
178991bc56edSDimitry Andric       if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
179091bc56edSDimitry Andric         insn->displacement *= 1 << (Op.encoding - ENCODING_RM);
179191bc56edSDimitry Andric       break;
179291bc56edSDimitry Andric     case ENCODING_IB:
179391bc56edSDimitry Andric       if (sawRegImm) {
179491bc56edSDimitry Andric         /* Saw a register immediate so don't read again and instead split the
179591bc56edSDimitry Andric            previous immediate.  FIXME: This is a hack. */
179691bc56edSDimitry Andric         insn->immediates[insn->numImmediatesConsumed] =
179791bc56edSDimitry Andric           insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
179891bc56edSDimitry Andric         ++insn->numImmediatesConsumed;
179991bc56edSDimitry Andric         break;
180091bc56edSDimitry Andric       }
180191bc56edSDimitry Andric       if (readImmediate(insn, 1))
180291bc56edSDimitry Andric         return -1;
18037a7e6055SDimitry Andric       if (Op.type == TYPE_XMM || Op.type == TYPE_YMM)
180491bc56edSDimitry Andric         sawRegImm = 1;
180591bc56edSDimitry Andric       break;
180691bc56edSDimitry Andric     case ENCODING_IW:
180791bc56edSDimitry Andric       if (readImmediate(insn, 2))
180891bc56edSDimitry Andric         return -1;
180991bc56edSDimitry Andric       break;
181091bc56edSDimitry Andric     case ENCODING_ID:
181191bc56edSDimitry Andric       if (readImmediate(insn, 4))
181291bc56edSDimitry Andric         return -1;
181391bc56edSDimitry Andric       break;
181491bc56edSDimitry Andric     case ENCODING_IO:
181591bc56edSDimitry Andric       if (readImmediate(insn, 8))
181691bc56edSDimitry Andric         return -1;
181791bc56edSDimitry Andric       break;
181891bc56edSDimitry Andric     case ENCODING_Iv:
181991bc56edSDimitry Andric       if (readImmediate(insn, insn->immediateSize))
182091bc56edSDimitry Andric         return -1;
182191bc56edSDimitry Andric       break;
182291bc56edSDimitry Andric     case ENCODING_Ia:
182391bc56edSDimitry Andric       if (readImmediate(insn, insn->addressSize))
182491bc56edSDimitry Andric         return -1;
182591bc56edSDimitry Andric       break;
18262cab237bSDimitry Andric     case ENCODING_IRC:
18272cab237bSDimitry Andric       insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) |
18282cab237bSDimitry Andric                  lFromEVEX4of4(insn->vectorExtensionPrefix[3]);
18292cab237bSDimitry Andric       break;
183091bc56edSDimitry Andric     case ENCODING_RB:
183191bc56edSDimitry Andric       if (readOpcodeRegister(insn, 1))
183291bc56edSDimitry Andric         return -1;
183391bc56edSDimitry Andric       break;
183491bc56edSDimitry Andric     case ENCODING_RW:
183591bc56edSDimitry Andric       if (readOpcodeRegister(insn, 2))
183691bc56edSDimitry Andric         return -1;
183791bc56edSDimitry Andric       break;
183891bc56edSDimitry Andric     case ENCODING_RD:
183991bc56edSDimitry Andric       if (readOpcodeRegister(insn, 4))
184091bc56edSDimitry Andric         return -1;
184191bc56edSDimitry Andric       break;
184291bc56edSDimitry Andric     case ENCODING_RO:
184391bc56edSDimitry Andric       if (readOpcodeRegister(insn, 8))
184491bc56edSDimitry Andric         return -1;
184591bc56edSDimitry Andric       break;
184691bc56edSDimitry Andric     case ENCODING_Rv:
184791bc56edSDimitry Andric       if (readOpcodeRegister(insn, 0))
184891bc56edSDimitry Andric         return -1;
184991bc56edSDimitry Andric       break;
185091bc56edSDimitry Andric     case ENCODING_FP:
185191bc56edSDimitry Andric       break;
185291bc56edSDimitry Andric     case ENCODING_VVVV:
185391bc56edSDimitry Andric       needVVVV = 0; /* Mark that we have found a VVVV operand. */
185491bc56edSDimitry Andric       if (!hasVVVV)
185591bc56edSDimitry Andric         return -1;
18564ba319b5SDimitry Andric       if (insn->mode != MODE_64BIT)
18574ba319b5SDimitry Andric         insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7);
185891bc56edSDimitry Andric       if (fixupReg(insn, &Op))
185991bc56edSDimitry Andric         return -1;
186091bc56edSDimitry Andric       break;
186191bc56edSDimitry Andric     case ENCODING_WRITEMASK:
186291bc56edSDimitry Andric       if (readMaskRegister(insn))
186391bc56edSDimitry Andric         return -1;
186491bc56edSDimitry Andric       break;
186591bc56edSDimitry Andric     case ENCODING_DUP:
186691bc56edSDimitry Andric       break;
186791bc56edSDimitry Andric     default:
186891bc56edSDimitry Andric       dbgprintf(insn, "Encountered an operand with an unknown encoding.");
186991bc56edSDimitry Andric       return -1;
187091bc56edSDimitry Andric     }
187191bc56edSDimitry Andric   }
187291bc56edSDimitry Andric 
187391bc56edSDimitry Andric   /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
187491bc56edSDimitry Andric   if (needVVVV) return -1;
187591bc56edSDimitry Andric 
187691bc56edSDimitry Andric   return 0;
187791bc56edSDimitry Andric }
187891bc56edSDimitry Andric 
187991bc56edSDimitry Andric /*
188091bc56edSDimitry Andric  * decodeInstruction - Reads and interprets a full instruction provided by the
188191bc56edSDimitry Andric  *   user.
188291bc56edSDimitry Andric  *
188391bc56edSDimitry Andric  * @param insn      - A pointer to the instruction to be populated.  Must be
188491bc56edSDimitry Andric  *                    pre-allocated.
188591bc56edSDimitry Andric  * @param reader    - The function to be used to read the instruction's bytes.
188691bc56edSDimitry Andric  * @param readerArg - A generic argument to be passed to the reader to store
188791bc56edSDimitry Andric  *                    any internal state.
188891bc56edSDimitry Andric  * @param logger    - If non-NULL, the function to be used to write log messages
188991bc56edSDimitry Andric  *                    and warnings.
189091bc56edSDimitry Andric  * @param loggerArg - A generic argument to be passed to the logger to store
189191bc56edSDimitry Andric  *                    any internal state.
189291bc56edSDimitry Andric  * @param startLoc  - The address (in the reader's address space) of the first
189391bc56edSDimitry Andric  *                    byte in the instruction.
189491bc56edSDimitry Andric  * @param mode      - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
189591bc56edSDimitry Andric  *                    decode the instruction in.
189691bc56edSDimitry Andric  * @return          - 0 if the instruction's memory could be read; nonzero if
189791bc56edSDimitry Andric  *                    not.
189891bc56edSDimitry Andric  */
decodeInstruction(struct InternalInstruction * insn,byteReader_t reader,const void * readerArg,dlog_t logger,void * loggerArg,const void * miiArg,uint64_t startLoc,DisassemblerMode mode)189991bc56edSDimitry Andric int llvm::X86Disassembler::decodeInstruction(
190091bc56edSDimitry Andric     struct InternalInstruction *insn, byteReader_t reader,
190191bc56edSDimitry Andric     const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg,
190291bc56edSDimitry Andric     uint64_t startLoc, DisassemblerMode mode) {
190391bc56edSDimitry Andric   memset(insn, 0, sizeof(struct InternalInstruction));
190491bc56edSDimitry Andric 
190591bc56edSDimitry Andric   insn->reader = reader;
190691bc56edSDimitry Andric   insn->readerArg = readerArg;
190791bc56edSDimitry Andric   insn->dlog = logger;
190891bc56edSDimitry Andric   insn->dlogArg = loggerArg;
190991bc56edSDimitry Andric   insn->startLocation = startLoc;
191091bc56edSDimitry Andric   insn->readerCursor = startLoc;
191191bc56edSDimitry Andric   insn->mode = mode;
191291bc56edSDimitry Andric   insn->numImmediatesConsumed = 0;
191391bc56edSDimitry Andric 
191491bc56edSDimitry Andric   if (readPrefixes(insn)       ||
191591bc56edSDimitry Andric       readOpcode(insn)         ||
191691bc56edSDimitry Andric       getID(insn, miiArg)      ||
191791bc56edSDimitry Andric       insn->instructionID == 0 ||
191891bc56edSDimitry Andric       readOperands(insn))
191991bc56edSDimitry Andric     return -1;
192091bc56edSDimitry Andric 
192191bc56edSDimitry Andric   insn->operands = x86OperandSets[insn->spec->operands];
192291bc56edSDimitry Andric 
192391bc56edSDimitry Andric   insn->length = insn->readerCursor - insn->startLocation;
192491bc56edSDimitry Andric 
192591bc56edSDimitry Andric   dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
192691bc56edSDimitry Andric             startLoc, insn->readerCursor, insn->length);
192791bc56edSDimitry Andric 
192891bc56edSDimitry Andric   if (insn->length > 15)
192991bc56edSDimitry Andric     dbgprintf(insn, "Instruction exceeds 15-byte limit");
193091bc56edSDimitry Andric 
193191bc56edSDimitry Andric   return 0;
193291bc56edSDimitry Andric }
1933