1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86IntelInstPrinter.h"
11 #include "MCTargetDesc/X86MCExpr.h"
12 #include "MCTargetDesc/X86TargetStreamer.h"
13 #include "TargetInfo/X86TargetInfo.h"
14 #include "X86AsmParserCommon.h"
15 #include "X86Operand.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/MC/MCContext.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCParser/MCAsmLexer.h"
26 #include "llvm/MC/MCParser/MCAsmParser.h"
27 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
28 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCSection.h"
31 #include "llvm/MC/MCStreamer.h"
32 #include "llvm/MC/MCSubtargetInfo.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/Support/SourceMgr.h"
35 #include "llvm/Support/TargetRegistry.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include <algorithm>
38 #include <memory>
39 
40 using namespace llvm;
41 
42 static bool checkScale(unsigned Scale, StringRef &ErrMsg) {
43   if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
44     ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
45     return true;
46   }
47   return false;
48 }
49 
50 namespace {
51 
52 static const char OpPrecedence[] = {
53   0, // IC_OR
54   1, // IC_XOR
55   2, // IC_AND
56   3, // IC_LSHIFT
57   3, // IC_RSHIFT
58   4, // IC_PLUS
59   4, // IC_MINUS
60   5, // IC_MULTIPLY
61   5, // IC_DIVIDE
62   5, // IC_MOD
63   6, // IC_NOT
64   7, // IC_NEG
65   8, // IC_RPAREN
66   9, // IC_LPAREN
67   0, // IC_IMM
68   0  // IC_REGISTER
69 };
70 
71 class X86AsmParser : public MCTargetAsmParser {
72   ParseInstructionInfo *InstInfo;
73   bool Code16GCC;
74 
75   enum VEXEncoding {
76     VEXEncoding_Default,
77     VEXEncoding_VEX2,
78     VEXEncoding_VEX3,
79     VEXEncoding_EVEX,
80   };
81 
82   VEXEncoding ForcedVEXEncoding = VEXEncoding_Default;
83 
84 private:
85   SMLoc consumeToken() {
86     MCAsmParser &Parser = getParser();
87     SMLoc Result = Parser.getTok().getLoc();
88     Parser.Lex();
89     return Result;
90   }
91 
92   X86TargetStreamer &getTargetStreamer() {
93     assert(getParser().getStreamer().getTargetStreamer() &&
94            "do not have a target streamer");
95     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
96     return static_cast<X86TargetStreamer &>(TS);
97   }
98 
99   unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
100                             uint64_t &ErrorInfo, FeatureBitset &MissingFeatures,
101                             bool matchingInlineAsm, unsigned VariantID = 0) {
102     // In Code16GCC mode, match as 32-bit.
103     if (Code16GCC)
104       SwitchMode(X86::Mode32Bit);
105     unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
106                                        MissingFeatures, matchingInlineAsm,
107                                        VariantID);
108     if (Code16GCC)
109       SwitchMode(X86::Mode16Bit);
110     return rv;
111   }
112 
113   enum InfixCalculatorTok {
114     IC_OR = 0,
115     IC_XOR,
116     IC_AND,
117     IC_LSHIFT,
118     IC_RSHIFT,
119     IC_PLUS,
120     IC_MINUS,
121     IC_MULTIPLY,
122     IC_DIVIDE,
123     IC_MOD,
124     IC_NOT,
125     IC_NEG,
126     IC_RPAREN,
127     IC_LPAREN,
128     IC_IMM,
129     IC_REGISTER
130   };
131 
132   enum IntelOperatorKind {
133     IOK_INVALID = 0,
134     IOK_LENGTH,
135     IOK_SIZE,
136     IOK_TYPE,
137     IOK_OFFSET
138   };
139 
140   class InfixCalculator {
141     typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
142     SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
143     SmallVector<ICToken, 4> PostfixStack;
144 
145     bool isUnaryOperator(const InfixCalculatorTok Op) {
146       return Op == IC_NEG || Op == IC_NOT;
147     }
148 
149   public:
150     int64_t popOperand() {
151       assert (!PostfixStack.empty() && "Poped an empty stack!");
152       ICToken Op = PostfixStack.pop_back_val();
153       if (!(Op.first == IC_IMM || Op.first == IC_REGISTER))
154         return -1; // The invalid Scale value will be caught later by checkScale
155       return Op.second;
156     }
157     void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
158       assert ((Op == IC_IMM || Op == IC_REGISTER) &&
159               "Unexpected operand!");
160       PostfixStack.push_back(std::make_pair(Op, Val));
161     }
162 
163     void popOperator() { InfixOperatorStack.pop_back(); }
164     void pushOperator(InfixCalculatorTok Op) {
165       // Push the new operator if the stack is empty.
166       if (InfixOperatorStack.empty()) {
167         InfixOperatorStack.push_back(Op);
168         return;
169       }
170 
171       // Push the new operator if it has a higher precedence than the operator
172       // on the top of the stack or the operator on the top of the stack is a
173       // left parentheses.
174       unsigned Idx = InfixOperatorStack.size() - 1;
175       InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
176       if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
177         InfixOperatorStack.push_back(Op);
178         return;
179       }
180 
181       // The operator on the top of the stack has higher precedence than the
182       // new operator.
183       unsigned ParenCount = 0;
184       while (1) {
185         // Nothing to process.
186         if (InfixOperatorStack.empty())
187           break;
188 
189         Idx = InfixOperatorStack.size() - 1;
190         StackOp = InfixOperatorStack[Idx];
191         if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
192           break;
193 
194         // If we have an even parentheses count and we see a left parentheses,
195         // then stop processing.
196         if (!ParenCount && StackOp == IC_LPAREN)
197           break;
198 
199         if (StackOp == IC_RPAREN) {
200           ++ParenCount;
201           InfixOperatorStack.pop_back();
202         } else if (StackOp == IC_LPAREN) {
203           --ParenCount;
204           InfixOperatorStack.pop_back();
205         } else {
206           InfixOperatorStack.pop_back();
207           PostfixStack.push_back(std::make_pair(StackOp, 0));
208         }
209       }
210       // Push the new operator.
211       InfixOperatorStack.push_back(Op);
212     }
213 
214     int64_t execute() {
215       // Push any remaining operators onto the postfix stack.
216       while (!InfixOperatorStack.empty()) {
217         InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
218         if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
219           PostfixStack.push_back(std::make_pair(StackOp, 0));
220       }
221 
222       if (PostfixStack.empty())
223         return 0;
224 
225       SmallVector<ICToken, 16> OperandStack;
226       for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
227         ICToken Op = PostfixStack[i];
228         if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
229           OperandStack.push_back(Op);
230         } else if (isUnaryOperator(Op.first)) {
231           assert (OperandStack.size() > 0 && "Too few operands.");
232           ICToken Operand = OperandStack.pop_back_val();
233           assert (Operand.first == IC_IMM &&
234                   "Unary operation with a register!");
235           switch (Op.first) {
236           default:
237             report_fatal_error("Unexpected operator!");
238             break;
239           case IC_NEG:
240             OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second));
241             break;
242           case IC_NOT:
243             OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second));
244             break;
245           }
246         } else {
247           assert (OperandStack.size() > 1 && "Too few operands.");
248           int64_t Val;
249           ICToken Op2 = OperandStack.pop_back_val();
250           ICToken Op1 = OperandStack.pop_back_val();
251           switch (Op.first) {
252           default:
253             report_fatal_error("Unexpected operator!");
254             break;
255           case IC_PLUS:
256             Val = Op1.second + Op2.second;
257             OperandStack.push_back(std::make_pair(IC_IMM, Val));
258             break;
259           case IC_MINUS:
260             Val = Op1.second - Op2.second;
261             OperandStack.push_back(std::make_pair(IC_IMM, Val));
262             break;
263           case IC_MULTIPLY:
264             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
265                     "Multiply operation with an immediate and a register!");
266             Val = Op1.second * Op2.second;
267             OperandStack.push_back(std::make_pair(IC_IMM, Val));
268             break;
269           case IC_DIVIDE:
270             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
271                     "Divide operation with an immediate and a register!");
272             assert (Op2.second != 0 && "Division by zero!");
273             Val = Op1.second / Op2.second;
274             OperandStack.push_back(std::make_pair(IC_IMM, Val));
275             break;
276           case IC_MOD:
277             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
278                     "Modulo operation with an immediate and a register!");
279             Val = Op1.second % Op2.second;
280             OperandStack.push_back(std::make_pair(IC_IMM, Val));
281             break;
282           case IC_OR:
283             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
284                     "Or operation with an immediate and a register!");
285             Val = Op1.second | Op2.second;
286             OperandStack.push_back(std::make_pair(IC_IMM, Val));
287             break;
288           case IC_XOR:
289             assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
290               "Xor operation with an immediate and a register!");
291             Val = Op1.second ^ Op2.second;
292             OperandStack.push_back(std::make_pair(IC_IMM, Val));
293             break;
294           case IC_AND:
295             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
296                     "And operation with an immediate and a register!");
297             Val = Op1.second & Op2.second;
298             OperandStack.push_back(std::make_pair(IC_IMM, Val));
299             break;
300           case IC_LSHIFT:
301             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
302                     "Left shift operation with an immediate and a register!");
303             Val = Op1.second << Op2.second;
304             OperandStack.push_back(std::make_pair(IC_IMM, Val));
305             break;
306           case IC_RSHIFT:
307             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
308                     "Right shift operation with an immediate and a register!");
309             Val = Op1.second >> Op2.second;
310             OperandStack.push_back(std::make_pair(IC_IMM, Val));
311             break;
312           }
313         }
314       }
315       assert (OperandStack.size() == 1 && "Expected a single result.");
316       return OperandStack.pop_back_val().second;
317     }
318   };
319 
320   enum IntelExprState {
321     IES_INIT,
322     IES_OR,
323     IES_XOR,
324     IES_AND,
325     IES_LSHIFT,
326     IES_RSHIFT,
327     IES_PLUS,
328     IES_MINUS,
329     IES_NOT,
330     IES_MULTIPLY,
331     IES_DIVIDE,
332     IES_MOD,
333     IES_LBRAC,
334     IES_RBRAC,
335     IES_LPAREN,
336     IES_RPAREN,
337     IES_REGISTER,
338     IES_INTEGER,
339     IES_IDENTIFIER,
340     IES_ERROR
341   };
342 
343   class IntelExprStateMachine {
344     IntelExprState State, PrevState;
345     unsigned BaseReg, IndexReg, TmpReg, Scale;
346     int64_t Imm;
347     const MCExpr *Sym;
348     StringRef SymName;
349     InfixCalculator IC;
350     InlineAsmIdentifierInfo Info;
351     short BracCount;
352     bool MemExpr;
353 
354   public:
355     IntelExprStateMachine()
356         : State(IES_INIT), PrevState(IES_ERROR), BaseReg(0), IndexReg(0),
357           TmpReg(0), Scale(0), Imm(0), Sym(nullptr), BracCount(0),
358           MemExpr(false) {}
359 
360     void addImm(int64_t imm) { Imm += imm; }
361     short getBracCount() { return BracCount; }
362     bool isMemExpr() { return MemExpr; }
363     unsigned getBaseReg() { return BaseReg; }
364     unsigned getIndexReg() { return IndexReg; }
365     unsigned getScale() { return Scale; }
366     const MCExpr *getSym() { return Sym; }
367     StringRef getSymName() { return SymName; }
368     int64_t getImm() { return Imm + IC.execute(); }
369     bool isValidEndState() {
370       return State == IES_RBRAC || State == IES_INTEGER;
371     }
372     bool hadError() { return State == IES_ERROR; }
373     InlineAsmIdentifierInfo &getIdentifierInfo() { return Info; }
374 
375     void onOr() {
376       IntelExprState CurrState = State;
377       switch (State) {
378       default:
379         State = IES_ERROR;
380         break;
381       case IES_INTEGER:
382       case IES_RPAREN:
383       case IES_REGISTER:
384         State = IES_OR;
385         IC.pushOperator(IC_OR);
386         break;
387       }
388       PrevState = CurrState;
389     }
390     void onXor() {
391       IntelExprState CurrState = State;
392       switch (State) {
393       default:
394         State = IES_ERROR;
395         break;
396       case IES_INTEGER:
397       case IES_RPAREN:
398       case IES_REGISTER:
399         State = IES_XOR;
400         IC.pushOperator(IC_XOR);
401         break;
402       }
403       PrevState = CurrState;
404     }
405     void onAnd() {
406       IntelExprState CurrState = State;
407       switch (State) {
408       default:
409         State = IES_ERROR;
410         break;
411       case IES_INTEGER:
412       case IES_RPAREN:
413       case IES_REGISTER:
414         State = IES_AND;
415         IC.pushOperator(IC_AND);
416         break;
417       }
418       PrevState = CurrState;
419     }
420     void onLShift() {
421       IntelExprState CurrState = State;
422       switch (State) {
423       default:
424         State = IES_ERROR;
425         break;
426       case IES_INTEGER:
427       case IES_RPAREN:
428       case IES_REGISTER:
429         State = IES_LSHIFT;
430         IC.pushOperator(IC_LSHIFT);
431         break;
432       }
433       PrevState = CurrState;
434     }
435     void onRShift() {
436       IntelExprState CurrState = State;
437       switch (State) {
438       default:
439         State = IES_ERROR;
440         break;
441       case IES_INTEGER:
442       case IES_RPAREN:
443       case IES_REGISTER:
444         State = IES_RSHIFT;
445         IC.pushOperator(IC_RSHIFT);
446         break;
447       }
448       PrevState = CurrState;
449     }
450     bool onPlus(StringRef &ErrMsg) {
451       IntelExprState CurrState = State;
452       switch (State) {
453       default:
454         State = IES_ERROR;
455         break;
456       case IES_INTEGER:
457       case IES_RPAREN:
458       case IES_REGISTER:
459         State = IES_PLUS;
460         IC.pushOperator(IC_PLUS);
461         if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
462           // If we already have a BaseReg, then assume this is the IndexReg with
463           // no explicit scale.
464           if (!BaseReg) {
465             BaseReg = TmpReg;
466           } else {
467             if (IndexReg) {
468               ErrMsg = "BaseReg/IndexReg already set!";
469               return true;
470             }
471             IndexReg = TmpReg;
472             Scale = 0;
473           }
474         }
475         break;
476       }
477       PrevState = CurrState;
478       return false;
479     }
480     bool onMinus(StringRef &ErrMsg) {
481       IntelExprState CurrState = State;
482       switch (State) {
483       default:
484         State = IES_ERROR;
485         break;
486       case IES_OR:
487       case IES_XOR:
488       case IES_AND:
489       case IES_LSHIFT:
490       case IES_RSHIFT:
491       case IES_PLUS:
492       case IES_NOT:
493       case IES_MULTIPLY:
494       case IES_DIVIDE:
495       case IES_MOD:
496       case IES_LPAREN:
497       case IES_RPAREN:
498       case IES_LBRAC:
499       case IES_RBRAC:
500       case IES_INTEGER:
501       case IES_REGISTER:
502       case IES_INIT:
503         State = IES_MINUS;
504         // push minus operator if it is not a negate operator
505         if (CurrState == IES_REGISTER || CurrState == IES_RPAREN ||
506             CurrState == IES_INTEGER  || CurrState == IES_RBRAC)
507           IC.pushOperator(IC_MINUS);
508         else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
509           // We have negate operator for Scale: it's illegal
510           ErrMsg = "Scale can't be negative";
511           return true;
512         } else
513           IC.pushOperator(IC_NEG);
514         if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
515           // If we already have a BaseReg, then assume this is the IndexReg with
516           // no explicit scale.
517           if (!BaseReg) {
518             BaseReg = TmpReg;
519           } else {
520             if (IndexReg) {
521               ErrMsg = "BaseReg/IndexReg already set!";
522               return true;
523             }
524             IndexReg = TmpReg;
525             Scale = 0;
526           }
527         }
528         break;
529       }
530       PrevState = CurrState;
531       return false;
532     }
533     void onNot() {
534       IntelExprState CurrState = State;
535       switch (State) {
536       default:
537         State = IES_ERROR;
538         break;
539       case IES_OR:
540       case IES_XOR:
541       case IES_AND:
542       case IES_LSHIFT:
543       case IES_RSHIFT:
544       case IES_PLUS:
545       case IES_MINUS:
546       case IES_NOT:
547       case IES_MULTIPLY:
548       case IES_DIVIDE:
549       case IES_MOD:
550       case IES_LPAREN:
551       case IES_LBRAC:
552       case IES_INIT:
553         State = IES_NOT;
554         IC.pushOperator(IC_NOT);
555         break;
556       }
557       PrevState = CurrState;
558     }
559 
560     bool onRegister(unsigned Reg, StringRef &ErrMsg) {
561       IntelExprState CurrState = State;
562       switch (State) {
563       default:
564         State = IES_ERROR;
565         break;
566       case IES_PLUS:
567       case IES_LPAREN:
568       case IES_LBRAC:
569         State = IES_REGISTER;
570         TmpReg = Reg;
571         IC.pushOperand(IC_REGISTER);
572         break;
573       case IES_MULTIPLY:
574         // Index Register - Scale * Register
575         if (PrevState == IES_INTEGER) {
576           if (IndexReg) {
577             ErrMsg = "BaseReg/IndexReg already set!";
578             return true;
579           }
580           State = IES_REGISTER;
581           IndexReg = Reg;
582           // Get the scale and replace the 'Scale * Register' with '0'.
583           Scale = IC.popOperand();
584           if (checkScale(Scale, ErrMsg))
585             return true;
586           IC.pushOperand(IC_IMM);
587           IC.popOperator();
588         } else {
589           State = IES_ERROR;
590         }
591         break;
592       }
593       PrevState = CurrState;
594       return false;
595     }
596     bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName,
597                           const InlineAsmIdentifierInfo &IDInfo,
598                           bool ParsingInlineAsm, StringRef &ErrMsg) {
599       // InlineAsm: Treat an enum value as an integer
600       if (ParsingInlineAsm)
601         if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
602           return onInteger(IDInfo.Enum.EnumVal, ErrMsg);
603       // Treat a symbolic constant like an integer
604       if (auto *CE = dyn_cast<MCConstantExpr>(SymRef))
605         return onInteger(CE->getValue(), ErrMsg);
606       PrevState = State;
607       bool HasSymbol = Sym != nullptr;
608       switch (State) {
609       default:
610         State = IES_ERROR;
611         break;
612       case IES_PLUS:
613       case IES_MINUS:
614       case IES_NOT:
615       case IES_INIT:
616       case IES_LBRAC:
617         MemExpr = true;
618         State = IES_INTEGER;
619         Sym = SymRef;
620         SymName = SymRefName;
621         IC.pushOperand(IC_IMM);
622         if (ParsingInlineAsm)
623           Info = IDInfo;
624         break;
625       }
626       if (HasSymbol)
627         ErrMsg = "cannot use more than one symbol in memory operand";
628       return HasSymbol;
629     }
630     bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
631       IntelExprState CurrState = State;
632       switch (State) {
633       default:
634         State = IES_ERROR;
635         break;
636       case IES_PLUS:
637       case IES_MINUS:
638       case IES_NOT:
639       case IES_OR:
640       case IES_XOR:
641       case IES_AND:
642       case IES_LSHIFT:
643       case IES_RSHIFT:
644       case IES_DIVIDE:
645       case IES_MOD:
646       case IES_MULTIPLY:
647       case IES_LPAREN:
648       case IES_INIT:
649       case IES_LBRAC:
650         State = IES_INTEGER;
651         if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
652           // Index Register - Register * Scale
653           if (IndexReg) {
654             ErrMsg = "BaseReg/IndexReg already set!";
655             return true;
656           }
657           IndexReg = TmpReg;
658           Scale = TmpInt;
659           if (checkScale(Scale, ErrMsg))
660             return true;
661           // Get the scale and replace the 'Register * Scale' with '0'.
662           IC.popOperator();
663         } else {
664           IC.pushOperand(IC_IMM, TmpInt);
665         }
666         break;
667       }
668       PrevState = CurrState;
669       return false;
670     }
671     void onStar() {
672       PrevState = State;
673       switch (State) {
674       default:
675         State = IES_ERROR;
676         break;
677       case IES_INTEGER:
678       case IES_REGISTER:
679       case IES_RPAREN:
680         State = IES_MULTIPLY;
681         IC.pushOperator(IC_MULTIPLY);
682         break;
683       }
684     }
685     void onDivide() {
686       PrevState = State;
687       switch (State) {
688       default:
689         State = IES_ERROR;
690         break;
691       case IES_INTEGER:
692       case IES_RPAREN:
693         State = IES_DIVIDE;
694         IC.pushOperator(IC_DIVIDE);
695         break;
696       }
697     }
698     void onMod() {
699       PrevState = State;
700       switch (State) {
701       default:
702         State = IES_ERROR;
703         break;
704       case IES_INTEGER:
705       case IES_RPAREN:
706         State = IES_MOD;
707         IC.pushOperator(IC_MOD);
708         break;
709       }
710     }
711     bool onLBrac() {
712       if (BracCount)
713         return true;
714       PrevState = State;
715       switch (State) {
716       default:
717         State = IES_ERROR;
718         break;
719       case IES_RBRAC:
720       case IES_INTEGER:
721       case IES_RPAREN:
722         State = IES_PLUS;
723         IC.pushOperator(IC_PLUS);
724         break;
725       case IES_INIT:
726         assert(!BracCount && "BracCount should be zero on parsing's start");
727         State = IES_LBRAC;
728         break;
729       }
730       MemExpr = true;
731       BracCount++;
732       return false;
733     }
734     bool onRBrac() {
735       IntelExprState CurrState = State;
736       switch (State) {
737       default:
738         State = IES_ERROR;
739         break;
740       case IES_INTEGER:
741       case IES_REGISTER:
742       case IES_RPAREN:
743         if (BracCount-- != 1)
744           return true;
745         State = IES_RBRAC;
746         if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
747           // If we already have a BaseReg, then assume this is the IndexReg with
748           // no explicit scale.
749           if (!BaseReg) {
750             BaseReg = TmpReg;
751           } else {
752             assert (!IndexReg && "BaseReg/IndexReg already set!");
753             IndexReg = TmpReg;
754             Scale = 0;
755           }
756         }
757         break;
758       }
759       PrevState = CurrState;
760       return false;
761     }
762     void onLParen() {
763       IntelExprState CurrState = State;
764       switch (State) {
765       default:
766         State = IES_ERROR;
767         break;
768       case IES_PLUS:
769       case IES_MINUS:
770       case IES_NOT:
771       case IES_OR:
772       case IES_XOR:
773       case IES_AND:
774       case IES_LSHIFT:
775       case IES_RSHIFT:
776       case IES_MULTIPLY:
777       case IES_DIVIDE:
778       case IES_MOD:
779       case IES_LPAREN:
780       case IES_INIT:
781       case IES_LBRAC:
782         State = IES_LPAREN;
783         IC.pushOperator(IC_LPAREN);
784         break;
785       }
786       PrevState = CurrState;
787     }
788     void onRParen() {
789       PrevState = State;
790       switch (State) {
791       default:
792         State = IES_ERROR;
793         break;
794       case IES_INTEGER:
795       case IES_REGISTER:
796       case IES_RPAREN:
797         State = IES_RPAREN;
798         IC.pushOperator(IC_RPAREN);
799         break;
800       }
801     }
802   };
803 
804   bool Error(SMLoc L, const Twine &Msg, SMRange Range = None,
805              bool MatchingInlineAsm = false) {
806     MCAsmParser &Parser = getParser();
807     if (MatchingInlineAsm) {
808       if (!getLexer().isAtStartOfStatement())
809         Parser.eatToEndOfStatement();
810       return false;
811     }
812     return Parser.Error(L, Msg, Range);
813   }
814 
815   std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg, SMRange R = SMRange()) {
816     Error(Loc, Msg, R);
817     return nullptr;
818   }
819 
820   std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
821   std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
822   bool IsSIReg(unsigned Reg);
823   unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg);
824   void
825   AddDefaultSrcDestOperands(OperandVector &Operands,
826                             std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
827                             std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
828   bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
829                                OperandVector &FinalOperands);
830   std::unique_ptr<X86Operand> ParseOperand();
831   std::unique_ptr<X86Operand> ParseATTOperand();
832   std::unique_ptr<X86Operand> ParseIntelOperand();
833   std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
834   bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
835   unsigned IdentifyIntelInlineAsmOperator(StringRef Name);
836   unsigned ParseIntelInlineAsmOperator(unsigned OpKind);
837   std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start);
838   bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM);
839   void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start,
840                               SMLoc End);
841   bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
842   bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier,
843                                      InlineAsmIdentifierInfo &Info,
844                                      bool IsUnevaluatedOperand, SMLoc &End);
845 
846   std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg,
847                                               const MCExpr *&Disp,
848                                               const SMLoc &StartLoc,
849                                               SMLoc &EndLoc);
850 
851   X86::CondCode ParseConditionCode(StringRef CCode);
852 
853   bool ParseIntelMemoryOperandSize(unsigned &Size);
854   std::unique_ptr<X86Operand>
855   CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
856                         unsigned IndexReg, unsigned Scale, SMLoc Start,
857                         SMLoc End, unsigned Size, StringRef Identifier,
858                         const InlineAsmIdentifierInfo &Info);
859 
860   bool parseDirectiveEven(SMLoc L);
861   bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
862 
863   /// CodeView FPO data directives.
864   bool parseDirectiveFPOProc(SMLoc L);
865   bool parseDirectiveFPOSetFrame(SMLoc L);
866   bool parseDirectiveFPOPushReg(SMLoc L);
867   bool parseDirectiveFPOStackAlloc(SMLoc L);
868   bool parseDirectiveFPOStackAlign(SMLoc L);
869   bool parseDirectiveFPOEndPrologue(SMLoc L);
870   bool parseDirectiveFPOEndProc(SMLoc L);
871   bool parseDirectiveFPOData(SMLoc L);
872 
873   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
874 
875   bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
876   bool processInstruction(MCInst &Inst, const OperandVector &Ops);
877 
878   /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
879   /// instrumentation around Inst.
880   void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
881 
882   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
883                                OperandVector &Operands, MCStreamer &Out,
884                                uint64_t &ErrorInfo,
885                                bool MatchingInlineAsm) override;
886 
887   void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
888                          MCStreamer &Out, bool MatchingInlineAsm);
889 
890   bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures,
891                            bool MatchingInlineAsm);
892 
893   bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
894                                   OperandVector &Operands, MCStreamer &Out,
895                                   uint64_t &ErrorInfo,
896                                   bool MatchingInlineAsm);
897 
898   bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
899                                     OperandVector &Operands, MCStreamer &Out,
900                                     uint64_t &ErrorInfo,
901                                     bool MatchingInlineAsm);
902 
903   bool OmitRegisterFromClobberLists(unsigned RegNo) override;
904 
905   /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
906   /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
907   /// return false if no parsing errors occurred, true otherwise.
908   bool HandleAVX512Operand(OperandVector &Operands,
909                            const MCParsedAsmOperand &Op);
910 
911   bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
912 
913   bool is64BitMode() const {
914     // FIXME: Can tablegen auto-generate this?
915     return getSTI().getFeatureBits()[X86::Mode64Bit];
916   }
917   bool is32BitMode() const {
918     // FIXME: Can tablegen auto-generate this?
919     return getSTI().getFeatureBits()[X86::Mode32Bit];
920   }
921   bool is16BitMode() const {
922     // FIXME: Can tablegen auto-generate this?
923     return getSTI().getFeatureBits()[X86::Mode16Bit];
924   }
925   void SwitchMode(unsigned mode) {
926     MCSubtargetInfo &STI = copySTI();
927     FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
928     FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
929     FeatureBitset FB = ComputeAvailableFeatures(
930       STI.ToggleFeature(OldMode.flip(mode)));
931     setAvailableFeatures(FB);
932 
933     assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
934   }
935 
936   unsigned getPointerWidth() {
937     if (is16BitMode()) return 16;
938     if (is32BitMode()) return 32;
939     if (is64BitMode()) return 64;
940     llvm_unreachable("invalid mode");
941   }
942 
943   bool isParsingIntelSyntax() {
944     return getParser().getAssemblerDialect();
945   }
946 
947   /// @name Auto-generated Matcher Functions
948   /// {
949 
950 #define GET_ASSEMBLER_HEADER
951 #include "X86GenAsmMatcher.inc"
952 
953   /// }
954 
955 public:
956   enum X86MatchResultTy {
957     Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY,
958 #define GET_OPERAND_DIAGNOSTIC_TYPES
959 #include "X86GenAsmMatcher.inc"
960   };
961 
962   X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
963                const MCInstrInfo &mii, const MCTargetOptions &Options)
964       : MCTargetAsmParser(Options, sti, mii),  InstInfo(nullptr),
965         Code16GCC(false) {
966 
967     Parser.addAliasForDirective(".word", ".2byte");
968 
969     // Initialize the set of available features.
970     setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
971   }
972 
973   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
974 
975   bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
976 
977   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
978                         SMLoc NameLoc, OperandVector &Operands) override;
979 
980   bool ParseDirective(AsmToken DirectiveID) override;
981 };
982 } // end anonymous namespace
983 
984 /// @name Auto-generated Match Functions
985 /// {
986 
987 static unsigned MatchRegisterName(StringRef Name);
988 
989 /// }
990 
991 static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg,
992                                             unsigned Scale, bool Is64BitMode,
993                                             StringRef &ErrMsg) {
994   // If we have both a base register and an index register make sure they are
995   // both 64-bit or 32-bit registers.
996   // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
997 
998   if (BaseReg != 0 &&
999       !(BaseReg == X86::RIP || BaseReg == X86::EIP ||
1000         X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) ||
1001         X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) ||
1002         X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) {
1003     ErrMsg = "invalid base+index expression";
1004     return true;
1005   }
1006 
1007   if (IndexReg != 0 &&
1008       !(IndexReg == X86::EIZ || IndexReg == X86::RIZ ||
1009         X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1010         X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1011         X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1012         X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
1013         X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
1014         X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) {
1015     ErrMsg = "invalid base+index expression";
1016     return true;
1017   }
1018 
1019   if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg != 0) ||
1020       IndexReg == X86::EIP || IndexReg == X86::RIP ||
1021       IndexReg == X86::ESP || IndexReg == X86::RSP) {
1022     ErrMsg = "invalid base+index expression";
1023     return true;
1024   }
1025 
1026   // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1027   // and then only in non-64-bit modes.
1028   if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1029       (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP &&
1030                        BaseReg != X86::SI && BaseReg != X86::DI))) {
1031     ErrMsg = "invalid 16-bit base register";
1032     return true;
1033   }
1034 
1035   if (BaseReg == 0 &&
1036       X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1037     ErrMsg = "16-bit memory operand may not include only index register";
1038     return true;
1039   }
1040 
1041   if (BaseReg != 0 && IndexReg != 0) {
1042     if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1043         (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1044          X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1045          IndexReg == X86::EIZ)) {
1046       ErrMsg = "base register is 64-bit, but index register is not";
1047       return true;
1048     }
1049     if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1050         (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1051          X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1052          IndexReg == X86::RIZ)) {
1053       ErrMsg = "base register is 32-bit, but index register is not";
1054       return true;
1055     }
1056     if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
1057       if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1058           X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
1059         ErrMsg = "base register is 16-bit, but index register is not";
1060         return true;
1061       }
1062       if ((BaseReg != X86::BX && BaseReg != X86::BP) ||
1063           (IndexReg != X86::SI && IndexReg != X86::DI)) {
1064         ErrMsg = "invalid 16-bit base/index register combination";
1065         return true;
1066       }
1067     }
1068   }
1069 
1070   // RIP/EIP-relative addressing is only supported in 64-bit mode.
1071   if (!Is64BitMode && BaseReg != 0 &&
1072       (BaseReg == X86::RIP || BaseReg == X86::EIP)) {
1073     ErrMsg = "IP-relative addressing requires 64-bit mode";
1074     return true;
1075   }
1076 
1077   return checkScale(Scale, ErrMsg);
1078 }
1079 
1080 bool X86AsmParser::ParseRegister(unsigned &RegNo,
1081                                  SMLoc &StartLoc, SMLoc &EndLoc) {
1082   MCAsmParser &Parser = getParser();
1083   RegNo = 0;
1084   const AsmToken &PercentTok = Parser.getTok();
1085   StartLoc = PercentTok.getLoc();
1086 
1087   // If we encounter a %, ignore it. This code handles registers with and
1088   // without the prefix, unprefixed registers can occur in cfi directives.
1089   if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
1090     Parser.Lex(); // Eat percent token.
1091 
1092   const AsmToken &Tok = Parser.getTok();
1093   EndLoc = Tok.getEndLoc();
1094 
1095   if (Tok.isNot(AsmToken::Identifier)) {
1096     if (isParsingIntelSyntax()) return true;
1097     return Error(StartLoc, "invalid register name",
1098                  SMRange(StartLoc, EndLoc));
1099   }
1100 
1101   RegNo = MatchRegisterName(Tok.getString());
1102 
1103   // If the match failed, try the register name as lowercase.
1104   if (RegNo == 0)
1105     RegNo = MatchRegisterName(Tok.getString().lower());
1106 
1107   // The "flags" register cannot be referenced directly.
1108   // Treat it as an identifier instead.
1109   if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS)
1110     RegNo = 0;
1111 
1112   if (!is64BitMode()) {
1113     // FIXME: This should be done using Requires<Not64BitMode> and
1114     // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
1115     // checked.
1116     // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
1117     // REX prefix.
1118     if (RegNo == X86::RIZ || RegNo == X86::RIP ||
1119         X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
1120         X86II::isX86_64NonExtLowByteReg(RegNo) ||
1121         X86II::isX86_64ExtendedReg(RegNo)) {
1122       StringRef RegName = Tok.getString();
1123       Parser.Lex(); // Eat register name.
1124       return Error(StartLoc,
1125                    "register %" + RegName + " is only available in 64-bit mode",
1126                    SMRange(StartLoc, EndLoc));
1127     }
1128   }
1129 
1130   // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
1131   if (RegNo == X86::ST0) {
1132     Parser.Lex(); // Eat 'st'
1133 
1134     // Check to see if we have '(4)' after %st.
1135     if (getLexer().isNot(AsmToken::LParen))
1136       return false;
1137     // Lex the paren.
1138     getParser().Lex();
1139 
1140     const AsmToken &IntTok = Parser.getTok();
1141     if (IntTok.isNot(AsmToken::Integer))
1142       return Error(IntTok.getLoc(), "expected stack index");
1143     switch (IntTok.getIntVal()) {
1144     case 0: RegNo = X86::ST0; break;
1145     case 1: RegNo = X86::ST1; break;
1146     case 2: RegNo = X86::ST2; break;
1147     case 3: RegNo = X86::ST3; break;
1148     case 4: RegNo = X86::ST4; break;
1149     case 5: RegNo = X86::ST5; break;
1150     case 6: RegNo = X86::ST6; break;
1151     case 7: RegNo = X86::ST7; break;
1152     default: return Error(IntTok.getLoc(), "invalid stack index");
1153     }
1154 
1155     if (getParser().Lex().isNot(AsmToken::RParen))
1156       return Error(Parser.getTok().getLoc(), "expected ')'");
1157 
1158     EndLoc = Parser.getTok().getEndLoc();
1159     Parser.Lex(); // Eat ')'
1160     return false;
1161   }
1162 
1163   EndLoc = Parser.getTok().getEndLoc();
1164 
1165   // If this is "db[0-15]", match it as an alias
1166   // for dr[0-15].
1167   if (RegNo == 0 && Tok.getString().startswith("db")) {
1168     if (Tok.getString().size() == 3) {
1169       switch (Tok.getString()[2]) {
1170       case '0': RegNo = X86::DR0; break;
1171       case '1': RegNo = X86::DR1; break;
1172       case '2': RegNo = X86::DR2; break;
1173       case '3': RegNo = X86::DR3; break;
1174       case '4': RegNo = X86::DR4; break;
1175       case '5': RegNo = X86::DR5; break;
1176       case '6': RegNo = X86::DR6; break;
1177       case '7': RegNo = X86::DR7; break;
1178       case '8': RegNo = X86::DR8; break;
1179       case '9': RegNo = X86::DR9; break;
1180       }
1181     } else if (Tok.getString().size() == 4 && Tok.getString()[2] == '1') {
1182       switch (Tok.getString()[3]) {
1183       case '0': RegNo = X86::DR10; break;
1184       case '1': RegNo = X86::DR11; break;
1185       case '2': RegNo = X86::DR12; break;
1186       case '3': RegNo = X86::DR13; break;
1187       case '4': RegNo = X86::DR14; break;
1188       case '5': RegNo = X86::DR15; break;
1189       }
1190     }
1191 
1192     if (RegNo != 0) {
1193       EndLoc = Parser.getTok().getEndLoc();
1194       Parser.Lex(); // Eat it.
1195       return false;
1196     }
1197   }
1198 
1199   if (RegNo == 0) {
1200     if (isParsingIntelSyntax()) return true;
1201     return Error(StartLoc, "invalid register name",
1202                  SMRange(StartLoc, EndLoc));
1203   }
1204 
1205   Parser.Lex(); // Eat identifier token.
1206   return false;
1207 }
1208 
1209 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1210   bool Parse32 = is32BitMode() || Code16GCC;
1211   unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
1212   const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1213   return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1214                                /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1215                                Loc, Loc, 0);
1216 }
1217 
1218 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1219   bool Parse32 = is32BitMode() || Code16GCC;
1220   unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
1221   const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1222   return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1223                                /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1224                                Loc, Loc, 0);
1225 }
1226 
1227 bool X86AsmParser::IsSIReg(unsigned Reg) {
1228   switch (Reg) {
1229   default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1230   case X86::RSI:
1231   case X86::ESI:
1232   case X86::SI:
1233     return true;
1234   case X86::RDI:
1235   case X86::EDI:
1236   case X86::DI:
1237     return false;
1238   }
1239 }
1240 
1241 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg,
1242                                           bool IsSIReg) {
1243   switch (RegClassID) {
1244   default: llvm_unreachable("Unexpected register class");
1245   case X86::GR64RegClassID:
1246     return IsSIReg ? X86::RSI : X86::RDI;
1247   case X86::GR32RegClassID:
1248     return IsSIReg ? X86::ESI : X86::EDI;
1249   case X86::GR16RegClassID:
1250     return IsSIReg ? X86::SI : X86::DI;
1251   }
1252 }
1253 
1254 void X86AsmParser::AddDefaultSrcDestOperands(
1255     OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1256     std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1257   if (isParsingIntelSyntax()) {
1258     Operands.push_back(std::move(Dst));
1259     Operands.push_back(std::move(Src));
1260   }
1261   else {
1262     Operands.push_back(std::move(Src));
1263     Operands.push_back(std::move(Dst));
1264   }
1265 }
1266 
1267 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
1268                                            OperandVector &FinalOperands) {
1269 
1270   if (OrigOperands.size() > 1) {
1271     // Check if sizes match, OrigOperands also contains the instruction name
1272     assert(OrigOperands.size() == FinalOperands.size() + 1 &&
1273            "Operand size mismatch");
1274 
1275     SmallVector<std::pair<SMLoc, std::string>, 2> Warnings;
1276     // Verify types match
1277     int RegClassID = -1;
1278     for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
1279       X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
1280       X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
1281 
1282       if (FinalOp.isReg() &&
1283           (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
1284         // Return false and let a normal complaint about bogus operands happen
1285         return false;
1286 
1287       if (FinalOp.isMem()) {
1288 
1289         if (!OrigOp.isMem())
1290           // Return false and let a normal complaint about bogus operands happen
1291           return false;
1292 
1293         unsigned OrigReg = OrigOp.Mem.BaseReg;
1294         unsigned FinalReg = FinalOp.Mem.BaseReg;
1295 
1296         // If we've already encounterd a register class, make sure all register
1297         // bases are of the same register class
1298         if (RegClassID != -1 &&
1299             !X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
1300           return Error(OrigOp.getStartLoc(),
1301                        "mismatching source and destination index registers");
1302         }
1303 
1304         if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
1305           RegClassID = X86::GR64RegClassID;
1306         else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
1307           RegClassID = X86::GR32RegClassID;
1308         else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
1309           RegClassID = X86::GR16RegClassID;
1310         else
1311           // Unexpected register class type
1312           // Return false and let a normal complaint about bogus operands happen
1313           return false;
1314 
1315         bool IsSI = IsSIReg(FinalReg);
1316         FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI);
1317 
1318         if (FinalReg != OrigReg) {
1319           std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
1320           Warnings.push_back(std::make_pair(
1321               OrigOp.getStartLoc(),
1322               "memory operand is only for determining the size, " + RegName +
1323                   " will be used for the location"));
1324         }
1325 
1326         FinalOp.Mem.Size = OrigOp.Mem.Size;
1327         FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
1328         FinalOp.Mem.BaseReg = FinalReg;
1329       }
1330     }
1331 
1332     // Produce warnings only if all the operands passed the adjustment - prevent
1333     // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1334     for (auto &WarningMsg : Warnings) {
1335       Warning(WarningMsg.first, WarningMsg.second);
1336     }
1337 
1338     // Remove old operands
1339     for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1340       OrigOperands.pop_back();
1341   }
1342   // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1343   for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1344     OrigOperands.push_back(std::move(FinalOperands[i]));
1345 
1346   return false;
1347 }
1348 
1349 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
1350   if (isParsingIntelSyntax())
1351     return ParseIntelOperand();
1352   return ParseATTOperand();
1353 }
1354 
1355 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1356     unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1357     unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1358     const InlineAsmIdentifierInfo &Info) {
1359   // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1360   // some other label reference.
1361   if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) {
1362     // Insert an explicit size if the user didn't have one.
1363     if (!Size) {
1364       Size = getPointerWidth();
1365       InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
1366                                           /*Len=*/0, Size);
1367     }
1368     // Create an absolute memory reference in order to match against
1369     // instructions taking a PC relative operand.
1370     return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1371                                  Identifier, Info.Label.Decl);
1372   }
1373   // We either have a direct symbol reference, or an offset from a symbol.  The
1374   // parser always puts the symbol on the LHS, so look there for size
1375   // calculation purposes.
1376   unsigned FrontendSize = 0;
1377   void *Decl = nullptr;
1378   bool IsGlobalLV = false;
1379   if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
1380     // Size is in terms of bits in this context.
1381     FrontendSize = Info.Var.Type * 8;
1382     Decl = Info.Var.Decl;
1383     IsGlobalLV = Info.Var.IsGlobalLV;
1384   }
1385   // It is widely common for MS InlineAsm to use a global variable and one/two
1386   // registers in a mmory expression, and though unaccessible via rip/eip.
1387   if (IsGlobalLV && (BaseReg || IndexReg)) {
1388     return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End);
1389   // Otherwise, we set the base register to a non-zero value
1390   // if we don't know the actual value at this time.  This is necessary to
1391   // get the matching correct in some cases.
1392   } else {
1393     BaseReg = BaseReg ? BaseReg : 1;
1394     return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1395                                  IndexReg, Scale, Start, End, Size, Identifier,
1396                                  Decl, FrontendSize);
1397   }
1398 }
1399 
1400 // Some binary bitwise operators have a named synonymous
1401 // Query a candidate string for being such a named operator
1402 // and if so - invoke the appropriate handler
1403 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM) {
1404   // A named operator should be either lower or upper case, but not a mix
1405   if (Name.compare(Name.lower()) && Name.compare(Name.upper()))
1406     return false;
1407   if (Name.equals_lower("not"))
1408     SM.onNot();
1409   else if (Name.equals_lower("or"))
1410     SM.onOr();
1411   else if (Name.equals_lower("shl"))
1412     SM.onLShift();
1413   else if (Name.equals_lower("shr"))
1414     SM.onRShift();
1415   else if (Name.equals_lower("xor"))
1416     SM.onXor();
1417   else if (Name.equals_lower("and"))
1418     SM.onAnd();
1419   else if (Name.equals_lower("mod"))
1420     SM.onMod();
1421   else
1422     return false;
1423   return true;
1424 }
1425 
1426 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1427   MCAsmParser &Parser = getParser();
1428   const AsmToken &Tok = Parser.getTok();
1429   StringRef ErrMsg;
1430 
1431   AsmToken::TokenKind PrevTK = AsmToken::Error;
1432   bool Done = false;
1433   while (!Done) {
1434     bool UpdateLocLex = true;
1435     AsmToken::TokenKind TK = getLexer().getKind();
1436 
1437     switch (TK) {
1438     default:
1439       if ((Done = SM.isValidEndState()))
1440         break;
1441       return Error(Tok.getLoc(), "unknown token in expression");
1442     case AsmToken::EndOfStatement:
1443       Done = true;
1444       break;
1445     case AsmToken::Real:
1446       // DotOperator: [ebx].0
1447       UpdateLocLex = false;
1448       if (ParseIntelDotOperator(SM, End))
1449         return true;
1450       break;
1451     case AsmToken::At:
1452     case AsmToken::String:
1453     case AsmToken::Identifier: {
1454       SMLoc IdentLoc = Tok.getLoc();
1455       StringRef Identifier = Tok.getString();
1456       UpdateLocLex = false;
1457       // Register
1458       unsigned Reg;
1459       if (Tok.is(AsmToken::Identifier) && !ParseRegister(Reg, IdentLoc, End)) {
1460         if (SM.onRegister(Reg, ErrMsg))
1461           return Error(Tok.getLoc(), ErrMsg);
1462         break;
1463       }
1464       // Operator synonymous ("not", "or" etc.)
1465       if ((UpdateLocLex = ParseIntelNamedOperator(Identifier, SM)))
1466         break;
1467       // Symbol reference, when parsing assembly content
1468       InlineAsmIdentifierInfo Info;
1469       const MCExpr *Val;
1470       if (!isParsingInlineAsm()) {
1471         if (getParser().parsePrimaryExpr(Val, End)) {
1472           return Error(Tok.getLoc(), "Unexpected identifier!");
1473         } else if (SM.onIdentifierExpr(Val, Identifier, Info, false, ErrMsg)) {
1474           return Error(IdentLoc, ErrMsg);
1475         } else
1476           break;
1477       }
1478       // MS InlineAsm operators (TYPE/LENGTH/SIZE)
1479       if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
1480         if (OpKind == IOK_OFFSET)
1481           return Error(IdentLoc, "Dealing OFFSET operator as part of"
1482             "a compound immediate expression is yet to be supported");
1483         if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
1484           if (SM.onInteger(Val, ErrMsg))
1485             return Error(IdentLoc, ErrMsg);
1486         } else
1487           return true;
1488         break;
1489       }
1490       // MS Dot Operator expression
1491       if (Identifier.count('.') && PrevTK == AsmToken::RBrac) {
1492         if (ParseIntelDotOperator(SM, End))
1493           return true;
1494         break;
1495       }
1496       // MS InlineAsm identifier
1497       // Call parseIdentifier() to combine @ with the identifier behind it.
1498       if (TK == AsmToken::At && Parser.parseIdentifier(Identifier))
1499         return Error(IdentLoc, "expected identifier");
1500       if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
1501         return true;
1502       else if (SM.onIdentifierExpr(Val, Identifier, Info, true, ErrMsg))
1503         return Error(IdentLoc, ErrMsg);
1504       break;
1505     }
1506     case AsmToken::Integer: {
1507       // Look for 'b' or 'f' following an Integer as a directional label
1508       SMLoc Loc = getTok().getLoc();
1509       int64_t IntVal = getTok().getIntVal();
1510       End = consumeToken();
1511       UpdateLocLex = false;
1512       if (getLexer().getKind() == AsmToken::Identifier) {
1513         StringRef IDVal = getTok().getString();
1514         if (IDVal == "f" || IDVal == "b") {
1515           MCSymbol *Sym =
1516               getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
1517           MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1518           const MCExpr *Val =
1519               MCSymbolRefExpr::create(Sym, Variant, getContext());
1520           if (IDVal == "b" && Sym->isUndefined())
1521             return Error(Loc, "invalid reference to undefined symbol");
1522           StringRef Identifier = Sym->getName();
1523           InlineAsmIdentifierInfo Info;
1524           if (SM.onIdentifierExpr(Val, Identifier, Info,
1525               isParsingInlineAsm(), ErrMsg))
1526             return Error(Loc, ErrMsg);
1527           End = consumeToken();
1528         } else {
1529           if (SM.onInteger(IntVal, ErrMsg))
1530             return Error(Loc, ErrMsg);
1531         }
1532       } else {
1533         if (SM.onInteger(IntVal, ErrMsg))
1534           return Error(Loc, ErrMsg);
1535       }
1536       break;
1537     }
1538     case AsmToken::Plus:
1539       if (SM.onPlus(ErrMsg))
1540         return Error(getTok().getLoc(), ErrMsg);
1541       break;
1542     case AsmToken::Minus:
1543       if (SM.onMinus(ErrMsg))
1544         return Error(getTok().getLoc(), ErrMsg);
1545       break;
1546     case AsmToken::Tilde:   SM.onNot(); break;
1547     case AsmToken::Star:    SM.onStar(); break;
1548     case AsmToken::Slash:   SM.onDivide(); break;
1549     case AsmToken::Percent: SM.onMod(); break;
1550     case AsmToken::Pipe:    SM.onOr(); break;
1551     case AsmToken::Caret:   SM.onXor(); break;
1552     case AsmToken::Amp:     SM.onAnd(); break;
1553     case AsmToken::LessLess:
1554                             SM.onLShift(); break;
1555     case AsmToken::GreaterGreater:
1556                             SM.onRShift(); break;
1557     case AsmToken::LBrac:
1558       if (SM.onLBrac())
1559         return Error(Tok.getLoc(), "unexpected bracket encountered");
1560       break;
1561     case AsmToken::RBrac:
1562       if (SM.onRBrac())
1563         return Error(Tok.getLoc(), "unexpected bracket encountered");
1564       break;
1565     case AsmToken::LParen:  SM.onLParen(); break;
1566     case AsmToken::RParen:  SM.onRParen(); break;
1567     }
1568     if (SM.hadError())
1569       return Error(Tok.getLoc(), "unknown token in expression");
1570 
1571     if (!Done && UpdateLocLex)
1572       End = consumeToken();
1573 
1574     PrevTK = TK;
1575   }
1576   return false;
1577 }
1578 
1579 void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM,
1580                                           SMLoc Start, SMLoc End) {
1581   SMLoc Loc = Start;
1582   unsigned ExprLen = End.getPointer() - Start.getPointer();
1583   // Skip everything before a symbol displacement (if we have one)
1584   if (SM.getSym()) {
1585     StringRef SymName = SM.getSymName();
1586     if (unsigned Len =  SymName.data() - Start.getPointer())
1587       InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len);
1588     Loc = SMLoc::getFromPointer(SymName.data() + SymName.size());
1589     ExprLen = End.getPointer() - (SymName.data() + SymName.size());
1590     // If we have only a symbol than there's no need for complex rewrite,
1591     // simply skip everything after it
1592     if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) {
1593       if (ExprLen)
1594         InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen);
1595       return;
1596     }
1597   }
1598   // Build an Intel Expression rewrite
1599   StringRef BaseRegStr;
1600   StringRef IndexRegStr;
1601   if (SM.getBaseReg())
1602     BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg());
1603   if (SM.getIndexReg())
1604     IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg());
1605   // Emit it
1606   IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), SM.getImm(), SM.isMemExpr());
1607   InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr);
1608 }
1609 
1610 // Inline assembly may use variable names with namespace alias qualifiers.
1611 bool X86AsmParser::ParseIntelInlineAsmIdentifier(const MCExpr *&Val,
1612                                                  StringRef &Identifier,
1613                                                  InlineAsmIdentifierInfo &Info,
1614                                                  bool IsUnevaluatedOperand,
1615                                                  SMLoc &End) {
1616   MCAsmParser &Parser = getParser();
1617   assert(isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1618   Val = nullptr;
1619 
1620   StringRef LineBuf(Identifier.data());
1621   SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1622 
1623   const AsmToken &Tok = Parser.getTok();
1624   SMLoc Loc = Tok.getLoc();
1625 
1626   // Advance the token stream until the end of the current token is
1627   // after the end of what the frontend claimed.
1628   const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1629   do {
1630     End = Tok.getEndLoc();
1631     getLexer().Lex();
1632   } while (End.getPointer() < EndPtr);
1633   Identifier = LineBuf;
1634 
1635   // The frontend should end parsing on an assembler token boundary, unless it
1636   // failed parsing.
1637   assert((End.getPointer() == EndPtr ||
1638           Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) &&
1639           "frontend claimed part of a token?");
1640 
1641   // If the identifier lookup was unsuccessful, assume that we are dealing with
1642   // a label.
1643   if (Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) {
1644     StringRef InternalName =
1645       SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1646                                          Loc, false);
1647     assert(InternalName.size() && "We should have an internal name here.");
1648     // Push a rewrite for replacing the identifier name with the internal name.
1649     InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
1650                                         InternalName);
1651   } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
1652     return false;
1653   // Create the symbol reference.
1654   MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
1655   MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1656   Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
1657   return false;
1658 }
1659 
1660 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
1661 std::unique_ptr<X86Operand>
1662 X86AsmParser::ParseRoundingModeOp(SMLoc Start) {
1663   MCAsmParser &Parser = getParser();
1664   const AsmToken &Tok = Parser.getTok();
1665   // Eat "{" and mark the current place.
1666   const SMLoc consumedToken = consumeToken();
1667   if (Tok.isNot(AsmToken::Identifier))
1668     return ErrorOperand(Tok.getLoc(), "Expected an identifier after {");
1669   if (Tok.getIdentifier().startswith("r")){
1670     int rndMode = StringSwitch<int>(Tok.getIdentifier())
1671       .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
1672       .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
1673       .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
1674       .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
1675       .Default(-1);
1676     if (-1 == rndMode)
1677       return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
1678      Parser.Lex();  // Eat "r*" of r*-sae
1679     if (!getLexer().is(AsmToken::Minus))
1680       return ErrorOperand(Tok.getLoc(), "Expected - at this point");
1681     Parser.Lex();  // Eat "-"
1682     Parser.Lex();  // Eat the sae
1683     if (!getLexer().is(AsmToken::RCurly))
1684       return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1685     SMLoc End = Tok.getEndLoc();
1686     Parser.Lex();  // Eat "}"
1687     const MCExpr *RndModeOp =
1688       MCConstantExpr::create(rndMode, Parser.getContext());
1689     return X86Operand::CreateImm(RndModeOp, Start, End);
1690   }
1691   if(Tok.getIdentifier().equals("sae")){
1692     Parser.Lex();  // Eat the sae
1693     if (!getLexer().is(AsmToken::RCurly))
1694       return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1695     Parser.Lex();  // Eat "}"
1696     return X86Operand::CreateToken("{sae}", consumedToken);
1697   }
1698   return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1699 }
1700 
1701 /// Parse the '.' operator.
1702 bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End) {
1703   const AsmToken &Tok = getTok();
1704   unsigned Offset;
1705 
1706   // Drop the optional '.'.
1707   StringRef DotDispStr = Tok.getString();
1708   if (DotDispStr.startswith("."))
1709     DotDispStr = DotDispStr.drop_front(1);
1710 
1711   // .Imm gets lexed as a real.
1712   if (Tok.is(AsmToken::Real)) {
1713     APInt DotDisp;
1714     DotDispStr.getAsInteger(10, DotDisp);
1715     Offset = DotDisp.getZExtValue();
1716   } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1717     std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1718     if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1719                                            Offset))
1720       return Error(Tok.getLoc(), "Unable to lookup field reference!");
1721   } else
1722     return Error(Tok.getLoc(), "Unexpected token type!");
1723 
1724   // Eat the DotExpression and update End
1725   End = SMLoc::getFromPointer(DotDispStr.data());
1726   const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size();
1727   while (Tok.getLoc().getPointer() < DotExprEndLoc)
1728     Lex();
1729   SM.addImm(Offset);
1730   return false;
1731 }
1732 
1733 /// Parse the 'offset' operator.  This operator is used to specify the
1734 /// location rather then the content of a variable.
1735 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1736   MCAsmParser &Parser = getParser();
1737   const AsmToken &Tok = Parser.getTok();
1738   SMLoc OffsetOfLoc = Tok.getLoc();
1739   Parser.Lex(); // Eat offset.
1740 
1741   const MCExpr *Val;
1742   InlineAsmIdentifierInfo Info;
1743   SMLoc Start = Tok.getLoc(), End;
1744   StringRef Identifier = Tok.getString();
1745   if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
1746                                     /*Unevaluated=*/false, End))
1747     return nullptr;
1748 
1749   void *Decl = nullptr;
1750   // FIXME: MS evaluates "offset <Constant>" to the underlying integral
1751   if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
1752     return ErrorOperand(Start, "offset operator cannot yet handle constants");
1753   else if (Info.isKind(InlineAsmIdentifierInfo::IK_Var))
1754     Decl = Info.Var.Decl;
1755   // Don't emit the offset operator.
1756   InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7);
1757 
1758   // The offset operator will have an 'r' constraint, thus we need to create
1759   // register operand to ensure proper matching.  Just pick a GPR based on
1760   // the size of a pointer.
1761   bool Parse32 = is32BitMode() || Code16GCC;
1762   unsigned RegNo = is64BitMode() ? X86::RBX : (Parse32 ? X86::EBX : X86::BX);
1763 
1764   return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1765                                OffsetOfLoc, Identifier, Decl);
1766 }
1767 
1768 // Query a candidate string for being an Intel assembly operator
1769 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
1770 unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) {
1771   return StringSwitch<unsigned>(Name)
1772     .Cases("TYPE","type",IOK_TYPE)
1773     .Cases("SIZE","size",IOK_SIZE)
1774     .Cases("LENGTH","length",IOK_LENGTH)
1775     .Cases("OFFSET","offset",IOK_OFFSET)
1776     .Default(IOK_INVALID);
1777 }
1778 
1779 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators.  The LENGTH operator
1780 /// returns the number of elements in an array.  It returns the value 1 for
1781 /// non-array variables.  The SIZE operator returns the size of a C or C++
1782 /// variable.  A variable's size is the product of its LENGTH and TYPE.  The
1783 /// TYPE operator returns the size of a C or C++ type or variable. If the
1784 /// variable is an array, TYPE returns the size of a single element.
1785 unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {
1786   MCAsmParser &Parser = getParser();
1787   const AsmToken &Tok = Parser.getTok();
1788   Parser.Lex(); // Eat operator.
1789 
1790   const MCExpr *Val = nullptr;
1791   InlineAsmIdentifierInfo Info;
1792   SMLoc Start = Tok.getLoc(), End;
1793   StringRef Identifier = Tok.getString();
1794   if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
1795                                     /*Unevaluated=*/true, End))
1796     return 0;
1797 
1798   if (!Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
1799     Error(Start, "unable to lookup expression");
1800     return 0;
1801   }
1802 
1803   unsigned CVal = 0;
1804   switch(OpKind) {
1805   default: llvm_unreachable("Unexpected operand kind!");
1806   case IOK_LENGTH: CVal = Info.Var.Length; break;
1807   case IOK_SIZE: CVal = Info.Var.Size; break;
1808   case IOK_TYPE: CVal = Info.Var.Type; break;
1809   }
1810 
1811   return CVal;
1812 }
1813 
1814 bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
1815   Size = StringSwitch<unsigned>(getTok().getString())
1816     .Cases("BYTE", "byte", 8)
1817     .Cases("WORD", "word", 16)
1818     .Cases("DWORD", "dword", 32)
1819     .Cases("FLOAT", "float", 32)
1820     .Cases("LONG", "long", 32)
1821     .Cases("FWORD", "fword", 48)
1822     .Cases("DOUBLE", "double", 64)
1823     .Cases("QWORD", "qword", 64)
1824     .Cases("MMWORD","mmword", 64)
1825     .Cases("XWORD", "xword", 80)
1826     .Cases("TBYTE", "tbyte", 80)
1827     .Cases("XMMWORD", "xmmword", 128)
1828     .Cases("YMMWORD", "ymmword", 256)
1829     .Cases("ZMMWORD", "zmmword", 512)
1830     .Default(0);
1831   if (Size) {
1832     const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word).
1833     if (!(Tok.getString().equals("PTR") || Tok.getString().equals("ptr")))
1834       return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1835     Lex(); // Eat ptr.
1836   }
1837   return false;
1838 }
1839 
1840 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1841   MCAsmParser &Parser = getParser();
1842   const AsmToken &Tok = Parser.getTok();
1843   SMLoc Start, End;
1844 
1845   // FIXME: Offset operator
1846   // Should be handled as part of immediate expression, as other operators
1847   // Currently, only supported as a stand-alone operand
1848   if (isParsingInlineAsm())
1849     if (IdentifyIntelInlineAsmOperator(Tok.getString()) == IOK_OFFSET)
1850       return ParseIntelOffsetOfOperator();
1851 
1852   // Parse optional Size directive.
1853   unsigned Size;
1854   if (ParseIntelMemoryOperandSize(Size))
1855     return nullptr;
1856   bool PtrInOperand = bool(Size);
1857 
1858   Start = Tok.getLoc();
1859 
1860   // Rounding mode operand.
1861   if (getLexer().is(AsmToken::LCurly))
1862     return ParseRoundingModeOp(Start);
1863 
1864   // Register operand.
1865   unsigned RegNo = 0;
1866   if (Tok.is(AsmToken::Identifier) && !ParseRegister(RegNo, Start, End)) {
1867     if (RegNo == X86::RIP)
1868       return ErrorOperand(Start, "rip can only be used as a base register");
1869     // A Register followed by ':' is considered a segment override
1870     if (Tok.isNot(AsmToken::Colon))
1871       return !PtrInOperand ? X86Operand::CreateReg(RegNo, Start, End) :
1872         ErrorOperand(Start, "expected memory operand after 'ptr', "
1873                             "found register operand instead");
1874     // An alleged segment override. check if we have a valid segment register
1875     if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1876       return ErrorOperand(Start, "invalid segment register");
1877     // Eat ':' and update Start location
1878     Start = Lex().getLoc();
1879   }
1880 
1881   // Immediates and Memory
1882   IntelExprStateMachine SM;
1883   if (ParseIntelExpression(SM, End))
1884     return nullptr;
1885 
1886   if (isParsingInlineAsm())
1887     RewriteIntelExpression(SM, Start, Tok.getLoc());
1888 
1889   int64_t Imm = SM.getImm();
1890   const MCExpr *Disp = SM.getSym();
1891   const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext());
1892   if (Disp && Imm)
1893     Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext());
1894   if (!Disp)
1895     Disp = ImmDisp;
1896 
1897   // RegNo != 0 specifies a valid segment register,
1898   // and we are parsing a segment override
1899   if (!SM.isMemExpr() && !RegNo)
1900     return X86Operand::CreateImm(Disp, Start, End);
1901 
1902   StringRef ErrMsg;
1903   unsigned BaseReg = SM.getBaseReg();
1904   unsigned IndexReg = SM.getIndexReg();
1905   unsigned Scale = SM.getScale();
1906 
1907   if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP &&
1908       (IndexReg == X86::ESP || IndexReg == X86::RSP))
1909     std::swap(BaseReg, IndexReg);
1910 
1911   // If BaseReg is a vector register and IndexReg is not, swap them unless
1912   // Scale was specified in which case it would be an error.
1913   if (Scale == 0 &&
1914       !(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
1915         X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
1916         X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) &&
1917       (X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) ||
1918        X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) ||
1919        X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg)))
1920     std::swap(BaseReg, IndexReg);
1921 
1922   if (Scale != 0 &&
1923       X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg))
1924     return ErrorOperand(Start, "16-bit addresses cannot have a scale");
1925 
1926   // If there was no explicit scale specified, change it to 1.
1927   if (Scale == 0)
1928     Scale = 1;
1929 
1930   // If this is a 16-bit addressing mode with the base and index in the wrong
1931   // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is
1932   // shared with att syntax where order matters.
1933   if ((BaseReg == X86::SI || BaseReg == X86::DI) &&
1934       (IndexReg == X86::BX || IndexReg == X86::BP))
1935     std::swap(BaseReg, IndexReg);
1936 
1937   if ((BaseReg || IndexReg) &&
1938       CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
1939                                       ErrMsg))
1940     return ErrorOperand(Start, ErrMsg);
1941   if (isParsingInlineAsm())
1942     return CreateMemForInlineAsm(RegNo, Disp, BaseReg, IndexReg,
1943                                  Scale, Start, End, Size, SM.getSymName(),
1944                                  SM.getIdentifierInfo());
1945   if (!(BaseReg || IndexReg || RegNo))
1946     return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1947   return X86Operand::CreateMem(getPointerWidth(), RegNo, Disp,
1948                                BaseReg, IndexReg, Scale, Start, End, Size);
1949 }
1950 
1951 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1952   MCAsmParser &Parser = getParser();
1953   switch (getLexer().getKind()) {
1954   case AsmToken::Dollar: {
1955     // $42 or $ID -> immediate.
1956     SMLoc Start = Parser.getTok().getLoc(), End;
1957     Parser.Lex();
1958     const MCExpr *Val;
1959     // This is an immediate, so we should not parse a register. Do a precheck
1960     // for '%' to supercede intra-register parse errors.
1961     SMLoc L = Parser.getTok().getLoc();
1962     if (check(getLexer().is(AsmToken::Percent), L,
1963               "expected immediate expression") ||
1964         getParser().parseExpression(Val, End) ||
1965         check(isa<X86MCExpr>(Val), L, "expected immediate expression"))
1966       return nullptr;
1967     return X86Operand::CreateImm(Val, Start, End);
1968   }
1969   case AsmToken::LCurly: {
1970     SMLoc Start = Parser.getTok().getLoc();
1971     return ParseRoundingModeOp(Start);
1972   }
1973   default: {
1974     // This a memory operand or a register. We have some parsing complications
1975     // as a '(' may be part of an immediate expression or the addressing mode
1976     // block. This is complicated by the fact that an assembler-level variable
1977     // may refer either to a register or an immediate expression.
1978 
1979     SMLoc Loc = Parser.getTok().getLoc(), EndLoc;
1980     const MCExpr *Expr = nullptr;
1981     unsigned Reg = 0;
1982     if (getLexer().isNot(AsmToken::LParen)) {
1983       // No '(' so this is either a displacement expression or a register.
1984       if (Parser.parseExpression(Expr, EndLoc))
1985         return nullptr;
1986       if (auto *RE = dyn_cast<X86MCExpr>(Expr)) {
1987         // Segment Register. Reset Expr and copy value to register.
1988         Expr = nullptr;
1989         Reg = RE->getRegNo();
1990 
1991         // Sanity check register.
1992         if (Reg == X86::EIZ || Reg == X86::RIZ)
1993           return ErrorOperand(
1994               Loc, "%eiz and %riz can only be used as index registers",
1995               SMRange(Loc, EndLoc));
1996         if (Reg == X86::RIP)
1997           return ErrorOperand(Loc, "%rip can only be used as a base register",
1998                               SMRange(Loc, EndLoc));
1999         // Return register that are not segment prefixes immediately.
2000         if (!Parser.parseOptionalToken(AsmToken::Colon))
2001           return X86Operand::CreateReg(Reg, Loc, EndLoc);
2002         if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg))
2003           return ErrorOperand(Loc, "invalid segment register");
2004       }
2005     }
2006     // This is a Memory operand.
2007     return ParseMemOperand(Reg, Expr, Loc, EndLoc);
2008   }
2009   }
2010 }
2011 
2012 // X86::COND_INVALID if not a recognized condition code or alternate mnemonic,
2013 // otherwise the EFLAGS Condition Code enumerator.
2014 X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) {
2015   return StringSwitch<X86::CondCode>(CC)
2016       .Case("o", X86::COND_O)          // Overflow
2017       .Case("no", X86::COND_NO)        // No Overflow
2018       .Cases("b", "nae", X86::COND_B)  // Below/Neither Above nor Equal
2019       .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below
2020       .Cases("e", "z", X86::COND_E)    // Equal/Zero
2021       .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero
2022       .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above
2023       .Cases("a", "nbe", X86::COND_A)  // Above/Neither Below nor Equal
2024       .Case("s", X86::COND_S)          // Sign
2025       .Case("ns", X86::COND_NS)        // No Sign
2026       .Cases("p", "pe", X86::COND_P)   // Parity/Parity Even
2027       .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd
2028       .Cases("l", "nge", X86::COND_L)  // Less/Neither Greater nor Equal
2029       .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less
2030       .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater
2031       .Cases("g", "nle", X86::COND_G)  // Greater/Neither Less nor Equal
2032       .Default(X86::COND_INVALID);
2033 }
2034 
2035 // true on failure, false otherwise
2036 // If no {z} mark was found - Parser doesn't advance
2037 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
2038                           const SMLoc &StartLoc) {
2039   MCAsmParser &Parser = getParser();
2040   // Assuming we are just pass the '{' mark, quering the next token
2041   // Searched for {z}, but none was found. Return false, as no parsing error was
2042   // encountered
2043   if (!(getLexer().is(AsmToken::Identifier) &&
2044         (getLexer().getTok().getIdentifier() == "z")))
2045     return false;
2046   Parser.Lex(); // Eat z
2047   // Query and eat the '}' mark
2048   if (!getLexer().is(AsmToken::RCurly))
2049     return Error(getLexer().getLoc(), "Expected } at this point");
2050   Parser.Lex(); // Eat '}'
2051   // Assign Z with the {z} mark opernad
2052   Z = X86Operand::CreateToken("{z}", StartLoc);
2053   return false;
2054 }
2055 
2056 // true on failure, false otherwise
2057 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
2058                                        const MCParsedAsmOperand &Op) {
2059   MCAsmParser &Parser = getParser();
2060   if (getLexer().is(AsmToken::LCurly)) {
2061     // Eat "{" and mark the current place.
2062     const SMLoc consumedToken = consumeToken();
2063     // Distinguish {1to<NUM>} from {%k<NUM>}.
2064     if(getLexer().is(AsmToken::Integer)) {
2065       // Parse memory broadcasting ({1to<NUM>}).
2066       if (getLexer().getTok().getIntVal() != 1)
2067         return TokError("Expected 1to<NUM> at this point");
2068       Parser.Lex();  // Eat "1" of 1to8
2069       if (!getLexer().is(AsmToken::Identifier) ||
2070           !getLexer().getTok().getIdentifier().startswith("to"))
2071         return TokError("Expected 1to<NUM> at this point");
2072       // Recognize only reasonable suffixes.
2073       const char *BroadcastPrimitive =
2074         StringSwitch<const char*>(getLexer().getTok().getIdentifier())
2075           .Case("to2",  "{1to2}")
2076           .Case("to4",  "{1to4}")
2077           .Case("to8",  "{1to8}")
2078           .Case("to16", "{1to16}")
2079           .Default(nullptr);
2080       if (!BroadcastPrimitive)
2081         return TokError("Invalid memory broadcast primitive.");
2082       Parser.Lex();  // Eat "toN" of 1toN
2083       if (!getLexer().is(AsmToken::RCurly))
2084         return TokError("Expected } at this point");
2085       Parser.Lex();  // Eat "}"
2086       Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
2087                                                  consumedToken));
2088       // No AVX512 specific primitives can pass
2089       // after memory broadcasting, so return.
2090       return false;
2091     } else {
2092       // Parse either {k}{z}, {z}{k}, {k} or {z}
2093       // last one have no meaning, but GCC accepts it
2094       // Currently, we're just pass a '{' mark
2095       std::unique_ptr<X86Operand> Z;
2096       if (ParseZ(Z, consumedToken))
2097         return true;
2098       // Reaching here means that parsing of the allegadly '{z}' mark yielded
2099       // no errors.
2100       // Query for the need of further parsing for a {%k<NUM>} mark
2101       if (!Z || getLexer().is(AsmToken::LCurly)) {
2102         SMLoc StartLoc = Z ? consumeToken() : consumedToken;
2103         // Parse an op-mask register mark ({%k<NUM>}), which is now to be
2104         // expected
2105         unsigned RegNo;
2106         SMLoc RegLoc;
2107         if (!ParseRegister(RegNo, RegLoc, StartLoc) &&
2108             X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) {
2109           if (RegNo == X86::K0)
2110             return Error(RegLoc, "Register k0 can't be used as write mask");
2111           if (!getLexer().is(AsmToken::RCurly))
2112             return Error(getLexer().getLoc(), "Expected } at this point");
2113           Operands.push_back(X86Operand::CreateToken("{", StartLoc));
2114           Operands.push_back(
2115               X86Operand::CreateReg(RegNo, StartLoc, StartLoc));
2116           Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2117         } else
2118           return Error(getLexer().getLoc(),
2119                         "Expected an op-mask register at this point");
2120         // {%k<NUM>} mark is found, inquire for {z}
2121         if (getLexer().is(AsmToken::LCurly) && !Z) {
2122           // Have we've found a parsing error, or found no (expected) {z} mark
2123           // - report an error
2124           if (ParseZ(Z, consumeToken()) || !Z)
2125             return Error(getLexer().getLoc(),
2126                          "Expected a {z} mark at this point");
2127 
2128         }
2129         // '{z}' on its own is meaningless, hence should be ignored.
2130         // on the contrary - have it been accompanied by a K register,
2131         // allow it.
2132         if (Z)
2133           Operands.push_back(std::move(Z));
2134       }
2135     }
2136   }
2137   return false;
2138 }
2139 
2140 /// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'.  The '%ds:' prefix
2141 /// has already been parsed if present. disp may be provided as well.
2142 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
2143                                                           const MCExpr *&Disp,
2144                                                           const SMLoc &StartLoc,
2145                                                           SMLoc &EndLoc) {
2146   MCAsmParser &Parser = getParser();
2147   SMLoc Loc;
2148   // Based on the initial passed values, we may be in any of these cases, we are
2149   // in one of these cases (with current position (*)):
2150 
2151   //   1. seg : * disp  (base-index-scale-expr)
2152   //   2. seg : *(disp) (base-index-scale-expr)
2153   //   3. seg :       *(base-index-scale-expr)
2154   //   4.        disp  *(base-index-scale-expr)
2155   //   5.      *(disp)  (base-index-scale-expr)
2156   //   6.             *(base-index-scale-expr)
2157   //   7.  disp *
2158   //   8. *(disp)
2159 
2160   // If we do not have an displacement yet, check if we're in cases 4 or 6 by
2161   // checking if the first object after the parenthesis is a register (or an
2162   // identifier referring to a register) and parse the displacement or default
2163   // to 0 as appropriate.
2164   auto isAtMemOperand = [this]() {
2165     if (this->getLexer().isNot(AsmToken::LParen))
2166       return false;
2167     AsmToken Buf[2];
2168     StringRef Id;
2169     auto TokCount = this->getLexer().peekTokens(Buf, true);
2170     if (TokCount == 0)
2171       return false;
2172     switch (Buf[0].getKind()) {
2173     case AsmToken::Percent:
2174     case AsmToken::Comma:
2175       return true;
2176     // These lower cases are doing a peekIdentifier.
2177     case AsmToken::At:
2178     case AsmToken::Dollar:
2179       if ((TokCount > 1) &&
2180           (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) &&
2181           (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer()))
2182         Id = StringRef(Buf[0].getLoc().getPointer(),
2183                        Buf[1].getIdentifier().size() + 1);
2184       break;
2185     case AsmToken::Identifier:
2186     case AsmToken::String:
2187       Id = Buf[0].getIdentifier();
2188       break;
2189     default:
2190       return false;
2191     }
2192     // We have an ID. Check if it is bound to a register.
2193     if (!Id.empty()) {
2194       MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id);
2195       if (Sym->isVariable()) {
2196         auto V = Sym->getVariableValue(/*SetUsed*/ false);
2197         return isa<X86MCExpr>(V);
2198       }
2199     }
2200     return false;
2201   };
2202 
2203   if (!Disp) {
2204     // Parse immediate if we're not at a mem operand yet.
2205     if (!isAtMemOperand()) {
2206       if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc))
2207         return nullptr;
2208       assert(!isa<X86MCExpr>(Disp) && "Expected non-register here.");
2209     } else {
2210       // Disp is implicitly zero if we haven't parsed it yet.
2211       Disp = MCConstantExpr::create(0, Parser.getContext());
2212     }
2213   }
2214 
2215   // We are now either at the end of the operand or at the '(' at the start of a
2216   // base-index-scale-expr.
2217 
2218   if (!parseOptionalToken(AsmToken::LParen)) {
2219     if (SegReg == 0)
2220       return X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc);
2221     return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
2222                                  StartLoc, EndLoc);
2223   }
2224 
2225   // If we reached here, then eat the '(' and Process
2226   // the rest of the memory operand.
2227   unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
2228   SMLoc BaseLoc = getLexer().getLoc();
2229   const MCExpr *E;
2230   StringRef ErrMsg;
2231 
2232   // Parse BaseReg if one is provided.
2233   if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) {
2234     if (Parser.parseExpression(E, EndLoc) ||
2235         check(!isa<X86MCExpr>(E), BaseLoc, "expected register here"))
2236       return nullptr;
2237 
2238     // Sanity check register.
2239     BaseReg = cast<X86MCExpr>(E)->getRegNo();
2240     if (BaseReg == X86::EIZ || BaseReg == X86::RIZ)
2241       return ErrorOperand(BaseLoc,
2242                           "eiz and riz can only be used as index registers",
2243                           SMRange(BaseLoc, EndLoc));
2244   }
2245 
2246   if (parseOptionalToken(AsmToken::Comma)) {
2247     // Following the comma we should have either an index register, or a scale
2248     // value. We don't support the later form, but we want to parse it
2249     // correctly.
2250     //
2251     // Even though it would be completely consistent to support syntax like
2252     // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
2253     if (getLexer().isNot(AsmToken::RParen)) {
2254       if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc))
2255         return nullptr;
2256 
2257       if (!isa<X86MCExpr>(E)) {
2258         // We've parsed an unexpected Scale Value instead of an index
2259         // register. Interpret it as an absolute.
2260         int64_t ScaleVal;
2261         if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr()))
2262           return ErrorOperand(Loc, "expected absolute expression");
2263         if (ScaleVal != 1)
2264           Warning(Loc, "scale factor without index register is ignored");
2265         Scale = 1;
2266       } else { // IndexReg Found.
2267         IndexReg = cast<X86MCExpr>(E)->getRegNo();
2268 
2269         if (BaseReg == X86::RIP)
2270           return ErrorOperand(
2271               Loc, "%rip as base register can not have an index register");
2272         if (IndexReg == X86::RIP)
2273           return ErrorOperand(Loc, "%rip is not allowed as an index register");
2274 
2275         if (parseOptionalToken(AsmToken::Comma)) {
2276           // Parse the scale amount:
2277           //  ::= ',' [scale-expression]
2278 
2279           // A scale amount without an index is ignored.
2280           if (getLexer().isNot(AsmToken::RParen)) {
2281             int64_t ScaleVal;
2282             if (Parser.parseTokenLoc(Loc) ||
2283                 Parser.parseAbsoluteExpression(ScaleVal))
2284               return ErrorOperand(Loc, "expected scale expression");
2285             Scale = (unsigned)ScaleVal;
2286             // Validate the scale amount.
2287             if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2288                 Scale != 1)
2289               return ErrorOperand(Loc,
2290                                   "scale factor in 16-bit address must be 1");
2291             if (checkScale(Scale, ErrMsg))
2292               return ErrorOperand(Loc, ErrMsg);
2293           }
2294         }
2295       }
2296     }
2297   }
2298 
2299   // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
2300   if (parseToken(AsmToken::RParen, "unexpected token in memory operand"))
2301     return nullptr;
2302 
2303   // This is to support otherwise illegal operand (%dx) found in various
2304   // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now
2305   // be supported. Mark such DX variants separately fix only in special cases.
2306   if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 && SegReg == 0 &&
2307       isa<MCConstantExpr>(Disp) && cast<MCConstantExpr>(Disp)->getValue() == 0)
2308     return X86Operand::CreateDXReg(BaseLoc, BaseLoc);
2309 
2310   if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
2311                                       ErrMsg))
2312     return ErrorOperand(BaseLoc, ErrMsg);
2313 
2314   if (SegReg || BaseReg || IndexReg)
2315     return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
2316                                  IndexReg, Scale, StartLoc, EndLoc);
2317   return X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc);
2318 }
2319 
2320 // Parse either a standard primary expression or a register.
2321 bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
2322   MCAsmParser &Parser = getParser();
2323   // See if this is a register first.
2324   if (getTok().is(AsmToken::Percent) ||
2325       (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) &&
2326        MatchRegisterName(Parser.getTok().getString()))) {
2327     SMLoc StartLoc = Parser.getTok().getLoc();
2328     unsigned RegNo;
2329     if (ParseRegister(RegNo, StartLoc, EndLoc))
2330       return true;
2331     Res = X86MCExpr::create(RegNo, Parser.getContext());
2332     return false;
2333   }
2334   return Parser.parsePrimaryExpr(Res, EndLoc);
2335 }
2336 
2337 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
2338                                     SMLoc NameLoc, OperandVector &Operands) {
2339   MCAsmParser &Parser = getParser();
2340   InstInfo = &Info;
2341 
2342   // Reset the forced VEX encoding.
2343   ForcedVEXEncoding = VEXEncoding_Default;
2344 
2345   // Parse pseudo prefixes.
2346   while (1) {
2347     if (Name == "{") {
2348       if (getLexer().isNot(AsmToken::Identifier))
2349         return Error(Parser.getTok().getLoc(), "Unexpected token after '{'");
2350       std::string Prefix = Parser.getTok().getString().lower();
2351       Parser.Lex(); // Eat identifier.
2352       if (getLexer().isNot(AsmToken::RCurly))
2353         return Error(Parser.getTok().getLoc(), "Expected '}'");
2354       Parser.Lex(); // Eat curly.
2355 
2356       if (Prefix == "vex2")
2357         ForcedVEXEncoding = VEXEncoding_VEX2;
2358       else if (Prefix == "vex3")
2359         ForcedVEXEncoding = VEXEncoding_VEX3;
2360       else if (Prefix == "evex")
2361         ForcedVEXEncoding = VEXEncoding_EVEX;
2362       else
2363         return Error(NameLoc, "unknown prefix");
2364 
2365       NameLoc = Parser.getTok().getLoc();
2366       if (getLexer().is(AsmToken::LCurly)) {
2367         Parser.Lex();
2368         Name = "{";
2369       } else {
2370         if (getLexer().isNot(AsmToken::Identifier))
2371           return Error(Parser.getTok().getLoc(), "Expected identifier");
2372         // FIXME: The mnemonic won't match correctly if its not in lower case.
2373         Name = Parser.getTok().getString();
2374         Parser.Lex();
2375       }
2376       continue;
2377     }
2378 
2379     break;
2380   }
2381 
2382   StringRef PatchedName = Name;
2383 
2384   // Hack to skip "short" following Jcc.
2385   if (isParsingIntelSyntax() &&
2386       (PatchedName == "jmp" || PatchedName == "jc" || PatchedName == "jnc" ||
2387        PatchedName == "jcxz" || PatchedName == "jexcz" ||
2388        (PatchedName.startswith("j") &&
2389         ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) {
2390     StringRef NextTok = Parser.getTok().getString();
2391     if (NextTok == "short") {
2392       SMLoc NameEndLoc =
2393           NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
2394       // Eat the short keyword.
2395       Parser.Lex();
2396       // MS and GAS ignore the short keyword; they both determine the jmp type
2397       // based on the distance of the label. (NASM does emit different code with
2398       // and without "short," though.)
2399       InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
2400                                           NextTok.size() + 1);
2401     }
2402   }
2403 
2404   // FIXME: Hack to recognize setneb as setne.
2405   if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2406       PatchedName != "setb" && PatchedName != "setnb")
2407     PatchedName = PatchedName.substr(0, Name.size()-1);
2408 
2409   unsigned ComparisonPredicate = ~0U;
2410 
2411   // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2412   if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2413       (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2414        PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2415     bool IsVCMP = PatchedName[0] == 'v';
2416     unsigned CCIdx = IsVCMP ? 4 : 3;
2417     unsigned CC = StringSwitch<unsigned>(
2418       PatchedName.slice(CCIdx, PatchedName.size() - 2))
2419       .Case("eq",       0x00)
2420       .Case("eq_oq",    0x00)
2421       .Case("lt",       0x01)
2422       .Case("lt_os",    0x01)
2423       .Case("le",       0x02)
2424       .Case("le_os",    0x02)
2425       .Case("unord",    0x03)
2426       .Case("unord_q",  0x03)
2427       .Case("neq",      0x04)
2428       .Case("neq_uq",   0x04)
2429       .Case("nlt",      0x05)
2430       .Case("nlt_us",   0x05)
2431       .Case("nle",      0x06)
2432       .Case("nle_us",   0x06)
2433       .Case("ord",      0x07)
2434       .Case("ord_q",    0x07)
2435       /* AVX only from here */
2436       .Case("eq_uq",    0x08)
2437       .Case("nge",      0x09)
2438       .Case("nge_us",   0x09)
2439       .Case("ngt",      0x0A)
2440       .Case("ngt_us",   0x0A)
2441       .Case("false",    0x0B)
2442       .Case("false_oq", 0x0B)
2443       .Case("neq_oq",   0x0C)
2444       .Case("ge",       0x0D)
2445       .Case("ge_os",    0x0D)
2446       .Case("gt",       0x0E)
2447       .Case("gt_os",    0x0E)
2448       .Case("true",     0x0F)
2449       .Case("true_uq",  0x0F)
2450       .Case("eq_os",    0x10)
2451       .Case("lt_oq",    0x11)
2452       .Case("le_oq",    0x12)
2453       .Case("unord_s",  0x13)
2454       .Case("neq_us",   0x14)
2455       .Case("nlt_uq",   0x15)
2456       .Case("nle_uq",   0x16)
2457       .Case("ord_s",    0x17)
2458       .Case("eq_us",    0x18)
2459       .Case("nge_uq",   0x19)
2460       .Case("ngt_uq",   0x1A)
2461       .Case("false_os", 0x1B)
2462       .Case("neq_os",   0x1C)
2463       .Case("ge_oq",    0x1D)
2464       .Case("gt_oq",    0x1E)
2465       .Case("true_us",  0x1F)
2466       .Default(~0U);
2467     if (CC != ~0U && (IsVCMP || CC < 8)) {
2468       if (PatchedName.endswith("ss"))
2469         PatchedName = IsVCMP ? "vcmpss" : "cmpss";
2470       else if (PatchedName.endswith("sd"))
2471         PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
2472       else if (PatchedName.endswith("ps"))
2473         PatchedName = IsVCMP ? "vcmpps" : "cmpps";
2474       else if (PatchedName.endswith("pd"))
2475         PatchedName = IsVCMP ? "vcmppd" : "cmppd";
2476       else
2477         llvm_unreachable("Unexpected suffix!");
2478 
2479       ComparisonPredicate = CC;
2480     }
2481   }
2482 
2483   // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2484   if (PatchedName.startswith("vpcmp") &&
2485       (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
2486        PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
2487     unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2488     unsigned CC = StringSwitch<unsigned>(
2489       PatchedName.slice(5, PatchedName.size() - SuffixSize))
2490       .Case("eq",    0x0) // Only allowed on unsigned. Checked below.
2491       .Case("lt",    0x1)
2492       .Case("le",    0x2)
2493       //.Case("false", 0x3) // Not a documented alias.
2494       .Case("neq",   0x4)
2495       .Case("nlt",   0x5)
2496       .Case("nle",   0x6)
2497       //.Case("true",  0x7) // Not a documented alias.
2498       .Default(~0U);
2499     if (CC != ~0U && (CC != 0 || SuffixSize == 2)) {
2500       switch (PatchedName.back()) {
2501       default: llvm_unreachable("Unexpected character!");
2502       case 'b': PatchedName = SuffixSize == 2 ? "vpcmpub" : "vpcmpb"; break;
2503       case 'w': PatchedName = SuffixSize == 2 ? "vpcmpuw" : "vpcmpw"; break;
2504       case 'd': PatchedName = SuffixSize == 2 ? "vpcmpud" : "vpcmpd"; break;
2505       case 'q': PatchedName = SuffixSize == 2 ? "vpcmpuq" : "vpcmpq"; break;
2506       }
2507       // Set up the immediate to push into the operands later.
2508       ComparisonPredicate = CC;
2509     }
2510   }
2511 
2512   // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2513   if (PatchedName.startswith("vpcom") &&
2514       (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
2515        PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
2516     unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2517     unsigned CC = StringSwitch<unsigned>(
2518       PatchedName.slice(5, PatchedName.size() - SuffixSize))
2519       .Case("lt",    0x0)
2520       .Case("le",    0x1)
2521       .Case("gt",    0x2)
2522       .Case("ge",    0x3)
2523       .Case("eq",    0x4)
2524       .Case("neq",   0x5)
2525       .Case("false", 0x6)
2526       .Case("true",  0x7)
2527       .Default(~0U);
2528     if (CC != ~0U) {
2529       switch (PatchedName.back()) {
2530       default: llvm_unreachable("Unexpected character!");
2531       case 'b': PatchedName = SuffixSize == 2 ? "vpcomub" : "vpcomb"; break;
2532       case 'w': PatchedName = SuffixSize == 2 ? "vpcomuw" : "vpcomw"; break;
2533       case 'd': PatchedName = SuffixSize == 2 ? "vpcomud" : "vpcomd"; break;
2534       case 'q': PatchedName = SuffixSize == 2 ? "vpcomuq" : "vpcomq"; break;
2535       }
2536       // Set up the immediate to push into the operands later.
2537       ComparisonPredicate = CC;
2538     }
2539   }
2540 
2541 
2542   // Determine whether this is an instruction prefix.
2543   // FIXME:
2544   // Enhance prefixes integrity robustness. for example, following forms
2545   // are currently tolerated:
2546   // repz repnz <insn>    ; GAS errors for the use of two similar prefixes
2547   // lock addq %rax, %rbx ; Destination operand must be of memory type
2548   // xacquire <insn>      ; xacquire must be accompanied by 'lock'
2549   bool isPrefix = StringSwitch<bool>(Name)
2550                       .Cases("rex64", "data32", "data16", true)
2551                       .Cases("xacquire", "xrelease", true)
2552                       .Cases("acquire", "release", isParsingIntelSyntax())
2553                       .Default(false);
2554 
2555   auto isLockRepeatNtPrefix = [](StringRef N) {
2556     return StringSwitch<bool>(N)
2557         .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
2558         .Default(false);
2559   };
2560 
2561   bool CurlyAsEndOfStatement = false;
2562 
2563   unsigned Flags = X86::IP_NO_PREFIX;
2564   while (isLockRepeatNtPrefix(Name.lower())) {
2565     unsigned Prefix =
2566         StringSwitch<unsigned>(Name)
2567             .Cases("lock", "lock", X86::IP_HAS_LOCK)
2568             .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT)
2569             .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE)
2570             .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK)
2571             .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible)
2572     Flags |= Prefix;
2573     if (getLexer().is(AsmToken::EndOfStatement)) {
2574       // We don't have real instr with the given prefix
2575       //  let's use the prefix as the instr.
2576       // TODO: there could be several prefixes one after another
2577       Flags = X86::IP_NO_PREFIX;
2578       break;
2579     }
2580     // FIXME: The mnemonic won't match correctly if its not in lower case.
2581     Name = Parser.getTok().getString();
2582     Parser.Lex(); // eat the prefix
2583     // Hack: we could have something like "rep # some comment" or
2584     //    "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
2585     while (Name.startswith(";") || Name.startswith("\n") ||
2586            Name.startswith("#") || Name.startswith("\t") ||
2587            Name.startswith("/")) {
2588       // FIXME: The mnemonic won't match correctly if its not in lower case.
2589       Name = Parser.getTok().getString();
2590       Parser.Lex(); // go to next prefix or instr
2591     }
2592   }
2593 
2594   if (Flags)
2595     PatchedName = Name;
2596 
2597   // Hacks to handle 'data16' and 'data32'
2598   if (PatchedName == "data16" && is16BitMode()) {
2599     return Error(NameLoc, "redundant data16 prefix");
2600   }
2601   if (PatchedName == "data32") {
2602     if (is32BitMode())
2603       return Error(NameLoc, "redundant data32 prefix");
2604     if (is64BitMode())
2605       return Error(NameLoc, "'data32' is not supported in 64-bit mode");
2606     // Hack to 'data16' for the table lookup.
2607     PatchedName = "data16";
2608   }
2609 
2610   Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2611 
2612   // Push the immediate if we extracted one from the mnemonic.
2613   if (ComparisonPredicate != ~0U && !isParsingIntelSyntax()) {
2614     const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
2615                                                  getParser().getContext());
2616     Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2617   }
2618 
2619   // This does the actual operand parsing.  Don't parse any more if we have a
2620   // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2621   // just want to parse the "lock" as the first instruction and the "incl" as
2622   // the next one.
2623   if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2624     // Parse '*' modifier.
2625     if (getLexer().is(AsmToken::Star))
2626       Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2627 
2628     // Read the operands.
2629     while(1) {
2630       if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2631         Operands.push_back(std::move(Op));
2632         if (HandleAVX512Operand(Operands, *Operands.back()))
2633           return true;
2634       } else {
2635          return true;
2636       }
2637       // check for comma and eat it
2638       if (getLexer().is(AsmToken::Comma))
2639         Parser.Lex();
2640       else
2641         break;
2642      }
2643 
2644     // In MS inline asm curly braces mark the beginning/end of a block,
2645     // therefore they should be interepreted as end of statement
2646     CurlyAsEndOfStatement =
2647         isParsingIntelSyntax() && isParsingInlineAsm() &&
2648         (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
2649     if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
2650       return TokError("unexpected token in argument list");
2651   }
2652 
2653   // Push the immediate if we extracted one from the mnemonic.
2654   if (ComparisonPredicate != ~0U && isParsingIntelSyntax()) {
2655     const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
2656                                                  getParser().getContext());
2657     Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2658   }
2659 
2660   // Consume the EndOfStatement or the prefix separator Slash
2661   if (getLexer().is(AsmToken::EndOfStatement) ||
2662       (isPrefix && getLexer().is(AsmToken::Slash)))
2663     Parser.Lex();
2664   else if (CurlyAsEndOfStatement)
2665     // Add an actual EndOfStatement before the curly brace
2666     Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
2667                                    getLexer().getTok().getLoc(), 0);
2668 
2669   // This is for gas compatibility and cannot be done in td.
2670   // Adding "p" for some floating point with no argument.
2671   // For example: fsub --> fsubp
2672   bool IsFp =
2673     Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
2674   if (IsFp && Operands.size() == 1) {
2675     const char *Repl = StringSwitch<const char *>(Name)
2676       .Case("fsub", "fsubp")
2677       .Case("fdiv", "fdivp")
2678       .Case("fsubr", "fsubrp")
2679       .Case("fdivr", "fdivrp");
2680     static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
2681   }
2682 
2683   if ((Name == "mov" || Name == "movw" || Name == "movl") &&
2684       (Operands.size() == 3)) {
2685     X86Operand &Op1 = (X86Operand &)*Operands[1];
2686     X86Operand &Op2 = (X86Operand &)*Operands[2];
2687     SMLoc Loc = Op1.getEndLoc();
2688     // Moving a 32 or 16 bit value into a segment register has the same
2689     // behavior. Modify such instructions to always take shorter form.
2690     if (Op1.isReg() && Op2.isReg() &&
2691         X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
2692             Op2.getReg()) &&
2693         (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
2694          X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
2695       // Change instruction name to match new instruction.
2696       if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
2697         Name = is16BitMode() ? "movw" : "movl";
2698         Operands[0] = X86Operand::CreateToken(Name, NameLoc);
2699       }
2700       // Select the correct equivalent 16-/32-bit source register.
2701       unsigned Reg =
2702           getX86SubSuperRegisterOrZero(Op1.getReg(), is16BitMode() ? 16 : 32);
2703       Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
2704     }
2705   }
2706 
2707   // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
2708   // "outb %al, %dx".  Out doesn't take a memory form, but this is a widely
2709   // documented form in various unofficial manuals, so a lot of code uses it.
2710   if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
2711        Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
2712       Operands.size() == 3) {
2713     X86Operand &Op = (X86Operand &)*Operands.back();
2714     if (Op.isDXReg())
2715       Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
2716                                               Op.getEndLoc());
2717   }
2718   // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
2719   if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
2720        Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
2721       Operands.size() == 3) {
2722     X86Operand &Op = (X86Operand &)*Operands[1];
2723     if (Op.isDXReg())
2724       Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
2725                                           Op.getEndLoc());
2726   }
2727 
2728   SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands;
2729   bool HadVerifyError = false;
2730 
2731   // Append default arguments to "ins[bwld]"
2732   if (Name.startswith("ins") &&
2733       (Operands.size() == 1 || Operands.size() == 3) &&
2734       (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
2735        Name == "ins")) {
2736 
2737     AddDefaultSrcDestOperands(TmpOperands,
2738                               X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
2739                               DefaultMemDIOperand(NameLoc));
2740     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2741   }
2742 
2743   // Append default arguments to "outs[bwld]"
2744   if (Name.startswith("outs") &&
2745       (Operands.size() == 1 || Operands.size() == 3) &&
2746       (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2747        Name == "outsd" || Name == "outs")) {
2748     AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2749                               X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2750     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2751   }
2752 
2753   // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2754   // values of $SIREG according to the mode. It would be nice if this
2755   // could be achieved with InstAlias in the tables.
2756   if (Name.startswith("lods") &&
2757       (Operands.size() == 1 || Operands.size() == 2) &&
2758       (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2759        Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
2760     TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
2761     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2762   }
2763 
2764   // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2765   // values of $DIREG according to the mode. It would be nice if this
2766   // could be achieved with InstAlias in the tables.
2767   if (Name.startswith("stos") &&
2768       (Operands.size() == 1 || Operands.size() == 2) &&
2769       (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2770        Name == "stosl" || Name == "stosd" || Name == "stosq")) {
2771     TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2772     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2773   }
2774 
2775   // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2776   // values of $DIREG according to the mode. It would be nice if this
2777   // could be achieved with InstAlias in the tables.
2778   if (Name.startswith("scas") &&
2779       (Operands.size() == 1 || Operands.size() == 2) &&
2780       (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2781        Name == "scasl" || Name == "scasd" || Name == "scasq")) {
2782     TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2783     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2784   }
2785 
2786   // Add default SI and DI operands to "cmps[bwlq]".
2787   if (Name.startswith("cmps") &&
2788       (Operands.size() == 1 || Operands.size() == 3) &&
2789       (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2790        Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2791     AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
2792                               DefaultMemSIOperand(NameLoc));
2793     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2794   }
2795 
2796   // Add default SI and DI operands to "movs[bwlq]".
2797   if (((Name.startswith("movs") &&
2798         (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2799          Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2800        (Name.startswith("smov") &&
2801         (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2802          Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
2803       (Operands.size() == 1 || Operands.size() == 3)) {
2804     if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
2805       Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2806     AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2807                               DefaultMemDIOperand(NameLoc));
2808     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2809   }
2810 
2811   // Check if we encountered an error for one the string insturctions
2812   if (HadVerifyError) {
2813     return HadVerifyError;
2814   }
2815 
2816   // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>.  Canonicalize to
2817   // "shift <op>".
2818   if ((Name.startswith("shr") || Name.startswith("sar") ||
2819        Name.startswith("shl") || Name.startswith("sal") ||
2820        Name.startswith("rcl") || Name.startswith("rcr") ||
2821        Name.startswith("rol") || Name.startswith("ror")) &&
2822       Operands.size() == 3) {
2823     if (isParsingIntelSyntax()) {
2824       // Intel syntax
2825       X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2826       if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2827           cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2828         Operands.pop_back();
2829     } else {
2830       X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2831       if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2832           cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2833         Operands.erase(Operands.begin() + 1);
2834     }
2835   }
2836 
2837   // Transforms "int $3" into "int3" as a size optimization.  We can't write an
2838   // instalias with an immediate operand yet.
2839   if (Name == "int" && Operands.size() == 2) {
2840     X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2841     if (Op1.isImm())
2842       if (auto *CE = dyn_cast<MCConstantExpr>(Op1.getImm()))
2843         if (CE->getValue() == 3) {
2844           Operands.erase(Operands.begin() + 1);
2845           static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2846         }
2847   }
2848 
2849   // Transforms "xlat mem8" into "xlatb"
2850   if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
2851     X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2852     if (Op1.isMem8()) {
2853       Warning(Op1.getStartLoc(), "memory operand is only for determining the "
2854                                  "size, (R|E)BX will be used for the location");
2855       Operands.pop_back();
2856       static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
2857     }
2858   }
2859 
2860   if (Flags)
2861     Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc));
2862   return false;
2863 }
2864 
2865 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2866   const MCRegisterInfo *MRI = getContext().getRegisterInfo();
2867 
2868   switch (Inst.getOpcode()) {
2869   default: return false;
2870   case X86::VMOVZPQILo2PQIrr:
2871   case X86::VMOVAPDrr:
2872   case X86::VMOVAPDYrr:
2873   case X86::VMOVAPSrr:
2874   case X86::VMOVAPSYrr:
2875   case X86::VMOVDQArr:
2876   case X86::VMOVDQAYrr:
2877   case X86::VMOVDQUrr:
2878   case X86::VMOVDQUYrr:
2879   case X86::VMOVUPDrr:
2880   case X86::VMOVUPDYrr:
2881   case X86::VMOVUPSrr:
2882   case X86::VMOVUPSYrr: {
2883     // We can get a smaller encoding by using VEX.R instead of VEX.B if one of
2884     // the registers is extended, but other isn't.
2885     if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
2886         MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
2887         MRI->getEncodingValue(Inst.getOperand(1).getReg()) < 8)
2888       return false;
2889 
2890     unsigned NewOpc;
2891     switch (Inst.getOpcode()) {
2892     default: llvm_unreachable("Invalid opcode");
2893     case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr;   break;
2894     case X86::VMOVAPDrr:        NewOpc = X86::VMOVAPDrr_REV;  break;
2895     case X86::VMOVAPDYrr:       NewOpc = X86::VMOVAPDYrr_REV; break;
2896     case X86::VMOVAPSrr:        NewOpc = X86::VMOVAPSrr_REV;  break;
2897     case X86::VMOVAPSYrr:       NewOpc = X86::VMOVAPSYrr_REV; break;
2898     case X86::VMOVDQArr:        NewOpc = X86::VMOVDQArr_REV;  break;
2899     case X86::VMOVDQAYrr:       NewOpc = X86::VMOVDQAYrr_REV; break;
2900     case X86::VMOVDQUrr:        NewOpc = X86::VMOVDQUrr_REV;  break;
2901     case X86::VMOVDQUYrr:       NewOpc = X86::VMOVDQUYrr_REV; break;
2902     case X86::VMOVUPDrr:        NewOpc = X86::VMOVUPDrr_REV;  break;
2903     case X86::VMOVUPDYrr:       NewOpc = X86::VMOVUPDYrr_REV; break;
2904     case X86::VMOVUPSrr:        NewOpc = X86::VMOVUPSrr_REV;  break;
2905     case X86::VMOVUPSYrr:       NewOpc = X86::VMOVUPSYrr_REV; break;
2906     }
2907     Inst.setOpcode(NewOpc);
2908     return true;
2909   }
2910   case X86::VMOVSDrr:
2911   case X86::VMOVSSrr: {
2912     // We can get a smaller encoding by using VEX.R instead of VEX.B if one of
2913     // the registers is extended, but other isn't.
2914     if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
2915         MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
2916         MRI->getEncodingValue(Inst.getOperand(2).getReg()) < 8)
2917       return false;
2918 
2919     unsigned NewOpc;
2920     switch (Inst.getOpcode()) {
2921     default: llvm_unreachable("Invalid opcode");
2922     case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2923     case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2924     }
2925     Inst.setOpcode(NewOpc);
2926     return true;
2927   }
2928   }
2929 }
2930 
2931 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
2932   const MCRegisterInfo *MRI = getContext().getRegisterInfo();
2933 
2934   switch (Inst.getOpcode()) {
2935   case X86::VGATHERDPDYrm:
2936   case X86::VGATHERDPDrm:
2937   case X86::VGATHERDPSYrm:
2938   case X86::VGATHERDPSrm:
2939   case X86::VGATHERQPDYrm:
2940   case X86::VGATHERQPDrm:
2941   case X86::VGATHERQPSYrm:
2942   case X86::VGATHERQPSrm:
2943   case X86::VPGATHERDDYrm:
2944   case X86::VPGATHERDDrm:
2945   case X86::VPGATHERDQYrm:
2946   case X86::VPGATHERDQrm:
2947   case X86::VPGATHERQDYrm:
2948   case X86::VPGATHERQDrm:
2949   case X86::VPGATHERQQYrm:
2950   case X86::VPGATHERQQrm: {
2951     unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
2952     unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg());
2953     unsigned Index =
2954       MRI->getEncodingValue(Inst.getOperand(3 + X86::AddrIndexReg).getReg());
2955     if (Dest == Mask || Dest == Index || Mask == Index)
2956       return Warning(Ops[0]->getStartLoc(), "mask, index, and destination "
2957                                             "registers should be distinct");
2958     break;
2959   }
2960   case X86::VGATHERDPDZ128rm:
2961   case X86::VGATHERDPDZ256rm:
2962   case X86::VGATHERDPDZrm:
2963   case X86::VGATHERDPSZ128rm:
2964   case X86::VGATHERDPSZ256rm:
2965   case X86::VGATHERDPSZrm:
2966   case X86::VGATHERQPDZ128rm:
2967   case X86::VGATHERQPDZ256rm:
2968   case X86::VGATHERQPDZrm:
2969   case X86::VGATHERQPSZ128rm:
2970   case X86::VGATHERQPSZ256rm:
2971   case X86::VGATHERQPSZrm:
2972   case X86::VPGATHERDDZ128rm:
2973   case X86::VPGATHERDDZ256rm:
2974   case X86::VPGATHERDDZrm:
2975   case X86::VPGATHERDQZ128rm:
2976   case X86::VPGATHERDQZ256rm:
2977   case X86::VPGATHERDQZrm:
2978   case X86::VPGATHERQDZ128rm:
2979   case X86::VPGATHERQDZ256rm:
2980   case X86::VPGATHERQDZrm:
2981   case X86::VPGATHERQQZ128rm:
2982   case X86::VPGATHERQQZ256rm:
2983   case X86::VPGATHERQQZrm: {
2984     unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
2985     unsigned Index =
2986       MRI->getEncodingValue(Inst.getOperand(4 + X86::AddrIndexReg).getReg());
2987     if (Dest == Index)
2988       return Warning(Ops[0]->getStartLoc(), "index and destination registers "
2989                                             "should be distinct");
2990     break;
2991   }
2992   case X86::V4FMADDPSrm:
2993   case X86::V4FMADDPSrmk:
2994   case X86::V4FMADDPSrmkz:
2995   case X86::V4FMADDSSrm:
2996   case X86::V4FMADDSSrmk:
2997   case X86::V4FMADDSSrmkz:
2998   case X86::V4FNMADDPSrm:
2999   case X86::V4FNMADDPSrmk:
3000   case X86::V4FNMADDPSrmkz:
3001   case X86::V4FNMADDSSrm:
3002   case X86::V4FNMADDSSrmk:
3003   case X86::V4FNMADDSSrmkz:
3004   case X86::VP4DPWSSDSrm:
3005   case X86::VP4DPWSSDSrmk:
3006   case X86::VP4DPWSSDSrmkz:
3007   case X86::VP4DPWSSDrm:
3008   case X86::VP4DPWSSDrmk:
3009   case X86::VP4DPWSSDrmkz: {
3010     unsigned Src2 = Inst.getOperand(Inst.getNumOperands() -
3011                                     X86::AddrNumOperands - 1).getReg();
3012     unsigned Src2Enc = MRI->getEncodingValue(Src2);
3013     if (Src2Enc % 4 != 0) {
3014       StringRef RegName = X86IntelInstPrinter::getRegisterName(Src2);
3015       unsigned GroupStart = (Src2Enc / 4) * 4;
3016       unsigned GroupEnd = GroupStart + 3;
3017       return Warning(Ops[0]->getStartLoc(),
3018                      "source register '" + RegName + "' implicitly denotes '" +
3019                      RegName.take_front(3) + Twine(GroupStart) + "' to '" +
3020                      RegName.take_front(3) + Twine(GroupEnd) +
3021                      "' source group");
3022     }
3023     break;
3024   }
3025   }
3026 
3027   return false;
3028 }
3029 
3030 static const char *getSubtargetFeatureName(uint64_t Val);
3031 
3032 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
3033                                    MCStreamer &Out) {
3034   Out.EmitInstruction(Inst, getSTI());
3035 }
3036 
3037 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3038                                            OperandVector &Operands,
3039                                            MCStreamer &Out, uint64_t &ErrorInfo,
3040                                            bool MatchingInlineAsm) {
3041   if (isParsingIntelSyntax())
3042     return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
3043                                         MatchingInlineAsm);
3044   return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
3045                                     MatchingInlineAsm);
3046 }
3047 
3048 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
3049                                      OperandVector &Operands, MCStreamer &Out,
3050                                      bool MatchingInlineAsm) {
3051   // FIXME: This should be replaced with a real .td file alias mechanism.
3052   // Also, MatchInstructionImpl should actually *do* the EmitInstruction
3053   // call.
3054   const char *Repl = StringSwitch<const char *>(Op.getToken())
3055                          .Case("finit", "fninit")
3056                          .Case("fsave", "fnsave")
3057                          .Case("fstcw", "fnstcw")
3058                          .Case("fstcww", "fnstcw")
3059                          .Case("fstenv", "fnstenv")
3060                          .Case("fstsw", "fnstsw")
3061                          .Case("fstsww", "fnstsw")
3062                          .Case("fclex", "fnclex")
3063                          .Default(nullptr);
3064   if (Repl) {
3065     MCInst Inst;
3066     Inst.setOpcode(X86::WAIT);
3067     Inst.setLoc(IDLoc);
3068     if (!MatchingInlineAsm)
3069       EmitInstruction(Inst, Operands, Out);
3070     Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
3071   }
3072 }
3073 
3074 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc,
3075                                        const FeatureBitset &MissingFeatures,
3076                                        bool MatchingInlineAsm) {
3077   assert(MissingFeatures.any() && "Unknown missing feature!");
3078   SmallString<126> Msg;
3079   raw_svector_ostream OS(Msg);
3080   OS << "instruction requires:";
3081   for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) {
3082     if (MissingFeatures[i])
3083       OS << ' ' << getSubtargetFeatureName(i);
3084   }
3085   return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
3086 }
3087 
3088 static unsigned getPrefixes(OperandVector &Operands) {
3089   unsigned Result = 0;
3090   X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back());
3091   if (Prefix.isPrefix()) {
3092     Result = Prefix.getPrefix();
3093     Operands.pop_back();
3094   }
3095   return Result;
3096 }
3097 
3098 unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3099   unsigned Opc = Inst.getOpcode();
3100   const MCInstrDesc &MCID = MII.get(Opc);
3101 
3102   if (ForcedVEXEncoding == VEXEncoding_EVEX &&
3103       (MCID.TSFlags & X86II::EncodingMask) != X86II::EVEX)
3104     return Match_Unsupported;
3105 
3106   if ((ForcedVEXEncoding == VEXEncoding_VEX2 ||
3107        ForcedVEXEncoding == VEXEncoding_VEX3) &&
3108       (MCID.TSFlags & X86II::EncodingMask) != X86II::VEX)
3109     return Match_Unsupported;
3110 
3111   // These instructions match ambiguously with their VEX encoded counterparts
3112   // and appear first in the matching table. Reject them unless we're forcing
3113   // EVEX encoding.
3114   // FIXME: We really need a way to break the ambiguity.
3115   switch (Opc) {
3116   case X86::VCVTSD2SIZrm_Int:
3117   case X86::VCVTSD2SI64Zrm_Int:
3118   case X86::VCVTSS2SIZrm_Int:
3119   case X86::VCVTSS2SI64Zrm_Int:
3120   case X86::VCVTTSD2SIZrm:   case X86::VCVTTSD2SIZrm_Int:
3121   case X86::VCVTTSD2SI64Zrm: case X86::VCVTTSD2SI64Zrm_Int:
3122   case X86::VCVTTSS2SIZrm:   case X86::VCVTTSS2SIZrm_Int:
3123   case X86::VCVTTSS2SI64Zrm: case X86::VCVTTSS2SI64Zrm_Int:
3124     if (ForcedVEXEncoding != VEXEncoding_EVEX)
3125       return Match_Unsupported;
3126   }
3127 
3128   return Match_Success;
3129 }
3130 
3131 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
3132                                               OperandVector &Operands,
3133                                               MCStreamer &Out,
3134                                               uint64_t &ErrorInfo,
3135                                               bool MatchingInlineAsm) {
3136   assert(!Operands.empty() && "Unexpect empty operand list!");
3137   assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
3138   SMRange EmptyRange = None;
3139 
3140   // First, handle aliases that expand to multiple instructions.
3141   MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands,
3142                     Out, MatchingInlineAsm);
3143   X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
3144   unsigned Prefixes = getPrefixes(Operands);
3145 
3146   MCInst Inst;
3147 
3148   // If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the
3149   // encoder.
3150   if (ForcedVEXEncoding == VEXEncoding_VEX3)
3151     Prefixes |= X86::IP_USE_VEX3;
3152 
3153   if (Prefixes)
3154     Inst.setFlags(Prefixes);
3155 
3156   // First, try a direct match.
3157   FeatureBitset MissingFeatures;
3158   unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo,
3159                                             MissingFeatures, MatchingInlineAsm,
3160                                             isParsingIntelSyntax());
3161   switch (OriginalError) {
3162   default: llvm_unreachable("Unexpected match result!");
3163   case Match_Success:
3164     if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
3165       return true;
3166     // Some instructions need post-processing to, for example, tweak which
3167     // encoding is selected. Loop on it while changes happen so the
3168     // individual transformations can chain off each other.
3169     if (!MatchingInlineAsm)
3170       while (processInstruction(Inst, Operands))
3171         ;
3172 
3173     Inst.setLoc(IDLoc);
3174     if (!MatchingInlineAsm)
3175       EmitInstruction(Inst, Operands, Out);
3176     Opcode = Inst.getOpcode();
3177     return false;
3178   case Match_InvalidImmUnsignedi4: {
3179     SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
3180     if (ErrorLoc == SMLoc())
3181       ErrorLoc = IDLoc;
3182     return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
3183                  EmptyRange, MatchingInlineAsm);
3184   }
3185   case Match_MissingFeature:
3186     return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm);
3187   case Match_InvalidOperand:
3188   case Match_MnemonicFail:
3189   case Match_Unsupported:
3190     break;
3191   }
3192   if (Op.getToken().empty()) {
3193     Error(IDLoc, "instruction must have size higher than 0", EmptyRange,
3194           MatchingInlineAsm);
3195     return true;
3196   }
3197 
3198   // FIXME: Ideally, we would only attempt suffix matches for things which are
3199   // valid prefixes, and we could just infer the right unambiguous
3200   // type. However, that requires substantially more matcher support than the
3201   // following hack.
3202 
3203   // Change the operand to point to a temporary token.
3204   StringRef Base = Op.getToken();
3205   SmallString<16> Tmp;
3206   Tmp += Base;
3207   Tmp += ' ';
3208   Op.setTokenValue(Tmp);
3209 
3210   // If this instruction starts with an 'f', then it is a floating point stack
3211   // instruction.  These come in up to three forms for 32-bit, 64-bit, and
3212   // 80-bit floating point, which use the suffixes s,l,t respectively.
3213   //
3214   // Otherwise, we assume that this may be an integer instruction, which comes
3215   // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
3216   const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
3217 
3218   // Check for the various suffix matches.
3219   uint64_t ErrorInfoIgnore;
3220   FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings.
3221   unsigned Match[4];
3222 
3223   for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
3224     Tmp.back() = Suffixes[I];
3225     Match[I] = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
3226                                 MissingFeatures, MatchingInlineAsm,
3227                                 isParsingIntelSyntax());
3228     // If this returned as a missing feature failure, remember that.
3229     if (Match[I] == Match_MissingFeature)
3230       ErrorInfoMissingFeatures = MissingFeatures;
3231   }
3232 
3233   // Restore the old token.
3234   Op.setTokenValue(Base);
3235 
3236   // If exactly one matched, then we treat that as a successful match (and the
3237   // instruction will already have been filled in correctly, since the failing
3238   // matches won't have modified it).
3239   unsigned NumSuccessfulMatches =
3240       std::count(std::begin(Match), std::end(Match), Match_Success);
3241   if (NumSuccessfulMatches == 1) {
3242     Inst.setLoc(IDLoc);
3243     if (!MatchingInlineAsm)
3244       EmitInstruction(Inst, Operands, Out);
3245     Opcode = Inst.getOpcode();
3246     return false;
3247   }
3248 
3249   // Otherwise, the match failed, try to produce a decent error message.
3250 
3251   // If we had multiple suffix matches, then identify this as an ambiguous
3252   // match.
3253   if (NumSuccessfulMatches > 1) {
3254     char MatchChars[4];
3255     unsigned NumMatches = 0;
3256     for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
3257       if (Match[I] == Match_Success)
3258         MatchChars[NumMatches++] = Suffixes[I];
3259 
3260     SmallString<126> Msg;
3261     raw_svector_ostream OS(Msg);
3262     OS << "ambiguous instructions require an explicit suffix (could be ";
3263     for (unsigned i = 0; i != NumMatches; ++i) {
3264       if (i != 0)
3265         OS << ", ";
3266       if (i + 1 == NumMatches)
3267         OS << "or ";
3268       OS << "'" << Base << MatchChars[i] << "'";
3269     }
3270     OS << ")";
3271     Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
3272     return true;
3273   }
3274 
3275   // Okay, we know that none of the variants matched successfully.
3276 
3277   // If all of the instructions reported an invalid mnemonic, then the original
3278   // mnemonic was invalid.
3279   if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
3280     if (OriginalError == Match_MnemonicFail)
3281       return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
3282                    Op.getLocRange(), MatchingInlineAsm);
3283 
3284     if (OriginalError == Match_Unsupported)
3285       return Error(IDLoc, "unsupported instruction", EmptyRange,
3286                    MatchingInlineAsm);
3287 
3288     assert(OriginalError == Match_InvalidOperand && "Unexpected error");
3289     // Recover location info for the operand if we know which was the problem.
3290     if (ErrorInfo != ~0ULL) {
3291       if (ErrorInfo >= Operands.size())
3292         return Error(IDLoc, "too few operands for instruction", EmptyRange,
3293                      MatchingInlineAsm);
3294 
3295       X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
3296       if (Operand.getStartLoc().isValid()) {
3297         SMRange OperandRange = Operand.getLocRange();
3298         return Error(Operand.getStartLoc(), "invalid operand for instruction",
3299                      OperandRange, MatchingInlineAsm);
3300       }
3301     }
3302 
3303     return Error(IDLoc, "invalid operand for instruction", EmptyRange,
3304                  MatchingInlineAsm);
3305   }
3306 
3307   // If one instruction matched as unsupported, report this as unsupported.
3308   if (std::count(std::begin(Match), std::end(Match),
3309                  Match_Unsupported) == 1) {
3310     return Error(IDLoc, "unsupported instruction", EmptyRange,
3311                  MatchingInlineAsm);
3312   }
3313 
3314   // If one instruction matched with a missing feature, report this as a
3315   // missing feature.
3316   if (std::count(std::begin(Match), std::end(Match),
3317                  Match_MissingFeature) == 1) {
3318     ErrorInfo = Match_MissingFeature;
3319     return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
3320                                MatchingInlineAsm);
3321   }
3322 
3323   // If one instruction matched with an invalid operand, report this as an
3324   // operand failure.
3325   if (std::count(std::begin(Match), std::end(Match),
3326                  Match_InvalidOperand) == 1) {
3327     return Error(IDLoc, "invalid operand for instruction", EmptyRange,
3328                  MatchingInlineAsm);
3329   }
3330 
3331   // If all of these were an outright failure, report it in a useless way.
3332   Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
3333         EmptyRange, MatchingInlineAsm);
3334   return true;
3335 }
3336 
3337 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
3338                                                 OperandVector &Operands,
3339                                                 MCStreamer &Out,
3340                                                 uint64_t &ErrorInfo,
3341                                                 bool MatchingInlineAsm) {
3342   assert(!Operands.empty() && "Unexpect empty operand list!");
3343   assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
3344   StringRef Mnemonic = (static_cast<X86Operand &>(*Operands[0])).getToken();
3345   SMRange EmptyRange = None;
3346   StringRef Base = (static_cast<X86Operand &>(*Operands[0])).getToken();
3347   unsigned Prefixes = getPrefixes(Operands);
3348 
3349   // First, handle aliases that expand to multiple instructions.
3350   MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands, Out, MatchingInlineAsm);
3351   X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
3352 
3353   MCInst Inst;
3354 
3355   // If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the
3356   // encoder.
3357   if (ForcedVEXEncoding == VEXEncoding_VEX3)
3358     Prefixes |= X86::IP_USE_VEX3;
3359 
3360   if (Prefixes)
3361     Inst.setFlags(Prefixes);
3362 
3363   // Find one unsized memory operand, if present.
3364   X86Operand *UnsizedMemOp = nullptr;
3365   for (const auto &Op : Operands) {
3366     X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
3367     if (X86Op->isMemUnsized()) {
3368       UnsizedMemOp = X86Op;
3369       // Have we found an unqualified memory operand,
3370       // break. IA allows only one memory operand.
3371       break;
3372     }
3373   }
3374 
3375   // Allow some instructions to have implicitly pointer-sized operands.  This is
3376   // compatible with gas.
3377   if (UnsizedMemOp) {
3378     static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
3379     for (const char *Instr : PtrSizedInstrs) {
3380       if (Mnemonic == Instr) {
3381         UnsizedMemOp->Mem.Size = getPointerWidth();
3382         break;
3383       }
3384     }
3385   }
3386 
3387   SmallVector<unsigned, 8> Match;
3388   FeatureBitset ErrorInfoMissingFeatures;
3389   FeatureBitset MissingFeatures;
3390 
3391   // If unsized push has immediate operand we should default the default pointer
3392   // size for the size.
3393   if (Mnemonic == "push" && Operands.size() == 2) {
3394     auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
3395     if (X86Op->isImm()) {
3396       // If it's not a constant fall through and let remainder take care of it.
3397       const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
3398       unsigned Size = getPointerWidth();
3399       if (CE &&
3400           (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
3401         SmallString<16> Tmp;
3402         Tmp += Base;
3403         Tmp += (is64BitMode())
3404                    ? "q"
3405                    : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
3406         Op.setTokenValue(Tmp);
3407         // Do match in ATT mode to allow explicit suffix usage.
3408         Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
3409                                          MissingFeatures, MatchingInlineAsm,
3410                                          false /*isParsingIntelSyntax()*/));
3411         Op.setTokenValue(Base);
3412       }
3413     }
3414   }
3415 
3416   // If an unsized memory operand is present, try to match with each memory
3417   // operand size.  In Intel assembly, the size is not part of the instruction
3418   // mnemonic.
3419   if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
3420     static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
3421     for (unsigned Size : MopSizes) {
3422       UnsizedMemOp->Mem.Size = Size;
3423       uint64_t ErrorInfoIgnore;
3424       unsigned LastOpcode = Inst.getOpcode();
3425       unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
3426                                     MissingFeatures, MatchingInlineAsm,
3427                                     isParsingIntelSyntax());
3428       if (Match.empty() || LastOpcode != Inst.getOpcode())
3429         Match.push_back(M);
3430 
3431       // If this returned as a missing feature failure, remember that.
3432       if (Match.back() == Match_MissingFeature)
3433         ErrorInfoMissingFeatures = MissingFeatures;
3434     }
3435 
3436     // Restore the size of the unsized memory operand if we modified it.
3437     UnsizedMemOp->Mem.Size = 0;
3438   }
3439 
3440   // If we haven't matched anything yet, this is not a basic integer or FPU
3441   // operation.  There shouldn't be any ambiguity in our mnemonic table, so try
3442   // matching with the unsized operand.
3443   if (Match.empty()) {
3444     Match.push_back(MatchInstruction(
3445         Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
3446         isParsingIntelSyntax()));
3447     // If this returned as a missing feature failure, remember that.
3448     if (Match.back() == Match_MissingFeature)
3449       ErrorInfoMissingFeatures = MissingFeatures;
3450   }
3451 
3452   // Restore the size of the unsized memory operand if we modified it.
3453   if (UnsizedMemOp)
3454     UnsizedMemOp->Mem.Size = 0;
3455 
3456   // If it's a bad mnemonic, all results will be the same.
3457   if (Match.back() == Match_MnemonicFail) {
3458     return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
3459                  Op.getLocRange(), MatchingInlineAsm);
3460   }
3461 
3462   unsigned NumSuccessfulMatches =
3463       std::count(std::begin(Match), std::end(Match), Match_Success);
3464 
3465   // If matching was ambiguous and we had size information from the frontend,
3466   // try again with that. This handles cases like "movxz eax, m8/m16".
3467   if (UnsizedMemOp && NumSuccessfulMatches > 1 &&
3468       UnsizedMemOp->getMemFrontendSize()) {
3469     UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
3470     unsigned M = MatchInstruction(
3471         Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
3472         isParsingIntelSyntax());
3473     if (M == Match_Success)
3474       NumSuccessfulMatches = 1;
3475 
3476     // Add a rewrite that encodes the size information we used from the
3477     // frontend.
3478     InstInfo->AsmRewrites->emplace_back(
3479         AOK_SizeDirective, UnsizedMemOp->getStartLoc(),
3480         /*Len=*/0, UnsizedMemOp->getMemFrontendSize());
3481   }
3482 
3483   // If exactly one matched, then we treat that as a successful match (and the
3484   // instruction will already have been filled in correctly, since the failing
3485   // matches won't have modified it).
3486   if (NumSuccessfulMatches == 1) {
3487     if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
3488       return true;
3489     // Some instructions need post-processing to, for example, tweak which
3490     // encoding is selected. Loop on it while changes happen so the individual
3491     // transformations can chain off each other.
3492     if (!MatchingInlineAsm)
3493       while (processInstruction(Inst, Operands))
3494         ;
3495     Inst.setLoc(IDLoc);
3496     if (!MatchingInlineAsm)
3497       EmitInstruction(Inst, Operands, Out);
3498     Opcode = Inst.getOpcode();
3499     return false;
3500   } else if (NumSuccessfulMatches > 1) {
3501     assert(UnsizedMemOp &&
3502            "multiple matches only possible with unsized memory operands");
3503     return Error(UnsizedMemOp->getStartLoc(),
3504                  "ambiguous operand size for instruction '" + Mnemonic + "\'",
3505                  UnsizedMemOp->getLocRange());
3506   }
3507 
3508   // If one instruction matched as unsupported, report this as unsupported.
3509   if (std::count(std::begin(Match), std::end(Match),
3510                  Match_Unsupported) == 1) {
3511     return Error(IDLoc, "unsupported instruction", EmptyRange,
3512                  MatchingInlineAsm);
3513   }
3514 
3515   // If one instruction matched with a missing feature, report this as a
3516   // missing feature.
3517   if (std::count(std::begin(Match), std::end(Match),
3518                  Match_MissingFeature) == 1) {
3519     ErrorInfo = Match_MissingFeature;
3520     return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
3521                                MatchingInlineAsm);
3522   }
3523 
3524   // If one instruction matched with an invalid operand, report this as an
3525   // operand failure.
3526   if (std::count(std::begin(Match), std::end(Match),
3527                  Match_InvalidOperand) == 1) {
3528     return Error(IDLoc, "invalid operand for instruction", EmptyRange,
3529                  MatchingInlineAsm);
3530   }
3531 
3532   if (std::count(std::begin(Match), std::end(Match),
3533                  Match_InvalidImmUnsignedi4) == 1) {
3534     SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
3535     if (ErrorLoc == SMLoc())
3536       ErrorLoc = IDLoc;
3537     return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
3538                  EmptyRange, MatchingInlineAsm);
3539   }
3540 
3541   // If all of these were an outright failure, report it in a useless way.
3542   return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
3543                MatchingInlineAsm);
3544 }
3545 
3546 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
3547   return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
3548 }
3549 
3550 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
3551   MCAsmParser &Parser = getParser();
3552   StringRef IDVal = DirectiveID.getIdentifier();
3553   if (IDVal.startswith(".code"))
3554     return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
3555   else if (IDVal.startswith(".att_syntax")) {
3556     if (getLexer().isNot(AsmToken::EndOfStatement)) {
3557       if (Parser.getTok().getString() == "prefix")
3558         Parser.Lex();
3559       else if (Parser.getTok().getString() == "noprefix")
3560         return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
3561                                            "supported: registers must have a "
3562                                            "'%' prefix in .att_syntax");
3563     }
3564     getParser().setAssemblerDialect(0);
3565     return false;
3566   } else if (IDVal.startswith(".intel_syntax")) {
3567     getParser().setAssemblerDialect(1);
3568     if (getLexer().isNot(AsmToken::EndOfStatement)) {
3569       if (Parser.getTok().getString() == "noprefix")
3570         Parser.Lex();
3571       else if (Parser.getTok().getString() == "prefix")
3572         return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
3573                                            "supported: registers must not have "
3574                                            "a '%' prefix in .intel_syntax");
3575     }
3576     return false;
3577   } else if (IDVal == ".even")
3578     return parseDirectiveEven(DirectiveID.getLoc());
3579   else if (IDVal == ".cv_fpo_proc")
3580     return parseDirectiveFPOProc(DirectiveID.getLoc());
3581   else if (IDVal == ".cv_fpo_setframe")
3582     return parseDirectiveFPOSetFrame(DirectiveID.getLoc());
3583   else if (IDVal == ".cv_fpo_pushreg")
3584     return parseDirectiveFPOPushReg(DirectiveID.getLoc());
3585   else if (IDVal == ".cv_fpo_stackalloc")
3586     return parseDirectiveFPOStackAlloc(DirectiveID.getLoc());
3587   else if (IDVal == ".cv_fpo_stackalign")
3588     return parseDirectiveFPOStackAlign(DirectiveID.getLoc());
3589   else if (IDVal == ".cv_fpo_endprologue")
3590     return parseDirectiveFPOEndPrologue(DirectiveID.getLoc());
3591   else if (IDVal == ".cv_fpo_endproc")
3592     return parseDirectiveFPOEndProc(DirectiveID.getLoc());
3593 
3594   return true;
3595 }
3596 
3597 /// parseDirectiveEven
3598 ///  ::= .even
3599 bool X86AsmParser::parseDirectiveEven(SMLoc L) {
3600   if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
3601     return false;
3602 
3603   const MCSection *Section = getStreamer().getCurrentSectionOnly();
3604   if (!Section) {
3605     getStreamer().InitSections(false);
3606     Section = getStreamer().getCurrentSectionOnly();
3607   }
3608   if (Section->UseCodeAlign())
3609     getStreamer().EmitCodeAlignment(2, 0);
3610   else
3611     getStreamer().EmitValueToAlignment(2, 0, 1, 0);
3612   return false;
3613 }
3614 
3615 /// ParseDirectiveCode
3616 ///  ::= .code16 | .code32 | .code64
3617 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
3618   MCAsmParser &Parser = getParser();
3619   Code16GCC = false;
3620   if (IDVal == ".code16") {
3621     Parser.Lex();
3622     if (!is16BitMode()) {
3623       SwitchMode(X86::Mode16Bit);
3624       getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
3625     }
3626   } else if (IDVal == ".code16gcc") {
3627     // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
3628     Parser.Lex();
3629     Code16GCC = true;
3630     if (!is16BitMode()) {
3631       SwitchMode(X86::Mode16Bit);
3632       getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
3633     }
3634   } else if (IDVal == ".code32") {
3635     Parser.Lex();
3636     if (!is32BitMode()) {
3637       SwitchMode(X86::Mode32Bit);
3638       getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
3639     }
3640   } else if (IDVal == ".code64") {
3641     Parser.Lex();
3642     if (!is64BitMode()) {
3643       SwitchMode(X86::Mode64Bit);
3644       getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
3645     }
3646   } else {
3647     Error(L, "unknown directive " + IDVal);
3648     return false;
3649   }
3650 
3651   return false;
3652 }
3653 
3654 // .cv_fpo_proc foo
3655 bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) {
3656   MCAsmParser &Parser = getParser();
3657   StringRef ProcName;
3658   int64_t ParamsSize;
3659   if (Parser.parseIdentifier(ProcName))
3660     return Parser.TokError("expected symbol name");
3661   if (Parser.parseIntToken(ParamsSize, "expected parameter byte count"))
3662     return true;
3663   if (!isUIntN(32, ParamsSize))
3664     return Parser.TokError("parameters size out of range");
3665   if (Parser.parseEOL("unexpected tokens"))
3666     return addErrorSuffix(" in '.cv_fpo_proc' directive");
3667   MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
3668   return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L);
3669 }
3670 
3671 // .cv_fpo_setframe ebp
3672 bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) {
3673   MCAsmParser &Parser = getParser();
3674   unsigned Reg;
3675   SMLoc DummyLoc;
3676   if (ParseRegister(Reg, DummyLoc, DummyLoc) ||
3677       Parser.parseEOL("unexpected tokens"))
3678     return addErrorSuffix(" in '.cv_fpo_setframe' directive");
3679   return getTargetStreamer().emitFPOSetFrame(Reg, L);
3680 }
3681 
3682 // .cv_fpo_pushreg ebx
3683 bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) {
3684   MCAsmParser &Parser = getParser();
3685   unsigned Reg;
3686   SMLoc DummyLoc;
3687   if (ParseRegister(Reg, DummyLoc, DummyLoc) ||
3688       Parser.parseEOL("unexpected tokens"))
3689     return addErrorSuffix(" in '.cv_fpo_pushreg' directive");
3690   return getTargetStreamer().emitFPOPushReg(Reg, L);
3691 }
3692 
3693 // .cv_fpo_stackalloc 20
3694 bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) {
3695   MCAsmParser &Parser = getParser();
3696   int64_t Offset;
3697   if (Parser.parseIntToken(Offset, "expected offset") ||
3698       Parser.parseEOL("unexpected tokens"))
3699     return addErrorSuffix(" in '.cv_fpo_stackalloc' directive");
3700   return getTargetStreamer().emitFPOStackAlloc(Offset, L);
3701 }
3702 
3703 // .cv_fpo_stackalign 8
3704 bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) {
3705   MCAsmParser &Parser = getParser();
3706   int64_t Offset;
3707   if (Parser.parseIntToken(Offset, "expected offset") ||
3708       Parser.parseEOL("unexpected tokens"))
3709     return addErrorSuffix(" in '.cv_fpo_stackalign' directive");
3710   return getTargetStreamer().emitFPOStackAlign(Offset, L);
3711 }
3712 
3713 // .cv_fpo_endprologue
3714 bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) {
3715   MCAsmParser &Parser = getParser();
3716   if (Parser.parseEOL("unexpected tokens"))
3717     return addErrorSuffix(" in '.cv_fpo_endprologue' directive");
3718   return getTargetStreamer().emitFPOEndPrologue(L);
3719 }
3720 
3721 // .cv_fpo_endproc
3722 bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) {
3723   MCAsmParser &Parser = getParser();
3724   if (Parser.parseEOL("unexpected tokens"))
3725     return addErrorSuffix(" in '.cv_fpo_endproc' directive");
3726   return getTargetStreamer().emitFPOEndProc(L);
3727 }
3728 
3729 // Force static initialization.
3730 extern "C" void LLVMInitializeX86AsmParser() {
3731   RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target());
3732   RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target());
3733 }
3734 
3735 #define GET_REGISTER_MATCHER
3736 #define GET_MATCHER_IMPLEMENTATION
3737 #define GET_SUBTARGET_FEATURE_NAME
3738 #include "X86GenAsmMatcher.inc"
3739