1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "AMDGPU.h"
11 #include "AMDKernelCodeT.h"
12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
14 #include "SIDefines.h"
15 #include "Utils/AMDGPUAsmUtils.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/ADT/Twine.h"
27 #include "llvm/BinaryFormat/ELF.h"
28 #include "llvm/CodeGen/MachineValueType.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include "llvm/Support/MathExtras.h"
49 #include "llvm/Support/SMLoc.h"
50 #include "llvm/Support/TargetRegistry.h"
51 #include "llvm/Support/raw_ostream.h"
52 #include <algorithm>
53 #include <cassert>
54 #include <cstdint>
55 #include <cstring>
56 #include <iterator>
57 #include <map>
58 #include <memory>
59 #include <string>
60 
61 using namespace llvm;
62 using namespace llvm::AMDGPU;
63 
64 namespace {
65 
66 class AMDGPUAsmParser;
67 
68 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
69 
70 //===----------------------------------------------------------------------===//
71 // Operand
72 //===----------------------------------------------------------------------===//
73 
74 class AMDGPUOperand : public MCParsedAsmOperand {
75   enum KindTy {
76     Token,
77     Immediate,
78     Register,
79     Expression
80   } Kind;
81 
82   SMLoc StartLoc, EndLoc;
83   const AMDGPUAsmParser *AsmParser;
84 
85 public:
86   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
87     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
88 
89   using Ptr = std::unique_ptr<AMDGPUOperand>;
90 
91   struct Modifiers {
92     bool Abs = false;
93     bool Neg = false;
94     bool Sext = false;
95 
96     bool hasFPModifiers() const { return Abs || Neg; }
97     bool hasIntModifiers() const { return Sext; }
98     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
99 
100     int64_t getFPModifiersOperand() const {
101       int64_t Operand = 0;
102       Operand |= Abs ? SISrcMods::ABS : 0;
103       Operand |= Neg ? SISrcMods::NEG : 0;
104       return Operand;
105     }
106 
107     int64_t getIntModifiersOperand() const {
108       int64_t Operand = 0;
109       Operand |= Sext ? SISrcMods::SEXT : 0;
110       return Operand;
111     }
112 
113     int64_t getModifiersOperand() const {
114       assert(!(hasFPModifiers() && hasIntModifiers())
115            && "fp and int modifiers should not be used simultaneously");
116       if (hasFPModifiers()) {
117         return getFPModifiersOperand();
118       } else if (hasIntModifiers()) {
119         return getIntModifiersOperand();
120       } else {
121         return 0;
122       }
123     }
124 
125     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
126   };
127 
128   enum ImmTy {
129     ImmTyNone,
130     ImmTyGDS,
131     ImmTyOffen,
132     ImmTyIdxen,
133     ImmTyAddr64,
134     ImmTyOffset,
135     ImmTyInstOffset,
136     ImmTyOffset0,
137     ImmTyOffset1,
138     ImmTyGLC,
139     ImmTySLC,
140     ImmTyTFE,
141     ImmTyClampSI,
142     ImmTyOModSI,
143     ImmTyDppCtrl,
144     ImmTyDppRowMask,
145     ImmTyDppBankMask,
146     ImmTyDppBoundCtrl,
147     ImmTySdwaDstSel,
148     ImmTySdwaSrc0Sel,
149     ImmTySdwaSrc1Sel,
150     ImmTySdwaDstUnused,
151     ImmTyDMask,
152     ImmTyUNorm,
153     ImmTyDA,
154     ImmTyR128,
155     ImmTyLWE,
156     ImmTyExpTgt,
157     ImmTyExpCompr,
158     ImmTyExpVM,
159     ImmTyDFMT,
160     ImmTyNFMT,
161     ImmTyHwreg,
162     ImmTyOff,
163     ImmTySendMsg,
164     ImmTyInterpSlot,
165     ImmTyInterpAttr,
166     ImmTyAttrChan,
167     ImmTyOpSel,
168     ImmTyOpSelHi,
169     ImmTyNegLo,
170     ImmTyNegHi,
171     ImmTySwizzle,
172     ImmTyHigh
173   };
174 
175   struct TokOp {
176     const char *Data;
177     unsigned Length;
178   };
179 
180   struct ImmOp {
181     int64_t Val;
182     ImmTy Type;
183     bool IsFPImm;
184     Modifiers Mods;
185   };
186 
187   struct RegOp {
188     unsigned RegNo;
189     bool IsForcedVOP3;
190     Modifiers Mods;
191   };
192 
193   union {
194     TokOp Tok;
195     ImmOp Imm;
196     RegOp Reg;
197     const MCExpr *Expr;
198   };
199 
200   bool isToken() const override {
201     if (Kind == Token)
202       return true;
203 
204     if (Kind != Expression || !Expr)
205       return false;
206 
207     // When parsing operands, we can't always tell if something was meant to be
208     // a token, like 'gds', or an expression that references a global variable.
209     // In this case, we assume the string is an expression, and if we need to
210     // interpret is a token, then we treat the symbol name as the token.
211     return isa<MCSymbolRefExpr>(Expr);
212   }
213 
214   bool isImm() const override {
215     return Kind == Immediate;
216   }
217 
218   bool isInlinableImm(MVT type) const;
219   bool isLiteralImm(MVT type) const;
220 
221   bool isRegKind() const {
222     return Kind == Register;
223   }
224 
225   bool isReg() const override {
226     return isRegKind() && !hasModifiers();
227   }
228 
229   bool isRegOrImmWithInputMods(MVT type) const {
230     return isRegKind() || isInlinableImm(type);
231   }
232 
233   bool isRegOrImmWithInt16InputMods() const {
234     return isRegOrImmWithInputMods(MVT::i16);
235   }
236 
237   bool isRegOrImmWithInt32InputMods() const {
238     return isRegOrImmWithInputMods(MVT::i32);
239   }
240 
241   bool isRegOrImmWithInt64InputMods() const {
242     return isRegOrImmWithInputMods(MVT::i64);
243   }
244 
245   bool isRegOrImmWithFP16InputMods() const {
246     return isRegOrImmWithInputMods(MVT::f16);
247   }
248 
249   bool isRegOrImmWithFP32InputMods() const {
250     return isRegOrImmWithInputMods(MVT::f32);
251   }
252 
253   bool isRegOrImmWithFP64InputMods() const {
254     return isRegOrImmWithInputMods(MVT::f64);
255   }
256 
257   bool isVReg() const {
258     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
259            isRegClass(AMDGPU::VReg_64RegClassID) ||
260            isRegClass(AMDGPU::VReg_96RegClassID) ||
261            isRegClass(AMDGPU::VReg_128RegClassID) ||
262            isRegClass(AMDGPU::VReg_256RegClassID) ||
263            isRegClass(AMDGPU::VReg_512RegClassID);
264   }
265 
266   bool isVReg32OrOff() const {
267     return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID);
268   }
269 
270   bool isSDWARegKind() const;
271 
272   bool isImmTy(ImmTy ImmT) const {
273     return isImm() && Imm.Type == ImmT;
274   }
275 
276   bool isImmModifier() const {
277     return isImm() && Imm.Type != ImmTyNone;
278   }
279 
280   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
281   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
282   bool isDMask() const { return isImmTy(ImmTyDMask); }
283   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
284   bool isDA() const { return isImmTy(ImmTyDA); }
285   bool isR128() const { return isImmTy(ImmTyUNorm); }
286   bool isLWE() const { return isImmTy(ImmTyLWE); }
287   bool isOff() const { return isImmTy(ImmTyOff); }
288   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
289   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
290   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
291   bool isOffen() const { return isImmTy(ImmTyOffen); }
292   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
293   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
294   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
295   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
296   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
297 
298   bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
299   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
300   bool isGDS() const { return isImmTy(ImmTyGDS); }
301   bool isGLC() const { return isImmTy(ImmTyGLC); }
302   bool isSLC() const { return isImmTy(ImmTySLC); }
303   bool isTFE() const { return isImmTy(ImmTyTFE); }
304   bool isDFMT() const { return isImmTy(ImmTyDFMT) && isUInt<8>(getImm()); }
305   bool isNFMT() const { return isImmTy(ImmTyNFMT) && isUInt<8>(getImm()); }
306   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
307   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
308   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
309   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
310   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
311   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
312   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
313   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
314   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
315   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
316   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
317   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
318   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
319   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
320   bool isHigh() const { return isImmTy(ImmTyHigh); }
321 
322   bool isMod() const {
323     return isClampSI() || isOModSI();
324   }
325 
326   bool isRegOrImm() const {
327     return isReg() || isImm();
328   }
329 
330   bool isRegClass(unsigned RCID) const;
331 
332   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
333     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
334   }
335 
336   bool isSCSrcB16() const {
337     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
338   }
339 
340   bool isSCSrcV2B16() const {
341     return isSCSrcB16();
342   }
343 
344   bool isSCSrcB32() const {
345     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
346   }
347 
348   bool isSCSrcB64() const {
349     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
350   }
351 
352   bool isSCSrcF16() const {
353     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
354   }
355 
356   bool isSCSrcV2F16() const {
357     return isSCSrcF16();
358   }
359 
360   bool isSCSrcF32() const {
361     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
362   }
363 
364   bool isSCSrcF64() const {
365     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
366   }
367 
368   bool isSSrcB32() const {
369     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
370   }
371 
372   bool isSSrcB16() const {
373     return isSCSrcB16() || isLiteralImm(MVT::i16);
374   }
375 
376   bool isSSrcV2B16() const {
377     llvm_unreachable("cannot happen");
378     return isSSrcB16();
379   }
380 
381   bool isSSrcB64() const {
382     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
383     // See isVSrc64().
384     return isSCSrcB64() || isLiteralImm(MVT::i64);
385   }
386 
387   bool isSSrcF32() const {
388     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
389   }
390 
391   bool isSSrcF64() const {
392     return isSCSrcB64() || isLiteralImm(MVT::f64);
393   }
394 
395   bool isSSrcF16() const {
396     return isSCSrcB16() || isLiteralImm(MVT::f16);
397   }
398 
399   bool isSSrcV2F16() const {
400     llvm_unreachable("cannot happen");
401     return isSSrcF16();
402   }
403 
404   bool isVCSrcB32() const {
405     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
406   }
407 
408   bool isVCSrcB64() const {
409     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
410   }
411 
412   bool isVCSrcB16() const {
413     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
414   }
415 
416   bool isVCSrcV2B16() const {
417     return isVCSrcB16();
418   }
419 
420   bool isVCSrcF32() const {
421     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
422   }
423 
424   bool isVCSrcF64() const {
425     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
426   }
427 
428   bool isVCSrcF16() const {
429     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
430   }
431 
432   bool isVCSrcV2F16() const {
433     return isVCSrcF16();
434   }
435 
436   bool isVSrcB32() const {
437     return isVCSrcF32() || isLiteralImm(MVT::i32);
438   }
439 
440   bool isVSrcB64() const {
441     return isVCSrcF64() || isLiteralImm(MVT::i64);
442   }
443 
444   bool isVSrcB16() const {
445     return isVCSrcF16() || isLiteralImm(MVT::i16);
446   }
447 
448   bool isVSrcV2B16() const {
449     llvm_unreachable("cannot happen");
450     return isVSrcB16();
451   }
452 
453   bool isVSrcF32() const {
454     return isVCSrcF32() || isLiteralImm(MVT::f32);
455   }
456 
457   bool isVSrcF64() const {
458     return isVCSrcF64() || isLiteralImm(MVT::f64);
459   }
460 
461   bool isVSrcF16() const {
462     return isVCSrcF16() || isLiteralImm(MVT::f16);
463   }
464 
465   bool isVSrcV2F16() const {
466     llvm_unreachable("cannot happen");
467     return isVSrcF16();
468   }
469 
470   bool isKImmFP32() const {
471     return isLiteralImm(MVT::f32);
472   }
473 
474   bool isKImmFP16() const {
475     return isLiteralImm(MVT::f16);
476   }
477 
478   bool isMem() const override {
479     return false;
480   }
481 
482   bool isExpr() const {
483     return Kind == Expression;
484   }
485 
486   bool isSoppBrTarget() const {
487     return isExpr() || isImm();
488   }
489 
490   bool isSWaitCnt() const;
491   bool isHwreg() const;
492   bool isSendMsg() const;
493   bool isSwizzle() const;
494   bool isSMRDOffset8() const;
495   bool isSMRDOffset20() const;
496   bool isSMRDLiteralOffset() const;
497   bool isDPPCtrl() const;
498   bool isGPRIdxMode() const;
499   bool isS16Imm() const;
500   bool isU16Imm() const;
501 
502   StringRef getExpressionAsToken() const {
503     assert(isExpr());
504     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
505     return S->getSymbol().getName();
506   }
507 
508   StringRef getToken() const {
509     assert(isToken());
510 
511     if (Kind == Expression)
512       return getExpressionAsToken();
513 
514     return StringRef(Tok.Data, Tok.Length);
515   }
516 
517   int64_t getImm() const {
518     assert(isImm());
519     return Imm.Val;
520   }
521 
522   ImmTy getImmTy() const {
523     assert(isImm());
524     return Imm.Type;
525   }
526 
527   unsigned getReg() const override {
528     return Reg.RegNo;
529   }
530 
531   SMLoc getStartLoc() const override {
532     return StartLoc;
533   }
534 
535   SMLoc getEndLoc() const override {
536     return EndLoc;
537   }
538 
539   SMRange getLocRange() const {
540     return SMRange(StartLoc, EndLoc);
541   }
542 
543   Modifiers getModifiers() const {
544     assert(isRegKind() || isImmTy(ImmTyNone));
545     return isRegKind() ? Reg.Mods : Imm.Mods;
546   }
547 
548   void setModifiers(Modifiers Mods) {
549     assert(isRegKind() || isImmTy(ImmTyNone));
550     if (isRegKind())
551       Reg.Mods = Mods;
552     else
553       Imm.Mods = Mods;
554   }
555 
556   bool hasModifiers() const {
557     return getModifiers().hasModifiers();
558   }
559 
560   bool hasFPModifiers() const {
561     return getModifiers().hasFPModifiers();
562   }
563 
564   bool hasIntModifiers() const {
565     return getModifiers().hasIntModifiers();
566   }
567 
568   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
569 
570   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
571 
572   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
573 
574   template <unsigned Bitwidth>
575   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
576 
577   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
578     addKImmFPOperands<16>(Inst, N);
579   }
580 
581   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
582     addKImmFPOperands<32>(Inst, N);
583   }
584 
585   void addRegOperands(MCInst &Inst, unsigned N) const;
586 
587   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
588     if (isRegKind())
589       addRegOperands(Inst, N);
590     else if (isExpr())
591       Inst.addOperand(MCOperand::createExpr(Expr));
592     else
593       addImmOperands(Inst, N);
594   }
595 
596   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
597     Modifiers Mods = getModifiers();
598     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
599     if (isRegKind()) {
600       addRegOperands(Inst, N);
601     } else {
602       addImmOperands(Inst, N, false);
603     }
604   }
605 
606   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
607     assert(!hasIntModifiers());
608     addRegOrImmWithInputModsOperands(Inst, N);
609   }
610 
611   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
612     assert(!hasFPModifiers());
613     addRegOrImmWithInputModsOperands(Inst, N);
614   }
615 
616   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
617     Modifiers Mods = getModifiers();
618     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
619     assert(isRegKind());
620     addRegOperands(Inst, N);
621   }
622 
623   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
624     assert(!hasIntModifiers());
625     addRegWithInputModsOperands(Inst, N);
626   }
627 
628   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
629     assert(!hasFPModifiers());
630     addRegWithInputModsOperands(Inst, N);
631   }
632 
633   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
634     if (isImm())
635       addImmOperands(Inst, N);
636     else {
637       assert(isExpr());
638       Inst.addOperand(MCOperand::createExpr(Expr));
639     }
640   }
641 
642   static void printImmTy(raw_ostream& OS, ImmTy Type) {
643     switch (Type) {
644     case ImmTyNone: OS << "None"; break;
645     case ImmTyGDS: OS << "GDS"; break;
646     case ImmTyOffen: OS << "Offen"; break;
647     case ImmTyIdxen: OS << "Idxen"; break;
648     case ImmTyAddr64: OS << "Addr64"; break;
649     case ImmTyOffset: OS << "Offset"; break;
650     case ImmTyInstOffset: OS << "InstOffset"; break;
651     case ImmTyOffset0: OS << "Offset0"; break;
652     case ImmTyOffset1: OS << "Offset1"; break;
653     case ImmTyGLC: OS << "GLC"; break;
654     case ImmTySLC: OS << "SLC"; break;
655     case ImmTyTFE: OS << "TFE"; break;
656     case ImmTyDFMT: OS << "DFMT"; break;
657     case ImmTyNFMT: OS << "NFMT"; break;
658     case ImmTyClampSI: OS << "ClampSI"; break;
659     case ImmTyOModSI: OS << "OModSI"; break;
660     case ImmTyDppCtrl: OS << "DppCtrl"; break;
661     case ImmTyDppRowMask: OS << "DppRowMask"; break;
662     case ImmTyDppBankMask: OS << "DppBankMask"; break;
663     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
664     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
665     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
666     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
667     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
668     case ImmTyDMask: OS << "DMask"; break;
669     case ImmTyUNorm: OS << "UNorm"; break;
670     case ImmTyDA: OS << "DA"; break;
671     case ImmTyR128: OS << "R128"; break;
672     case ImmTyLWE: OS << "LWE"; break;
673     case ImmTyOff: OS << "Off"; break;
674     case ImmTyExpTgt: OS << "ExpTgt"; break;
675     case ImmTyExpCompr: OS << "ExpCompr"; break;
676     case ImmTyExpVM: OS << "ExpVM"; break;
677     case ImmTyHwreg: OS << "Hwreg"; break;
678     case ImmTySendMsg: OS << "SendMsg"; break;
679     case ImmTyInterpSlot: OS << "InterpSlot"; break;
680     case ImmTyInterpAttr: OS << "InterpAttr"; break;
681     case ImmTyAttrChan: OS << "AttrChan"; break;
682     case ImmTyOpSel: OS << "OpSel"; break;
683     case ImmTyOpSelHi: OS << "OpSelHi"; break;
684     case ImmTyNegLo: OS << "NegLo"; break;
685     case ImmTyNegHi: OS << "NegHi"; break;
686     case ImmTySwizzle: OS << "Swizzle"; break;
687     case ImmTyHigh: OS << "High"; break;
688     }
689   }
690 
691   void print(raw_ostream &OS) const override {
692     switch (Kind) {
693     case Register:
694       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
695       break;
696     case Immediate:
697       OS << '<' << getImm();
698       if (getImmTy() != ImmTyNone) {
699         OS << " type: "; printImmTy(OS, getImmTy());
700       }
701       OS << " mods: " << Imm.Mods << '>';
702       break;
703     case Token:
704       OS << '\'' << getToken() << '\'';
705       break;
706     case Expression:
707       OS << "<expr " << *Expr << '>';
708       break;
709     }
710   }
711 
712   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
713                                       int64_t Val, SMLoc Loc,
714                                       ImmTy Type = ImmTyNone,
715                                       bool IsFPImm = false) {
716     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
717     Op->Imm.Val = Val;
718     Op->Imm.IsFPImm = IsFPImm;
719     Op->Imm.Type = Type;
720     Op->Imm.Mods = Modifiers();
721     Op->StartLoc = Loc;
722     Op->EndLoc = Loc;
723     return Op;
724   }
725 
726   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
727                                         StringRef Str, SMLoc Loc,
728                                         bool HasExplicitEncodingSize = true) {
729     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
730     Res->Tok.Data = Str.data();
731     Res->Tok.Length = Str.size();
732     Res->StartLoc = Loc;
733     Res->EndLoc = Loc;
734     return Res;
735   }
736 
737   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
738                                       unsigned RegNo, SMLoc S,
739                                       SMLoc E,
740                                       bool ForceVOP3) {
741     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
742     Op->Reg.RegNo = RegNo;
743     Op->Reg.Mods = Modifiers();
744     Op->Reg.IsForcedVOP3 = ForceVOP3;
745     Op->StartLoc = S;
746     Op->EndLoc = E;
747     return Op;
748   }
749 
750   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
751                                        const class MCExpr *Expr, SMLoc S) {
752     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
753     Op->Expr = Expr;
754     Op->StartLoc = S;
755     Op->EndLoc = S;
756     return Op;
757   }
758 };
759 
760 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
761   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
762   return OS;
763 }
764 
765 //===----------------------------------------------------------------------===//
766 // AsmParser
767 //===----------------------------------------------------------------------===//
768 
769 // Holds info related to the current kernel, e.g. count of SGPRs used.
770 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
771 // .amdgpu_hsa_kernel or at EOF.
772 class KernelScopeInfo {
773   int SgprIndexUnusedMin = -1;
774   int VgprIndexUnusedMin = -1;
775   MCContext *Ctx = nullptr;
776 
777   void usesSgprAt(int i) {
778     if (i >= SgprIndexUnusedMin) {
779       SgprIndexUnusedMin = ++i;
780       if (Ctx) {
781         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
782         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
783       }
784     }
785   }
786 
787   void usesVgprAt(int i) {
788     if (i >= VgprIndexUnusedMin) {
789       VgprIndexUnusedMin = ++i;
790       if (Ctx) {
791         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
792         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
793       }
794     }
795   }
796 
797 public:
798   KernelScopeInfo() = default;
799 
800   void initialize(MCContext &Context) {
801     Ctx = &Context;
802     usesSgprAt(SgprIndexUnusedMin = -1);
803     usesVgprAt(VgprIndexUnusedMin = -1);
804   }
805 
806   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
807     switch (RegKind) {
808       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
809       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
810       default: break;
811     }
812   }
813 };
814 
815 class AMDGPUAsmParser : public MCTargetAsmParser {
816   MCAsmParser &Parser;
817 
818   // Number of extra operands parsed after the first optional operand.
819   // This may be necessary to skip hardcoded mandatory operands.
820   static const unsigned MAX_OPR_LOOKAHEAD = 1;
821 
822   unsigned ForcedEncodingSize = 0;
823   bool ForcedDPP = false;
824   bool ForcedSDWA = false;
825   KernelScopeInfo KernelScope;
826 
827   /// @name Auto-generated Match Functions
828   /// {
829 
830 #define GET_ASSEMBLER_HEADER
831 #include "AMDGPUGenAsmMatcher.inc"
832 
833   /// }
834 
835 private:
836   bool ParseAsAbsoluteExpression(uint32_t &Ret);
837   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
838   bool ParseDirectiveHSACodeObjectVersion();
839   bool ParseDirectiveHSACodeObjectISA();
840   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
841   bool ParseDirectiveAMDKernelCodeT();
842   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
843   bool ParseDirectiveAMDGPUHsaKernel();
844 
845   bool ParseDirectiveISAVersion();
846   bool ParseDirectiveHSAMetadata();
847   bool ParseDirectivePALMetadata();
848 
849   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
850                              RegisterKind RegKind, unsigned Reg1,
851                              unsigned RegNum);
852   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
853                            unsigned& RegNum, unsigned& RegWidth,
854                            unsigned *DwordRegIndex);
855   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
856                     bool IsAtomic, bool IsAtomicReturn);
857   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
858                  bool IsGdsHardcoded);
859 
860 public:
861   enum AMDGPUMatchResultTy {
862     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
863   };
864 
865   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
866 
867   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
868                const MCInstrInfo &MII,
869                const MCTargetOptions &Options)
870       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
871     MCAsmParserExtension::Initialize(Parser);
872 
873     if (getFeatureBits().none()) {
874       // Set default features.
875       copySTI().ToggleFeature("SOUTHERN_ISLANDS");
876     }
877 
878     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
879 
880     {
881       // TODO: make those pre-defined variables read-only.
882       // Currently there is none suitable machinery in the core llvm-mc for this.
883       // MCSymbol::isRedefinable is intended for another purpose, and
884       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
885       AMDGPU::IsaInfo::IsaVersion ISA =
886           AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
887       MCContext &Ctx = getContext();
888       MCSymbol *Sym =
889           Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
890       Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
891       Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
892       Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
893       Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
894       Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
895     }
896     KernelScope.initialize(getContext());
897   }
898 
899   bool hasXNACK() const {
900     return AMDGPU::hasXNACK(getSTI());
901   }
902 
903   bool isSI() const {
904     return AMDGPU::isSI(getSTI());
905   }
906 
907   bool isCI() const {
908     return AMDGPU::isCI(getSTI());
909   }
910 
911   bool isVI() const {
912     return AMDGPU::isVI(getSTI());
913   }
914 
915   bool isGFX9() const {
916     return AMDGPU::isGFX9(getSTI());
917   }
918 
919   bool hasInv2PiInlineImm() const {
920     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
921   }
922 
923   bool hasFlatOffsets() const {
924     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
925   }
926 
927   bool hasSGPR102_SGPR103() const {
928     return !isVI();
929   }
930 
931   bool hasIntClamp() const {
932     return getFeatureBits()[AMDGPU::FeatureIntClamp];
933   }
934 
935   AMDGPUTargetStreamer &getTargetStreamer() {
936     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
937     return static_cast<AMDGPUTargetStreamer &>(TS);
938   }
939 
940   const MCRegisterInfo *getMRI() const {
941     // We need this const_cast because for some reason getContext() is not const
942     // in MCAsmParser.
943     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
944   }
945 
946   const MCInstrInfo *getMII() const {
947     return &MII;
948   }
949 
950   const FeatureBitset &getFeatureBits() const {
951     return getSTI().getFeatureBits();
952   }
953 
954   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
955   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
956   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
957 
958   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
959   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
960   bool isForcedDPP() const { return ForcedDPP; }
961   bool isForcedSDWA() const { return ForcedSDWA; }
962   ArrayRef<unsigned> getMatchedVariants() const;
963 
964   std::unique_ptr<AMDGPUOperand> parseRegister();
965   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
966   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
967   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
968                                       unsigned Kind) override;
969   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
970                                OperandVector &Operands, MCStreamer &Out,
971                                uint64_t &ErrorInfo,
972                                bool MatchingInlineAsm) override;
973   bool ParseDirective(AsmToken DirectiveID) override;
974   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
975   StringRef parseMnemonicSuffix(StringRef Name);
976   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
977                         SMLoc NameLoc, OperandVector &Operands) override;
978   //bool ProcessInstruction(MCInst &Inst);
979 
980   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
981 
982   OperandMatchResultTy
983   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
984                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
985                      bool (*ConvertResult)(int64_t &) = nullptr);
986 
987   OperandMatchResultTy parseOperandArrayWithPrefix(
988     const char *Prefix,
989     OperandVector &Operands,
990     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
991     bool (*ConvertResult)(int64_t&) = nullptr);
992 
993   OperandMatchResultTy
994   parseNamedBit(const char *Name, OperandVector &Operands,
995                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
996   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
997                                              StringRef &Value);
998 
999   bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false);
1000   OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false);
1001   OperandMatchResultTy parseReg(OperandVector &Operands);
1002   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
1003   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1004   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1005   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1006   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1007   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1008 
1009   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1010   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1011   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1012   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1013 
1014   bool parseCnt(int64_t &IntVal);
1015   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1016   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1017 
1018 private:
1019   struct OperandInfoTy {
1020     int64_t Id;
1021     bool IsSymbolic = false;
1022 
1023     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1024   };
1025 
1026   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1027   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1028 
1029   void errorExpTgt();
1030   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1031 
1032   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1033   bool validateConstantBusLimitations(const MCInst &Inst);
1034   bool validateEarlyClobberLimitations(const MCInst &Inst);
1035   bool validateIntClampSupported(const MCInst &Inst);
1036   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1037   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1038   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1039 
1040   bool trySkipId(const StringRef Id);
1041   bool trySkipToken(const AsmToken::TokenKind Kind);
1042   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1043   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1044   bool parseExpr(int64_t &Imm);
1045 
1046 public:
1047   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1048   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1049 
1050   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1051   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1052   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1053   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1054   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1055 
1056   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1057                             const unsigned MinVal,
1058                             const unsigned MaxVal,
1059                             const StringRef ErrMsg);
1060   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1061   bool parseSwizzleOffset(int64_t &Imm);
1062   bool parseSwizzleMacro(int64_t &Imm);
1063   bool parseSwizzleQuadPerm(int64_t &Imm);
1064   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1065   bool parseSwizzleBroadcast(int64_t &Imm);
1066   bool parseSwizzleSwap(int64_t &Imm);
1067   bool parseSwizzleReverse(int64_t &Imm);
1068 
1069   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1070   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1071   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1072   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1073 
1074   AMDGPUOperand::Ptr defaultGLC() const;
1075   AMDGPUOperand::Ptr defaultSLC() const;
1076   AMDGPUOperand::Ptr defaultTFE() const;
1077 
1078   AMDGPUOperand::Ptr defaultDMask() const;
1079   AMDGPUOperand::Ptr defaultUNorm() const;
1080   AMDGPUOperand::Ptr defaultDA() const;
1081   AMDGPUOperand::Ptr defaultR128() const;
1082   AMDGPUOperand::Ptr defaultLWE() const;
1083   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1084   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1085   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1086   AMDGPUOperand::Ptr defaultOffsetU12() const;
1087   AMDGPUOperand::Ptr defaultOffsetS13() const;
1088 
1089   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1090 
1091   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1092                OptionalImmIndexMap &OptionalIdx);
1093   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1094   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1095   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1096 
1097   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1098 
1099   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1100                bool IsAtomic = false);
1101   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1102 
1103   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1104   AMDGPUOperand::Ptr defaultRowMask() const;
1105   AMDGPUOperand::Ptr defaultBankMask() const;
1106   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1107   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1108 
1109   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1110                                     AMDGPUOperand::ImmTy Type);
1111   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1112   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1113   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1114   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1115   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1116   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1117                 uint64_t BasicInstType, bool skipVcc = false);
1118 };
1119 
1120 struct OptionalOperand {
1121   const char *Name;
1122   AMDGPUOperand::ImmTy Type;
1123   bool IsBit;
1124   bool (*ConvertResult)(int64_t&);
1125 };
1126 
1127 } // end anonymous namespace
1128 
1129 // May be called with integer type with equivalent bitwidth.
1130 static const fltSemantics *getFltSemantics(unsigned Size) {
1131   switch (Size) {
1132   case 4:
1133     return &APFloat::IEEEsingle();
1134   case 8:
1135     return &APFloat::IEEEdouble();
1136   case 2:
1137     return &APFloat::IEEEhalf();
1138   default:
1139     llvm_unreachable("unsupported fp type");
1140   }
1141 }
1142 
1143 static const fltSemantics *getFltSemantics(MVT VT) {
1144   return getFltSemantics(VT.getSizeInBits() / 8);
1145 }
1146 
1147 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1148   switch (OperandType) {
1149   case AMDGPU::OPERAND_REG_IMM_INT32:
1150   case AMDGPU::OPERAND_REG_IMM_FP32:
1151   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1152   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1153     return &APFloat::IEEEsingle();
1154   case AMDGPU::OPERAND_REG_IMM_INT64:
1155   case AMDGPU::OPERAND_REG_IMM_FP64:
1156   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1157   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1158     return &APFloat::IEEEdouble();
1159   case AMDGPU::OPERAND_REG_IMM_INT16:
1160   case AMDGPU::OPERAND_REG_IMM_FP16:
1161   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1162   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1163   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1164   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1165     return &APFloat::IEEEhalf();
1166   default:
1167     llvm_unreachable("unsupported fp type");
1168   }
1169 }
1170 
1171 //===----------------------------------------------------------------------===//
1172 // Operand
1173 //===----------------------------------------------------------------------===//
1174 
1175 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1176   bool Lost;
1177 
1178   // Convert literal to single precision
1179   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1180                                                APFloat::rmNearestTiesToEven,
1181                                                &Lost);
1182   // We allow precision lost but not overflow or underflow
1183   if (Status != APFloat::opOK &&
1184       Lost &&
1185       ((Status & APFloat::opOverflow)  != 0 ||
1186        (Status & APFloat::opUnderflow) != 0)) {
1187     return false;
1188   }
1189 
1190   return true;
1191 }
1192 
1193 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1194   if (!isImmTy(ImmTyNone)) {
1195     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1196     return false;
1197   }
1198   // TODO: We should avoid using host float here. It would be better to
1199   // check the float bit values which is what a few other places do.
1200   // We've had bot failures before due to weird NaN support on mips hosts.
1201 
1202   APInt Literal(64, Imm.Val);
1203 
1204   if (Imm.IsFPImm) { // We got fp literal token
1205     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1206       return AMDGPU::isInlinableLiteral64(Imm.Val,
1207                                           AsmParser->hasInv2PiInlineImm());
1208     }
1209 
1210     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1211     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1212       return false;
1213 
1214     if (type.getScalarSizeInBits() == 16) {
1215       return AMDGPU::isInlinableLiteral16(
1216         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1217         AsmParser->hasInv2PiInlineImm());
1218     }
1219 
1220     // Check if single precision literal is inlinable
1221     return AMDGPU::isInlinableLiteral32(
1222       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1223       AsmParser->hasInv2PiInlineImm());
1224   }
1225 
1226   // We got int literal token.
1227   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1228     return AMDGPU::isInlinableLiteral64(Imm.Val,
1229                                         AsmParser->hasInv2PiInlineImm());
1230   }
1231 
1232   if (type.getScalarSizeInBits() == 16) {
1233     return AMDGPU::isInlinableLiteral16(
1234       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1235       AsmParser->hasInv2PiInlineImm());
1236   }
1237 
1238   return AMDGPU::isInlinableLiteral32(
1239     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1240     AsmParser->hasInv2PiInlineImm());
1241 }
1242 
1243 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1244   // Check that this immediate can be added as literal
1245   if (!isImmTy(ImmTyNone)) {
1246     return false;
1247   }
1248 
1249   if (!Imm.IsFPImm) {
1250     // We got int literal token.
1251 
1252     if (type == MVT::f64 && hasFPModifiers()) {
1253       // Cannot apply fp modifiers to int literals preserving the same semantics
1254       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1255       // disable these cases.
1256       return false;
1257     }
1258 
1259     unsigned Size = type.getSizeInBits();
1260     if (Size == 64)
1261       Size = 32;
1262 
1263     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1264     // types.
1265     return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val);
1266   }
1267 
1268   // We got fp literal token
1269   if (type == MVT::f64) { // Expected 64-bit fp operand
1270     // We would set low 64-bits of literal to zeroes but we accept this literals
1271     return true;
1272   }
1273 
1274   if (type == MVT::i64) { // Expected 64-bit int operand
1275     // We don't allow fp literals in 64-bit integer instructions. It is
1276     // unclear how we should encode them.
1277     return false;
1278   }
1279 
1280   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1281   return canLosslesslyConvertToFPType(FPLiteral, type);
1282 }
1283 
1284 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1285   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1286 }
1287 
1288 bool AMDGPUOperand::isSDWARegKind() const {
1289   if (AsmParser->isVI())
1290     return isVReg();
1291   else if (AsmParser->isGFX9())
1292     return isRegKind();
1293   else
1294     return false;
1295 }
1296 
1297 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1298 {
1299   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1300   assert(Size == 2 || Size == 4 || Size == 8);
1301 
1302   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1303 
1304   if (Imm.Mods.Abs) {
1305     Val &= ~FpSignMask;
1306   }
1307   if (Imm.Mods.Neg) {
1308     Val ^= FpSignMask;
1309   }
1310 
1311   return Val;
1312 }
1313 
1314 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1315   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1316                              Inst.getNumOperands())) {
1317     addLiteralImmOperand(Inst, Imm.Val,
1318                          ApplyModifiers &
1319                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1320   } else {
1321     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1322     Inst.addOperand(MCOperand::createImm(Imm.Val));
1323   }
1324 }
1325 
1326 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1327   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1328   auto OpNum = Inst.getNumOperands();
1329   // Check that this operand accepts literals
1330   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1331 
1332   if (ApplyModifiers) {
1333     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1334     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1335     Val = applyInputFPModifiers(Val, Size);
1336   }
1337 
1338   APInt Literal(64, Val);
1339   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1340 
1341   if (Imm.IsFPImm) { // We got fp literal token
1342     switch (OpTy) {
1343     case AMDGPU::OPERAND_REG_IMM_INT64:
1344     case AMDGPU::OPERAND_REG_IMM_FP64:
1345     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1346     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1347       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1348                                        AsmParser->hasInv2PiInlineImm())) {
1349         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1350         return;
1351       }
1352 
1353       // Non-inlineable
1354       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1355         // For fp operands we check if low 32 bits are zeros
1356         if (Literal.getLoBits(32) != 0) {
1357           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1358           "Can't encode literal as exact 64-bit floating-point operand. "
1359           "Low 32-bits will be set to zero");
1360         }
1361 
1362         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1363         return;
1364       }
1365 
1366       // We don't allow fp literals in 64-bit integer instructions. It is
1367       // unclear how we should encode them. This case should be checked earlier
1368       // in predicate methods (isLiteralImm())
1369       llvm_unreachable("fp literal in 64-bit integer instruction.");
1370 
1371     case AMDGPU::OPERAND_REG_IMM_INT32:
1372     case AMDGPU::OPERAND_REG_IMM_FP32:
1373     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1374     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1375     case AMDGPU::OPERAND_REG_IMM_INT16:
1376     case AMDGPU::OPERAND_REG_IMM_FP16:
1377     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1378     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1379     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1380     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1381       bool lost;
1382       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1383       // Convert literal to single precision
1384       FPLiteral.convert(*getOpFltSemantics(OpTy),
1385                         APFloat::rmNearestTiesToEven, &lost);
1386       // We allow precision lost but not overflow or underflow. This should be
1387       // checked earlier in isLiteralImm()
1388 
1389       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1390       if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
1391           OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
1392         ImmVal |= (ImmVal << 16);
1393       }
1394 
1395       Inst.addOperand(MCOperand::createImm(ImmVal));
1396       return;
1397     }
1398     default:
1399       llvm_unreachable("invalid operand size");
1400     }
1401 
1402     return;
1403   }
1404 
1405    // We got int literal token.
1406   // Only sign extend inline immediates.
1407   // FIXME: No errors on truncation
1408   switch (OpTy) {
1409   case AMDGPU::OPERAND_REG_IMM_INT32:
1410   case AMDGPU::OPERAND_REG_IMM_FP32:
1411   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1412   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1413     if (isInt<32>(Val) &&
1414         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1415                                      AsmParser->hasInv2PiInlineImm())) {
1416       Inst.addOperand(MCOperand::createImm(Val));
1417       return;
1418     }
1419 
1420     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1421     return;
1422 
1423   case AMDGPU::OPERAND_REG_IMM_INT64:
1424   case AMDGPU::OPERAND_REG_IMM_FP64:
1425   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1426   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1427     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1428       Inst.addOperand(MCOperand::createImm(Val));
1429       return;
1430     }
1431 
1432     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1433     return;
1434 
1435   case AMDGPU::OPERAND_REG_IMM_INT16:
1436   case AMDGPU::OPERAND_REG_IMM_FP16:
1437   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1438   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1439     if (isInt<16>(Val) &&
1440         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1441                                      AsmParser->hasInv2PiInlineImm())) {
1442       Inst.addOperand(MCOperand::createImm(Val));
1443       return;
1444     }
1445 
1446     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1447     return;
1448 
1449   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1450   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1451     auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
1452     assert(AMDGPU::isInlinableLiteral16(LiteralVal,
1453                                         AsmParser->hasInv2PiInlineImm()));
1454 
1455     uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
1456                       static_cast<uint32_t>(LiteralVal);
1457     Inst.addOperand(MCOperand::createImm(ImmVal));
1458     return;
1459   }
1460   default:
1461     llvm_unreachable("invalid operand size");
1462   }
1463 }
1464 
1465 template <unsigned Bitwidth>
1466 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1467   APInt Literal(64, Imm.Val);
1468 
1469   if (!Imm.IsFPImm) {
1470     // We got int literal token.
1471     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1472     return;
1473   }
1474 
1475   bool Lost;
1476   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1477   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1478                     APFloat::rmNearestTiesToEven, &Lost);
1479   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1480 }
1481 
1482 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1483   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1484 }
1485 
1486 //===----------------------------------------------------------------------===//
1487 // AsmParser
1488 //===----------------------------------------------------------------------===//
1489 
1490 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1491   if (Is == IS_VGPR) {
1492     switch (RegWidth) {
1493       default: return -1;
1494       case 1: return AMDGPU::VGPR_32RegClassID;
1495       case 2: return AMDGPU::VReg_64RegClassID;
1496       case 3: return AMDGPU::VReg_96RegClassID;
1497       case 4: return AMDGPU::VReg_128RegClassID;
1498       case 8: return AMDGPU::VReg_256RegClassID;
1499       case 16: return AMDGPU::VReg_512RegClassID;
1500     }
1501   } else if (Is == IS_TTMP) {
1502     switch (RegWidth) {
1503       default: return -1;
1504       case 1: return AMDGPU::TTMP_32RegClassID;
1505       case 2: return AMDGPU::TTMP_64RegClassID;
1506       case 4: return AMDGPU::TTMP_128RegClassID;
1507       case 8: return AMDGPU::TTMP_256RegClassID;
1508       case 16: return AMDGPU::TTMP_512RegClassID;
1509     }
1510   } else if (Is == IS_SGPR) {
1511     switch (RegWidth) {
1512       default: return -1;
1513       case 1: return AMDGPU::SGPR_32RegClassID;
1514       case 2: return AMDGPU::SGPR_64RegClassID;
1515       case 4: return AMDGPU::SGPR_128RegClassID;
1516       case 8: return AMDGPU::SGPR_256RegClassID;
1517       case 16: return AMDGPU::SGPR_512RegClassID;
1518     }
1519   }
1520   return -1;
1521 }
1522 
1523 static unsigned getSpecialRegForName(StringRef RegName) {
1524   return StringSwitch<unsigned>(RegName)
1525     .Case("exec", AMDGPU::EXEC)
1526     .Case("vcc", AMDGPU::VCC)
1527     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1528     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1529     .Case("m0", AMDGPU::M0)
1530     .Case("scc", AMDGPU::SCC)
1531     .Case("tba", AMDGPU::TBA)
1532     .Case("tma", AMDGPU::TMA)
1533     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1534     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1535     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1536     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1537     .Case("vcc_lo", AMDGPU::VCC_LO)
1538     .Case("vcc_hi", AMDGPU::VCC_HI)
1539     .Case("exec_lo", AMDGPU::EXEC_LO)
1540     .Case("exec_hi", AMDGPU::EXEC_HI)
1541     .Case("tma_lo", AMDGPU::TMA_LO)
1542     .Case("tma_hi", AMDGPU::TMA_HI)
1543     .Case("tba_lo", AMDGPU::TBA_LO)
1544     .Case("tba_hi", AMDGPU::TBA_HI)
1545     .Default(0);
1546 }
1547 
1548 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1549                                     SMLoc &EndLoc) {
1550   auto R = parseRegister();
1551   if (!R) return true;
1552   assert(R->isReg());
1553   RegNo = R->getReg();
1554   StartLoc = R->getStartLoc();
1555   EndLoc = R->getEndLoc();
1556   return false;
1557 }
1558 
1559 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1560                                             RegisterKind RegKind, unsigned Reg1,
1561                                             unsigned RegNum) {
1562   switch (RegKind) {
1563   case IS_SPECIAL:
1564     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1565       Reg = AMDGPU::EXEC;
1566       RegWidth = 2;
1567       return true;
1568     }
1569     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1570       Reg = AMDGPU::FLAT_SCR;
1571       RegWidth = 2;
1572       return true;
1573     }
1574     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1575       Reg = AMDGPU::XNACK_MASK;
1576       RegWidth = 2;
1577       return true;
1578     }
1579     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1580       Reg = AMDGPU::VCC;
1581       RegWidth = 2;
1582       return true;
1583     }
1584     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1585       Reg = AMDGPU::TBA;
1586       RegWidth = 2;
1587       return true;
1588     }
1589     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1590       Reg = AMDGPU::TMA;
1591       RegWidth = 2;
1592       return true;
1593     }
1594     return false;
1595   case IS_VGPR:
1596   case IS_SGPR:
1597   case IS_TTMP:
1598     if (Reg1 != Reg + RegWidth) {
1599       return false;
1600     }
1601     RegWidth++;
1602     return true;
1603   default:
1604     llvm_unreachable("unexpected register kind");
1605   }
1606 }
1607 
1608 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1609                                           unsigned &RegNum, unsigned &RegWidth,
1610                                           unsigned *DwordRegIndex) {
1611   if (DwordRegIndex) { *DwordRegIndex = 0; }
1612   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1613   if (getLexer().is(AsmToken::Identifier)) {
1614     StringRef RegName = Parser.getTok().getString();
1615     if ((Reg = getSpecialRegForName(RegName))) {
1616       Parser.Lex();
1617       RegKind = IS_SPECIAL;
1618     } else {
1619       unsigned RegNumIndex = 0;
1620       if (RegName[0] == 'v') {
1621         RegNumIndex = 1;
1622         RegKind = IS_VGPR;
1623       } else if (RegName[0] == 's') {
1624         RegNumIndex = 1;
1625         RegKind = IS_SGPR;
1626       } else if (RegName.startswith("ttmp")) {
1627         RegNumIndex = strlen("ttmp");
1628         RegKind = IS_TTMP;
1629       } else {
1630         return false;
1631       }
1632       if (RegName.size() > RegNumIndex) {
1633         // Single 32-bit register: vXX.
1634         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1635           return false;
1636         Parser.Lex();
1637         RegWidth = 1;
1638       } else {
1639         // Range of registers: v[XX:YY]. ":YY" is optional.
1640         Parser.Lex();
1641         int64_t RegLo, RegHi;
1642         if (getLexer().isNot(AsmToken::LBrac))
1643           return false;
1644         Parser.Lex();
1645 
1646         if (getParser().parseAbsoluteExpression(RegLo))
1647           return false;
1648 
1649         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1650         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1651           return false;
1652         Parser.Lex();
1653 
1654         if (isRBrace) {
1655           RegHi = RegLo;
1656         } else {
1657           if (getParser().parseAbsoluteExpression(RegHi))
1658             return false;
1659 
1660           if (getLexer().isNot(AsmToken::RBrac))
1661             return false;
1662           Parser.Lex();
1663         }
1664         RegNum = (unsigned) RegLo;
1665         RegWidth = (RegHi - RegLo) + 1;
1666       }
1667     }
1668   } else if (getLexer().is(AsmToken::LBrac)) {
1669     // List of consecutive registers: [s0,s1,s2,s3]
1670     Parser.Lex();
1671     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1672       return false;
1673     if (RegWidth != 1)
1674       return false;
1675     RegisterKind RegKind1;
1676     unsigned Reg1, RegNum1, RegWidth1;
1677     do {
1678       if (getLexer().is(AsmToken::Comma)) {
1679         Parser.Lex();
1680       } else if (getLexer().is(AsmToken::RBrac)) {
1681         Parser.Lex();
1682         break;
1683       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1684         if (RegWidth1 != 1) {
1685           return false;
1686         }
1687         if (RegKind1 != RegKind) {
1688           return false;
1689         }
1690         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1691           return false;
1692         }
1693       } else {
1694         return false;
1695       }
1696     } while (true);
1697   } else {
1698     return false;
1699   }
1700   switch (RegKind) {
1701   case IS_SPECIAL:
1702     RegNum = 0;
1703     RegWidth = 1;
1704     break;
1705   case IS_VGPR:
1706   case IS_SGPR:
1707   case IS_TTMP:
1708   {
1709     unsigned Size = 1;
1710     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1711       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1712       Size = std::min(RegWidth, 4u);
1713     }
1714     if (RegNum % Size != 0)
1715       return false;
1716     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1717     RegNum = RegNum / Size;
1718     int RCID = getRegClass(RegKind, RegWidth);
1719     if (RCID == -1)
1720       return false;
1721     const MCRegisterClass RC = TRI->getRegClass(RCID);
1722     if (RegNum >= RC.getNumRegs())
1723       return false;
1724     Reg = RC.getRegister(RegNum);
1725     break;
1726   }
1727 
1728   default:
1729     llvm_unreachable("unexpected register kind");
1730   }
1731 
1732   if (!subtargetHasRegister(*TRI, Reg))
1733     return false;
1734   return true;
1735 }
1736 
1737 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
1738   const auto &Tok = Parser.getTok();
1739   SMLoc StartLoc = Tok.getLoc();
1740   SMLoc EndLoc = Tok.getEndLoc();
1741   RegisterKind RegKind;
1742   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
1743 
1744   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
1745     return nullptr;
1746   }
1747   KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
1748   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
1749 }
1750 
1751 bool
1752 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) {
1753   if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) &&
1754       (getLexer().getKind() == AsmToken::Integer ||
1755        getLexer().getKind() == AsmToken::Real)) {
1756     // This is a workaround for handling operands like these:
1757     //     |1.0|
1758     //     |-1|
1759     // This syntax is not compatible with syntax of standard
1760     // MC expressions (due to the trailing '|').
1761 
1762     SMLoc EndLoc;
1763     const MCExpr *Expr;
1764 
1765     if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
1766       return true;
1767     }
1768 
1769     return !Expr->evaluateAsAbsolute(Val);
1770   }
1771 
1772   return getParser().parseAbsoluteExpression(Val);
1773 }
1774 
1775 OperandMatchResultTy
1776 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
1777   // TODO: add syntactic sugar for 1/(2*PI)
1778   bool Minus = false;
1779   if (getLexer().getKind() == AsmToken::Minus) {
1780     const AsmToken NextToken = getLexer().peekTok();
1781     if (!NextToken.is(AsmToken::Integer) &&
1782         !NextToken.is(AsmToken::Real)) {
1783         return MatchOperand_NoMatch;
1784     }
1785     Minus = true;
1786     Parser.Lex();
1787   }
1788 
1789   SMLoc S = Parser.getTok().getLoc();
1790   switch(getLexer().getKind()) {
1791   case AsmToken::Integer: {
1792     int64_t IntVal;
1793     if (parseAbsoluteExpr(IntVal, AbsMod))
1794       return MatchOperand_ParseFail;
1795     if (Minus)
1796       IntVal *= -1;
1797     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
1798     return MatchOperand_Success;
1799   }
1800   case AsmToken::Real: {
1801     int64_t IntVal;
1802     if (parseAbsoluteExpr(IntVal, AbsMod))
1803       return MatchOperand_ParseFail;
1804 
1805     APFloat F(BitsToDouble(IntVal));
1806     if (Minus)
1807       F.changeSign();
1808     Operands.push_back(
1809         AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S,
1810                                  AMDGPUOperand::ImmTyNone, true));
1811     return MatchOperand_Success;
1812   }
1813   default:
1814     return MatchOperand_NoMatch;
1815   }
1816 }
1817 
1818 OperandMatchResultTy
1819 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
1820   if (auto R = parseRegister()) {
1821     assert(R->isReg());
1822     R->Reg.IsForcedVOP3 = isForcedVOP3();
1823     Operands.push_back(std::move(R));
1824     return MatchOperand_Success;
1825   }
1826   return MatchOperand_NoMatch;
1827 }
1828 
1829 OperandMatchResultTy
1830 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
1831   auto res = parseImm(Operands, AbsMod);
1832   if (res != MatchOperand_NoMatch) {
1833     return res;
1834   }
1835 
1836   return parseReg(Operands);
1837 }
1838 
1839 OperandMatchResultTy
1840 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
1841                                               bool AllowImm) {
1842   bool Negate = false, Negate2 = false, Abs = false, Abs2 = false;
1843 
1844   if (getLexer().getKind()== AsmToken::Minus) {
1845     const AsmToken NextToken = getLexer().peekTok();
1846 
1847     // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
1848     if (NextToken.is(AsmToken::Minus)) {
1849       Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier");
1850       return MatchOperand_ParseFail;
1851     }
1852 
1853     // '-' followed by an integer literal N should be interpreted as integer
1854     // negation rather than a floating-point NEG modifier applied to N.
1855     // Beside being contr-intuitive, such use of floating-point NEG modifier
1856     // results in different meaning of integer literals used with VOP1/2/C
1857     // and VOP3, for example:
1858     //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
1859     //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
1860     // Negative fp literals should be handled likewise for unifomtity
1861     if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) {
1862       Parser.Lex();
1863       Negate = true;
1864     }
1865   }
1866 
1867   if (getLexer().getKind() == AsmToken::Identifier &&
1868       Parser.getTok().getString() == "neg") {
1869     if (Negate) {
1870       Error(Parser.getTok().getLoc(), "expected register or immediate");
1871       return MatchOperand_ParseFail;
1872     }
1873     Parser.Lex();
1874     Negate2 = true;
1875     if (getLexer().isNot(AsmToken::LParen)) {
1876       Error(Parser.getTok().getLoc(), "expected left paren after neg");
1877       return MatchOperand_ParseFail;
1878     }
1879     Parser.Lex();
1880   }
1881 
1882   if (getLexer().getKind() == AsmToken::Identifier &&
1883       Parser.getTok().getString() == "abs") {
1884     Parser.Lex();
1885     Abs2 = true;
1886     if (getLexer().isNot(AsmToken::LParen)) {
1887       Error(Parser.getTok().getLoc(), "expected left paren after abs");
1888       return MatchOperand_ParseFail;
1889     }
1890     Parser.Lex();
1891   }
1892 
1893   if (getLexer().getKind() == AsmToken::Pipe) {
1894     if (Abs2) {
1895       Error(Parser.getTok().getLoc(), "expected register or immediate");
1896       return MatchOperand_ParseFail;
1897     }
1898     Parser.Lex();
1899     Abs = true;
1900   }
1901 
1902   OperandMatchResultTy Res;
1903   if (AllowImm) {
1904     Res = parseRegOrImm(Operands, Abs);
1905   } else {
1906     Res = parseReg(Operands);
1907   }
1908   if (Res != MatchOperand_Success) {
1909     return Res;
1910   }
1911 
1912   AMDGPUOperand::Modifiers Mods;
1913   if (Abs) {
1914     if (getLexer().getKind() != AsmToken::Pipe) {
1915       Error(Parser.getTok().getLoc(), "expected vertical bar");
1916       return MatchOperand_ParseFail;
1917     }
1918     Parser.Lex();
1919     Mods.Abs = true;
1920   }
1921   if (Abs2) {
1922     if (getLexer().isNot(AsmToken::RParen)) {
1923       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1924       return MatchOperand_ParseFail;
1925     }
1926     Parser.Lex();
1927     Mods.Abs = true;
1928   }
1929 
1930   if (Negate) {
1931     Mods.Neg = true;
1932   } else if (Negate2) {
1933     if (getLexer().isNot(AsmToken::RParen)) {
1934       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1935       return MatchOperand_ParseFail;
1936     }
1937     Parser.Lex();
1938     Mods.Neg = true;
1939   }
1940 
1941   if (Mods.hasFPModifiers()) {
1942     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
1943     Op.setModifiers(Mods);
1944   }
1945   return MatchOperand_Success;
1946 }
1947 
1948 OperandMatchResultTy
1949 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
1950                                                bool AllowImm) {
1951   bool Sext = false;
1952 
1953   if (getLexer().getKind() == AsmToken::Identifier &&
1954       Parser.getTok().getString() == "sext") {
1955     Parser.Lex();
1956     Sext = true;
1957     if (getLexer().isNot(AsmToken::LParen)) {
1958       Error(Parser.getTok().getLoc(), "expected left paren after sext");
1959       return MatchOperand_ParseFail;
1960     }
1961     Parser.Lex();
1962   }
1963 
1964   OperandMatchResultTy Res;
1965   if (AllowImm) {
1966     Res = parseRegOrImm(Operands);
1967   } else {
1968     Res = parseReg(Operands);
1969   }
1970   if (Res != MatchOperand_Success) {
1971     return Res;
1972   }
1973 
1974   AMDGPUOperand::Modifiers Mods;
1975   if (Sext) {
1976     if (getLexer().isNot(AsmToken::RParen)) {
1977       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1978       return MatchOperand_ParseFail;
1979     }
1980     Parser.Lex();
1981     Mods.Sext = true;
1982   }
1983 
1984   if (Mods.hasIntModifiers()) {
1985     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
1986     Op.setModifiers(Mods);
1987   }
1988 
1989   return MatchOperand_Success;
1990 }
1991 
1992 OperandMatchResultTy
1993 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
1994   return parseRegOrImmWithFPInputMods(Operands, false);
1995 }
1996 
1997 OperandMatchResultTy
1998 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
1999   return parseRegOrImmWithIntInputMods(Operands, false);
2000 }
2001 
2002 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2003   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2004   if (Reg) {
2005     Operands.push_back(std::move(Reg));
2006     return MatchOperand_Success;
2007   }
2008 
2009   const AsmToken &Tok = Parser.getTok();
2010   if (Tok.getString() == "off") {
2011     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(),
2012                                                 AMDGPUOperand::ImmTyOff, false));
2013     Parser.Lex();
2014     return MatchOperand_Success;
2015   }
2016 
2017   return MatchOperand_NoMatch;
2018 }
2019 
2020 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2021   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2022 
2023   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2024       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2025       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2026       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2027     return Match_InvalidOperand;
2028 
2029   if ((TSFlags & SIInstrFlags::VOP3) &&
2030       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2031       getForcedEncodingSize() != 64)
2032     return Match_PreferE32;
2033 
2034   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2035       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2036     // v_mac_f32/16 allow only dst_sel == DWORD;
2037     auto OpNum =
2038         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2039     const auto &Op = Inst.getOperand(OpNum);
2040     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2041       return Match_InvalidOperand;
2042     }
2043   }
2044 
2045   if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
2046     // FIXME: Produces error without correct column reported.
2047     auto OpNum =
2048         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
2049     const auto &Op = Inst.getOperand(OpNum);
2050     if (Op.getImm() != 0)
2051       return Match_InvalidOperand;
2052   }
2053 
2054   return Match_Success;
2055 }
2056 
2057 // What asm variants we should check
2058 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2059   if (getForcedEncodingSize() == 32) {
2060     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2061     return makeArrayRef(Variants);
2062   }
2063 
2064   if (isForcedVOP3()) {
2065     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2066     return makeArrayRef(Variants);
2067   }
2068 
2069   if (isForcedSDWA()) {
2070     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2071                                         AMDGPUAsmVariants::SDWA9};
2072     return makeArrayRef(Variants);
2073   }
2074 
2075   if (isForcedDPP()) {
2076     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2077     return makeArrayRef(Variants);
2078   }
2079 
2080   static const unsigned Variants[] = {
2081     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2082     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2083   };
2084 
2085   return makeArrayRef(Variants);
2086 }
2087 
2088 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2089   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2090   const unsigned Num = Desc.getNumImplicitUses();
2091   for (unsigned i = 0; i < Num; ++i) {
2092     unsigned Reg = Desc.ImplicitUses[i];
2093     switch (Reg) {
2094     case AMDGPU::FLAT_SCR:
2095     case AMDGPU::VCC:
2096     case AMDGPU::M0:
2097       return Reg;
2098     default:
2099       break;
2100     }
2101   }
2102   return AMDGPU::NoRegister;
2103 }
2104 
2105 // NB: This code is correct only when used to check constant
2106 // bus limitations because GFX7 support no f16 inline constants.
2107 // Note that there are no cases when a GFX7 opcode violates
2108 // constant bus limitations due to the use of an f16 constant.
2109 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2110                                        unsigned OpIdx) const {
2111   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2112 
2113   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2114     return false;
2115   }
2116 
2117   const MCOperand &MO = Inst.getOperand(OpIdx);
2118 
2119   int64_t Val = MO.getImm();
2120   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2121 
2122   switch (OpSize) { // expected operand size
2123   case 8:
2124     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2125   case 4:
2126     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2127   case 2: {
2128     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2129     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2130         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
2131       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2132     } else {
2133       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2134     }
2135   }
2136   default:
2137     llvm_unreachable("invalid operand size");
2138   }
2139 }
2140 
2141 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2142   const MCOperand &MO = Inst.getOperand(OpIdx);
2143   if (MO.isImm()) {
2144     return !isInlineConstant(Inst, OpIdx);
2145   }
2146   return !MO.isReg() ||
2147          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2148 }
2149 
2150 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2151   const unsigned Opcode = Inst.getOpcode();
2152   const MCInstrDesc &Desc = MII.get(Opcode);
2153   unsigned ConstantBusUseCount = 0;
2154 
2155   if (Desc.TSFlags &
2156       (SIInstrFlags::VOPC |
2157        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2158        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2159        SIInstrFlags::SDWA)) {
2160     // Check special imm operands (used by madmk, etc)
2161     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2162       ++ConstantBusUseCount;
2163     }
2164 
2165     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2166     if (SGPRUsed != AMDGPU::NoRegister) {
2167       ++ConstantBusUseCount;
2168     }
2169 
2170     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2171     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2172     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2173 
2174     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2175 
2176     for (int OpIdx : OpIndices) {
2177       if (OpIdx == -1) break;
2178 
2179       const MCOperand &MO = Inst.getOperand(OpIdx);
2180       if (usesConstantBus(Inst, OpIdx)) {
2181         if (MO.isReg()) {
2182           const unsigned Reg = mc2PseudoReg(MO.getReg());
2183           // Pairs of registers with a partial intersections like these
2184           //   s0, s[0:1]
2185           //   flat_scratch_lo, flat_scratch
2186           //   flat_scratch_lo, flat_scratch_hi
2187           // are theoretically valid but they are disabled anyway.
2188           // Note that this code mimics SIInstrInfo::verifyInstruction
2189           if (Reg != SGPRUsed) {
2190             ++ConstantBusUseCount;
2191           }
2192           SGPRUsed = Reg;
2193         } else { // Expression or a literal
2194           ++ConstantBusUseCount;
2195         }
2196       }
2197     }
2198   }
2199 
2200   return ConstantBusUseCount <= 1;
2201 }
2202 
2203 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2204   const unsigned Opcode = Inst.getOpcode();
2205   const MCInstrDesc &Desc = MII.get(Opcode);
2206 
2207   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2208   if (DstIdx == -1 ||
2209       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2210     return true;
2211   }
2212 
2213   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2214 
2215   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2216   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2217   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2218 
2219   assert(DstIdx != -1);
2220   const MCOperand &Dst = Inst.getOperand(DstIdx);
2221   assert(Dst.isReg());
2222   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2223 
2224   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2225 
2226   for (int SrcIdx : SrcIndices) {
2227     if (SrcIdx == -1) break;
2228     const MCOperand &Src = Inst.getOperand(SrcIdx);
2229     if (Src.isReg()) {
2230       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2231       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2232         return false;
2233       }
2234     }
2235   }
2236 
2237   return true;
2238 }
2239 
2240 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2241 
2242   const unsigned Opc = Inst.getOpcode();
2243   const MCInstrDesc &Desc = MII.get(Opc);
2244 
2245   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2246     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2247     assert(ClampIdx != -1);
2248     return Inst.getOperand(ClampIdx).getImm() == 0;
2249   }
2250 
2251   return true;
2252 }
2253 
2254 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2255                                           const SMLoc &IDLoc) {
2256   if (!validateConstantBusLimitations(Inst)) {
2257     Error(IDLoc,
2258       "invalid operand (violates constant bus restrictions)");
2259     return false;
2260   }
2261   if (!validateEarlyClobberLimitations(Inst)) {
2262     Error(IDLoc,
2263       "destination must be different than all sources");
2264     return false;
2265   }
2266   if (!validateIntClampSupported(Inst)) {
2267     Error(IDLoc,
2268       "integer clamping is not supported on this GPU");
2269     return false;
2270   }
2271 
2272   return true;
2273 }
2274 
2275 static std::string AMDGPUMnemonicSpellCheck(StringRef S, uint64_t FBS,
2276                                             unsigned VariantID = 0);
2277 
2278 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2279                                               OperandVector &Operands,
2280                                               MCStreamer &Out,
2281                                               uint64_t &ErrorInfo,
2282                                               bool MatchingInlineAsm) {
2283   MCInst Inst;
2284   unsigned Result = Match_Success;
2285   for (auto Variant : getMatchedVariants()) {
2286     uint64_t EI;
2287     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
2288                                   Variant);
2289     // We order match statuses from least to most specific. We use most specific
2290     // status as resulting
2291     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
2292     if ((R == Match_Success) ||
2293         (R == Match_PreferE32) ||
2294         (R == Match_MissingFeature && Result != Match_PreferE32) ||
2295         (R == Match_InvalidOperand && Result != Match_MissingFeature
2296                                    && Result != Match_PreferE32) ||
2297         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
2298                                    && Result != Match_MissingFeature
2299                                    && Result != Match_PreferE32)) {
2300       Result = R;
2301       ErrorInfo = EI;
2302     }
2303     if (R == Match_Success)
2304       break;
2305   }
2306 
2307   switch (Result) {
2308   default: break;
2309   case Match_Success:
2310     if (!validateInstruction(Inst, IDLoc)) {
2311       return true;
2312     }
2313     Inst.setLoc(IDLoc);
2314     Out.EmitInstruction(Inst, getSTI());
2315     return false;
2316 
2317   case Match_MissingFeature:
2318     return Error(IDLoc, "instruction not supported on this GPU");
2319 
2320   case Match_MnemonicFail: {
2321     uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
2322     std::string Suggestion = AMDGPUMnemonicSpellCheck(
2323         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
2324     return Error(IDLoc, "invalid instruction" + Suggestion,
2325                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
2326   }
2327 
2328   case Match_InvalidOperand: {
2329     SMLoc ErrorLoc = IDLoc;
2330     if (ErrorInfo != ~0ULL) {
2331       if (ErrorInfo >= Operands.size()) {
2332         return Error(IDLoc, "too few operands for instruction");
2333       }
2334       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
2335       if (ErrorLoc == SMLoc())
2336         ErrorLoc = IDLoc;
2337     }
2338     return Error(ErrorLoc, "invalid operand for instruction");
2339   }
2340 
2341   case Match_PreferE32:
2342     return Error(IDLoc, "internal error: instruction without _e64 suffix "
2343                         "should be encoded as e32");
2344   }
2345   llvm_unreachable("Implement any new match types added!");
2346 }
2347 
2348 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
2349   int64_t Tmp = -1;
2350   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
2351     return true;
2352   }
2353   if (getParser().parseAbsoluteExpression(Tmp)) {
2354     return true;
2355   }
2356   Ret = static_cast<uint32_t>(Tmp);
2357   return false;
2358 }
2359 
2360 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
2361                                                uint32_t &Minor) {
2362   if (ParseAsAbsoluteExpression(Major))
2363     return TokError("invalid major version");
2364 
2365   if (getLexer().isNot(AsmToken::Comma))
2366     return TokError("minor version number required, comma expected");
2367   Lex();
2368 
2369   if (ParseAsAbsoluteExpression(Minor))
2370     return TokError("invalid minor version");
2371 
2372   return false;
2373 }
2374 
2375 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
2376   uint32_t Major;
2377   uint32_t Minor;
2378 
2379   if (ParseDirectiveMajorMinor(Major, Minor))
2380     return true;
2381 
2382   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
2383   return false;
2384 }
2385 
2386 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
2387   uint32_t Major;
2388   uint32_t Minor;
2389   uint32_t Stepping;
2390   StringRef VendorName;
2391   StringRef ArchName;
2392 
2393   // If this directive has no arguments, then use the ISA version for the
2394   // targeted GPU.
2395   if (getLexer().is(AsmToken::EndOfStatement)) {
2396     AMDGPU::IsaInfo::IsaVersion ISA =
2397         AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
2398     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
2399                                                       ISA.Stepping,
2400                                                       "AMD", "AMDGPU");
2401     return false;
2402   }
2403 
2404   if (ParseDirectiveMajorMinor(Major, Minor))
2405     return true;
2406 
2407   if (getLexer().isNot(AsmToken::Comma))
2408     return TokError("stepping version number required, comma expected");
2409   Lex();
2410 
2411   if (ParseAsAbsoluteExpression(Stepping))
2412     return TokError("invalid stepping version");
2413 
2414   if (getLexer().isNot(AsmToken::Comma))
2415     return TokError("vendor name required, comma expected");
2416   Lex();
2417 
2418   if (getLexer().isNot(AsmToken::String))
2419     return TokError("invalid vendor name");
2420 
2421   VendorName = getLexer().getTok().getStringContents();
2422   Lex();
2423 
2424   if (getLexer().isNot(AsmToken::Comma))
2425     return TokError("arch name required, comma expected");
2426   Lex();
2427 
2428   if (getLexer().isNot(AsmToken::String))
2429     return TokError("invalid arch name");
2430 
2431   ArchName = getLexer().getTok().getStringContents();
2432   Lex();
2433 
2434   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
2435                                                     VendorName, ArchName);
2436   return false;
2437 }
2438 
2439 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
2440                                                amd_kernel_code_t &Header) {
2441   SmallString<40> ErrStr;
2442   raw_svector_ostream Err(ErrStr);
2443   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
2444     return TokError(Err.str());
2445   }
2446   Lex();
2447   return false;
2448 }
2449 
2450 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
2451   amd_kernel_code_t Header;
2452   AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits());
2453 
2454   while (true) {
2455     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
2456     // will set the current token to EndOfStatement.
2457     while(getLexer().is(AsmToken::EndOfStatement))
2458       Lex();
2459 
2460     if (getLexer().isNot(AsmToken::Identifier))
2461       return TokError("expected value identifier or .end_amd_kernel_code_t");
2462 
2463     StringRef ID = getLexer().getTok().getIdentifier();
2464     Lex();
2465 
2466     if (ID == ".end_amd_kernel_code_t")
2467       break;
2468 
2469     if (ParseAMDKernelCodeTValue(ID, Header))
2470       return true;
2471   }
2472 
2473   getTargetStreamer().EmitAMDKernelCodeT(Header);
2474 
2475   return false;
2476 }
2477 
2478 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
2479   if (getLexer().isNot(AsmToken::Identifier))
2480     return TokError("expected symbol name");
2481 
2482   StringRef KernelName = Parser.getTok().getString();
2483 
2484   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
2485                                            ELF::STT_AMDGPU_HSA_KERNEL);
2486   Lex();
2487   KernelScope.initialize(getContext());
2488   return false;
2489 }
2490 
2491 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
2492   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
2493     return Error(getParser().getTok().getLoc(),
2494                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
2495                  "architectures");
2496   }
2497 
2498   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
2499 
2500   std::string ISAVersionStringFromSTI;
2501   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
2502   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
2503 
2504   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
2505     return Error(getParser().getTok().getLoc(),
2506                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
2507                  "arguments specified through the command line");
2508   }
2509 
2510   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
2511   Lex();
2512 
2513   return false;
2514 }
2515 
2516 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
2517   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
2518     return Error(getParser().getTok().getLoc(),
2519                  (Twine(HSAMD::AssemblerDirectiveBegin) + Twine(" directive is "
2520                  "not available on non-amdhsa OSes")).str());
2521   }
2522 
2523   std::string HSAMetadataString;
2524   raw_string_ostream YamlStream(HSAMetadataString);
2525 
2526   getLexer().setSkipSpace(false);
2527 
2528   bool FoundEnd = false;
2529   while (!getLexer().is(AsmToken::Eof)) {
2530     while (getLexer().is(AsmToken::Space)) {
2531       YamlStream << getLexer().getTok().getString();
2532       Lex();
2533     }
2534 
2535     if (getLexer().is(AsmToken::Identifier)) {
2536       StringRef ID = getLexer().getTok().getIdentifier();
2537       if (ID == AMDGPU::HSAMD::AssemblerDirectiveEnd) {
2538         Lex();
2539         FoundEnd = true;
2540         break;
2541       }
2542     }
2543 
2544     YamlStream << Parser.parseStringToEndOfStatement()
2545                << getContext().getAsmInfo()->getSeparatorString();
2546 
2547     Parser.eatToEndOfStatement();
2548   }
2549 
2550   getLexer().setSkipSpace(true);
2551 
2552   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
2553     return TokError(Twine("expected directive ") +
2554                     Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found"));
2555   }
2556 
2557   YamlStream.flush();
2558 
2559   if (!getTargetStreamer().EmitHSAMetadata(HSAMetadataString))
2560     return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
2561 
2562   return false;
2563 }
2564 
2565 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
2566   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
2567     return Error(getParser().getTok().getLoc(),
2568                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
2569                  "not available on non-amdpal OSes")).str());
2570   }
2571 
2572   PALMD::Metadata PALMetadata;
2573   for (;;) {
2574     uint32_t Value;
2575     if (ParseAsAbsoluteExpression(Value)) {
2576       return TokError(Twine("invalid value in ") +
2577                       Twine(PALMD::AssemblerDirective));
2578     }
2579     PALMetadata.push_back(Value);
2580     if (getLexer().isNot(AsmToken::Comma))
2581       break;
2582     Lex();
2583   }
2584   getTargetStreamer().EmitPALMetadata(PALMetadata);
2585   return false;
2586 }
2587 
2588 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
2589   StringRef IDVal = DirectiveID.getString();
2590 
2591   if (IDVal == ".hsa_code_object_version")
2592     return ParseDirectiveHSACodeObjectVersion();
2593 
2594   if (IDVal == ".hsa_code_object_isa")
2595     return ParseDirectiveHSACodeObjectISA();
2596 
2597   if (IDVal == ".amd_kernel_code_t")
2598     return ParseDirectiveAMDKernelCodeT();
2599 
2600   if (IDVal == ".amdgpu_hsa_kernel")
2601     return ParseDirectiveAMDGPUHsaKernel();
2602 
2603   if (IDVal == ".amd_amdgpu_isa")
2604     return ParseDirectiveISAVersion();
2605 
2606   if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
2607     return ParseDirectiveHSAMetadata();
2608 
2609   if (IDVal == PALMD::AssemblerDirective)
2610     return ParseDirectivePALMetadata();
2611 
2612   return true;
2613 }
2614 
2615 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
2616                                            unsigned RegNo) const {
2617 
2618   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
2619        R.isValid(); ++R) {
2620     if (*R == RegNo)
2621       return isGFX9();
2622   }
2623 
2624   switch (RegNo) {
2625   case AMDGPU::TBA:
2626   case AMDGPU::TBA_LO:
2627   case AMDGPU::TBA_HI:
2628   case AMDGPU::TMA:
2629   case AMDGPU::TMA_LO:
2630   case AMDGPU::TMA_HI:
2631     return !isGFX9();
2632   case AMDGPU::XNACK_MASK:
2633   case AMDGPU::XNACK_MASK_LO:
2634   case AMDGPU::XNACK_MASK_HI:
2635     return !isCI() && !isSI() && hasXNACK();
2636   default:
2637     break;
2638   }
2639 
2640   if (isCI())
2641     return true;
2642 
2643   if (isSI()) {
2644     // No flat_scr
2645     switch (RegNo) {
2646     case AMDGPU::FLAT_SCR:
2647     case AMDGPU::FLAT_SCR_LO:
2648     case AMDGPU::FLAT_SCR_HI:
2649       return false;
2650     default:
2651       return true;
2652     }
2653   }
2654 
2655   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
2656   // SI/CI have.
2657   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
2658        R.isValid(); ++R) {
2659     if (*R == RegNo)
2660       return false;
2661   }
2662 
2663   return true;
2664 }
2665 
2666 OperandMatchResultTy
2667 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
2668   // Try to parse with a custom parser
2669   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
2670 
2671   // If we successfully parsed the operand or if there as an error parsing,
2672   // we are done.
2673   //
2674   // If we are parsing after we reach EndOfStatement then this means we
2675   // are appending default values to the Operands list.  This is only done
2676   // by custom parser, so we shouldn't continue on to the generic parsing.
2677   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
2678       getLexer().is(AsmToken::EndOfStatement))
2679     return ResTy;
2680 
2681   ResTy = parseRegOrImm(Operands);
2682 
2683   if (ResTy == MatchOperand_Success)
2684     return ResTy;
2685 
2686   const auto &Tok = Parser.getTok();
2687   SMLoc S = Tok.getLoc();
2688 
2689   const MCExpr *Expr = nullptr;
2690   if (!Parser.parseExpression(Expr)) {
2691     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2692     return MatchOperand_Success;
2693   }
2694 
2695   // Possibly this is an instruction flag like 'gds'.
2696   if (Tok.getKind() == AsmToken::Identifier) {
2697     Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
2698     Parser.Lex();
2699     return MatchOperand_Success;
2700   }
2701 
2702   return MatchOperand_NoMatch;
2703 }
2704 
2705 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
2706   // Clear any forced encodings from the previous instruction.
2707   setForcedEncodingSize(0);
2708   setForcedDPP(false);
2709   setForcedSDWA(false);
2710 
2711   if (Name.endswith("_e64")) {
2712     setForcedEncodingSize(64);
2713     return Name.substr(0, Name.size() - 4);
2714   } else if (Name.endswith("_e32")) {
2715     setForcedEncodingSize(32);
2716     return Name.substr(0, Name.size() - 4);
2717   } else if (Name.endswith("_dpp")) {
2718     setForcedDPP(true);
2719     return Name.substr(0, Name.size() - 4);
2720   } else if (Name.endswith("_sdwa")) {
2721     setForcedSDWA(true);
2722     return Name.substr(0, Name.size() - 5);
2723   }
2724   return Name;
2725 }
2726 
2727 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
2728                                        StringRef Name,
2729                                        SMLoc NameLoc, OperandVector &Operands) {
2730   // Add the instruction mnemonic
2731   Name = parseMnemonicSuffix(Name);
2732   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
2733 
2734   while (!getLexer().is(AsmToken::EndOfStatement)) {
2735     OperandMatchResultTy Res = parseOperand(Operands, Name);
2736 
2737     // Eat the comma or space if there is one.
2738     if (getLexer().is(AsmToken::Comma))
2739       Parser.Lex();
2740 
2741     switch (Res) {
2742       case MatchOperand_Success: break;
2743       case MatchOperand_ParseFail:
2744         Error(getLexer().getLoc(), "failed parsing operand.");
2745         while (!getLexer().is(AsmToken::EndOfStatement)) {
2746           Parser.Lex();
2747         }
2748         return true;
2749       case MatchOperand_NoMatch:
2750         Error(getLexer().getLoc(), "not a valid operand.");
2751         while (!getLexer().is(AsmToken::EndOfStatement)) {
2752           Parser.Lex();
2753         }
2754         return true;
2755     }
2756   }
2757 
2758   return false;
2759 }
2760 
2761 //===----------------------------------------------------------------------===//
2762 // Utility functions
2763 //===----------------------------------------------------------------------===//
2764 
2765 OperandMatchResultTy
2766 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
2767   switch(getLexer().getKind()) {
2768     default: return MatchOperand_NoMatch;
2769     case AsmToken::Identifier: {
2770       StringRef Name = Parser.getTok().getString();
2771       if (!Name.equals(Prefix)) {
2772         return MatchOperand_NoMatch;
2773       }
2774 
2775       Parser.Lex();
2776       if (getLexer().isNot(AsmToken::Colon))
2777         return MatchOperand_ParseFail;
2778 
2779       Parser.Lex();
2780 
2781       bool IsMinus = false;
2782       if (getLexer().getKind() == AsmToken::Minus) {
2783         Parser.Lex();
2784         IsMinus = true;
2785       }
2786 
2787       if (getLexer().isNot(AsmToken::Integer))
2788         return MatchOperand_ParseFail;
2789 
2790       if (getParser().parseAbsoluteExpression(Int))
2791         return MatchOperand_ParseFail;
2792 
2793       if (IsMinus)
2794         Int = -Int;
2795       break;
2796     }
2797   }
2798   return MatchOperand_Success;
2799 }
2800 
2801 OperandMatchResultTy
2802 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
2803                                     AMDGPUOperand::ImmTy ImmTy,
2804                                     bool (*ConvertResult)(int64_t&)) {
2805   SMLoc S = Parser.getTok().getLoc();
2806   int64_t Value = 0;
2807 
2808   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
2809   if (Res != MatchOperand_Success)
2810     return Res;
2811 
2812   if (ConvertResult && !ConvertResult(Value)) {
2813     return MatchOperand_ParseFail;
2814   }
2815 
2816   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
2817   return MatchOperand_Success;
2818 }
2819 
2820 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
2821   const char *Prefix,
2822   OperandVector &Operands,
2823   AMDGPUOperand::ImmTy ImmTy,
2824   bool (*ConvertResult)(int64_t&)) {
2825   StringRef Name = Parser.getTok().getString();
2826   if (!Name.equals(Prefix))
2827     return MatchOperand_NoMatch;
2828 
2829   Parser.Lex();
2830   if (getLexer().isNot(AsmToken::Colon))
2831     return MatchOperand_ParseFail;
2832 
2833   Parser.Lex();
2834   if (getLexer().isNot(AsmToken::LBrac))
2835     return MatchOperand_ParseFail;
2836   Parser.Lex();
2837 
2838   unsigned Val = 0;
2839   SMLoc S = Parser.getTok().getLoc();
2840 
2841   // FIXME: How to verify the number of elements matches the number of src
2842   // operands?
2843   for (int I = 0; I < 4; ++I) {
2844     if (I != 0) {
2845       if (getLexer().is(AsmToken::RBrac))
2846         break;
2847 
2848       if (getLexer().isNot(AsmToken::Comma))
2849         return MatchOperand_ParseFail;
2850       Parser.Lex();
2851     }
2852 
2853     if (getLexer().isNot(AsmToken::Integer))
2854       return MatchOperand_ParseFail;
2855 
2856     int64_t Op;
2857     if (getParser().parseAbsoluteExpression(Op))
2858       return MatchOperand_ParseFail;
2859 
2860     if (Op != 0 && Op != 1)
2861       return MatchOperand_ParseFail;
2862     Val |= (Op << I);
2863   }
2864 
2865   Parser.Lex();
2866   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
2867   return MatchOperand_Success;
2868 }
2869 
2870 OperandMatchResultTy
2871 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
2872                                AMDGPUOperand::ImmTy ImmTy) {
2873   int64_t Bit = 0;
2874   SMLoc S = Parser.getTok().getLoc();
2875 
2876   // We are at the end of the statement, and this is a default argument, so
2877   // use a default value.
2878   if (getLexer().isNot(AsmToken::EndOfStatement)) {
2879     switch(getLexer().getKind()) {
2880       case AsmToken::Identifier: {
2881         StringRef Tok = Parser.getTok().getString();
2882         if (Tok == Name) {
2883           Bit = 1;
2884           Parser.Lex();
2885         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
2886           Bit = 0;
2887           Parser.Lex();
2888         } else {
2889           return MatchOperand_NoMatch;
2890         }
2891         break;
2892       }
2893       default:
2894         return MatchOperand_NoMatch;
2895     }
2896   }
2897 
2898   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
2899   return MatchOperand_Success;
2900 }
2901 
2902 static void addOptionalImmOperand(
2903   MCInst& Inst, const OperandVector& Operands,
2904   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
2905   AMDGPUOperand::ImmTy ImmT,
2906   int64_t Default = 0) {
2907   auto i = OptionalIdx.find(ImmT);
2908   if (i != OptionalIdx.end()) {
2909     unsigned Idx = i->second;
2910     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
2911   } else {
2912     Inst.addOperand(MCOperand::createImm(Default));
2913   }
2914 }
2915 
2916 OperandMatchResultTy
2917 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
2918   if (getLexer().isNot(AsmToken::Identifier)) {
2919     return MatchOperand_NoMatch;
2920   }
2921   StringRef Tok = Parser.getTok().getString();
2922   if (Tok != Prefix) {
2923     return MatchOperand_NoMatch;
2924   }
2925 
2926   Parser.Lex();
2927   if (getLexer().isNot(AsmToken::Colon)) {
2928     return MatchOperand_ParseFail;
2929   }
2930 
2931   Parser.Lex();
2932   if (getLexer().isNot(AsmToken::Identifier)) {
2933     return MatchOperand_ParseFail;
2934   }
2935 
2936   Value = Parser.getTok().getString();
2937   return MatchOperand_Success;
2938 }
2939 
2940 //===----------------------------------------------------------------------===//
2941 // ds
2942 //===----------------------------------------------------------------------===//
2943 
2944 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
2945                                     const OperandVector &Operands) {
2946   OptionalImmIndexMap OptionalIdx;
2947 
2948   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
2949     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
2950 
2951     // Add the register arguments
2952     if (Op.isReg()) {
2953       Op.addRegOperands(Inst, 1);
2954       continue;
2955     }
2956 
2957     // Handle optional arguments
2958     OptionalIdx[Op.getImmTy()] = i;
2959   }
2960 
2961   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
2962   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
2963   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
2964 
2965   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
2966 }
2967 
2968 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
2969                                 bool IsGdsHardcoded) {
2970   OptionalImmIndexMap OptionalIdx;
2971 
2972   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
2973     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
2974 
2975     // Add the register arguments
2976     if (Op.isReg()) {
2977       Op.addRegOperands(Inst, 1);
2978       continue;
2979     }
2980 
2981     if (Op.isToken() && Op.getToken() == "gds") {
2982       IsGdsHardcoded = true;
2983       continue;
2984     }
2985 
2986     // Handle optional arguments
2987     OptionalIdx[Op.getImmTy()] = i;
2988   }
2989 
2990   AMDGPUOperand::ImmTy OffsetType =
2991     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
2992      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
2993                                                       AMDGPUOperand::ImmTyOffset;
2994 
2995   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
2996 
2997   if (!IsGdsHardcoded) {
2998     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
2999   }
3000   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3001 }
3002 
3003 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
3004   OptionalImmIndexMap OptionalIdx;
3005 
3006   unsigned OperandIdx[4];
3007   unsigned EnMask = 0;
3008   int SrcIdx = 0;
3009 
3010   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3011     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3012 
3013     // Add the register arguments
3014     if (Op.isReg()) {
3015       assert(SrcIdx < 4);
3016       OperandIdx[SrcIdx] = Inst.size();
3017       Op.addRegOperands(Inst, 1);
3018       ++SrcIdx;
3019       continue;
3020     }
3021 
3022     if (Op.isOff()) {
3023       assert(SrcIdx < 4);
3024       OperandIdx[SrcIdx] = Inst.size();
3025       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
3026       ++SrcIdx;
3027       continue;
3028     }
3029 
3030     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
3031       Op.addImmOperands(Inst, 1);
3032       continue;
3033     }
3034 
3035     if (Op.isToken() && Op.getToken() == "done")
3036       continue;
3037 
3038     // Handle optional arguments
3039     OptionalIdx[Op.getImmTy()] = i;
3040   }
3041 
3042   assert(SrcIdx == 4);
3043 
3044   bool Compr = false;
3045   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
3046     Compr = true;
3047     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
3048     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
3049     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
3050   }
3051 
3052   for (auto i = 0; i < SrcIdx; ++i) {
3053     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
3054       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
3055     }
3056   }
3057 
3058   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
3059   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
3060 
3061   Inst.addOperand(MCOperand::createImm(EnMask));
3062 }
3063 
3064 //===----------------------------------------------------------------------===//
3065 // s_waitcnt
3066 //===----------------------------------------------------------------------===//
3067 
3068 static bool
3069 encodeCnt(
3070   const AMDGPU::IsaInfo::IsaVersion ISA,
3071   int64_t &IntVal,
3072   int64_t CntVal,
3073   bool Saturate,
3074   unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned),
3075   unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned))
3076 {
3077   bool Failed = false;
3078 
3079   IntVal = encode(ISA, IntVal, CntVal);
3080   if (CntVal != decode(ISA, IntVal)) {
3081     if (Saturate) {
3082       IntVal = encode(ISA, IntVal, -1);
3083     } else {
3084       Failed = true;
3085     }
3086   }
3087   return Failed;
3088 }
3089 
3090 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
3091   StringRef CntName = Parser.getTok().getString();
3092   int64_t CntVal;
3093 
3094   Parser.Lex();
3095   if (getLexer().isNot(AsmToken::LParen))
3096     return true;
3097 
3098   Parser.Lex();
3099   if (getLexer().isNot(AsmToken::Integer))
3100     return true;
3101 
3102   SMLoc ValLoc = Parser.getTok().getLoc();
3103   if (getParser().parseAbsoluteExpression(CntVal))
3104     return true;
3105 
3106   AMDGPU::IsaInfo::IsaVersion ISA =
3107       AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
3108 
3109   bool Failed = true;
3110   bool Sat = CntName.endswith("_sat");
3111 
3112   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
3113     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
3114   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
3115     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
3116   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
3117     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
3118   }
3119 
3120   if (Failed) {
3121     Error(ValLoc, "too large value for " + CntName);
3122     return true;
3123   }
3124 
3125   if (getLexer().isNot(AsmToken::RParen)) {
3126     return true;
3127   }
3128 
3129   Parser.Lex();
3130   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
3131     const AsmToken NextToken = getLexer().peekTok();
3132     if (NextToken.is(AsmToken::Identifier)) {
3133       Parser.Lex();
3134     }
3135   }
3136 
3137   return false;
3138 }
3139 
3140 OperandMatchResultTy
3141 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
3142   AMDGPU::IsaInfo::IsaVersion ISA =
3143       AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
3144   int64_t Waitcnt = getWaitcntBitMask(ISA);
3145   SMLoc S = Parser.getTok().getLoc();
3146 
3147   switch(getLexer().getKind()) {
3148     default: return MatchOperand_ParseFail;
3149     case AsmToken::Integer:
3150       // The operand can be an integer value.
3151       if (getParser().parseAbsoluteExpression(Waitcnt))
3152         return MatchOperand_ParseFail;
3153       break;
3154 
3155     case AsmToken::Identifier:
3156       do {
3157         if (parseCnt(Waitcnt))
3158           return MatchOperand_ParseFail;
3159       } while(getLexer().isNot(AsmToken::EndOfStatement));
3160       break;
3161   }
3162   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
3163   return MatchOperand_Success;
3164 }
3165 
3166 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
3167                                           int64_t &Width) {
3168   using namespace llvm::AMDGPU::Hwreg;
3169 
3170   if (Parser.getTok().getString() != "hwreg")
3171     return true;
3172   Parser.Lex();
3173 
3174   if (getLexer().isNot(AsmToken::LParen))
3175     return true;
3176   Parser.Lex();
3177 
3178   if (getLexer().is(AsmToken::Identifier)) {
3179     HwReg.IsSymbolic = true;
3180     HwReg.Id = ID_UNKNOWN_;
3181     const StringRef tok = Parser.getTok().getString();
3182     for (int i = ID_SYMBOLIC_FIRST_; i < ID_SYMBOLIC_LAST_; ++i) {
3183       if (tok == IdSymbolic[i]) {
3184         HwReg.Id = i;
3185         break;
3186       }
3187     }
3188     Parser.Lex();
3189   } else {
3190     HwReg.IsSymbolic = false;
3191     if (getLexer().isNot(AsmToken::Integer))
3192       return true;
3193     if (getParser().parseAbsoluteExpression(HwReg.Id))
3194       return true;
3195   }
3196 
3197   if (getLexer().is(AsmToken::RParen)) {
3198     Parser.Lex();
3199     return false;
3200   }
3201 
3202   // optional params
3203   if (getLexer().isNot(AsmToken::Comma))
3204     return true;
3205   Parser.Lex();
3206 
3207   if (getLexer().isNot(AsmToken::Integer))
3208     return true;
3209   if (getParser().parseAbsoluteExpression(Offset))
3210     return true;
3211 
3212   if (getLexer().isNot(AsmToken::Comma))
3213     return true;
3214   Parser.Lex();
3215 
3216   if (getLexer().isNot(AsmToken::Integer))
3217     return true;
3218   if (getParser().parseAbsoluteExpression(Width))
3219     return true;
3220 
3221   if (getLexer().isNot(AsmToken::RParen))
3222     return true;
3223   Parser.Lex();
3224 
3225   return false;
3226 }
3227 
3228 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
3229   using namespace llvm::AMDGPU::Hwreg;
3230 
3231   int64_t Imm16Val = 0;
3232   SMLoc S = Parser.getTok().getLoc();
3233 
3234   switch(getLexer().getKind()) {
3235     default: return MatchOperand_NoMatch;
3236     case AsmToken::Integer:
3237       // The operand can be an integer value.
3238       if (getParser().parseAbsoluteExpression(Imm16Val))
3239         return MatchOperand_NoMatch;
3240       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
3241         Error(S, "invalid immediate: only 16-bit values are legal");
3242         // Do not return error code, but create an imm operand anyway and proceed
3243         // to the next operand, if any. That avoids unneccessary error messages.
3244       }
3245       break;
3246 
3247     case AsmToken::Identifier: {
3248         OperandInfoTy HwReg(ID_UNKNOWN_);
3249         int64_t Offset = OFFSET_DEFAULT_;
3250         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
3251         if (parseHwregConstruct(HwReg, Offset, Width))
3252           return MatchOperand_ParseFail;
3253         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
3254           if (HwReg.IsSymbolic)
3255             Error(S, "invalid symbolic name of hardware register");
3256           else
3257             Error(S, "invalid code of hardware register: only 6-bit values are legal");
3258         }
3259         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
3260           Error(S, "invalid bit offset: only 5-bit values are legal");
3261         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
3262           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
3263         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
3264       }
3265       break;
3266   }
3267   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
3268   return MatchOperand_Success;
3269 }
3270 
3271 bool AMDGPUOperand::isSWaitCnt() const {
3272   return isImm();
3273 }
3274 
3275 bool AMDGPUOperand::isHwreg() const {
3276   return isImmTy(ImmTyHwreg);
3277 }
3278 
3279 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
3280   using namespace llvm::AMDGPU::SendMsg;
3281 
3282   if (Parser.getTok().getString() != "sendmsg")
3283     return true;
3284   Parser.Lex();
3285 
3286   if (getLexer().isNot(AsmToken::LParen))
3287     return true;
3288   Parser.Lex();
3289 
3290   if (getLexer().is(AsmToken::Identifier)) {
3291     Msg.IsSymbolic = true;
3292     Msg.Id = ID_UNKNOWN_;
3293     const std::string tok = Parser.getTok().getString();
3294     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
3295       switch(i) {
3296         default: continue; // Omit gaps.
3297         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:  case ID_SYSMSG: break;
3298       }
3299       if (tok == IdSymbolic[i]) {
3300         Msg.Id = i;
3301         break;
3302       }
3303     }
3304     Parser.Lex();
3305   } else {
3306     Msg.IsSymbolic = false;
3307     if (getLexer().isNot(AsmToken::Integer))
3308       return true;
3309     if (getParser().parseAbsoluteExpression(Msg.Id))
3310       return true;
3311     if (getLexer().is(AsmToken::Integer))
3312       if (getParser().parseAbsoluteExpression(Msg.Id))
3313         Msg.Id = ID_UNKNOWN_;
3314   }
3315   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
3316     return false;
3317 
3318   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
3319     if (getLexer().isNot(AsmToken::RParen))
3320       return true;
3321     Parser.Lex();
3322     return false;
3323   }
3324 
3325   if (getLexer().isNot(AsmToken::Comma))
3326     return true;
3327   Parser.Lex();
3328 
3329   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
3330   Operation.Id = ID_UNKNOWN_;
3331   if (getLexer().is(AsmToken::Identifier)) {
3332     Operation.IsSymbolic = true;
3333     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
3334     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
3335     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
3336     const StringRef Tok = Parser.getTok().getString();
3337     for (int i = F; i < L; ++i) {
3338       if (Tok == S[i]) {
3339         Operation.Id = i;
3340         break;
3341       }
3342     }
3343     Parser.Lex();
3344   } else {
3345     Operation.IsSymbolic = false;
3346     if (getLexer().isNot(AsmToken::Integer))
3347       return true;
3348     if (getParser().parseAbsoluteExpression(Operation.Id))
3349       return true;
3350   }
3351 
3352   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
3353     // Stream id is optional.
3354     if (getLexer().is(AsmToken::RParen)) {
3355       Parser.Lex();
3356       return false;
3357     }
3358 
3359     if (getLexer().isNot(AsmToken::Comma))
3360       return true;
3361     Parser.Lex();
3362 
3363     if (getLexer().isNot(AsmToken::Integer))
3364       return true;
3365     if (getParser().parseAbsoluteExpression(StreamId))
3366       return true;
3367   }
3368 
3369   if (getLexer().isNot(AsmToken::RParen))
3370     return true;
3371   Parser.Lex();
3372   return false;
3373 }
3374 
3375 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
3376   if (getLexer().getKind() != AsmToken::Identifier)
3377     return MatchOperand_NoMatch;
3378 
3379   StringRef Str = Parser.getTok().getString();
3380   int Slot = StringSwitch<int>(Str)
3381     .Case("p10", 0)
3382     .Case("p20", 1)
3383     .Case("p0", 2)
3384     .Default(-1);
3385 
3386   SMLoc S = Parser.getTok().getLoc();
3387   if (Slot == -1)
3388     return MatchOperand_ParseFail;
3389 
3390   Parser.Lex();
3391   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
3392                                               AMDGPUOperand::ImmTyInterpSlot));
3393   return MatchOperand_Success;
3394 }
3395 
3396 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
3397   if (getLexer().getKind() != AsmToken::Identifier)
3398     return MatchOperand_NoMatch;
3399 
3400   StringRef Str = Parser.getTok().getString();
3401   if (!Str.startswith("attr"))
3402     return MatchOperand_NoMatch;
3403 
3404   StringRef Chan = Str.take_back(2);
3405   int AttrChan = StringSwitch<int>(Chan)
3406     .Case(".x", 0)
3407     .Case(".y", 1)
3408     .Case(".z", 2)
3409     .Case(".w", 3)
3410     .Default(-1);
3411   if (AttrChan == -1)
3412     return MatchOperand_ParseFail;
3413 
3414   Str = Str.drop_back(2).drop_front(4);
3415 
3416   uint8_t Attr;
3417   if (Str.getAsInteger(10, Attr))
3418     return MatchOperand_ParseFail;
3419 
3420   SMLoc S = Parser.getTok().getLoc();
3421   Parser.Lex();
3422   if (Attr > 63) {
3423     Error(S, "out of bounds attr");
3424     return MatchOperand_Success;
3425   }
3426 
3427   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
3428 
3429   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
3430                                               AMDGPUOperand::ImmTyInterpAttr));
3431   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
3432                                               AMDGPUOperand::ImmTyAttrChan));
3433   return MatchOperand_Success;
3434 }
3435 
3436 void AMDGPUAsmParser::errorExpTgt() {
3437   Error(Parser.getTok().getLoc(), "invalid exp target");
3438 }
3439 
3440 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
3441                                                       uint8_t &Val) {
3442   if (Str == "null") {
3443     Val = 9;
3444     return MatchOperand_Success;
3445   }
3446 
3447   if (Str.startswith("mrt")) {
3448     Str = Str.drop_front(3);
3449     if (Str == "z") { // == mrtz
3450       Val = 8;
3451       return MatchOperand_Success;
3452     }
3453 
3454     if (Str.getAsInteger(10, Val))
3455       return MatchOperand_ParseFail;
3456 
3457     if (Val > 7)
3458       errorExpTgt();
3459 
3460     return MatchOperand_Success;
3461   }
3462 
3463   if (Str.startswith("pos")) {
3464     Str = Str.drop_front(3);
3465     if (Str.getAsInteger(10, Val))
3466       return MatchOperand_ParseFail;
3467 
3468     if (Val > 3)
3469       errorExpTgt();
3470 
3471     Val += 12;
3472     return MatchOperand_Success;
3473   }
3474 
3475   if (Str.startswith("param")) {
3476     Str = Str.drop_front(5);
3477     if (Str.getAsInteger(10, Val))
3478       return MatchOperand_ParseFail;
3479 
3480     if (Val >= 32)
3481       errorExpTgt();
3482 
3483     Val += 32;
3484     return MatchOperand_Success;
3485   }
3486 
3487   if (Str.startswith("invalid_target_")) {
3488     Str = Str.drop_front(15);
3489     if (Str.getAsInteger(10, Val))
3490       return MatchOperand_ParseFail;
3491 
3492     errorExpTgt();
3493     return MatchOperand_Success;
3494   }
3495 
3496   return MatchOperand_NoMatch;
3497 }
3498 
3499 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
3500   uint8_t Val;
3501   StringRef Str = Parser.getTok().getString();
3502 
3503   auto Res = parseExpTgtImpl(Str, Val);
3504   if (Res != MatchOperand_Success)
3505     return Res;
3506 
3507   SMLoc S = Parser.getTok().getLoc();
3508   Parser.Lex();
3509 
3510   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
3511                                               AMDGPUOperand::ImmTyExpTgt));
3512   return MatchOperand_Success;
3513 }
3514 
3515 OperandMatchResultTy
3516 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
3517   using namespace llvm::AMDGPU::SendMsg;
3518 
3519   int64_t Imm16Val = 0;
3520   SMLoc S = Parser.getTok().getLoc();
3521 
3522   switch(getLexer().getKind()) {
3523   default:
3524     return MatchOperand_NoMatch;
3525   case AsmToken::Integer:
3526     // The operand can be an integer value.
3527     if (getParser().parseAbsoluteExpression(Imm16Val))
3528       return MatchOperand_NoMatch;
3529     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
3530       Error(S, "invalid immediate: only 16-bit values are legal");
3531       // Do not return error code, but create an imm operand anyway and proceed
3532       // to the next operand, if any. That avoids unneccessary error messages.
3533     }
3534     break;
3535   case AsmToken::Identifier: {
3536       OperandInfoTy Msg(ID_UNKNOWN_);
3537       OperandInfoTy Operation(OP_UNKNOWN_);
3538       int64_t StreamId = STREAM_ID_DEFAULT_;
3539       if (parseSendMsgConstruct(Msg, Operation, StreamId))
3540         return MatchOperand_ParseFail;
3541       do {
3542         // Validate and encode message ID.
3543         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
3544                 || Msg.Id == ID_SYSMSG)) {
3545           if (Msg.IsSymbolic)
3546             Error(S, "invalid/unsupported symbolic name of message");
3547           else
3548             Error(S, "invalid/unsupported code of message");
3549           break;
3550         }
3551         Imm16Val = (Msg.Id << ID_SHIFT_);
3552         // Validate and encode operation ID.
3553         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
3554           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
3555             if (Operation.IsSymbolic)
3556               Error(S, "invalid symbolic name of GS_OP");
3557             else
3558               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
3559             break;
3560           }
3561           if (Operation.Id == OP_GS_NOP
3562               && Msg.Id != ID_GS_DONE) {
3563             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
3564             break;
3565           }
3566           Imm16Val |= (Operation.Id << OP_SHIFT_);
3567         }
3568         if (Msg.Id == ID_SYSMSG) {
3569           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
3570             if (Operation.IsSymbolic)
3571               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
3572             else
3573               Error(S, "invalid/unsupported code of SYSMSG_OP");
3574             break;
3575           }
3576           Imm16Val |= (Operation.Id << OP_SHIFT_);
3577         }
3578         // Validate and encode stream ID.
3579         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
3580           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
3581             Error(S, "invalid stream id: only 2-bit values are legal");
3582             break;
3583           }
3584           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
3585         }
3586       } while (false);
3587     }
3588     break;
3589   }
3590   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
3591   return MatchOperand_Success;
3592 }
3593 
3594 bool AMDGPUOperand::isSendMsg() const {
3595   return isImmTy(ImmTySendMsg);
3596 }
3597 
3598 //===----------------------------------------------------------------------===//
3599 // parser helpers
3600 //===----------------------------------------------------------------------===//
3601 
3602 bool
3603 AMDGPUAsmParser::trySkipId(const StringRef Id) {
3604   if (getLexer().getKind() == AsmToken::Identifier &&
3605       Parser.getTok().getString() == Id) {
3606     Parser.Lex();
3607     return true;
3608   }
3609   return false;
3610 }
3611 
3612 bool
3613 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
3614   if (getLexer().getKind() == Kind) {
3615     Parser.Lex();
3616     return true;
3617   }
3618   return false;
3619 }
3620 
3621 bool
3622 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
3623                            const StringRef ErrMsg) {
3624   if (!trySkipToken(Kind)) {
3625     Error(Parser.getTok().getLoc(), ErrMsg);
3626     return false;
3627   }
3628   return true;
3629 }
3630 
3631 bool
3632 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
3633   return !getParser().parseAbsoluteExpression(Imm);
3634 }
3635 
3636 bool
3637 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
3638   SMLoc S = Parser.getTok().getLoc();
3639   if (getLexer().getKind() == AsmToken::String) {
3640     Val = Parser.getTok().getStringContents();
3641     Parser.Lex();
3642     return true;
3643   } else {
3644     Error(S, ErrMsg);
3645     return false;
3646   }
3647 }
3648 
3649 //===----------------------------------------------------------------------===//
3650 // swizzle
3651 //===----------------------------------------------------------------------===//
3652 
3653 LLVM_READNONE
3654 static unsigned
3655 encodeBitmaskPerm(const unsigned AndMask,
3656                   const unsigned OrMask,
3657                   const unsigned XorMask) {
3658   using namespace llvm::AMDGPU::Swizzle;
3659 
3660   return BITMASK_PERM_ENC |
3661          (AndMask << BITMASK_AND_SHIFT) |
3662          (OrMask  << BITMASK_OR_SHIFT)  |
3663          (XorMask << BITMASK_XOR_SHIFT);
3664 }
3665 
3666 bool
3667 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
3668                                       const unsigned MinVal,
3669                                       const unsigned MaxVal,
3670                                       const StringRef ErrMsg) {
3671   for (unsigned i = 0; i < OpNum; ++i) {
3672     if (!skipToken(AsmToken::Comma, "expected a comma")){
3673       return false;
3674     }
3675     SMLoc ExprLoc = Parser.getTok().getLoc();
3676     if (!parseExpr(Op[i])) {
3677       return false;
3678     }
3679     if (Op[i] < MinVal || Op[i] > MaxVal) {
3680       Error(ExprLoc, ErrMsg);
3681       return false;
3682     }
3683   }
3684 
3685   return true;
3686 }
3687 
3688 bool
3689 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
3690   using namespace llvm::AMDGPU::Swizzle;
3691 
3692   int64_t Lane[LANE_NUM];
3693   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
3694                            "expected a 2-bit lane id")) {
3695     Imm = QUAD_PERM_ENC;
3696     for (auto i = 0; i < LANE_NUM; ++i) {
3697       Imm |= Lane[i] << (LANE_SHIFT * i);
3698     }
3699     return true;
3700   }
3701   return false;
3702 }
3703 
3704 bool
3705 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
3706   using namespace llvm::AMDGPU::Swizzle;
3707 
3708   SMLoc S = Parser.getTok().getLoc();
3709   int64_t GroupSize;
3710   int64_t LaneIdx;
3711 
3712   if (!parseSwizzleOperands(1, &GroupSize,
3713                             2, 32,
3714                             "group size must be in the interval [2,32]")) {
3715     return false;
3716   }
3717   if (!isPowerOf2_64(GroupSize)) {
3718     Error(S, "group size must be a power of two");
3719     return false;
3720   }
3721   if (parseSwizzleOperands(1, &LaneIdx,
3722                            0, GroupSize - 1,
3723                            "lane id must be in the interval [0,group size - 1]")) {
3724     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
3725     return true;
3726   }
3727   return false;
3728 }
3729 
3730 bool
3731 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
3732   using namespace llvm::AMDGPU::Swizzle;
3733 
3734   SMLoc S = Parser.getTok().getLoc();
3735   int64_t GroupSize;
3736 
3737   if (!parseSwizzleOperands(1, &GroupSize,
3738       2, 32, "group size must be in the interval [2,32]")) {
3739     return false;
3740   }
3741   if (!isPowerOf2_64(GroupSize)) {
3742     Error(S, "group size must be a power of two");
3743     return false;
3744   }
3745 
3746   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
3747   return true;
3748 }
3749 
3750 bool
3751 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
3752   using namespace llvm::AMDGPU::Swizzle;
3753 
3754   SMLoc S = Parser.getTok().getLoc();
3755   int64_t GroupSize;
3756 
3757   if (!parseSwizzleOperands(1, &GroupSize,
3758       1, 16, "group size must be in the interval [1,16]")) {
3759     return false;
3760   }
3761   if (!isPowerOf2_64(GroupSize)) {
3762     Error(S, "group size must be a power of two");
3763     return false;
3764   }
3765 
3766   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
3767   return true;
3768 }
3769 
3770 bool
3771 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
3772   using namespace llvm::AMDGPU::Swizzle;
3773 
3774   if (!skipToken(AsmToken::Comma, "expected a comma")) {
3775     return false;
3776   }
3777 
3778   StringRef Ctl;
3779   SMLoc StrLoc = Parser.getTok().getLoc();
3780   if (!parseString(Ctl)) {
3781     return false;
3782   }
3783   if (Ctl.size() != BITMASK_WIDTH) {
3784     Error(StrLoc, "expected a 5-character mask");
3785     return false;
3786   }
3787 
3788   unsigned AndMask = 0;
3789   unsigned OrMask = 0;
3790   unsigned XorMask = 0;
3791 
3792   for (size_t i = 0; i < Ctl.size(); ++i) {
3793     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
3794     switch(Ctl[i]) {
3795     default:
3796       Error(StrLoc, "invalid mask");
3797       return false;
3798     case '0':
3799       break;
3800     case '1':
3801       OrMask |= Mask;
3802       break;
3803     case 'p':
3804       AndMask |= Mask;
3805       break;
3806     case 'i':
3807       AndMask |= Mask;
3808       XorMask |= Mask;
3809       break;
3810     }
3811   }
3812 
3813   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
3814   return true;
3815 }
3816 
3817 bool
3818 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
3819 
3820   SMLoc OffsetLoc = Parser.getTok().getLoc();
3821 
3822   if (!parseExpr(Imm)) {
3823     return false;
3824   }
3825   if (!isUInt<16>(Imm)) {
3826     Error(OffsetLoc, "expected a 16-bit offset");
3827     return false;
3828   }
3829   return true;
3830 }
3831 
3832 bool
3833 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
3834   using namespace llvm::AMDGPU::Swizzle;
3835 
3836   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
3837 
3838     SMLoc ModeLoc = Parser.getTok().getLoc();
3839     bool Ok = false;
3840 
3841     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
3842       Ok = parseSwizzleQuadPerm(Imm);
3843     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
3844       Ok = parseSwizzleBitmaskPerm(Imm);
3845     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
3846       Ok = parseSwizzleBroadcast(Imm);
3847     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
3848       Ok = parseSwizzleSwap(Imm);
3849     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
3850       Ok = parseSwizzleReverse(Imm);
3851     } else {
3852       Error(ModeLoc, "expected a swizzle mode");
3853     }
3854 
3855     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
3856   }
3857 
3858   return false;
3859 }
3860 
3861 OperandMatchResultTy
3862 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
3863   SMLoc S = Parser.getTok().getLoc();
3864   int64_t Imm = 0;
3865 
3866   if (trySkipId("offset")) {
3867 
3868     bool Ok = false;
3869     if (skipToken(AsmToken::Colon, "expected a colon")) {
3870       if (trySkipId("swizzle")) {
3871         Ok = parseSwizzleMacro(Imm);
3872       } else {
3873         Ok = parseSwizzleOffset(Imm);
3874       }
3875     }
3876 
3877     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
3878 
3879     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
3880   } else {
3881     // Swizzle "offset" operand is optional.
3882     // If it is omitted, try parsing other optional operands.
3883     return parseOptionalOpr(Operands);
3884   }
3885 }
3886 
3887 bool
3888 AMDGPUOperand::isSwizzle() const {
3889   return isImmTy(ImmTySwizzle);
3890 }
3891 
3892 //===----------------------------------------------------------------------===//
3893 // sopp branch targets
3894 //===----------------------------------------------------------------------===//
3895 
3896 OperandMatchResultTy
3897 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
3898   SMLoc S = Parser.getTok().getLoc();
3899 
3900   switch (getLexer().getKind()) {
3901     default: return MatchOperand_ParseFail;
3902     case AsmToken::Integer: {
3903       int64_t Imm;
3904       if (getParser().parseAbsoluteExpression(Imm))
3905         return MatchOperand_ParseFail;
3906       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
3907       return MatchOperand_Success;
3908     }
3909 
3910     case AsmToken::Identifier:
3911       Operands.push_back(AMDGPUOperand::CreateExpr(this,
3912           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
3913                                   Parser.getTok().getString()), getContext()), S));
3914       Parser.Lex();
3915       return MatchOperand_Success;
3916   }
3917 }
3918 
3919 //===----------------------------------------------------------------------===//
3920 // mubuf
3921 //===----------------------------------------------------------------------===//
3922 
3923 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
3924   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
3925 }
3926 
3927 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
3928   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
3929 }
3930 
3931 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultTFE() const {
3932   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyTFE);
3933 }
3934 
3935 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
3936                                const OperandVector &Operands,
3937                                bool IsAtomic, bool IsAtomicReturn) {
3938   OptionalImmIndexMap OptionalIdx;
3939   assert(IsAtomicReturn ? IsAtomic : true);
3940 
3941   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3942     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3943 
3944     // Add the register arguments
3945     if (Op.isReg()) {
3946       Op.addRegOperands(Inst, 1);
3947       continue;
3948     }
3949 
3950     // Handle the case where soffset is an immediate
3951     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
3952       Op.addImmOperands(Inst, 1);
3953       continue;
3954     }
3955 
3956     // Handle tokens like 'offen' which are sometimes hard-coded into the
3957     // asm string.  There are no MCInst operands for these.
3958     if (Op.isToken()) {
3959       continue;
3960     }
3961     assert(Op.isImm());
3962 
3963     // Handle optional arguments
3964     OptionalIdx[Op.getImmTy()] = i;
3965   }
3966 
3967   // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns.
3968   if (IsAtomicReturn) {
3969     MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning.
3970     Inst.insert(I, *I);
3971   }
3972 
3973   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
3974   if (!IsAtomic) { // glc is hard-coded.
3975     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
3976   }
3977   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
3978   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
3979 }
3980 
3981 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
3982   OptionalImmIndexMap OptionalIdx;
3983 
3984   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3985     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3986 
3987     // Add the register arguments
3988     if (Op.isReg()) {
3989       Op.addRegOperands(Inst, 1);
3990       continue;
3991     }
3992 
3993     // Handle the case where soffset is an immediate
3994     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
3995       Op.addImmOperands(Inst, 1);
3996       continue;
3997     }
3998 
3999     // Handle tokens like 'offen' which are sometimes hard-coded into the
4000     // asm string.  There are no MCInst operands for these.
4001     if (Op.isToken()) {
4002       continue;
4003     }
4004     assert(Op.isImm());
4005 
4006     // Handle optional arguments
4007     OptionalIdx[Op.getImmTy()] = i;
4008   }
4009 
4010   addOptionalImmOperand(Inst, Operands, OptionalIdx,
4011                         AMDGPUOperand::ImmTyOffset);
4012   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDFMT);
4013   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyNFMT);
4014   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4015   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4016   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4017 }
4018 
4019 //===----------------------------------------------------------------------===//
4020 // mimg
4021 //===----------------------------------------------------------------------===//
4022 
4023 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
4024                               bool IsAtomic) {
4025   unsigned I = 1;
4026   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4027   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4028     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4029   }
4030 
4031   if (IsAtomic) {
4032     // Add src, same as dst
4033     ((AMDGPUOperand &)*Operands[I]).addRegOperands(Inst, 1);
4034   }
4035 
4036   OptionalImmIndexMap OptionalIdx;
4037 
4038   for (unsigned E = Operands.size(); I != E; ++I) {
4039     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4040 
4041     // Add the register arguments
4042     if (Op.isRegOrImm()) {
4043       Op.addRegOrImmOperands(Inst, 1);
4044       continue;
4045     } else if (Op.isImmModifier()) {
4046       OptionalIdx[Op.getImmTy()] = I;
4047     } else {
4048       llvm_unreachable("unexpected operand type");
4049     }
4050   }
4051 
4052   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
4053   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
4054   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4055   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
4056   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128);
4057   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4058   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
4059   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4060 }
4061 
4062 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
4063   cvtMIMG(Inst, Operands, true);
4064 }
4065 
4066 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDMask() const {
4067   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDMask);
4068 }
4069 
4070 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultUNorm() const {
4071   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyUNorm);
4072 }
4073 
4074 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDA() const {
4075   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDA);
4076 }
4077 
4078 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultR128() const {
4079   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyR128);
4080 }
4081 
4082 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultLWE() const {
4083   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyLWE);
4084 }
4085 
4086 //===----------------------------------------------------------------------===//
4087 // smrd
4088 //===----------------------------------------------------------------------===//
4089 
4090 bool AMDGPUOperand::isSMRDOffset8() const {
4091   return isImm() && isUInt<8>(getImm());
4092 }
4093 
4094 bool AMDGPUOperand::isSMRDOffset20() const {
4095   return isImm() && isUInt<20>(getImm());
4096 }
4097 
4098 bool AMDGPUOperand::isSMRDLiteralOffset() const {
4099   // 32-bit literals are only supported on CI and we only want to use them
4100   // when the offset is > 8-bits.
4101   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
4102 }
4103 
4104 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
4105   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4106 }
4107 
4108 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
4109   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4110 }
4111 
4112 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
4113   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4114 }
4115 
4116 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
4117   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4118 }
4119 
4120 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
4121   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4122 }
4123 
4124 //===----------------------------------------------------------------------===//
4125 // vop3
4126 //===----------------------------------------------------------------------===//
4127 
4128 static bool ConvertOmodMul(int64_t &Mul) {
4129   if (Mul != 1 && Mul != 2 && Mul != 4)
4130     return false;
4131 
4132   Mul >>= 1;
4133   return true;
4134 }
4135 
4136 static bool ConvertOmodDiv(int64_t &Div) {
4137   if (Div == 1) {
4138     Div = 0;
4139     return true;
4140   }
4141 
4142   if (Div == 2) {
4143     Div = 3;
4144     return true;
4145   }
4146 
4147   return false;
4148 }
4149 
4150 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
4151   if (BoundCtrl == 0) {
4152     BoundCtrl = 1;
4153     return true;
4154   }
4155 
4156   if (BoundCtrl == -1) {
4157     BoundCtrl = 0;
4158     return true;
4159   }
4160 
4161   return false;
4162 }
4163 
4164 // Note: the order in this table matches the order of operands in AsmString.
4165 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
4166   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
4167   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
4168   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
4169   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
4170   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
4171   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
4172   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
4173   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
4174   {"dfmt",    AMDGPUOperand::ImmTyDFMT, false, nullptr},
4175   {"nfmt",    AMDGPUOperand::ImmTyNFMT, false, nullptr},
4176   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
4177   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
4178   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
4179   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
4180   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
4181   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
4182   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
4183   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
4184   {"r128",    AMDGPUOperand::ImmTyR128,  true, nullptr},
4185   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
4186   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
4187   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
4188   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
4189   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
4190   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
4191   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
4192   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
4193   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
4194   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
4195   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
4196   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
4197   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
4198   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
4199   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
4200 };
4201 
4202 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
4203   unsigned size = Operands.size();
4204   assert(size > 0);
4205 
4206   OperandMatchResultTy res = parseOptionalOpr(Operands);
4207 
4208   // This is a hack to enable hardcoded mandatory operands which follow
4209   // optional operands.
4210   //
4211   // Current design assumes that all operands after the first optional operand
4212   // are also optional. However implementation of some instructions violates
4213   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
4214   //
4215   // To alleviate this problem, we have to (implicitly) parse extra operands
4216   // to make sure autogenerated parser of custom operands never hit hardcoded
4217   // mandatory operands.
4218 
4219   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
4220 
4221     // We have parsed the first optional operand.
4222     // Parse as many operands as necessary to skip all mandatory operands.
4223 
4224     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
4225       if (res != MatchOperand_Success ||
4226           getLexer().is(AsmToken::EndOfStatement)) break;
4227       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
4228       res = parseOptionalOpr(Operands);
4229     }
4230   }
4231 
4232   return res;
4233 }
4234 
4235 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
4236   OperandMatchResultTy res;
4237   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
4238     // try to parse any optional operand here
4239     if (Op.IsBit) {
4240       res = parseNamedBit(Op.Name, Operands, Op.Type);
4241     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
4242       res = parseOModOperand(Operands);
4243     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
4244                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
4245                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
4246       res = parseSDWASel(Operands, Op.Name, Op.Type);
4247     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
4248       res = parseSDWADstUnused(Operands);
4249     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
4250                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
4251                Op.Type == AMDGPUOperand::ImmTyNegLo ||
4252                Op.Type == AMDGPUOperand::ImmTyNegHi) {
4253       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
4254                                         Op.ConvertResult);
4255     } else {
4256       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
4257     }
4258     if (res != MatchOperand_NoMatch) {
4259       return res;
4260     }
4261   }
4262   return MatchOperand_NoMatch;
4263 }
4264 
4265 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
4266   StringRef Name = Parser.getTok().getString();
4267   if (Name == "mul") {
4268     return parseIntWithPrefix("mul", Operands,
4269                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
4270   }
4271 
4272   if (Name == "div") {
4273     return parseIntWithPrefix("div", Operands,
4274                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
4275   }
4276 
4277   return MatchOperand_NoMatch;
4278 }
4279 
4280 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
4281   cvtVOP3P(Inst, Operands);
4282 
4283   int Opc = Inst.getOpcode();
4284 
4285   int SrcNum;
4286   const int Ops[] = { AMDGPU::OpName::src0,
4287                       AMDGPU::OpName::src1,
4288                       AMDGPU::OpName::src2 };
4289   for (SrcNum = 0;
4290        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
4291        ++SrcNum);
4292   assert(SrcNum > 0);
4293 
4294   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4295   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4296 
4297   if ((OpSel & (1 << SrcNum)) != 0) {
4298     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
4299     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
4300     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
4301   }
4302 }
4303 
4304 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
4305       // 1. This operand is input modifiers
4306   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
4307       // 2. This is not last operand
4308       && Desc.NumOperands > (OpNum + 1)
4309       // 3. Next operand is register class
4310       && Desc.OpInfo[OpNum + 1].RegClass != -1
4311       // 4. Next register is not tied to any other operand
4312       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
4313 }
4314 
4315 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
4316 {
4317   OptionalImmIndexMap OptionalIdx;
4318   unsigned Opc = Inst.getOpcode();
4319 
4320   unsigned I = 1;
4321   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4322   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4323     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4324   }
4325 
4326   for (unsigned E = Operands.size(); I != E; ++I) {
4327     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4328     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4329       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
4330     } else if (Op.isInterpSlot() ||
4331                Op.isInterpAttr() ||
4332                Op.isAttrChan()) {
4333       Inst.addOperand(MCOperand::createImm(Op.Imm.Val));
4334     } else if (Op.isImmModifier()) {
4335       OptionalIdx[Op.getImmTy()] = I;
4336     } else {
4337       llvm_unreachable("unhandled operand type");
4338     }
4339   }
4340 
4341   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
4342     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
4343   }
4344 
4345   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
4346     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
4347   }
4348 
4349   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
4350     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
4351   }
4352 }
4353 
4354 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
4355                               OptionalImmIndexMap &OptionalIdx) {
4356   unsigned Opc = Inst.getOpcode();
4357 
4358   unsigned I = 1;
4359   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4360   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4361     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4362   }
4363 
4364   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
4365     // This instruction has src modifiers
4366     for (unsigned E = Operands.size(); I != E; ++I) {
4367       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4368       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4369         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
4370       } else if (Op.isImmModifier()) {
4371         OptionalIdx[Op.getImmTy()] = I;
4372       } else if (Op.isRegOrImm()) {
4373         Op.addRegOrImmOperands(Inst, 1);
4374       } else {
4375         llvm_unreachable("unhandled operand type");
4376       }
4377     }
4378   } else {
4379     // No src modifiers
4380     for (unsigned E = Operands.size(); I != E; ++I) {
4381       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4382       if (Op.isMod()) {
4383         OptionalIdx[Op.getImmTy()] = I;
4384       } else {
4385         Op.addRegOrImmOperands(Inst, 1);
4386       }
4387     }
4388   }
4389 
4390   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
4391     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
4392   }
4393 
4394   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
4395     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
4396   }
4397 
4398   // special case v_mac_{f16, f32}:
4399   // it has src2 register operand that is tied to dst operand
4400   // we don't allow modifiers for this operand in assembler so src2_modifiers
4401   // should be 0
4402   if (Opc == AMDGPU::V_MAC_F32_e64_si || Opc == AMDGPU::V_MAC_F32_e64_vi ||
4403       Opc == AMDGPU::V_MAC_F16_e64_vi) {
4404     auto it = Inst.begin();
4405     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
4406     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
4407     ++it;
4408     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
4409   }
4410 }
4411 
4412 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
4413   OptionalImmIndexMap OptionalIdx;
4414   cvtVOP3(Inst, Operands, OptionalIdx);
4415 }
4416 
4417 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
4418                                const OperandVector &Operands) {
4419   OptionalImmIndexMap OptIdx;
4420   const int Opc = Inst.getOpcode();
4421   const MCInstrDesc &Desc = MII.get(Opc);
4422 
4423   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
4424 
4425   cvtVOP3(Inst, Operands, OptIdx);
4426 
4427   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
4428     assert(!IsPacked);
4429     Inst.addOperand(Inst.getOperand(0));
4430   }
4431 
4432   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
4433   // instruction, and then figure out where to actually put the modifiers
4434 
4435   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
4436 
4437   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4438   if (OpSelHiIdx != -1) {
4439     int DefaultVal = IsPacked ? -1 : 0;
4440     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
4441                           DefaultVal);
4442   }
4443 
4444   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
4445   if (NegLoIdx != -1) {
4446     assert(IsPacked);
4447     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
4448     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
4449   }
4450 
4451   const int Ops[] = { AMDGPU::OpName::src0,
4452                       AMDGPU::OpName::src1,
4453                       AMDGPU::OpName::src2 };
4454   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
4455                          AMDGPU::OpName::src1_modifiers,
4456                          AMDGPU::OpName::src2_modifiers };
4457 
4458   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4459 
4460   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4461   unsigned OpSelHi = 0;
4462   unsigned NegLo = 0;
4463   unsigned NegHi = 0;
4464 
4465   if (OpSelHiIdx != -1) {
4466     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4467   }
4468 
4469   if (NegLoIdx != -1) {
4470     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
4471     NegLo = Inst.getOperand(NegLoIdx).getImm();
4472     NegHi = Inst.getOperand(NegHiIdx).getImm();
4473   }
4474 
4475   for (int J = 0; J < 3; ++J) {
4476     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
4477     if (OpIdx == -1)
4478       break;
4479 
4480     uint32_t ModVal = 0;
4481 
4482     if ((OpSel & (1 << J)) != 0)
4483       ModVal |= SISrcMods::OP_SEL_0;
4484 
4485     if ((OpSelHi & (1 << J)) != 0)
4486       ModVal |= SISrcMods::OP_SEL_1;
4487 
4488     if ((NegLo & (1 << J)) != 0)
4489       ModVal |= SISrcMods::NEG;
4490 
4491     if ((NegHi & (1 << J)) != 0)
4492       ModVal |= SISrcMods::NEG_HI;
4493 
4494     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
4495 
4496     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
4497   }
4498 }
4499 
4500 //===----------------------------------------------------------------------===//
4501 // dpp
4502 //===----------------------------------------------------------------------===//
4503 
4504 bool AMDGPUOperand::isDPPCtrl() const {
4505   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
4506   if (result) {
4507     int64_t Imm = getImm();
4508     return ((Imm >= 0x000) && (Imm <= 0x0ff)) ||
4509            ((Imm >= 0x101) && (Imm <= 0x10f)) ||
4510            ((Imm >= 0x111) && (Imm <= 0x11f)) ||
4511            ((Imm >= 0x121) && (Imm <= 0x12f)) ||
4512            (Imm == 0x130) ||
4513            (Imm == 0x134) ||
4514            (Imm == 0x138) ||
4515            (Imm == 0x13c) ||
4516            (Imm == 0x140) ||
4517            (Imm == 0x141) ||
4518            (Imm == 0x142) ||
4519            (Imm == 0x143);
4520   }
4521   return false;
4522 }
4523 
4524 bool AMDGPUOperand::isGPRIdxMode() const {
4525   return isImm() && isUInt<4>(getImm());
4526 }
4527 
4528 bool AMDGPUOperand::isS16Imm() const {
4529   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
4530 }
4531 
4532 bool AMDGPUOperand::isU16Imm() const {
4533   return isImm() && isUInt<16>(getImm());
4534 }
4535 
4536 OperandMatchResultTy
4537 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
4538   SMLoc S = Parser.getTok().getLoc();
4539   StringRef Prefix;
4540   int64_t Int;
4541 
4542   if (getLexer().getKind() == AsmToken::Identifier) {
4543     Prefix = Parser.getTok().getString();
4544   } else {
4545     return MatchOperand_NoMatch;
4546   }
4547 
4548   if (Prefix == "row_mirror") {
4549     Int = 0x140;
4550     Parser.Lex();
4551   } else if (Prefix == "row_half_mirror") {
4552     Int = 0x141;
4553     Parser.Lex();
4554   } else {
4555     // Check to prevent parseDPPCtrlOps from eating invalid tokens
4556     if (Prefix != "quad_perm"
4557         && Prefix != "row_shl"
4558         && Prefix != "row_shr"
4559         && Prefix != "row_ror"
4560         && Prefix != "wave_shl"
4561         && Prefix != "wave_rol"
4562         && Prefix != "wave_shr"
4563         && Prefix != "wave_ror"
4564         && Prefix != "row_bcast") {
4565       return MatchOperand_NoMatch;
4566     }
4567 
4568     Parser.Lex();
4569     if (getLexer().isNot(AsmToken::Colon))
4570       return MatchOperand_ParseFail;
4571 
4572     if (Prefix == "quad_perm") {
4573       // quad_perm:[%d,%d,%d,%d]
4574       Parser.Lex();
4575       if (getLexer().isNot(AsmToken::LBrac))
4576         return MatchOperand_ParseFail;
4577       Parser.Lex();
4578 
4579       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
4580         return MatchOperand_ParseFail;
4581 
4582       for (int i = 0; i < 3; ++i) {
4583         if (getLexer().isNot(AsmToken::Comma))
4584           return MatchOperand_ParseFail;
4585         Parser.Lex();
4586 
4587         int64_t Temp;
4588         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
4589           return MatchOperand_ParseFail;
4590         const int shift = i*2 + 2;
4591         Int += (Temp << shift);
4592       }
4593 
4594       if (getLexer().isNot(AsmToken::RBrac))
4595         return MatchOperand_ParseFail;
4596       Parser.Lex();
4597     } else {
4598       // sel:%d
4599       Parser.Lex();
4600       if (getParser().parseAbsoluteExpression(Int))
4601         return MatchOperand_ParseFail;
4602 
4603       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
4604         Int |= 0x100;
4605       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
4606         Int |= 0x110;
4607       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
4608         Int |= 0x120;
4609       } else if (Prefix == "wave_shl" && 1 == Int) {
4610         Int = 0x130;
4611       } else if (Prefix == "wave_rol" && 1 == Int) {
4612         Int = 0x134;
4613       } else if (Prefix == "wave_shr" && 1 == Int) {
4614         Int = 0x138;
4615       } else if (Prefix == "wave_ror" && 1 == Int) {
4616         Int = 0x13C;
4617       } else if (Prefix == "row_bcast") {
4618         if (Int == 15) {
4619           Int = 0x142;
4620         } else if (Int == 31) {
4621           Int = 0x143;
4622         } else {
4623           return MatchOperand_ParseFail;
4624         }
4625       } else {
4626         return MatchOperand_ParseFail;
4627       }
4628     }
4629   }
4630 
4631   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
4632   return MatchOperand_Success;
4633 }
4634 
4635 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
4636   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
4637 }
4638 
4639 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
4640   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
4641 }
4642 
4643 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
4644   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
4645 }
4646 
4647 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
4648   OptionalImmIndexMap OptionalIdx;
4649 
4650   unsigned I = 1;
4651   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4652   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4653     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4654   }
4655 
4656   // All DPP instructions with at least one source operand have a fake "old"
4657   // source at the beginning that's tied to the dst operand. Handle it here.
4658   if (Desc.getNumOperands() >= 2)
4659     Inst.addOperand(Inst.getOperand(0));
4660 
4661   for (unsigned E = Operands.size(); I != E; ++I) {
4662     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4663     // Add the register arguments
4664     if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
4665       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
4666       // Skip it.
4667       continue;
4668     } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4669       Op.addRegWithFPInputModsOperands(Inst, 2);
4670     } else if (Op.isDPPCtrl()) {
4671       Op.addImmOperands(Inst, 1);
4672     } else if (Op.isImm()) {
4673       // Handle optional arguments
4674       OptionalIdx[Op.getImmTy()] = I;
4675     } else {
4676       llvm_unreachable("Invalid operand type");
4677     }
4678   }
4679 
4680   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
4681   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
4682   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
4683 }
4684 
4685 //===----------------------------------------------------------------------===//
4686 // sdwa
4687 //===----------------------------------------------------------------------===//
4688 
4689 OperandMatchResultTy
4690 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
4691                               AMDGPUOperand::ImmTy Type) {
4692   using namespace llvm::AMDGPU::SDWA;
4693 
4694   SMLoc S = Parser.getTok().getLoc();
4695   StringRef Value;
4696   OperandMatchResultTy res;
4697 
4698   res = parseStringWithPrefix(Prefix, Value);
4699   if (res != MatchOperand_Success) {
4700     return res;
4701   }
4702 
4703   int64_t Int;
4704   Int = StringSwitch<int64_t>(Value)
4705         .Case("BYTE_0", SdwaSel::BYTE_0)
4706         .Case("BYTE_1", SdwaSel::BYTE_1)
4707         .Case("BYTE_2", SdwaSel::BYTE_2)
4708         .Case("BYTE_3", SdwaSel::BYTE_3)
4709         .Case("WORD_0", SdwaSel::WORD_0)
4710         .Case("WORD_1", SdwaSel::WORD_1)
4711         .Case("DWORD", SdwaSel::DWORD)
4712         .Default(0xffffffff);
4713   Parser.Lex(); // eat last token
4714 
4715   if (Int == 0xffffffff) {
4716     return MatchOperand_ParseFail;
4717   }
4718 
4719   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
4720   return MatchOperand_Success;
4721 }
4722 
4723 OperandMatchResultTy
4724 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
4725   using namespace llvm::AMDGPU::SDWA;
4726 
4727   SMLoc S = Parser.getTok().getLoc();
4728   StringRef Value;
4729   OperandMatchResultTy res;
4730 
4731   res = parseStringWithPrefix("dst_unused", Value);
4732   if (res != MatchOperand_Success) {
4733     return res;
4734   }
4735 
4736   int64_t Int;
4737   Int = StringSwitch<int64_t>(Value)
4738         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
4739         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
4740         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
4741         .Default(0xffffffff);
4742   Parser.Lex(); // eat last token
4743 
4744   if (Int == 0xffffffff) {
4745     return MatchOperand_ParseFail;
4746   }
4747 
4748   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
4749   return MatchOperand_Success;
4750 }
4751 
4752 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
4753   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
4754 }
4755 
4756 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
4757   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
4758 }
4759 
4760 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
4761   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
4762 }
4763 
4764 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
4765   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
4766 }
4767 
4768 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
4769                               uint64_t BasicInstType, bool skipVcc) {
4770   using namespace llvm::AMDGPU::SDWA;
4771 
4772   OptionalImmIndexMap OptionalIdx;
4773   bool skippedVcc = false;
4774 
4775   unsigned I = 1;
4776   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4777   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4778     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4779   }
4780 
4781   for (unsigned E = Operands.size(); I != E; ++I) {
4782     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4783     if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
4784       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
4785       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
4786       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
4787       // Skip VCC only if we didn't skip it on previous iteration.
4788       if (BasicInstType == SIInstrFlags::VOP2 &&
4789           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
4790         skippedVcc = true;
4791         continue;
4792       } else if (BasicInstType == SIInstrFlags::VOPC &&
4793                  Inst.getNumOperands() == 0) {
4794         skippedVcc = true;
4795         continue;
4796       }
4797     }
4798     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4799       Op.addRegWithInputModsOperands(Inst, 2);
4800     } else if (Op.isImm()) {
4801       // Handle optional arguments
4802       OptionalIdx[Op.getImmTy()] = I;
4803     } else {
4804       llvm_unreachable("Invalid operand type");
4805     }
4806     skippedVcc = false;
4807   }
4808 
4809   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
4810       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
4811     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
4812     switch (BasicInstType) {
4813     case SIInstrFlags::VOP1:
4814       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4815       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
4816         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
4817       }
4818       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
4819       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
4820       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
4821       break;
4822 
4823     case SIInstrFlags::VOP2:
4824       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4825       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
4826         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
4827       }
4828       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
4829       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
4830       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
4831       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
4832       break;
4833 
4834     case SIInstrFlags::VOPC:
4835       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4836       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
4837       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
4838       break;
4839 
4840     default:
4841       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
4842     }
4843   }
4844 
4845   // special case v_mac_{f16, f32}:
4846   // it has src2 register operand that is tied to dst operand
4847   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
4848       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
4849     auto it = Inst.begin();
4850     std::advance(
4851       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
4852     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
4853   }
4854 }
4855 
4856 /// Force static initialization.
4857 extern "C" void LLVMInitializeAMDGPUAsmParser() {
4858   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
4859   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
4860 }
4861 
4862 #define GET_REGISTER_MATCHER
4863 #define GET_MATCHER_IMPLEMENTATION
4864 #define GET_MNEMONIC_SPELL_CHECKER
4865 #include "AMDGPUGenAsmMatcher.inc"
4866 
4867 // This fuction should be defined after auto-generated include so that we have
4868 // MatchClassKind enum defined
4869 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
4870                                                      unsigned Kind) {
4871   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
4872   // But MatchInstructionImpl() expects to meet token and fails to validate
4873   // operand. This method checks if we are given immediate operand but expect to
4874   // get corresponding token.
4875   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
4876   switch (Kind) {
4877   case MCK_addr64:
4878     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
4879   case MCK_gds:
4880     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
4881   case MCK_glc:
4882     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
4883   case MCK_idxen:
4884     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
4885   case MCK_offen:
4886     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
4887   case MCK_SSrcB32:
4888     // When operands have expression values, they will return true for isToken,
4889     // because it is not possible to distinguish between a token and an
4890     // expression at parse time. MatchInstructionImpl() will always try to
4891     // match an operand as a token, when isToken returns true, and when the
4892     // name of the expression is not a valid token, the match will fail,
4893     // so we need to handle it here.
4894     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
4895   case MCK_SSrcF32:
4896     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
4897   case MCK_SoppBrTarget:
4898     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
4899   case MCK_VReg32OrOff:
4900     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
4901   case MCK_InterpSlot:
4902     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
4903   case MCK_Attr:
4904     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
4905   case MCK_AttrChan:
4906     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
4907   default:
4908     return Match_InvalidOperand;
4909   }
4910 }
4911