1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "AMDGPU.h"
11 #include "AMDKernelCodeT.h"
12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
14 #include "SIDefines.h"
15 #include "Utils/AMDGPUAsmUtils.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/ADT/Twine.h"
27 #include "llvm/BinaryFormat/ELF.h"
28 #include "llvm/CodeGen/MachineValueType.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include "llvm/Support/MathExtras.h"
49 #include "llvm/Support/SMLoc.h"
50 #include "llvm/Support/TargetRegistry.h"
51 #include "llvm/Support/raw_ostream.h"
52 #include <algorithm>
53 #include <cassert>
54 #include <cstdint>
55 #include <cstring>
56 #include <iterator>
57 #include <map>
58 #include <memory>
59 #include <string>
60 
61 using namespace llvm;
62 using namespace llvm::AMDGPU;
63 
64 namespace {
65 
66 class AMDGPUAsmParser;
67 
68 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
69 
70 //===----------------------------------------------------------------------===//
71 // Operand
72 //===----------------------------------------------------------------------===//
73 
74 class AMDGPUOperand : public MCParsedAsmOperand {
75   enum KindTy {
76     Token,
77     Immediate,
78     Register,
79     Expression
80   } Kind;
81 
82   SMLoc StartLoc, EndLoc;
83   const AMDGPUAsmParser *AsmParser;
84 
85 public:
86   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
87     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
88 
89   using Ptr = std::unique_ptr<AMDGPUOperand>;
90 
91   struct Modifiers {
92     bool Abs = false;
93     bool Neg = false;
94     bool Sext = false;
95 
96     bool hasFPModifiers() const { return Abs || Neg; }
97     bool hasIntModifiers() const { return Sext; }
98     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
99 
100     int64_t getFPModifiersOperand() const {
101       int64_t Operand = 0;
102       Operand |= Abs ? SISrcMods::ABS : 0;
103       Operand |= Neg ? SISrcMods::NEG : 0;
104       return Operand;
105     }
106 
107     int64_t getIntModifiersOperand() const {
108       int64_t Operand = 0;
109       Operand |= Sext ? SISrcMods::SEXT : 0;
110       return Operand;
111     }
112 
113     int64_t getModifiersOperand() const {
114       assert(!(hasFPModifiers() && hasIntModifiers())
115            && "fp and int modifiers should not be used simultaneously");
116       if (hasFPModifiers()) {
117         return getFPModifiersOperand();
118       } else if (hasIntModifiers()) {
119         return getIntModifiersOperand();
120       } else {
121         return 0;
122       }
123     }
124 
125     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
126   };
127 
128   enum ImmTy {
129     ImmTyNone,
130     ImmTyGDS,
131     ImmTyOffen,
132     ImmTyIdxen,
133     ImmTyAddr64,
134     ImmTyOffset,
135     ImmTyOffset0,
136     ImmTyOffset1,
137     ImmTyGLC,
138     ImmTySLC,
139     ImmTyTFE,
140     ImmTyClampSI,
141     ImmTyOModSI,
142     ImmTyDppCtrl,
143     ImmTyDppRowMask,
144     ImmTyDppBankMask,
145     ImmTyDppBoundCtrl,
146     ImmTySdwaDstSel,
147     ImmTySdwaSrc0Sel,
148     ImmTySdwaSrc1Sel,
149     ImmTySdwaDstUnused,
150     ImmTyDMask,
151     ImmTyUNorm,
152     ImmTyDA,
153     ImmTyR128,
154     ImmTyLWE,
155     ImmTyExpTgt,
156     ImmTyExpCompr,
157     ImmTyExpVM,
158     ImmTyDFMT,
159     ImmTyNFMT,
160     ImmTyHwreg,
161     ImmTyOff,
162     ImmTySendMsg,
163     ImmTyInterpSlot,
164     ImmTyInterpAttr,
165     ImmTyAttrChan,
166     ImmTyOpSel,
167     ImmTyOpSelHi,
168     ImmTyNegLo,
169     ImmTyNegHi,
170     ImmTySwizzle,
171     ImmTyHigh
172   };
173 
174   struct TokOp {
175     const char *Data;
176     unsigned Length;
177   };
178 
179   struct ImmOp {
180     int64_t Val;
181     ImmTy Type;
182     bool IsFPImm;
183     Modifiers Mods;
184   };
185 
186   struct RegOp {
187     unsigned RegNo;
188     bool IsForcedVOP3;
189     Modifiers Mods;
190   };
191 
192   union {
193     TokOp Tok;
194     ImmOp Imm;
195     RegOp Reg;
196     const MCExpr *Expr;
197   };
198 
199   bool isToken() const override {
200     if (Kind == Token)
201       return true;
202 
203     if (Kind != Expression || !Expr)
204       return false;
205 
206     // When parsing operands, we can't always tell if something was meant to be
207     // a token, like 'gds', or an expression that references a global variable.
208     // In this case, we assume the string is an expression, and if we need to
209     // interpret is a token, then we treat the symbol name as the token.
210     return isa<MCSymbolRefExpr>(Expr);
211   }
212 
213   bool isImm() const override {
214     return Kind == Immediate;
215   }
216 
217   bool isInlinableImm(MVT type) const;
218   bool isLiteralImm(MVT type) const;
219 
220   bool isRegKind() const {
221     return Kind == Register;
222   }
223 
224   bool isReg() const override {
225     return isRegKind() && !hasModifiers();
226   }
227 
228   bool isRegOrImmWithInputMods(MVT type) const {
229     return isRegKind() || isInlinableImm(type);
230   }
231 
232   bool isRegOrImmWithInt16InputMods() const {
233     return isRegOrImmWithInputMods(MVT::i16);
234   }
235 
236   bool isRegOrImmWithInt32InputMods() const {
237     return isRegOrImmWithInputMods(MVT::i32);
238   }
239 
240   bool isRegOrImmWithInt64InputMods() const {
241     return isRegOrImmWithInputMods(MVT::i64);
242   }
243 
244   bool isRegOrImmWithFP16InputMods() const {
245     return isRegOrImmWithInputMods(MVT::f16);
246   }
247 
248   bool isRegOrImmWithFP32InputMods() const {
249     return isRegOrImmWithInputMods(MVT::f32);
250   }
251 
252   bool isRegOrImmWithFP64InputMods() const {
253     return isRegOrImmWithInputMods(MVT::f64);
254   }
255 
256   bool isVReg() const {
257     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
258            isRegClass(AMDGPU::VReg_64RegClassID) ||
259            isRegClass(AMDGPU::VReg_96RegClassID) ||
260            isRegClass(AMDGPU::VReg_128RegClassID) ||
261            isRegClass(AMDGPU::VReg_256RegClassID) ||
262            isRegClass(AMDGPU::VReg_512RegClassID);
263   }
264 
265   bool isVReg32OrOff() const {
266     return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID);
267   }
268 
269   bool isSDWARegKind() const;
270 
271   bool isImmTy(ImmTy ImmT) const {
272     return isImm() && Imm.Type == ImmT;
273   }
274 
275   bool isImmModifier() const {
276     return isImm() && Imm.Type != ImmTyNone;
277   }
278 
279   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
280   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
281   bool isDMask() const { return isImmTy(ImmTyDMask); }
282   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
283   bool isDA() const { return isImmTy(ImmTyDA); }
284   bool isR128() const { return isImmTy(ImmTyUNorm); }
285   bool isLWE() const { return isImmTy(ImmTyLWE); }
286   bool isOff() const { return isImmTy(ImmTyOff); }
287   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
288   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
289   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
290   bool isOffen() const { return isImmTy(ImmTyOffen); }
291   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
292   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
293   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
294   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
295   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
296 
297   bool isOffsetU12() const { return isImmTy(ImmTyOffset) && isUInt<12>(getImm()); }
298   bool isOffsetS13() const { return isImmTy(ImmTyOffset) && isInt<13>(getImm()); }
299   bool isGDS() const { return isImmTy(ImmTyGDS); }
300   bool isGLC() const { return isImmTy(ImmTyGLC); }
301   bool isSLC() const { return isImmTy(ImmTySLC); }
302   bool isTFE() const { return isImmTy(ImmTyTFE); }
303   bool isDFMT() const { return isImmTy(ImmTyDFMT) && isUInt<8>(getImm()); }
304   bool isNFMT() const { return isImmTy(ImmTyNFMT) && isUInt<8>(getImm()); }
305   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
306   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
307   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
308   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
309   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
310   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
311   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
312   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
313   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
314   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
315   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
316   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
317   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
318   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
319   bool isHigh() const { return isImmTy(ImmTyHigh); }
320 
321   bool isMod() const {
322     return isClampSI() || isOModSI();
323   }
324 
325   bool isRegOrImm() const {
326     return isReg() || isImm();
327   }
328 
329   bool isRegClass(unsigned RCID) const;
330 
331   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
332     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
333   }
334 
335   bool isSCSrcB16() const {
336     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
337   }
338 
339   bool isSCSrcV2B16() const {
340     return isSCSrcB16();
341   }
342 
343   bool isSCSrcB32() const {
344     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
345   }
346 
347   bool isSCSrcB64() const {
348     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
349   }
350 
351   bool isSCSrcF16() const {
352     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
353   }
354 
355   bool isSCSrcV2F16() const {
356     return isSCSrcF16();
357   }
358 
359   bool isSCSrcF32() const {
360     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
361   }
362 
363   bool isSCSrcF64() const {
364     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
365   }
366 
367   bool isSSrcB32() const {
368     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
369   }
370 
371   bool isSSrcB16() const {
372     return isSCSrcB16() || isLiteralImm(MVT::i16);
373   }
374 
375   bool isSSrcV2B16() const {
376     llvm_unreachable("cannot happen");
377     return isSSrcB16();
378   }
379 
380   bool isSSrcB64() const {
381     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
382     // See isVSrc64().
383     return isSCSrcB64() || isLiteralImm(MVT::i64);
384   }
385 
386   bool isSSrcF32() const {
387     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
388   }
389 
390   bool isSSrcF64() const {
391     return isSCSrcB64() || isLiteralImm(MVT::f64);
392   }
393 
394   bool isSSrcF16() const {
395     return isSCSrcB16() || isLiteralImm(MVT::f16);
396   }
397 
398   bool isSSrcV2F16() const {
399     llvm_unreachable("cannot happen");
400     return isSSrcF16();
401   }
402 
403   bool isVCSrcB32() const {
404     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
405   }
406 
407   bool isVCSrcB64() const {
408     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
409   }
410 
411   bool isVCSrcB16() const {
412     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
413   }
414 
415   bool isVCSrcV2B16() const {
416     return isVCSrcB16();
417   }
418 
419   bool isVCSrcF32() const {
420     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
421   }
422 
423   bool isVCSrcF64() const {
424     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
425   }
426 
427   bool isVCSrcF16() const {
428     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
429   }
430 
431   bool isVCSrcV2F16() const {
432     return isVCSrcF16();
433   }
434 
435   bool isVSrcB32() const {
436     return isVCSrcF32() || isLiteralImm(MVT::i32);
437   }
438 
439   bool isVSrcB64() const {
440     return isVCSrcF64() || isLiteralImm(MVT::i64);
441   }
442 
443   bool isVSrcB16() const {
444     return isVCSrcF16() || isLiteralImm(MVT::i16);
445   }
446 
447   bool isVSrcV2B16() const {
448     llvm_unreachable("cannot happen");
449     return isVSrcB16();
450   }
451 
452   bool isVSrcF32() const {
453     return isVCSrcF32() || isLiteralImm(MVT::f32);
454   }
455 
456   bool isVSrcF64() const {
457     return isVCSrcF64() || isLiteralImm(MVT::f64);
458   }
459 
460   bool isVSrcF16() const {
461     return isVCSrcF16() || isLiteralImm(MVT::f16);
462   }
463 
464   bool isVSrcV2F16() const {
465     llvm_unreachable("cannot happen");
466     return isVSrcF16();
467   }
468 
469   bool isKImmFP32() const {
470     return isLiteralImm(MVT::f32);
471   }
472 
473   bool isKImmFP16() const {
474     return isLiteralImm(MVT::f16);
475   }
476 
477   bool isMem() const override {
478     return false;
479   }
480 
481   bool isExpr() const {
482     return Kind == Expression;
483   }
484 
485   bool isSoppBrTarget() const {
486     return isExpr() || isImm();
487   }
488 
489   bool isSWaitCnt() const;
490   bool isHwreg() const;
491   bool isSendMsg() const;
492   bool isSwizzle() const;
493   bool isSMRDOffset8() const;
494   bool isSMRDOffset20() const;
495   bool isSMRDLiteralOffset() const;
496   bool isDPPCtrl() const;
497   bool isGPRIdxMode() const;
498   bool isS16Imm() const;
499   bool isU16Imm() const;
500 
501   StringRef getExpressionAsToken() const {
502     assert(isExpr());
503     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
504     return S->getSymbol().getName();
505   }
506 
507   StringRef getToken() const {
508     assert(isToken());
509 
510     if (Kind == Expression)
511       return getExpressionAsToken();
512 
513     return StringRef(Tok.Data, Tok.Length);
514   }
515 
516   int64_t getImm() const {
517     assert(isImm());
518     return Imm.Val;
519   }
520 
521   ImmTy getImmTy() const {
522     assert(isImm());
523     return Imm.Type;
524   }
525 
526   unsigned getReg() const override {
527     return Reg.RegNo;
528   }
529 
530   SMLoc getStartLoc() const override {
531     return StartLoc;
532   }
533 
534   SMLoc getEndLoc() const override {
535     return EndLoc;
536   }
537 
538   Modifiers getModifiers() const {
539     assert(isRegKind() || isImmTy(ImmTyNone));
540     return isRegKind() ? Reg.Mods : Imm.Mods;
541   }
542 
543   void setModifiers(Modifiers Mods) {
544     assert(isRegKind() || isImmTy(ImmTyNone));
545     if (isRegKind())
546       Reg.Mods = Mods;
547     else
548       Imm.Mods = Mods;
549   }
550 
551   bool hasModifiers() const {
552     return getModifiers().hasModifiers();
553   }
554 
555   bool hasFPModifiers() const {
556     return getModifiers().hasFPModifiers();
557   }
558 
559   bool hasIntModifiers() const {
560     return getModifiers().hasIntModifiers();
561   }
562 
563   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
564 
565   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
566 
567   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
568 
569   template <unsigned Bitwidth>
570   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
571 
572   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
573     addKImmFPOperands<16>(Inst, N);
574   }
575 
576   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
577     addKImmFPOperands<32>(Inst, N);
578   }
579 
580   void addRegOperands(MCInst &Inst, unsigned N) const;
581 
582   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
583     if (isRegKind())
584       addRegOperands(Inst, N);
585     else if (isExpr())
586       Inst.addOperand(MCOperand::createExpr(Expr));
587     else
588       addImmOperands(Inst, N);
589   }
590 
591   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
592     Modifiers Mods = getModifiers();
593     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
594     if (isRegKind()) {
595       addRegOperands(Inst, N);
596     } else {
597       addImmOperands(Inst, N, false);
598     }
599   }
600 
601   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
602     assert(!hasIntModifiers());
603     addRegOrImmWithInputModsOperands(Inst, N);
604   }
605 
606   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
607     assert(!hasFPModifiers());
608     addRegOrImmWithInputModsOperands(Inst, N);
609   }
610 
611   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
612     Modifiers Mods = getModifiers();
613     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
614     assert(isRegKind());
615     addRegOperands(Inst, N);
616   }
617 
618   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
619     assert(!hasIntModifiers());
620     addRegWithInputModsOperands(Inst, N);
621   }
622 
623   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
624     assert(!hasFPModifiers());
625     addRegWithInputModsOperands(Inst, N);
626   }
627 
628   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
629     if (isImm())
630       addImmOperands(Inst, N);
631     else {
632       assert(isExpr());
633       Inst.addOperand(MCOperand::createExpr(Expr));
634     }
635   }
636 
637   static void printImmTy(raw_ostream& OS, ImmTy Type) {
638     switch (Type) {
639     case ImmTyNone: OS << "None"; break;
640     case ImmTyGDS: OS << "GDS"; break;
641     case ImmTyOffen: OS << "Offen"; break;
642     case ImmTyIdxen: OS << "Idxen"; break;
643     case ImmTyAddr64: OS << "Addr64"; break;
644     case ImmTyOffset: OS << "Offset"; break;
645     case ImmTyOffset0: OS << "Offset0"; break;
646     case ImmTyOffset1: OS << "Offset1"; break;
647     case ImmTyGLC: OS << "GLC"; break;
648     case ImmTySLC: OS << "SLC"; break;
649     case ImmTyTFE: OS << "TFE"; break;
650     case ImmTyDFMT: OS << "DFMT"; break;
651     case ImmTyNFMT: OS << "NFMT"; break;
652     case ImmTyClampSI: OS << "ClampSI"; break;
653     case ImmTyOModSI: OS << "OModSI"; break;
654     case ImmTyDppCtrl: OS << "DppCtrl"; break;
655     case ImmTyDppRowMask: OS << "DppRowMask"; break;
656     case ImmTyDppBankMask: OS << "DppBankMask"; break;
657     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
658     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
659     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
660     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
661     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
662     case ImmTyDMask: OS << "DMask"; break;
663     case ImmTyUNorm: OS << "UNorm"; break;
664     case ImmTyDA: OS << "DA"; break;
665     case ImmTyR128: OS << "R128"; break;
666     case ImmTyLWE: OS << "LWE"; break;
667     case ImmTyOff: OS << "Off"; break;
668     case ImmTyExpTgt: OS << "ExpTgt"; break;
669     case ImmTyExpCompr: OS << "ExpCompr"; break;
670     case ImmTyExpVM: OS << "ExpVM"; break;
671     case ImmTyHwreg: OS << "Hwreg"; break;
672     case ImmTySendMsg: OS << "SendMsg"; break;
673     case ImmTyInterpSlot: OS << "InterpSlot"; break;
674     case ImmTyInterpAttr: OS << "InterpAttr"; break;
675     case ImmTyAttrChan: OS << "AttrChan"; break;
676     case ImmTyOpSel: OS << "OpSel"; break;
677     case ImmTyOpSelHi: OS << "OpSelHi"; break;
678     case ImmTyNegLo: OS << "NegLo"; break;
679     case ImmTyNegHi: OS << "NegHi"; break;
680     case ImmTySwizzle: OS << "Swizzle"; break;
681     case ImmTyHigh: OS << "High"; break;
682     }
683   }
684 
685   void print(raw_ostream &OS) const override {
686     switch (Kind) {
687     case Register:
688       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
689       break;
690     case Immediate:
691       OS << '<' << getImm();
692       if (getImmTy() != ImmTyNone) {
693         OS << " type: "; printImmTy(OS, getImmTy());
694       }
695       OS << " mods: " << Imm.Mods << '>';
696       break;
697     case Token:
698       OS << '\'' << getToken() << '\'';
699       break;
700     case Expression:
701       OS << "<expr " << *Expr << '>';
702       break;
703     }
704   }
705 
706   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
707                                       int64_t Val, SMLoc Loc,
708                                       ImmTy Type = ImmTyNone,
709                                       bool IsFPImm = false) {
710     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
711     Op->Imm.Val = Val;
712     Op->Imm.IsFPImm = IsFPImm;
713     Op->Imm.Type = Type;
714     Op->Imm.Mods = Modifiers();
715     Op->StartLoc = Loc;
716     Op->EndLoc = Loc;
717     return Op;
718   }
719 
720   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
721                                         StringRef Str, SMLoc Loc,
722                                         bool HasExplicitEncodingSize = true) {
723     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
724     Res->Tok.Data = Str.data();
725     Res->Tok.Length = Str.size();
726     Res->StartLoc = Loc;
727     Res->EndLoc = Loc;
728     return Res;
729   }
730 
731   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
732                                       unsigned RegNo, SMLoc S,
733                                       SMLoc E,
734                                       bool ForceVOP3) {
735     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
736     Op->Reg.RegNo = RegNo;
737     Op->Reg.Mods = Modifiers();
738     Op->Reg.IsForcedVOP3 = ForceVOP3;
739     Op->StartLoc = S;
740     Op->EndLoc = E;
741     return Op;
742   }
743 
744   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
745                                        const class MCExpr *Expr, SMLoc S) {
746     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
747     Op->Expr = Expr;
748     Op->StartLoc = S;
749     Op->EndLoc = S;
750     return Op;
751   }
752 };
753 
754 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
755   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
756   return OS;
757 }
758 
759 //===----------------------------------------------------------------------===//
760 // AsmParser
761 //===----------------------------------------------------------------------===//
762 
763 // Holds info related to the current kernel, e.g. count of SGPRs used.
764 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
765 // .amdgpu_hsa_kernel or at EOF.
766 class KernelScopeInfo {
767   int SgprIndexUnusedMin = -1;
768   int VgprIndexUnusedMin = -1;
769   MCContext *Ctx = nullptr;
770 
771   void usesSgprAt(int i) {
772     if (i >= SgprIndexUnusedMin) {
773       SgprIndexUnusedMin = ++i;
774       if (Ctx) {
775         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
776         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
777       }
778     }
779   }
780 
781   void usesVgprAt(int i) {
782     if (i >= VgprIndexUnusedMin) {
783       VgprIndexUnusedMin = ++i;
784       if (Ctx) {
785         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
786         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
787       }
788     }
789   }
790 
791 public:
792   KernelScopeInfo() = default;
793 
794   void initialize(MCContext &Context) {
795     Ctx = &Context;
796     usesSgprAt(SgprIndexUnusedMin = -1);
797     usesVgprAt(VgprIndexUnusedMin = -1);
798   }
799 
800   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
801     switch (RegKind) {
802       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
803       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
804       default: break;
805     }
806   }
807 };
808 
809 class AMDGPUAsmParser : public MCTargetAsmParser {
810   MCAsmParser &Parser;
811 
812   unsigned ForcedEncodingSize = 0;
813   bool ForcedDPP = false;
814   bool ForcedSDWA = false;
815   KernelScopeInfo KernelScope;
816 
817   /// @name Auto-generated Match Functions
818   /// {
819 
820 #define GET_ASSEMBLER_HEADER
821 #include "AMDGPUGenAsmMatcher.inc"
822 
823   /// }
824 
825 private:
826   bool ParseAsAbsoluteExpression(uint32_t &Ret);
827   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
828   bool ParseDirectiveHSACodeObjectVersion();
829   bool ParseDirectiveHSACodeObjectISA();
830   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
831   bool ParseDirectiveAMDKernelCodeT();
832   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
833   bool ParseDirectiveAMDGPUHsaKernel();
834 
835   bool ParseDirectiveISAVersion();
836   bool ParseDirectiveHSAMetadata();
837   bool ParseDirectivePALMetadata();
838 
839   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
840                              RegisterKind RegKind, unsigned Reg1,
841                              unsigned RegNum);
842   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
843                            unsigned& RegNum, unsigned& RegWidth,
844                            unsigned *DwordRegIndex);
845   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
846                     bool IsAtomic, bool IsAtomicReturn);
847   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
848                  bool IsGdsHardcoded);
849 
850 public:
851   enum AMDGPUMatchResultTy {
852     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
853   };
854 
855   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
856 
857   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
858                const MCInstrInfo &MII,
859                const MCTargetOptions &Options)
860       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
861     MCAsmParserExtension::Initialize(Parser);
862 
863     if (getFeatureBits().none()) {
864       // Set default features.
865       copySTI().ToggleFeature("SOUTHERN_ISLANDS");
866     }
867 
868     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
869 
870     {
871       // TODO: make those pre-defined variables read-only.
872       // Currently there is none suitable machinery in the core llvm-mc for this.
873       // MCSymbol::isRedefinable is intended for another purpose, and
874       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
875       AMDGPU::IsaInfo::IsaVersion ISA =
876           AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
877       MCContext &Ctx = getContext();
878       MCSymbol *Sym =
879           Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
880       Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
881       Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
882       Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
883       Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
884       Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
885     }
886     KernelScope.initialize(getContext());
887   }
888 
889   bool isSI() const {
890     return AMDGPU::isSI(getSTI());
891   }
892 
893   bool isCI() const {
894     return AMDGPU::isCI(getSTI());
895   }
896 
897   bool isVI() const {
898     return AMDGPU::isVI(getSTI());
899   }
900 
901   bool isGFX9() const {
902     return AMDGPU::isGFX9(getSTI());
903   }
904 
905   bool hasInv2PiInlineImm() const {
906     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
907   }
908 
909   bool hasFlatOffsets() const {
910     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
911   }
912 
913   bool hasSGPR102_SGPR103() const {
914     return !isVI();
915   }
916 
917   bool hasIntClamp() const {
918     return getFeatureBits()[AMDGPU::FeatureIntClamp];
919   }
920 
921   AMDGPUTargetStreamer &getTargetStreamer() {
922     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
923     return static_cast<AMDGPUTargetStreamer &>(TS);
924   }
925 
926   const MCRegisterInfo *getMRI() const {
927     // We need this const_cast because for some reason getContext() is not const
928     // in MCAsmParser.
929     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
930   }
931 
932   const MCInstrInfo *getMII() const {
933     return &MII;
934   }
935 
936   const FeatureBitset &getFeatureBits() const {
937     return getSTI().getFeatureBits();
938   }
939 
940   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
941   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
942   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
943 
944   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
945   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
946   bool isForcedDPP() const { return ForcedDPP; }
947   bool isForcedSDWA() const { return ForcedSDWA; }
948   ArrayRef<unsigned> getMatchedVariants() const;
949 
950   std::unique_ptr<AMDGPUOperand> parseRegister();
951   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
952   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
953   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
954                                       unsigned Kind) override;
955   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
956                                OperandVector &Operands, MCStreamer &Out,
957                                uint64_t &ErrorInfo,
958                                bool MatchingInlineAsm) override;
959   bool ParseDirective(AsmToken DirectiveID) override;
960   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
961   StringRef parseMnemonicSuffix(StringRef Name);
962   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
963                         SMLoc NameLoc, OperandVector &Operands) override;
964   //bool ProcessInstruction(MCInst &Inst);
965 
966   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
967 
968   OperandMatchResultTy
969   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
970                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
971                      bool (*ConvertResult)(int64_t &) = nullptr);
972 
973   OperandMatchResultTy parseOperandArrayWithPrefix(
974     const char *Prefix,
975     OperandVector &Operands,
976     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
977     bool (*ConvertResult)(int64_t&) = nullptr);
978 
979   OperandMatchResultTy
980   parseNamedBit(const char *Name, OperandVector &Operands,
981                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
982   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
983                                              StringRef &Value);
984 
985   bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false);
986   OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false);
987   OperandMatchResultTy parseReg(OperandVector &Operands);
988   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
989   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
990   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
991   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
992   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
993   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
994 
995   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
996   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
997   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
998   void cvtExp(MCInst &Inst, const OperandVector &Operands);
999 
1000   bool parseCnt(int64_t &IntVal);
1001   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1002   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1003 
1004 private:
1005   struct OperandInfoTy {
1006     int64_t Id;
1007     bool IsSymbolic = false;
1008 
1009     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1010   };
1011 
1012   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1013   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1014 
1015   void errorExpTgt();
1016   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1017 
1018   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1019   bool validateConstantBusLimitations(const MCInst &Inst);
1020   bool validateEarlyClobberLimitations(const MCInst &Inst);
1021   bool validateIntClampSupported(const MCInst &Inst);
1022   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1023   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1024   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1025 
1026   bool trySkipId(const StringRef Id);
1027   bool trySkipToken(const AsmToken::TokenKind Kind);
1028   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1029   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1030   bool parseExpr(int64_t &Imm);
1031 
1032 public:
1033   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1034 
1035   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1036   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1037   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1038   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1039   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1040 
1041   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1042                             const unsigned MinVal,
1043                             const unsigned MaxVal,
1044                             const StringRef ErrMsg);
1045   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1046   bool parseSwizzleOffset(int64_t &Imm);
1047   bool parseSwizzleMacro(int64_t &Imm);
1048   bool parseSwizzleQuadPerm(int64_t &Imm);
1049   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1050   bool parseSwizzleBroadcast(int64_t &Imm);
1051   bool parseSwizzleSwap(int64_t &Imm);
1052   bool parseSwizzleReverse(int64_t &Imm);
1053 
1054   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1055   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1056   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1057   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1058 
1059   AMDGPUOperand::Ptr defaultGLC() const;
1060   AMDGPUOperand::Ptr defaultSLC() const;
1061   AMDGPUOperand::Ptr defaultTFE() const;
1062 
1063   AMDGPUOperand::Ptr defaultDMask() const;
1064   AMDGPUOperand::Ptr defaultUNorm() const;
1065   AMDGPUOperand::Ptr defaultDA() const;
1066   AMDGPUOperand::Ptr defaultR128() const;
1067   AMDGPUOperand::Ptr defaultLWE() const;
1068   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1069   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1070   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1071   AMDGPUOperand::Ptr defaultOffsetU12() const;
1072   AMDGPUOperand::Ptr defaultOffsetS13() const;
1073 
1074   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1075 
1076   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1077                OptionalImmIndexMap &OptionalIdx);
1078   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1079   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1080   void cvtVOP3PImpl(MCInst &Inst, const OperandVector &Operands,
1081                     bool IsPacked);
1082   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1083   void cvtVOP3P_NotPacked(MCInst &Inst, const OperandVector &Operands);
1084 
1085   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1086 
1087   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1088                bool IsAtomic = false);
1089   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1090 
1091   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1092   AMDGPUOperand::Ptr defaultRowMask() const;
1093   AMDGPUOperand::Ptr defaultBankMask() const;
1094   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1095   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1096 
1097   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1098                                     AMDGPUOperand::ImmTy Type);
1099   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1100   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1101   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1102   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1103   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1104   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1105                 uint64_t BasicInstType, bool skipVcc = false);
1106 };
1107 
1108 struct OptionalOperand {
1109   const char *Name;
1110   AMDGPUOperand::ImmTy Type;
1111   bool IsBit;
1112   bool (*ConvertResult)(int64_t&);
1113 };
1114 
1115 } // end anonymous namespace
1116 
1117 // May be called with integer type with equivalent bitwidth.
1118 static const fltSemantics *getFltSemantics(unsigned Size) {
1119   switch (Size) {
1120   case 4:
1121     return &APFloat::IEEEsingle();
1122   case 8:
1123     return &APFloat::IEEEdouble();
1124   case 2:
1125     return &APFloat::IEEEhalf();
1126   default:
1127     llvm_unreachable("unsupported fp type");
1128   }
1129 }
1130 
1131 static const fltSemantics *getFltSemantics(MVT VT) {
1132   return getFltSemantics(VT.getSizeInBits() / 8);
1133 }
1134 
1135 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1136   switch (OperandType) {
1137   case AMDGPU::OPERAND_REG_IMM_INT32:
1138   case AMDGPU::OPERAND_REG_IMM_FP32:
1139   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1140   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1141     return &APFloat::IEEEsingle();
1142   case AMDGPU::OPERAND_REG_IMM_INT64:
1143   case AMDGPU::OPERAND_REG_IMM_FP64:
1144   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1145   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1146     return &APFloat::IEEEdouble();
1147   case AMDGPU::OPERAND_REG_IMM_INT16:
1148   case AMDGPU::OPERAND_REG_IMM_FP16:
1149   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1150   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1151   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1152   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1153     return &APFloat::IEEEhalf();
1154   default:
1155     llvm_unreachable("unsupported fp type");
1156   }
1157 }
1158 
1159 //===----------------------------------------------------------------------===//
1160 // Operand
1161 //===----------------------------------------------------------------------===//
1162 
1163 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1164   bool Lost;
1165 
1166   // Convert literal to single precision
1167   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1168                                                APFloat::rmNearestTiesToEven,
1169                                                &Lost);
1170   // We allow precision lost but not overflow or underflow
1171   if (Status != APFloat::opOK &&
1172       Lost &&
1173       ((Status & APFloat::opOverflow)  != 0 ||
1174        (Status & APFloat::opUnderflow) != 0)) {
1175     return false;
1176   }
1177 
1178   return true;
1179 }
1180 
1181 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1182   if (!isImmTy(ImmTyNone)) {
1183     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1184     return false;
1185   }
1186   // TODO: We should avoid using host float here. It would be better to
1187   // check the float bit values which is what a few other places do.
1188   // We've had bot failures before due to weird NaN support on mips hosts.
1189 
1190   APInt Literal(64, Imm.Val);
1191 
1192   if (Imm.IsFPImm) { // We got fp literal token
1193     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1194       return AMDGPU::isInlinableLiteral64(Imm.Val,
1195                                           AsmParser->hasInv2PiInlineImm());
1196     }
1197 
1198     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1199     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1200       return false;
1201 
1202     if (type.getScalarSizeInBits() == 16) {
1203       return AMDGPU::isInlinableLiteral16(
1204         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1205         AsmParser->hasInv2PiInlineImm());
1206     }
1207 
1208     // Check if single precision literal is inlinable
1209     return AMDGPU::isInlinableLiteral32(
1210       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1211       AsmParser->hasInv2PiInlineImm());
1212   }
1213 
1214   // We got int literal token.
1215   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1216     return AMDGPU::isInlinableLiteral64(Imm.Val,
1217                                         AsmParser->hasInv2PiInlineImm());
1218   }
1219 
1220   if (type.getScalarSizeInBits() == 16) {
1221     return AMDGPU::isInlinableLiteral16(
1222       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1223       AsmParser->hasInv2PiInlineImm());
1224   }
1225 
1226   return AMDGPU::isInlinableLiteral32(
1227     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1228     AsmParser->hasInv2PiInlineImm());
1229 }
1230 
1231 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1232   // Check that this immediate can be added as literal
1233   if (!isImmTy(ImmTyNone)) {
1234     return false;
1235   }
1236 
1237   if (!Imm.IsFPImm) {
1238     // We got int literal token.
1239 
1240     if (type == MVT::f64 && hasFPModifiers()) {
1241       // Cannot apply fp modifiers to int literals preserving the same semantics
1242       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1243       // disable these cases.
1244       return false;
1245     }
1246 
1247     unsigned Size = type.getSizeInBits();
1248     if (Size == 64)
1249       Size = 32;
1250 
1251     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1252     // types.
1253     return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val);
1254   }
1255 
1256   // We got fp literal token
1257   if (type == MVT::f64) { // Expected 64-bit fp operand
1258     // We would set low 64-bits of literal to zeroes but we accept this literals
1259     return true;
1260   }
1261 
1262   if (type == MVT::i64) { // Expected 64-bit int operand
1263     // We don't allow fp literals in 64-bit integer instructions. It is
1264     // unclear how we should encode them.
1265     return false;
1266   }
1267 
1268   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1269   return canLosslesslyConvertToFPType(FPLiteral, type);
1270 }
1271 
1272 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1273   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1274 }
1275 
1276 bool AMDGPUOperand::isSDWARegKind() const {
1277   if (AsmParser->isVI())
1278     return isVReg();
1279   else if (AsmParser->isGFX9())
1280     return isRegKind();
1281   else
1282     return false;
1283 }
1284 
1285 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1286 {
1287   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1288   assert(Size == 2 || Size == 4 || Size == 8);
1289 
1290   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1291 
1292   if (Imm.Mods.Abs) {
1293     Val &= ~FpSignMask;
1294   }
1295   if (Imm.Mods.Neg) {
1296     Val ^= FpSignMask;
1297   }
1298 
1299   return Val;
1300 }
1301 
1302 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1303   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1304                              Inst.getNumOperands())) {
1305     addLiteralImmOperand(Inst, Imm.Val,
1306                          ApplyModifiers &
1307                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1308   } else {
1309     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1310     Inst.addOperand(MCOperand::createImm(Imm.Val));
1311   }
1312 }
1313 
1314 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1315   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1316   auto OpNum = Inst.getNumOperands();
1317   // Check that this operand accepts literals
1318   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1319 
1320   if (ApplyModifiers) {
1321     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1322     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1323     Val = applyInputFPModifiers(Val, Size);
1324   }
1325 
1326   APInt Literal(64, Val);
1327   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1328 
1329   if (Imm.IsFPImm) { // We got fp literal token
1330     switch (OpTy) {
1331     case AMDGPU::OPERAND_REG_IMM_INT64:
1332     case AMDGPU::OPERAND_REG_IMM_FP64:
1333     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1334     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1335       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1336                                        AsmParser->hasInv2PiInlineImm())) {
1337         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1338         return;
1339       }
1340 
1341       // Non-inlineable
1342       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1343         // For fp operands we check if low 32 bits are zeros
1344         if (Literal.getLoBits(32) != 0) {
1345           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1346           "Can't encode literal as exact 64-bit floating-point operand. "
1347           "Low 32-bits will be set to zero");
1348         }
1349 
1350         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1351         return;
1352       }
1353 
1354       // We don't allow fp literals in 64-bit integer instructions. It is
1355       // unclear how we should encode them. This case should be checked earlier
1356       // in predicate methods (isLiteralImm())
1357       llvm_unreachable("fp literal in 64-bit integer instruction.");
1358 
1359     case AMDGPU::OPERAND_REG_IMM_INT32:
1360     case AMDGPU::OPERAND_REG_IMM_FP32:
1361     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1362     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1363     case AMDGPU::OPERAND_REG_IMM_INT16:
1364     case AMDGPU::OPERAND_REG_IMM_FP16:
1365     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1366     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1367     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1368     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1369       bool lost;
1370       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1371       // Convert literal to single precision
1372       FPLiteral.convert(*getOpFltSemantics(OpTy),
1373                         APFloat::rmNearestTiesToEven, &lost);
1374       // We allow precision lost but not overflow or underflow. This should be
1375       // checked earlier in isLiteralImm()
1376 
1377       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1378       if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
1379           OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
1380         ImmVal |= (ImmVal << 16);
1381       }
1382 
1383       Inst.addOperand(MCOperand::createImm(ImmVal));
1384       return;
1385     }
1386     default:
1387       llvm_unreachable("invalid operand size");
1388     }
1389 
1390     return;
1391   }
1392 
1393    // We got int literal token.
1394   // Only sign extend inline immediates.
1395   // FIXME: No errors on truncation
1396   switch (OpTy) {
1397   case AMDGPU::OPERAND_REG_IMM_INT32:
1398   case AMDGPU::OPERAND_REG_IMM_FP32:
1399   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1400   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1401     if (isInt<32>(Val) &&
1402         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1403                                      AsmParser->hasInv2PiInlineImm())) {
1404       Inst.addOperand(MCOperand::createImm(Val));
1405       return;
1406     }
1407 
1408     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1409     return;
1410 
1411   case AMDGPU::OPERAND_REG_IMM_INT64:
1412   case AMDGPU::OPERAND_REG_IMM_FP64:
1413   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1414   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1415     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1416       Inst.addOperand(MCOperand::createImm(Val));
1417       return;
1418     }
1419 
1420     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1421     return;
1422 
1423   case AMDGPU::OPERAND_REG_IMM_INT16:
1424   case AMDGPU::OPERAND_REG_IMM_FP16:
1425   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1426   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1427     if (isInt<16>(Val) &&
1428         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1429                                      AsmParser->hasInv2PiInlineImm())) {
1430       Inst.addOperand(MCOperand::createImm(Val));
1431       return;
1432     }
1433 
1434     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1435     return;
1436 
1437   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1438   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1439     auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
1440     assert(AMDGPU::isInlinableLiteral16(LiteralVal,
1441                                         AsmParser->hasInv2PiInlineImm()));
1442 
1443     uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
1444                       static_cast<uint32_t>(LiteralVal);
1445     Inst.addOperand(MCOperand::createImm(ImmVal));
1446     return;
1447   }
1448   default:
1449     llvm_unreachable("invalid operand size");
1450   }
1451 }
1452 
1453 template <unsigned Bitwidth>
1454 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1455   APInt Literal(64, Imm.Val);
1456 
1457   if (!Imm.IsFPImm) {
1458     // We got int literal token.
1459     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1460     return;
1461   }
1462 
1463   bool Lost;
1464   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1465   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1466                     APFloat::rmNearestTiesToEven, &Lost);
1467   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1468 }
1469 
1470 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1471   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1472 }
1473 
1474 //===----------------------------------------------------------------------===//
1475 // AsmParser
1476 //===----------------------------------------------------------------------===//
1477 
1478 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1479   if (Is == IS_VGPR) {
1480     switch (RegWidth) {
1481       default: return -1;
1482       case 1: return AMDGPU::VGPR_32RegClassID;
1483       case 2: return AMDGPU::VReg_64RegClassID;
1484       case 3: return AMDGPU::VReg_96RegClassID;
1485       case 4: return AMDGPU::VReg_128RegClassID;
1486       case 8: return AMDGPU::VReg_256RegClassID;
1487       case 16: return AMDGPU::VReg_512RegClassID;
1488     }
1489   } else if (Is == IS_TTMP) {
1490     switch (RegWidth) {
1491       default: return -1;
1492       case 1: return AMDGPU::TTMP_32RegClassID;
1493       case 2: return AMDGPU::TTMP_64RegClassID;
1494       case 4: return AMDGPU::TTMP_128RegClassID;
1495     }
1496   } else if (Is == IS_SGPR) {
1497     switch (RegWidth) {
1498       default: return -1;
1499       case 1: return AMDGPU::SGPR_32RegClassID;
1500       case 2: return AMDGPU::SGPR_64RegClassID;
1501       case 4: return AMDGPU::SGPR_128RegClassID;
1502       case 8: return AMDGPU::SReg_256RegClassID;
1503       case 16: return AMDGPU::SReg_512RegClassID;
1504     }
1505   }
1506   return -1;
1507 }
1508 
1509 static unsigned getSpecialRegForName(StringRef RegName) {
1510   return StringSwitch<unsigned>(RegName)
1511     .Case("exec", AMDGPU::EXEC)
1512     .Case("vcc", AMDGPU::VCC)
1513     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1514     .Case("m0", AMDGPU::M0)
1515     .Case("scc", AMDGPU::SCC)
1516     .Case("tba", AMDGPU::TBA)
1517     .Case("tma", AMDGPU::TMA)
1518     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1519     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1520     .Case("vcc_lo", AMDGPU::VCC_LO)
1521     .Case("vcc_hi", AMDGPU::VCC_HI)
1522     .Case("exec_lo", AMDGPU::EXEC_LO)
1523     .Case("exec_hi", AMDGPU::EXEC_HI)
1524     .Case("tma_lo", AMDGPU::TMA_LO)
1525     .Case("tma_hi", AMDGPU::TMA_HI)
1526     .Case("tba_lo", AMDGPU::TBA_LO)
1527     .Case("tba_hi", AMDGPU::TBA_HI)
1528     .Default(0);
1529 }
1530 
1531 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1532                                     SMLoc &EndLoc) {
1533   auto R = parseRegister();
1534   if (!R) return true;
1535   assert(R->isReg());
1536   RegNo = R->getReg();
1537   StartLoc = R->getStartLoc();
1538   EndLoc = R->getEndLoc();
1539   return false;
1540 }
1541 
1542 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1543                                             RegisterKind RegKind, unsigned Reg1,
1544                                             unsigned RegNum) {
1545   switch (RegKind) {
1546   case IS_SPECIAL:
1547     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1548       Reg = AMDGPU::EXEC;
1549       RegWidth = 2;
1550       return true;
1551     }
1552     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1553       Reg = AMDGPU::FLAT_SCR;
1554       RegWidth = 2;
1555       return true;
1556     }
1557     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1558       Reg = AMDGPU::VCC;
1559       RegWidth = 2;
1560       return true;
1561     }
1562     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1563       Reg = AMDGPU::TBA;
1564       RegWidth = 2;
1565       return true;
1566     }
1567     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1568       Reg = AMDGPU::TMA;
1569       RegWidth = 2;
1570       return true;
1571     }
1572     return false;
1573   case IS_VGPR:
1574   case IS_SGPR:
1575   case IS_TTMP:
1576     if (Reg1 != Reg + RegWidth) {
1577       return false;
1578     }
1579     RegWidth++;
1580     return true;
1581   default:
1582     llvm_unreachable("unexpected register kind");
1583   }
1584 }
1585 
1586 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1587                                           unsigned &RegNum, unsigned &RegWidth,
1588                                           unsigned *DwordRegIndex) {
1589   if (DwordRegIndex) { *DwordRegIndex = 0; }
1590   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1591   if (getLexer().is(AsmToken::Identifier)) {
1592     StringRef RegName = Parser.getTok().getString();
1593     if ((Reg = getSpecialRegForName(RegName))) {
1594       Parser.Lex();
1595       RegKind = IS_SPECIAL;
1596     } else {
1597       unsigned RegNumIndex = 0;
1598       if (RegName[0] == 'v') {
1599         RegNumIndex = 1;
1600         RegKind = IS_VGPR;
1601       } else if (RegName[0] == 's') {
1602         RegNumIndex = 1;
1603         RegKind = IS_SGPR;
1604       } else if (RegName.startswith("ttmp")) {
1605         RegNumIndex = strlen("ttmp");
1606         RegKind = IS_TTMP;
1607       } else {
1608         return false;
1609       }
1610       if (RegName.size() > RegNumIndex) {
1611         // Single 32-bit register: vXX.
1612         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1613           return false;
1614         Parser.Lex();
1615         RegWidth = 1;
1616       } else {
1617         // Range of registers: v[XX:YY]. ":YY" is optional.
1618         Parser.Lex();
1619         int64_t RegLo, RegHi;
1620         if (getLexer().isNot(AsmToken::LBrac))
1621           return false;
1622         Parser.Lex();
1623 
1624         if (getParser().parseAbsoluteExpression(RegLo))
1625           return false;
1626 
1627         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1628         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1629           return false;
1630         Parser.Lex();
1631 
1632         if (isRBrace) {
1633           RegHi = RegLo;
1634         } else {
1635           if (getParser().parseAbsoluteExpression(RegHi))
1636             return false;
1637 
1638           if (getLexer().isNot(AsmToken::RBrac))
1639             return false;
1640           Parser.Lex();
1641         }
1642         RegNum = (unsigned) RegLo;
1643         RegWidth = (RegHi - RegLo) + 1;
1644       }
1645     }
1646   } else if (getLexer().is(AsmToken::LBrac)) {
1647     // List of consecutive registers: [s0,s1,s2,s3]
1648     Parser.Lex();
1649     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1650       return false;
1651     if (RegWidth != 1)
1652       return false;
1653     RegisterKind RegKind1;
1654     unsigned Reg1, RegNum1, RegWidth1;
1655     do {
1656       if (getLexer().is(AsmToken::Comma)) {
1657         Parser.Lex();
1658       } else if (getLexer().is(AsmToken::RBrac)) {
1659         Parser.Lex();
1660         break;
1661       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1662         if (RegWidth1 != 1) {
1663           return false;
1664         }
1665         if (RegKind1 != RegKind) {
1666           return false;
1667         }
1668         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1669           return false;
1670         }
1671       } else {
1672         return false;
1673       }
1674     } while (true);
1675   } else {
1676     return false;
1677   }
1678   switch (RegKind) {
1679   case IS_SPECIAL:
1680     RegNum = 0;
1681     RegWidth = 1;
1682     break;
1683   case IS_VGPR:
1684   case IS_SGPR:
1685   case IS_TTMP:
1686   {
1687     unsigned Size = 1;
1688     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1689       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1690       Size = std::min(RegWidth, 4u);
1691     }
1692     if (RegNum % Size != 0)
1693       return false;
1694     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1695     RegNum = RegNum / Size;
1696     int RCID = getRegClass(RegKind, RegWidth);
1697     if (RCID == -1)
1698       return false;
1699     const MCRegisterClass RC = TRI->getRegClass(RCID);
1700     if (RegNum >= RC.getNumRegs())
1701       return false;
1702     Reg = RC.getRegister(RegNum);
1703     break;
1704   }
1705 
1706   default:
1707     llvm_unreachable("unexpected register kind");
1708   }
1709 
1710   if (!subtargetHasRegister(*TRI, Reg))
1711     return false;
1712   return true;
1713 }
1714 
1715 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
1716   const auto &Tok = Parser.getTok();
1717   SMLoc StartLoc = Tok.getLoc();
1718   SMLoc EndLoc = Tok.getEndLoc();
1719   RegisterKind RegKind;
1720   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
1721 
1722   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
1723     return nullptr;
1724   }
1725   KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
1726   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
1727 }
1728 
1729 bool
1730 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) {
1731   if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) &&
1732       (getLexer().getKind() == AsmToken::Integer ||
1733        getLexer().getKind() == AsmToken::Real)) {
1734     // This is a workaround for handling operands like these:
1735     //     |1.0|
1736     //     |-1|
1737     // This syntax is not compatible with syntax of standard
1738     // MC expressions (due to the trailing '|').
1739 
1740     SMLoc EndLoc;
1741     const MCExpr *Expr;
1742 
1743     if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
1744       return true;
1745     }
1746 
1747     return !Expr->evaluateAsAbsolute(Val);
1748   }
1749 
1750   return getParser().parseAbsoluteExpression(Val);
1751 }
1752 
1753 OperandMatchResultTy
1754 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
1755   // TODO: add syntactic sugar for 1/(2*PI)
1756   bool Minus = false;
1757   if (getLexer().getKind() == AsmToken::Minus) {
1758     Minus = true;
1759     Parser.Lex();
1760   }
1761 
1762   SMLoc S = Parser.getTok().getLoc();
1763   switch(getLexer().getKind()) {
1764   case AsmToken::Integer: {
1765     int64_t IntVal;
1766     if (parseAbsoluteExpr(IntVal, AbsMod))
1767       return MatchOperand_ParseFail;
1768     if (Minus)
1769       IntVal *= -1;
1770     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
1771     return MatchOperand_Success;
1772   }
1773   case AsmToken::Real: {
1774     int64_t IntVal;
1775     if (parseAbsoluteExpr(IntVal, AbsMod))
1776       return MatchOperand_ParseFail;
1777 
1778     APFloat F(BitsToDouble(IntVal));
1779     if (Minus)
1780       F.changeSign();
1781     Operands.push_back(
1782         AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S,
1783                                  AMDGPUOperand::ImmTyNone, true));
1784     return MatchOperand_Success;
1785   }
1786   default:
1787     return Minus ? MatchOperand_ParseFail : MatchOperand_NoMatch;
1788   }
1789 }
1790 
1791 OperandMatchResultTy
1792 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
1793   if (auto R = parseRegister()) {
1794     assert(R->isReg());
1795     R->Reg.IsForcedVOP3 = isForcedVOP3();
1796     Operands.push_back(std::move(R));
1797     return MatchOperand_Success;
1798   }
1799   return MatchOperand_NoMatch;
1800 }
1801 
1802 OperandMatchResultTy
1803 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
1804   auto res = parseImm(Operands, AbsMod);
1805   if (res != MatchOperand_NoMatch) {
1806     return res;
1807   }
1808 
1809   return parseReg(Operands);
1810 }
1811 
1812 OperandMatchResultTy
1813 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
1814                                               bool AllowImm) {
1815   bool Negate = false, Negate2 = false, Abs = false, Abs2 = false;
1816 
1817   if (getLexer().getKind()== AsmToken::Minus) {
1818     const AsmToken NextToken = getLexer().peekTok();
1819 
1820     // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
1821     if (NextToken.is(AsmToken::Minus)) {
1822       Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier");
1823       return MatchOperand_ParseFail;
1824     }
1825 
1826     // '-' followed by an integer literal N should be interpreted as integer
1827     // negation rather than a floating-point NEG modifier applied to N.
1828     // Beside being contr-intuitive, such use of floating-point NEG modifier
1829     // results in different meaning of integer literals used with VOP1/2/C
1830     // and VOP3, for example:
1831     //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
1832     //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
1833     // Negative fp literals should be handled likewise for unifomtity
1834     if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) {
1835       Parser.Lex();
1836       Negate = true;
1837     }
1838   }
1839 
1840   if (getLexer().getKind() == AsmToken::Identifier &&
1841       Parser.getTok().getString() == "neg") {
1842     if (Negate) {
1843       Error(Parser.getTok().getLoc(), "expected register or immediate");
1844       return MatchOperand_ParseFail;
1845     }
1846     Parser.Lex();
1847     Negate2 = true;
1848     if (getLexer().isNot(AsmToken::LParen)) {
1849       Error(Parser.getTok().getLoc(), "expected left paren after neg");
1850       return MatchOperand_ParseFail;
1851     }
1852     Parser.Lex();
1853   }
1854 
1855   if (getLexer().getKind() == AsmToken::Identifier &&
1856       Parser.getTok().getString() == "abs") {
1857     Parser.Lex();
1858     Abs2 = true;
1859     if (getLexer().isNot(AsmToken::LParen)) {
1860       Error(Parser.getTok().getLoc(), "expected left paren after abs");
1861       return MatchOperand_ParseFail;
1862     }
1863     Parser.Lex();
1864   }
1865 
1866   if (getLexer().getKind() == AsmToken::Pipe) {
1867     if (Abs2) {
1868       Error(Parser.getTok().getLoc(), "expected register or immediate");
1869       return MatchOperand_ParseFail;
1870     }
1871     Parser.Lex();
1872     Abs = true;
1873   }
1874 
1875   OperandMatchResultTy Res;
1876   if (AllowImm) {
1877     Res = parseRegOrImm(Operands, Abs);
1878   } else {
1879     Res = parseReg(Operands);
1880   }
1881   if (Res != MatchOperand_Success) {
1882     return Res;
1883   }
1884 
1885   AMDGPUOperand::Modifiers Mods;
1886   if (Abs) {
1887     if (getLexer().getKind() != AsmToken::Pipe) {
1888       Error(Parser.getTok().getLoc(), "expected vertical bar");
1889       return MatchOperand_ParseFail;
1890     }
1891     Parser.Lex();
1892     Mods.Abs = true;
1893   }
1894   if (Abs2) {
1895     if (getLexer().isNot(AsmToken::RParen)) {
1896       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1897       return MatchOperand_ParseFail;
1898     }
1899     Parser.Lex();
1900     Mods.Abs = true;
1901   }
1902 
1903   if (Negate) {
1904     Mods.Neg = true;
1905   } else if (Negate2) {
1906     if (getLexer().isNot(AsmToken::RParen)) {
1907       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1908       return MatchOperand_ParseFail;
1909     }
1910     Parser.Lex();
1911     Mods.Neg = true;
1912   }
1913 
1914   if (Mods.hasFPModifiers()) {
1915     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
1916     Op.setModifiers(Mods);
1917   }
1918   return MatchOperand_Success;
1919 }
1920 
1921 OperandMatchResultTy
1922 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
1923                                                bool AllowImm) {
1924   bool Sext = false;
1925 
1926   if (getLexer().getKind() == AsmToken::Identifier &&
1927       Parser.getTok().getString() == "sext") {
1928     Parser.Lex();
1929     Sext = true;
1930     if (getLexer().isNot(AsmToken::LParen)) {
1931       Error(Parser.getTok().getLoc(), "expected left paren after sext");
1932       return MatchOperand_ParseFail;
1933     }
1934     Parser.Lex();
1935   }
1936 
1937   OperandMatchResultTy Res;
1938   if (AllowImm) {
1939     Res = parseRegOrImm(Operands);
1940   } else {
1941     Res = parseReg(Operands);
1942   }
1943   if (Res != MatchOperand_Success) {
1944     return Res;
1945   }
1946 
1947   AMDGPUOperand::Modifiers Mods;
1948   if (Sext) {
1949     if (getLexer().isNot(AsmToken::RParen)) {
1950       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1951       return MatchOperand_ParseFail;
1952     }
1953     Parser.Lex();
1954     Mods.Sext = true;
1955   }
1956 
1957   if (Mods.hasIntModifiers()) {
1958     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
1959     Op.setModifiers(Mods);
1960   }
1961 
1962   return MatchOperand_Success;
1963 }
1964 
1965 OperandMatchResultTy
1966 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
1967   return parseRegOrImmWithFPInputMods(Operands, false);
1968 }
1969 
1970 OperandMatchResultTy
1971 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
1972   return parseRegOrImmWithIntInputMods(Operands, false);
1973 }
1974 
1975 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
1976   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
1977   if (Reg) {
1978     Operands.push_back(std::move(Reg));
1979     return MatchOperand_Success;
1980   }
1981 
1982   const AsmToken &Tok = Parser.getTok();
1983   if (Tok.getString() == "off") {
1984     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(),
1985                                                 AMDGPUOperand::ImmTyOff, false));
1986     Parser.Lex();
1987     return MatchOperand_Success;
1988   }
1989 
1990   return MatchOperand_NoMatch;
1991 }
1992 
1993 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
1994   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
1995 
1996   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
1997       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
1998       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
1999       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2000     return Match_InvalidOperand;
2001 
2002   if ((TSFlags & SIInstrFlags::VOP3) &&
2003       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2004       getForcedEncodingSize() != 64)
2005     return Match_PreferE32;
2006 
2007   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2008       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2009     // v_mac_f32/16 allow only dst_sel == DWORD;
2010     auto OpNum =
2011         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2012     const auto &Op = Inst.getOperand(OpNum);
2013     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2014       return Match_InvalidOperand;
2015     }
2016   }
2017 
2018   if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
2019     // FIXME: Produces error without correct column reported.
2020     auto OpNum =
2021         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
2022     const auto &Op = Inst.getOperand(OpNum);
2023     if (Op.getImm() != 0)
2024       return Match_InvalidOperand;
2025   }
2026 
2027   return Match_Success;
2028 }
2029 
2030 // What asm variants we should check
2031 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2032   if (getForcedEncodingSize() == 32) {
2033     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2034     return makeArrayRef(Variants);
2035   }
2036 
2037   if (isForcedVOP3()) {
2038     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2039     return makeArrayRef(Variants);
2040   }
2041 
2042   if (isForcedSDWA()) {
2043     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2044                                         AMDGPUAsmVariants::SDWA9};
2045     return makeArrayRef(Variants);
2046   }
2047 
2048   if (isForcedDPP()) {
2049     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2050     return makeArrayRef(Variants);
2051   }
2052 
2053   static const unsigned Variants[] = {
2054     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2055     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2056   };
2057 
2058   return makeArrayRef(Variants);
2059 }
2060 
2061 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2062   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2063   const unsigned Num = Desc.getNumImplicitUses();
2064   for (unsigned i = 0; i < Num; ++i) {
2065     unsigned Reg = Desc.ImplicitUses[i];
2066     switch (Reg) {
2067     case AMDGPU::FLAT_SCR:
2068     case AMDGPU::VCC:
2069     case AMDGPU::M0:
2070       return Reg;
2071     default:
2072       break;
2073     }
2074   }
2075   return AMDGPU::NoRegister;
2076 }
2077 
2078 // NB: This code is correct only when used to check constant
2079 // bus limitations because GFX7 support no f16 inline constants.
2080 // Note that there are no cases when a GFX7 opcode violates
2081 // constant bus limitations due to the use of an f16 constant.
2082 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2083                                        unsigned OpIdx) const {
2084   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2085 
2086   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2087     return false;
2088   }
2089 
2090   const MCOperand &MO = Inst.getOperand(OpIdx);
2091 
2092   int64_t Val = MO.getImm();
2093   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2094 
2095   switch (OpSize) { // expected operand size
2096   case 8:
2097     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2098   case 4:
2099     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2100   case 2: {
2101     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2102     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2103         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
2104       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2105     } else {
2106       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2107     }
2108   }
2109   default:
2110     llvm_unreachable("invalid operand size");
2111   }
2112 }
2113 
2114 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2115   const MCOperand &MO = Inst.getOperand(OpIdx);
2116   if (MO.isImm()) {
2117     return !isInlineConstant(Inst, OpIdx);
2118   }
2119   return !MO.isReg() ||
2120          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2121 }
2122 
2123 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2124   const unsigned Opcode = Inst.getOpcode();
2125   const MCInstrDesc &Desc = MII.get(Opcode);
2126   unsigned ConstantBusUseCount = 0;
2127 
2128   if (Desc.TSFlags &
2129       (SIInstrFlags::VOPC |
2130        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2131        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2132        SIInstrFlags::SDWA)) {
2133     // Check special imm operands (used by madmk, etc)
2134     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2135       ++ConstantBusUseCount;
2136     }
2137 
2138     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2139     if (SGPRUsed != AMDGPU::NoRegister) {
2140       ++ConstantBusUseCount;
2141     }
2142 
2143     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2144     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2145     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2146 
2147     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2148 
2149     for (int OpIdx : OpIndices) {
2150       if (OpIdx == -1) break;
2151 
2152       const MCOperand &MO = Inst.getOperand(OpIdx);
2153       if (usesConstantBus(Inst, OpIdx)) {
2154         if (MO.isReg()) {
2155           const unsigned Reg = mc2PseudoReg(MO.getReg());
2156           // Pairs of registers with a partial intersections like these
2157           //   s0, s[0:1]
2158           //   flat_scratch_lo, flat_scratch
2159           //   flat_scratch_lo, flat_scratch_hi
2160           // are theoretically valid but they are disabled anyway.
2161           // Note that this code mimics SIInstrInfo::verifyInstruction
2162           if (Reg != SGPRUsed) {
2163             ++ConstantBusUseCount;
2164           }
2165           SGPRUsed = Reg;
2166         } else { // Expression or a literal
2167           ++ConstantBusUseCount;
2168         }
2169       }
2170     }
2171   }
2172 
2173   return ConstantBusUseCount <= 1;
2174 }
2175 
2176 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2177   const unsigned Opcode = Inst.getOpcode();
2178   const MCInstrDesc &Desc = MII.get(Opcode);
2179 
2180   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2181   if (DstIdx == -1 ||
2182       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2183     return true;
2184   }
2185 
2186   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2187 
2188   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2189   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2190   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2191 
2192   assert(DstIdx != -1);
2193   const MCOperand &Dst = Inst.getOperand(DstIdx);
2194   assert(Dst.isReg());
2195   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2196 
2197   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2198 
2199   for (int SrcIdx : SrcIndices) {
2200     if (SrcIdx == -1) break;
2201     const MCOperand &Src = Inst.getOperand(SrcIdx);
2202     if (Src.isReg()) {
2203       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2204       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2205         return false;
2206       }
2207     }
2208   }
2209 
2210   return true;
2211 }
2212 
2213 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2214 
2215   const unsigned Opc = Inst.getOpcode();
2216   const MCInstrDesc &Desc = MII.get(Opc);
2217 
2218   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2219     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2220     assert(ClampIdx != -1);
2221     return Inst.getOperand(ClampIdx).getImm() == 0;
2222   }
2223 
2224   return true;
2225 }
2226 
2227 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2228                                           const SMLoc &IDLoc) {
2229   if (!validateConstantBusLimitations(Inst)) {
2230     Error(IDLoc,
2231       "invalid operand (violates constant bus restrictions)");
2232     return false;
2233   }
2234   if (!validateEarlyClobberLimitations(Inst)) {
2235     Error(IDLoc,
2236       "destination must be different than all sources");
2237     return false;
2238   }
2239   if (!validateIntClampSupported(Inst)) {
2240     Error(IDLoc,
2241       "integer clamping is not supported on this GPU");
2242     return false;
2243   }
2244 
2245   return true;
2246 }
2247 
2248 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2249                                               OperandVector &Operands,
2250                                               MCStreamer &Out,
2251                                               uint64_t &ErrorInfo,
2252                                               bool MatchingInlineAsm) {
2253   MCInst Inst;
2254   unsigned Result = Match_Success;
2255   for (auto Variant : getMatchedVariants()) {
2256     uint64_t EI;
2257     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
2258                                   Variant);
2259     // We order match statuses from least to most specific. We use most specific
2260     // status as resulting
2261     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
2262     if ((R == Match_Success) ||
2263         (R == Match_PreferE32) ||
2264         (R == Match_MissingFeature && Result != Match_PreferE32) ||
2265         (R == Match_InvalidOperand && Result != Match_MissingFeature
2266                                    && Result != Match_PreferE32) ||
2267         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
2268                                    && Result != Match_MissingFeature
2269                                    && Result != Match_PreferE32)) {
2270       Result = R;
2271       ErrorInfo = EI;
2272     }
2273     if (R == Match_Success)
2274       break;
2275   }
2276 
2277   switch (Result) {
2278   default: break;
2279   case Match_Success:
2280     if (!validateInstruction(Inst, IDLoc)) {
2281       return true;
2282     }
2283     Inst.setLoc(IDLoc);
2284     Out.EmitInstruction(Inst, getSTI());
2285     return false;
2286 
2287   case Match_MissingFeature:
2288     return Error(IDLoc, "instruction not supported on this GPU");
2289 
2290   case Match_MnemonicFail:
2291     return Error(IDLoc, "unrecognized instruction mnemonic");
2292 
2293   case Match_InvalidOperand: {
2294     SMLoc ErrorLoc = IDLoc;
2295     if (ErrorInfo != ~0ULL) {
2296       if (ErrorInfo >= Operands.size()) {
2297         return Error(IDLoc, "too few operands for instruction");
2298       }
2299       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
2300       if (ErrorLoc == SMLoc())
2301         ErrorLoc = IDLoc;
2302     }
2303     return Error(ErrorLoc, "invalid operand for instruction");
2304   }
2305 
2306   case Match_PreferE32:
2307     return Error(IDLoc, "internal error: instruction without _e64 suffix "
2308                         "should be encoded as e32");
2309   }
2310   llvm_unreachable("Implement any new match types added!");
2311 }
2312 
2313 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
2314   int64_t Tmp = -1;
2315   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
2316     return true;
2317   }
2318   if (getParser().parseAbsoluteExpression(Tmp)) {
2319     return true;
2320   }
2321   Ret = static_cast<uint32_t>(Tmp);
2322   return false;
2323 }
2324 
2325 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
2326                                                uint32_t &Minor) {
2327   if (ParseAsAbsoluteExpression(Major))
2328     return TokError("invalid major version");
2329 
2330   if (getLexer().isNot(AsmToken::Comma))
2331     return TokError("minor version number required, comma expected");
2332   Lex();
2333 
2334   if (ParseAsAbsoluteExpression(Minor))
2335     return TokError("invalid minor version");
2336 
2337   return false;
2338 }
2339 
2340 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
2341   uint32_t Major;
2342   uint32_t Minor;
2343 
2344   if (ParseDirectiveMajorMinor(Major, Minor))
2345     return true;
2346 
2347   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
2348   return false;
2349 }
2350 
2351 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
2352   uint32_t Major;
2353   uint32_t Minor;
2354   uint32_t Stepping;
2355   StringRef VendorName;
2356   StringRef ArchName;
2357 
2358   // If this directive has no arguments, then use the ISA version for the
2359   // targeted GPU.
2360   if (getLexer().is(AsmToken::EndOfStatement)) {
2361     AMDGPU::IsaInfo::IsaVersion ISA =
2362         AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
2363     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
2364                                                       ISA.Stepping,
2365                                                       "AMD", "AMDGPU");
2366     return false;
2367   }
2368 
2369   if (ParseDirectiveMajorMinor(Major, Minor))
2370     return true;
2371 
2372   if (getLexer().isNot(AsmToken::Comma))
2373     return TokError("stepping version number required, comma expected");
2374   Lex();
2375 
2376   if (ParseAsAbsoluteExpression(Stepping))
2377     return TokError("invalid stepping version");
2378 
2379   if (getLexer().isNot(AsmToken::Comma))
2380     return TokError("vendor name required, comma expected");
2381   Lex();
2382 
2383   if (getLexer().isNot(AsmToken::String))
2384     return TokError("invalid vendor name");
2385 
2386   VendorName = getLexer().getTok().getStringContents();
2387   Lex();
2388 
2389   if (getLexer().isNot(AsmToken::Comma))
2390     return TokError("arch name required, comma expected");
2391   Lex();
2392 
2393   if (getLexer().isNot(AsmToken::String))
2394     return TokError("invalid arch name");
2395 
2396   ArchName = getLexer().getTok().getStringContents();
2397   Lex();
2398 
2399   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
2400                                                     VendorName, ArchName);
2401   return false;
2402 }
2403 
2404 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
2405                                                amd_kernel_code_t &Header) {
2406   SmallString<40> ErrStr;
2407   raw_svector_ostream Err(ErrStr);
2408   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
2409     return TokError(Err.str());
2410   }
2411   Lex();
2412   return false;
2413 }
2414 
2415 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
2416   amd_kernel_code_t Header;
2417   AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits());
2418 
2419   while (true) {
2420     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
2421     // will set the current token to EndOfStatement.
2422     while(getLexer().is(AsmToken::EndOfStatement))
2423       Lex();
2424 
2425     if (getLexer().isNot(AsmToken::Identifier))
2426       return TokError("expected value identifier or .end_amd_kernel_code_t");
2427 
2428     StringRef ID = getLexer().getTok().getIdentifier();
2429     Lex();
2430 
2431     if (ID == ".end_amd_kernel_code_t")
2432       break;
2433 
2434     if (ParseAMDKernelCodeTValue(ID, Header))
2435       return true;
2436   }
2437 
2438   getTargetStreamer().EmitAMDKernelCodeT(Header);
2439 
2440   return false;
2441 }
2442 
2443 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
2444   if (getLexer().isNot(AsmToken::Identifier))
2445     return TokError("expected symbol name");
2446 
2447   StringRef KernelName = Parser.getTok().getString();
2448 
2449   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
2450                                            ELF::STT_AMDGPU_HSA_KERNEL);
2451   Lex();
2452   KernelScope.initialize(getContext());
2453   return false;
2454 }
2455 
2456 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
2457   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
2458 
2459   std::string ISAVersionStringFromSTI;
2460   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
2461   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
2462 
2463   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
2464     return Error(getParser().getTok().getLoc(),
2465                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
2466                  "arguments specified through the command line");
2467   }
2468 
2469   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
2470   Lex();
2471 
2472   return false;
2473 }
2474 
2475 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
2476   std::string HSAMetadataString;
2477   raw_string_ostream YamlStream(HSAMetadataString);
2478 
2479   getLexer().setSkipSpace(false);
2480 
2481   bool FoundEnd = false;
2482   while (!getLexer().is(AsmToken::Eof)) {
2483     while (getLexer().is(AsmToken::Space)) {
2484       YamlStream << getLexer().getTok().getString();
2485       Lex();
2486     }
2487 
2488     if (getLexer().is(AsmToken::Identifier)) {
2489       StringRef ID = getLexer().getTok().getIdentifier();
2490       if (ID == AMDGPU::HSAMD::AssemblerDirectiveEnd) {
2491         Lex();
2492         FoundEnd = true;
2493         break;
2494       }
2495     }
2496 
2497     YamlStream << Parser.parseStringToEndOfStatement()
2498                << getContext().getAsmInfo()->getSeparatorString();
2499 
2500     Parser.eatToEndOfStatement();
2501   }
2502 
2503   getLexer().setSkipSpace(true);
2504 
2505   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
2506     return TokError(Twine("expected directive ") +
2507                     Twine(HSAMD::AssemblerDirectiveEnd) + Twine("not found"));
2508   }
2509 
2510   YamlStream.flush();
2511 
2512   if (!getTargetStreamer().EmitHSAMetadata(HSAMetadataString))
2513     return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
2514 
2515   return false;
2516 }
2517 
2518 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
2519   PALMD::Metadata PALMetadata;
2520   for (;;) {
2521     uint32_t Value;
2522     if (ParseAsAbsoluteExpression(Value)) {
2523       return TokError(Twine("invalid value in ") +
2524                       Twine(PALMD::AssemblerDirective));
2525     }
2526     PALMetadata.push_back(Value);
2527     if (getLexer().isNot(AsmToken::Comma))
2528       break;
2529     Lex();
2530   }
2531   getTargetStreamer().EmitPALMetadata(PALMetadata);
2532   return false;
2533 }
2534 
2535 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
2536   StringRef IDVal = DirectiveID.getString();
2537 
2538   if (IDVal == ".hsa_code_object_version")
2539     return ParseDirectiveHSACodeObjectVersion();
2540 
2541   if (IDVal == ".hsa_code_object_isa")
2542     return ParseDirectiveHSACodeObjectISA();
2543 
2544   if (IDVal == ".amd_kernel_code_t")
2545     return ParseDirectiveAMDKernelCodeT();
2546 
2547   if (IDVal == ".amdgpu_hsa_kernel")
2548     return ParseDirectiveAMDGPUHsaKernel();
2549 
2550   if (IDVal == ".amd_amdgpu_isa")
2551     return ParseDirectiveISAVersion();
2552 
2553   if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
2554     return ParseDirectiveHSAMetadata();
2555 
2556   if (IDVal == PALMD::AssemblerDirective)
2557     return ParseDirectivePALMetadata();
2558 
2559   return true;
2560 }
2561 
2562 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
2563                                            unsigned RegNo) const {
2564   if (isCI())
2565     return true;
2566 
2567   if (isSI()) {
2568     // No flat_scr
2569     switch (RegNo) {
2570     case AMDGPU::FLAT_SCR:
2571     case AMDGPU::FLAT_SCR_LO:
2572     case AMDGPU::FLAT_SCR_HI:
2573       return false;
2574     default:
2575       return true;
2576     }
2577   }
2578 
2579   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
2580   // SI/CI have.
2581   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
2582        R.isValid(); ++R) {
2583     if (*R == RegNo)
2584       return false;
2585   }
2586 
2587   return true;
2588 }
2589 
2590 OperandMatchResultTy
2591 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
2592   // Try to parse with a custom parser
2593   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
2594 
2595   // If we successfully parsed the operand or if there as an error parsing,
2596   // we are done.
2597   //
2598   // If we are parsing after we reach EndOfStatement then this means we
2599   // are appending default values to the Operands list.  This is only done
2600   // by custom parser, so we shouldn't continue on to the generic parsing.
2601   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
2602       getLexer().is(AsmToken::EndOfStatement))
2603     return ResTy;
2604 
2605   ResTy = parseRegOrImm(Operands);
2606 
2607   if (ResTy == MatchOperand_Success)
2608     return ResTy;
2609 
2610   const auto &Tok = Parser.getTok();
2611   SMLoc S = Tok.getLoc();
2612 
2613   const MCExpr *Expr = nullptr;
2614   if (!Parser.parseExpression(Expr)) {
2615     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2616     return MatchOperand_Success;
2617   }
2618 
2619   // Possibly this is an instruction flag like 'gds'.
2620   if (Tok.getKind() == AsmToken::Identifier) {
2621     Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
2622     Parser.Lex();
2623     return MatchOperand_Success;
2624   }
2625 
2626   return MatchOperand_NoMatch;
2627 }
2628 
2629 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
2630   // Clear any forced encodings from the previous instruction.
2631   setForcedEncodingSize(0);
2632   setForcedDPP(false);
2633   setForcedSDWA(false);
2634 
2635   if (Name.endswith("_e64")) {
2636     setForcedEncodingSize(64);
2637     return Name.substr(0, Name.size() - 4);
2638   } else if (Name.endswith("_e32")) {
2639     setForcedEncodingSize(32);
2640     return Name.substr(0, Name.size() - 4);
2641   } else if (Name.endswith("_dpp")) {
2642     setForcedDPP(true);
2643     return Name.substr(0, Name.size() - 4);
2644   } else if (Name.endswith("_sdwa")) {
2645     setForcedSDWA(true);
2646     return Name.substr(0, Name.size() - 5);
2647   }
2648   return Name;
2649 }
2650 
2651 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
2652                                        StringRef Name,
2653                                        SMLoc NameLoc, OperandVector &Operands) {
2654   // Add the instruction mnemonic
2655   Name = parseMnemonicSuffix(Name);
2656   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
2657 
2658   while (!getLexer().is(AsmToken::EndOfStatement)) {
2659     OperandMatchResultTy Res = parseOperand(Operands, Name);
2660 
2661     // Eat the comma or space if there is one.
2662     if (getLexer().is(AsmToken::Comma))
2663       Parser.Lex();
2664 
2665     switch (Res) {
2666       case MatchOperand_Success: break;
2667       case MatchOperand_ParseFail:
2668         Error(getLexer().getLoc(), "failed parsing operand.");
2669         while (!getLexer().is(AsmToken::EndOfStatement)) {
2670           Parser.Lex();
2671         }
2672         return true;
2673       case MatchOperand_NoMatch:
2674         Error(getLexer().getLoc(), "not a valid operand.");
2675         while (!getLexer().is(AsmToken::EndOfStatement)) {
2676           Parser.Lex();
2677         }
2678         return true;
2679     }
2680   }
2681 
2682   return false;
2683 }
2684 
2685 //===----------------------------------------------------------------------===//
2686 // Utility functions
2687 //===----------------------------------------------------------------------===//
2688 
2689 OperandMatchResultTy
2690 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
2691   switch(getLexer().getKind()) {
2692     default: return MatchOperand_NoMatch;
2693     case AsmToken::Identifier: {
2694       StringRef Name = Parser.getTok().getString();
2695       if (!Name.equals(Prefix)) {
2696         return MatchOperand_NoMatch;
2697       }
2698 
2699       Parser.Lex();
2700       if (getLexer().isNot(AsmToken::Colon))
2701         return MatchOperand_ParseFail;
2702 
2703       Parser.Lex();
2704 
2705       bool IsMinus = false;
2706       if (getLexer().getKind() == AsmToken::Minus) {
2707         Parser.Lex();
2708         IsMinus = true;
2709       }
2710 
2711       if (getLexer().isNot(AsmToken::Integer))
2712         return MatchOperand_ParseFail;
2713 
2714       if (getParser().parseAbsoluteExpression(Int))
2715         return MatchOperand_ParseFail;
2716 
2717       if (IsMinus)
2718         Int = -Int;
2719       break;
2720     }
2721   }
2722   return MatchOperand_Success;
2723 }
2724 
2725 OperandMatchResultTy
2726 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
2727                                     AMDGPUOperand::ImmTy ImmTy,
2728                                     bool (*ConvertResult)(int64_t&)) {
2729   SMLoc S = Parser.getTok().getLoc();
2730   int64_t Value = 0;
2731 
2732   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
2733   if (Res != MatchOperand_Success)
2734     return Res;
2735 
2736   if (ConvertResult && !ConvertResult(Value)) {
2737     return MatchOperand_ParseFail;
2738   }
2739 
2740   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
2741   return MatchOperand_Success;
2742 }
2743 
2744 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
2745   const char *Prefix,
2746   OperandVector &Operands,
2747   AMDGPUOperand::ImmTy ImmTy,
2748   bool (*ConvertResult)(int64_t&)) {
2749   StringRef Name = Parser.getTok().getString();
2750   if (!Name.equals(Prefix))
2751     return MatchOperand_NoMatch;
2752 
2753   Parser.Lex();
2754   if (getLexer().isNot(AsmToken::Colon))
2755     return MatchOperand_ParseFail;
2756 
2757   Parser.Lex();
2758   if (getLexer().isNot(AsmToken::LBrac))
2759     return MatchOperand_ParseFail;
2760   Parser.Lex();
2761 
2762   unsigned Val = 0;
2763   SMLoc S = Parser.getTok().getLoc();
2764 
2765   // FIXME: How to verify the number of elements matches the number of src
2766   // operands?
2767   for (int I = 0; I < 4; ++I) {
2768     if (I != 0) {
2769       if (getLexer().is(AsmToken::RBrac))
2770         break;
2771 
2772       if (getLexer().isNot(AsmToken::Comma))
2773         return MatchOperand_ParseFail;
2774       Parser.Lex();
2775     }
2776 
2777     if (getLexer().isNot(AsmToken::Integer))
2778       return MatchOperand_ParseFail;
2779 
2780     int64_t Op;
2781     if (getParser().parseAbsoluteExpression(Op))
2782       return MatchOperand_ParseFail;
2783 
2784     if (Op != 0 && Op != 1)
2785       return MatchOperand_ParseFail;
2786     Val |= (Op << I);
2787   }
2788 
2789   Parser.Lex();
2790   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
2791   return MatchOperand_Success;
2792 }
2793 
2794 OperandMatchResultTy
2795 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
2796                                AMDGPUOperand::ImmTy ImmTy) {
2797   int64_t Bit = 0;
2798   SMLoc S = Parser.getTok().getLoc();
2799 
2800   // We are at the end of the statement, and this is a default argument, so
2801   // use a default value.
2802   if (getLexer().isNot(AsmToken::EndOfStatement)) {
2803     switch(getLexer().getKind()) {
2804       case AsmToken::Identifier: {
2805         StringRef Tok = Parser.getTok().getString();
2806         if (Tok == Name) {
2807           Bit = 1;
2808           Parser.Lex();
2809         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
2810           Bit = 0;
2811           Parser.Lex();
2812         } else {
2813           return MatchOperand_NoMatch;
2814         }
2815         break;
2816       }
2817       default:
2818         return MatchOperand_NoMatch;
2819     }
2820   }
2821 
2822   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
2823   return MatchOperand_Success;
2824 }
2825 
2826 static void addOptionalImmOperand(
2827   MCInst& Inst, const OperandVector& Operands,
2828   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
2829   AMDGPUOperand::ImmTy ImmT,
2830   int64_t Default = 0) {
2831   auto i = OptionalIdx.find(ImmT);
2832   if (i != OptionalIdx.end()) {
2833     unsigned Idx = i->second;
2834     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
2835   } else {
2836     Inst.addOperand(MCOperand::createImm(Default));
2837   }
2838 }
2839 
2840 OperandMatchResultTy
2841 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
2842   if (getLexer().isNot(AsmToken::Identifier)) {
2843     return MatchOperand_NoMatch;
2844   }
2845   StringRef Tok = Parser.getTok().getString();
2846   if (Tok != Prefix) {
2847     return MatchOperand_NoMatch;
2848   }
2849 
2850   Parser.Lex();
2851   if (getLexer().isNot(AsmToken::Colon)) {
2852     return MatchOperand_ParseFail;
2853   }
2854 
2855   Parser.Lex();
2856   if (getLexer().isNot(AsmToken::Identifier)) {
2857     return MatchOperand_ParseFail;
2858   }
2859 
2860   Value = Parser.getTok().getString();
2861   return MatchOperand_Success;
2862 }
2863 
2864 //===----------------------------------------------------------------------===//
2865 // ds
2866 //===----------------------------------------------------------------------===//
2867 
2868 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
2869                                     const OperandVector &Operands) {
2870   OptionalImmIndexMap OptionalIdx;
2871 
2872   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
2873     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
2874 
2875     // Add the register arguments
2876     if (Op.isReg()) {
2877       Op.addRegOperands(Inst, 1);
2878       continue;
2879     }
2880 
2881     // Handle optional arguments
2882     OptionalIdx[Op.getImmTy()] = i;
2883   }
2884 
2885   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
2886   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
2887   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
2888 
2889   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
2890 }
2891 
2892 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
2893                                 bool IsGdsHardcoded) {
2894   OptionalImmIndexMap OptionalIdx;
2895 
2896   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
2897     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
2898 
2899     // Add the register arguments
2900     if (Op.isReg()) {
2901       Op.addRegOperands(Inst, 1);
2902       continue;
2903     }
2904 
2905     if (Op.isToken() && Op.getToken() == "gds") {
2906       IsGdsHardcoded = true;
2907       continue;
2908     }
2909 
2910     // Handle optional arguments
2911     OptionalIdx[Op.getImmTy()] = i;
2912   }
2913 
2914   AMDGPUOperand::ImmTy OffsetType =
2915     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
2916      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
2917                                                       AMDGPUOperand::ImmTyOffset;
2918 
2919   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
2920 
2921   if (!IsGdsHardcoded) {
2922     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
2923   }
2924   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
2925 }
2926 
2927 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
2928   OptionalImmIndexMap OptionalIdx;
2929 
2930   unsigned OperandIdx[4];
2931   unsigned EnMask = 0;
2932   int SrcIdx = 0;
2933 
2934   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
2935     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
2936 
2937     // Add the register arguments
2938     if (Op.isReg()) {
2939       assert(SrcIdx < 4);
2940       OperandIdx[SrcIdx] = Inst.size();
2941       Op.addRegOperands(Inst, 1);
2942       ++SrcIdx;
2943       continue;
2944     }
2945 
2946     if (Op.isOff()) {
2947       assert(SrcIdx < 4);
2948       OperandIdx[SrcIdx] = Inst.size();
2949       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
2950       ++SrcIdx;
2951       continue;
2952     }
2953 
2954     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
2955       Op.addImmOperands(Inst, 1);
2956       continue;
2957     }
2958 
2959     if (Op.isToken() && Op.getToken() == "done")
2960       continue;
2961 
2962     // Handle optional arguments
2963     OptionalIdx[Op.getImmTy()] = i;
2964   }
2965 
2966   assert(SrcIdx == 4);
2967 
2968   bool Compr = false;
2969   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
2970     Compr = true;
2971     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
2972     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
2973     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
2974   }
2975 
2976   for (auto i = 0; i < SrcIdx; ++i) {
2977     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
2978       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
2979     }
2980   }
2981 
2982   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
2983   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
2984 
2985   Inst.addOperand(MCOperand::createImm(EnMask));
2986 }
2987 
2988 //===----------------------------------------------------------------------===//
2989 // s_waitcnt
2990 //===----------------------------------------------------------------------===//
2991 
2992 static bool
2993 encodeCnt(
2994   const AMDGPU::IsaInfo::IsaVersion ISA,
2995   int64_t &IntVal,
2996   int64_t CntVal,
2997   bool Saturate,
2998   unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned),
2999   unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned))
3000 {
3001   bool Failed = false;
3002 
3003   IntVal = encode(ISA, IntVal, CntVal);
3004   if (CntVal != decode(ISA, IntVal)) {
3005     if (Saturate) {
3006       IntVal = encode(ISA, IntVal, -1);
3007     } else {
3008       Failed = true;
3009     }
3010   }
3011   return Failed;
3012 }
3013 
3014 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
3015   StringRef CntName = Parser.getTok().getString();
3016   int64_t CntVal;
3017 
3018   Parser.Lex();
3019   if (getLexer().isNot(AsmToken::LParen))
3020     return true;
3021 
3022   Parser.Lex();
3023   if (getLexer().isNot(AsmToken::Integer))
3024     return true;
3025 
3026   SMLoc ValLoc = Parser.getTok().getLoc();
3027   if (getParser().parseAbsoluteExpression(CntVal))
3028     return true;
3029 
3030   AMDGPU::IsaInfo::IsaVersion ISA =
3031       AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
3032 
3033   bool Failed = true;
3034   bool Sat = CntName.endswith("_sat");
3035 
3036   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
3037     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
3038   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
3039     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
3040   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
3041     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
3042   }
3043 
3044   if (Failed) {
3045     Error(ValLoc, "too large value for " + CntName);
3046     return true;
3047   }
3048 
3049   if (getLexer().isNot(AsmToken::RParen)) {
3050     return true;
3051   }
3052 
3053   Parser.Lex();
3054   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
3055     const AsmToken NextToken = getLexer().peekTok();
3056     if (NextToken.is(AsmToken::Identifier)) {
3057       Parser.Lex();
3058     }
3059   }
3060 
3061   return false;
3062 }
3063 
3064 OperandMatchResultTy
3065 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
3066   AMDGPU::IsaInfo::IsaVersion ISA =
3067       AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
3068   int64_t Waitcnt = getWaitcntBitMask(ISA);
3069   SMLoc S = Parser.getTok().getLoc();
3070 
3071   switch(getLexer().getKind()) {
3072     default: return MatchOperand_ParseFail;
3073     case AsmToken::Integer:
3074       // The operand can be an integer value.
3075       if (getParser().parseAbsoluteExpression(Waitcnt))
3076         return MatchOperand_ParseFail;
3077       break;
3078 
3079     case AsmToken::Identifier:
3080       do {
3081         if (parseCnt(Waitcnt))
3082           return MatchOperand_ParseFail;
3083       } while(getLexer().isNot(AsmToken::EndOfStatement));
3084       break;
3085   }
3086   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
3087   return MatchOperand_Success;
3088 }
3089 
3090 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
3091                                           int64_t &Width) {
3092   using namespace llvm::AMDGPU::Hwreg;
3093 
3094   if (Parser.getTok().getString() != "hwreg")
3095     return true;
3096   Parser.Lex();
3097 
3098   if (getLexer().isNot(AsmToken::LParen))
3099     return true;
3100   Parser.Lex();
3101 
3102   if (getLexer().is(AsmToken::Identifier)) {
3103     HwReg.IsSymbolic = true;
3104     HwReg.Id = ID_UNKNOWN_;
3105     const StringRef tok = Parser.getTok().getString();
3106     for (int i = ID_SYMBOLIC_FIRST_; i < ID_SYMBOLIC_LAST_; ++i) {
3107       if (tok == IdSymbolic[i]) {
3108         HwReg.Id = i;
3109         break;
3110       }
3111     }
3112     Parser.Lex();
3113   } else {
3114     HwReg.IsSymbolic = false;
3115     if (getLexer().isNot(AsmToken::Integer))
3116       return true;
3117     if (getParser().parseAbsoluteExpression(HwReg.Id))
3118       return true;
3119   }
3120 
3121   if (getLexer().is(AsmToken::RParen)) {
3122     Parser.Lex();
3123     return false;
3124   }
3125 
3126   // optional params
3127   if (getLexer().isNot(AsmToken::Comma))
3128     return true;
3129   Parser.Lex();
3130 
3131   if (getLexer().isNot(AsmToken::Integer))
3132     return true;
3133   if (getParser().parseAbsoluteExpression(Offset))
3134     return true;
3135 
3136   if (getLexer().isNot(AsmToken::Comma))
3137     return true;
3138   Parser.Lex();
3139 
3140   if (getLexer().isNot(AsmToken::Integer))
3141     return true;
3142   if (getParser().parseAbsoluteExpression(Width))
3143     return true;
3144 
3145   if (getLexer().isNot(AsmToken::RParen))
3146     return true;
3147   Parser.Lex();
3148 
3149   return false;
3150 }
3151 
3152 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
3153   using namespace llvm::AMDGPU::Hwreg;
3154 
3155   int64_t Imm16Val = 0;
3156   SMLoc S = Parser.getTok().getLoc();
3157 
3158   switch(getLexer().getKind()) {
3159     default: return MatchOperand_NoMatch;
3160     case AsmToken::Integer:
3161       // The operand can be an integer value.
3162       if (getParser().parseAbsoluteExpression(Imm16Val))
3163         return MatchOperand_NoMatch;
3164       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
3165         Error(S, "invalid immediate: only 16-bit values are legal");
3166         // Do not return error code, but create an imm operand anyway and proceed
3167         // to the next operand, if any. That avoids unneccessary error messages.
3168       }
3169       break;
3170 
3171     case AsmToken::Identifier: {
3172         OperandInfoTy HwReg(ID_UNKNOWN_);
3173         int64_t Offset = OFFSET_DEFAULT_;
3174         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
3175         if (parseHwregConstruct(HwReg, Offset, Width))
3176           return MatchOperand_ParseFail;
3177         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
3178           if (HwReg.IsSymbolic)
3179             Error(S, "invalid symbolic name of hardware register");
3180           else
3181             Error(S, "invalid code of hardware register: only 6-bit values are legal");
3182         }
3183         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
3184           Error(S, "invalid bit offset: only 5-bit values are legal");
3185         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
3186           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
3187         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
3188       }
3189       break;
3190   }
3191   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
3192   return MatchOperand_Success;
3193 }
3194 
3195 bool AMDGPUOperand::isSWaitCnt() const {
3196   return isImm();
3197 }
3198 
3199 bool AMDGPUOperand::isHwreg() const {
3200   return isImmTy(ImmTyHwreg);
3201 }
3202 
3203 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
3204   using namespace llvm::AMDGPU::SendMsg;
3205 
3206   if (Parser.getTok().getString() != "sendmsg")
3207     return true;
3208   Parser.Lex();
3209 
3210   if (getLexer().isNot(AsmToken::LParen))
3211     return true;
3212   Parser.Lex();
3213 
3214   if (getLexer().is(AsmToken::Identifier)) {
3215     Msg.IsSymbolic = true;
3216     Msg.Id = ID_UNKNOWN_;
3217     const std::string tok = Parser.getTok().getString();
3218     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
3219       switch(i) {
3220         default: continue; // Omit gaps.
3221         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:  case ID_SYSMSG: break;
3222       }
3223       if (tok == IdSymbolic[i]) {
3224         Msg.Id = i;
3225         break;
3226       }
3227     }
3228     Parser.Lex();
3229   } else {
3230     Msg.IsSymbolic = false;
3231     if (getLexer().isNot(AsmToken::Integer))
3232       return true;
3233     if (getParser().parseAbsoluteExpression(Msg.Id))
3234       return true;
3235     if (getLexer().is(AsmToken::Integer))
3236       if (getParser().parseAbsoluteExpression(Msg.Id))
3237         Msg.Id = ID_UNKNOWN_;
3238   }
3239   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
3240     return false;
3241 
3242   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
3243     if (getLexer().isNot(AsmToken::RParen))
3244       return true;
3245     Parser.Lex();
3246     return false;
3247   }
3248 
3249   if (getLexer().isNot(AsmToken::Comma))
3250     return true;
3251   Parser.Lex();
3252 
3253   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
3254   Operation.Id = ID_UNKNOWN_;
3255   if (getLexer().is(AsmToken::Identifier)) {
3256     Operation.IsSymbolic = true;
3257     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
3258     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
3259     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
3260     const StringRef Tok = Parser.getTok().getString();
3261     for (int i = F; i < L; ++i) {
3262       if (Tok == S[i]) {
3263         Operation.Id = i;
3264         break;
3265       }
3266     }
3267     Parser.Lex();
3268   } else {
3269     Operation.IsSymbolic = false;
3270     if (getLexer().isNot(AsmToken::Integer))
3271       return true;
3272     if (getParser().parseAbsoluteExpression(Operation.Id))
3273       return true;
3274   }
3275 
3276   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
3277     // Stream id is optional.
3278     if (getLexer().is(AsmToken::RParen)) {
3279       Parser.Lex();
3280       return false;
3281     }
3282 
3283     if (getLexer().isNot(AsmToken::Comma))
3284       return true;
3285     Parser.Lex();
3286 
3287     if (getLexer().isNot(AsmToken::Integer))
3288       return true;
3289     if (getParser().parseAbsoluteExpression(StreamId))
3290       return true;
3291   }
3292 
3293   if (getLexer().isNot(AsmToken::RParen))
3294     return true;
3295   Parser.Lex();
3296   return false;
3297 }
3298 
3299 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
3300   if (getLexer().getKind() != AsmToken::Identifier)
3301     return MatchOperand_NoMatch;
3302 
3303   StringRef Str = Parser.getTok().getString();
3304   int Slot = StringSwitch<int>(Str)
3305     .Case("p10", 0)
3306     .Case("p20", 1)
3307     .Case("p0", 2)
3308     .Default(-1);
3309 
3310   SMLoc S = Parser.getTok().getLoc();
3311   if (Slot == -1)
3312     return MatchOperand_ParseFail;
3313 
3314   Parser.Lex();
3315   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
3316                                               AMDGPUOperand::ImmTyInterpSlot));
3317   return MatchOperand_Success;
3318 }
3319 
3320 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
3321   if (getLexer().getKind() != AsmToken::Identifier)
3322     return MatchOperand_NoMatch;
3323 
3324   StringRef Str = Parser.getTok().getString();
3325   if (!Str.startswith("attr"))
3326     return MatchOperand_NoMatch;
3327 
3328   StringRef Chan = Str.take_back(2);
3329   int AttrChan = StringSwitch<int>(Chan)
3330     .Case(".x", 0)
3331     .Case(".y", 1)
3332     .Case(".z", 2)
3333     .Case(".w", 3)
3334     .Default(-1);
3335   if (AttrChan == -1)
3336     return MatchOperand_ParseFail;
3337 
3338   Str = Str.drop_back(2).drop_front(4);
3339 
3340   uint8_t Attr;
3341   if (Str.getAsInteger(10, Attr))
3342     return MatchOperand_ParseFail;
3343 
3344   SMLoc S = Parser.getTok().getLoc();
3345   Parser.Lex();
3346   if (Attr > 63) {
3347     Error(S, "out of bounds attr");
3348     return MatchOperand_Success;
3349   }
3350 
3351   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
3352 
3353   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
3354                                               AMDGPUOperand::ImmTyInterpAttr));
3355   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
3356                                               AMDGPUOperand::ImmTyAttrChan));
3357   return MatchOperand_Success;
3358 }
3359 
3360 void AMDGPUAsmParser::errorExpTgt() {
3361   Error(Parser.getTok().getLoc(), "invalid exp target");
3362 }
3363 
3364 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
3365                                                       uint8_t &Val) {
3366   if (Str == "null") {
3367     Val = 9;
3368     return MatchOperand_Success;
3369   }
3370 
3371   if (Str.startswith("mrt")) {
3372     Str = Str.drop_front(3);
3373     if (Str == "z") { // == mrtz
3374       Val = 8;
3375       return MatchOperand_Success;
3376     }
3377 
3378     if (Str.getAsInteger(10, Val))
3379       return MatchOperand_ParseFail;
3380 
3381     if (Val > 7)
3382       errorExpTgt();
3383 
3384     return MatchOperand_Success;
3385   }
3386 
3387   if (Str.startswith("pos")) {
3388     Str = Str.drop_front(3);
3389     if (Str.getAsInteger(10, Val))
3390       return MatchOperand_ParseFail;
3391 
3392     if (Val > 3)
3393       errorExpTgt();
3394 
3395     Val += 12;
3396     return MatchOperand_Success;
3397   }
3398 
3399   if (Str.startswith("param")) {
3400     Str = Str.drop_front(5);
3401     if (Str.getAsInteger(10, Val))
3402       return MatchOperand_ParseFail;
3403 
3404     if (Val >= 32)
3405       errorExpTgt();
3406 
3407     Val += 32;
3408     return MatchOperand_Success;
3409   }
3410 
3411   if (Str.startswith("invalid_target_")) {
3412     Str = Str.drop_front(15);
3413     if (Str.getAsInteger(10, Val))
3414       return MatchOperand_ParseFail;
3415 
3416     errorExpTgt();
3417     return MatchOperand_Success;
3418   }
3419 
3420   return MatchOperand_NoMatch;
3421 }
3422 
3423 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
3424   uint8_t Val;
3425   StringRef Str = Parser.getTok().getString();
3426 
3427   auto Res = parseExpTgtImpl(Str, Val);
3428   if (Res != MatchOperand_Success)
3429     return Res;
3430 
3431   SMLoc S = Parser.getTok().getLoc();
3432   Parser.Lex();
3433 
3434   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
3435                                               AMDGPUOperand::ImmTyExpTgt));
3436   return MatchOperand_Success;
3437 }
3438 
3439 OperandMatchResultTy
3440 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
3441   using namespace llvm::AMDGPU::SendMsg;
3442 
3443   int64_t Imm16Val = 0;
3444   SMLoc S = Parser.getTok().getLoc();
3445 
3446   switch(getLexer().getKind()) {
3447   default:
3448     return MatchOperand_NoMatch;
3449   case AsmToken::Integer:
3450     // The operand can be an integer value.
3451     if (getParser().parseAbsoluteExpression(Imm16Val))
3452       return MatchOperand_NoMatch;
3453     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
3454       Error(S, "invalid immediate: only 16-bit values are legal");
3455       // Do not return error code, but create an imm operand anyway and proceed
3456       // to the next operand, if any. That avoids unneccessary error messages.
3457     }
3458     break;
3459   case AsmToken::Identifier: {
3460       OperandInfoTy Msg(ID_UNKNOWN_);
3461       OperandInfoTy Operation(OP_UNKNOWN_);
3462       int64_t StreamId = STREAM_ID_DEFAULT_;
3463       if (parseSendMsgConstruct(Msg, Operation, StreamId))
3464         return MatchOperand_ParseFail;
3465       do {
3466         // Validate and encode message ID.
3467         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
3468                 || Msg.Id == ID_SYSMSG)) {
3469           if (Msg.IsSymbolic)
3470             Error(S, "invalid/unsupported symbolic name of message");
3471           else
3472             Error(S, "invalid/unsupported code of message");
3473           break;
3474         }
3475         Imm16Val = (Msg.Id << ID_SHIFT_);
3476         // Validate and encode operation ID.
3477         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
3478           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
3479             if (Operation.IsSymbolic)
3480               Error(S, "invalid symbolic name of GS_OP");
3481             else
3482               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
3483             break;
3484           }
3485           if (Operation.Id == OP_GS_NOP
3486               && Msg.Id != ID_GS_DONE) {
3487             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
3488             break;
3489           }
3490           Imm16Val |= (Operation.Id << OP_SHIFT_);
3491         }
3492         if (Msg.Id == ID_SYSMSG) {
3493           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
3494             if (Operation.IsSymbolic)
3495               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
3496             else
3497               Error(S, "invalid/unsupported code of SYSMSG_OP");
3498             break;
3499           }
3500           Imm16Val |= (Operation.Id << OP_SHIFT_);
3501         }
3502         // Validate and encode stream ID.
3503         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
3504           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
3505             Error(S, "invalid stream id: only 2-bit values are legal");
3506             break;
3507           }
3508           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
3509         }
3510       } while (false);
3511     }
3512     break;
3513   }
3514   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
3515   return MatchOperand_Success;
3516 }
3517 
3518 bool AMDGPUOperand::isSendMsg() const {
3519   return isImmTy(ImmTySendMsg);
3520 }
3521 
3522 //===----------------------------------------------------------------------===//
3523 // parser helpers
3524 //===----------------------------------------------------------------------===//
3525 
3526 bool
3527 AMDGPUAsmParser::trySkipId(const StringRef Id) {
3528   if (getLexer().getKind() == AsmToken::Identifier &&
3529       Parser.getTok().getString() == Id) {
3530     Parser.Lex();
3531     return true;
3532   }
3533   return false;
3534 }
3535 
3536 bool
3537 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
3538   if (getLexer().getKind() == Kind) {
3539     Parser.Lex();
3540     return true;
3541   }
3542   return false;
3543 }
3544 
3545 bool
3546 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
3547                            const StringRef ErrMsg) {
3548   if (!trySkipToken(Kind)) {
3549     Error(Parser.getTok().getLoc(), ErrMsg);
3550     return false;
3551   }
3552   return true;
3553 }
3554 
3555 bool
3556 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
3557   return !getParser().parseAbsoluteExpression(Imm);
3558 }
3559 
3560 bool
3561 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
3562   SMLoc S = Parser.getTok().getLoc();
3563   if (getLexer().getKind() == AsmToken::String) {
3564     Val = Parser.getTok().getStringContents();
3565     Parser.Lex();
3566     return true;
3567   } else {
3568     Error(S, ErrMsg);
3569     return false;
3570   }
3571 }
3572 
3573 //===----------------------------------------------------------------------===//
3574 // swizzle
3575 //===----------------------------------------------------------------------===//
3576 
3577 LLVM_READNONE
3578 static unsigned
3579 encodeBitmaskPerm(const unsigned AndMask,
3580                   const unsigned OrMask,
3581                   const unsigned XorMask) {
3582   using namespace llvm::AMDGPU::Swizzle;
3583 
3584   return BITMASK_PERM_ENC |
3585          (AndMask << BITMASK_AND_SHIFT) |
3586          (OrMask  << BITMASK_OR_SHIFT)  |
3587          (XorMask << BITMASK_XOR_SHIFT);
3588 }
3589 
3590 bool
3591 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
3592                                       const unsigned MinVal,
3593                                       const unsigned MaxVal,
3594                                       const StringRef ErrMsg) {
3595   for (unsigned i = 0; i < OpNum; ++i) {
3596     if (!skipToken(AsmToken::Comma, "expected a comma")){
3597       return false;
3598     }
3599     SMLoc ExprLoc = Parser.getTok().getLoc();
3600     if (!parseExpr(Op[i])) {
3601       return false;
3602     }
3603     if (Op[i] < MinVal || Op[i] > MaxVal) {
3604       Error(ExprLoc, ErrMsg);
3605       return false;
3606     }
3607   }
3608 
3609   return true;
3610 }
3611 
3612 bool
3613 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
3614   using namespace llvm::AMDGPU::Swizzle;
3615 
3616   int64_t Lane[LANE_NUM];
3617   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
3618                            "expected a 2-bit lane id")) {
3619     Imm = QUAD_PERM_ENC;
3620     for (auto i = 0; i < LANE_NUM; ++i) {
3621       Imm |= Lane[i] << (LANE_SHIFT * i);
3622     }
3623     return true;
3624   }
3625   return false;
3626 }
3627 
3628 bool
3629 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
3630   using namespace llvm::AMDGPU::Swizzle;
3631 
3632   SMLoc S = Parser.getTok().getLoc();
3633   int64_t GroupSize;
3634   int64_t LaneIdx;
3635 
3636   if (!parseSwizzleOperands(1, &GroupSize,
3637                             2, 32,
3638                             "group size must be in the interval [2,32]")) {
3639     return false;
3640   }
3641   if (!isPowerOf2_64(GroupSize)) {
3642     Error(S, "group size must be a power of two");
3643     return false;
3644   }
3645   if (parseSwizzleOperands(1, &LaneIdx,
3646                            0, GroupSize - 1,
3647                            "lane id must be in the interval [0,group size - 1]")) {
3648     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
3649     return true;
3650   }
3651   return false;
3652 }
3653 
3654 bool
3655 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
3656   using namespace llvm::AMDGPU::Swizzle;
3657 
3658   SMLoc S = Parser.getTok().getLoc();
3659   int64_t GroupSize;
3660 
3661   if (!parseSwizzleOperands(1, &GroupSize,
3662       2, 32, "group size must be in the interval [2,32]")) {
3663     return false;
3664   }
3665   if (!isPowerOf2_64(GroupSize)) {
3666     Error(S, "group size must be a power of two");
3667     return false;
3668   }
3669 
3670   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
3671   return true;
3672 }
3673 
3674 bool
3675 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
3676   using namespace llvm::AMDGPU::Swizzle;
3677 
3678   SMLoc S = Parser.getTok().getLoc();
3679   int64_t GroupSize;
3680 
3681   if (!parseSwizzleOperands(1, &GroupSize,
3682       1, 16, "group size must be in the interval [1,16]")) {
3683     return false;
3684   }
3685   if (!isPowerOf2_64(GroupSize)) {
3686     Error(S, "group size must be a power of two");
3687     return false;
3688   }
3689 
3690   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
3691   return true;
3692 }
3693 
3694 bool
3695 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
3696   using namespace llvm::AMDGPU::Swizzle;
3697 
3698   if (!skipToken(AsmToken::Comma, "expected a comma")) {
3699     return false;
3700   }
3701 
3702   StringRef Ctl;
3703   SMLoc StrLoc = Parser.getTok().getLoc();
3704   if (!parseString(Ctl)) {
3705     return false;
3706   }
3707   if (Ctl.size() != BITMASK_WIDTH) {
3708     Error(StrLoc, "expected a 5-character mask");
3709     return false;
3710   }
3711 
3712   unsigned AndMask = 0;
3713   unsigned OrMask = 0;
3714   unsigned XorMask = 0;
3715 
3716   for (size_t i = 0; i < Ctl.size(); ++i) {
3717     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
3718     switch(Ctl[i]) {
3719     default:
3720       Error(StrLoc, "invalid mask");
3721       return false;
3722     case '0':
3723       break;
3724     case '1':
3725       OrMask |= Mask;
3726       break;
3727     case 'p':
3728       AndMask |= Mask;
3729       break;
3730     case 'i':
3731       AndMask |= Mask;
3732       XorMask |= Mask;
3733       break;
3734     }
3735   }
3736 
3737   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
3738   return true;
3739 }
3740 
3741 bool
3742 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
3743 
3744   SMLoc OffsetLoc = Parser.getTok().getLoc();
3745 
3746   if (!parseExpr(Imm)) {
3747     return false;
3748   }
3749   if (!isUInt<16>(Imm)) {
3750     Error(OffsetLoc, "expected a 16-bit offset");
3751     return false;
3752   }
3753   return true;
3754 }
3755 
3756 bool
3757 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
3758   using namespace llvm::AMDGPU::Swizzle;
3759 
3760   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
3761 
3762     SMLoc ModeLoc = Parser.getTok().getLoc();
3763     bool Ok = false;
3764 
3765     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
3766       Ok = parseSwizzleQuadPerm(Imm);
3767     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
3768       Ok = parseSwizzleBitmaskPerm(Imm);
3769     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
3770       Ok = parseSwizzleBroadcast(Imm);
3771     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
3772       Ok = parseSwizzleSwap(Imm);
3773     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
3774       Ok = parseSwizzleReverse(Imm);
3775     } else {
3776       Error(ModeLoc, "expected a swizzle mode");
3777     }
3778 
3779     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
3780   }
3781 
3782   return false;
3783 }
3784 
3785 OperandMatchResultTy
3786 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
3787   SMLoc S = Parser.getTok().getLoc();
3788   int64_t Imm = 0;
3789 
3790   if (trySkipId("offset")) {
3791 
3792     bool Ok = false;
3793     if (skipToken(AsmToken::Colon, "expected a colon")) {
3794       if (trySkipId("swizzle")) {
3795         Ok = parseSwizzleMacro(Imm);
3796       } else {
3797         Ok = parseSwizzleOffset(Imm);
3798       }
3799     }
3800 
3801     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
3802 
3803     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
3804   } else {
3805     return MatchOperand_NoMatch;
3806   }
3807 }
3808 
3809 bool
3810 AMDGPUOperand::isSwizzle() const {
3811   return isImmTy(ImmTySwizzle);
3812 }
3813 
3814 //===----------------------------------------------------------------------===//
3815 // sopp branch targets
3816 //===----------------------------------------------------------------------===//
3817 
3818 OperandMatchResultTy
3819 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
3820   SMLoc S = Parser.getTok().getLoc();
3821 
3822   switch (getLexer().getKind()) {
3823     default: return MatchOperand_ParseFail;
3824     case AsmToken::Integer: {
3825       int64_t Imm;
3826       if (getParser().parseAbsoluteExpression(Imm))
3827         return MatchOperand_ParseFail;
3828       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
3829       return MatchOperand_Success;
3830     }
3831 
3832     case AsmToken::Identifier:
3833       Operands.push_back(AMDGPUOperand::CreateExpr(this,
3834           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
3835                                   Parser.getTok().getString()), getContext()), S));
3836       Parser.Lex();
3837       return MatchOperand_Success;
3838   }
3839 }
3840 
3841 //===----------------------------------------------------------------------===//
3842 // mubuf
3843 //===----------------------------------------------------------------------===//
3844 
3845 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
3846   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
3847 }
3848 
3849 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
3850   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
3851 }
3852 
3853 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultTFE() const {
3854   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyTFE);
3855 }
3856 
3857 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
3858                                const OperandVector &Operands,
3859                                bool IsAtomic, bool IsAtomicReturn) {
3860   OptionalImmIndexMap OptionalIdx;
3861   assert(IsAtomicReturn ? IsAtomic : true);
3862 
3863   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3864     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3865 
3866     // Add the register arguments
3867     if (Op.isReg()) {
3868       Op.addRegOperands(Inst, 1);
3869       continue;
3870     }
3871 
3872     // Handle the case where soffset is an immediate
3873     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
3874       Op.addImmOperands(Inst, 1);
3875       continue;
3876     }
3877 
3878     // Handle tokens like 'offen' which are sometimes hard-coded into the
3879     // asm string.  There are no MCInst operands for these.
3880     if (Op.isToken()) {
3881       continue;
3882     }
3883     assert(Op.isImm());
3884 
3885     // Handle optional arguments
3886     OptionalIdx[Op.getImmTy()] = i;
3887   }
3888 
3889   // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns.
3890   if (IsAtomicReturn) {
3891     MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning.
3892     Inst.insert(I, *I);
3893   }
3894 
3895   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
3896   if (!IsAtomic) { // glc is hard-coded.
3897     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
3898   }
3899   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
3900   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
3901 }
3902 
3903 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
3904   OptionalImmIndexMap OptionalIdx;
3905 
3906   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3907     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3908 
3909     // Add the register arguments
3910     if (Op.isReg()) {
3911       Op.addRegOperands(Inst, 1);
3912       continue;
3913     }
3914 
3915     // Handle the case where soffset is an immediate
3916     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
3917       Op.addImmOperands(Inst, 1);
3918       continue;
3919     }
3920 
3921     // Handle tokens like 'offen' which are sometimes hard-coded into the
3922     // asm string.  There are no MCInst operands for these.
3923     if (Op.isToken()) {
3924       continue;
3925     }
3926     assert(Op.isImm());
3927 
3928     // Handle optional arguments
3929     OptionalIdx[Op.getImmTy()] = i;
3930   }
3931 
3932   addOptionalImmOperand(Inst, Operands, OptionalIdx,
3933                         AMDGPUOperand::ImmTyOffset);
3934   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDFMT);
3935   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyNFMT);
3936   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
3937   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
3938   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
3939 }
3940 
3941 //===----------------------------------------------------------------------===//
3942 // mimg
3943 //===----------------------------------------------------------------------===//
3944 
3945 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
3946                               bool IsAtomic) {
3947   unsigned I = 1;
3948   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3949   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
3950     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
3951   }
3952 
3953   if (IsAtomic) {
3954     // Add src, same as dst
3955     ((AMDGPUOperand &)*Operands[I]).addRegOperands(Inst, 1);
3956   }
3957 
3958   OptionalImmIndexMap OptionalIdx;
3959 
3960   for (unsigned E = Operands.size(); I != E; ++I) {
3961     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
3962 
3963     // Add the register arguments
3964     if (Op.isRegOrImm()) {
3965       Op.addRegOrImmOperands(Inst, 1);
3966       continue;
3967     } else if (Op.isImmModifier()) {
3968       OptionalIdx[Op.getImmTy()] = I;
3969     } else {
3970       llvm_unreachable("unexpected operand type");
3971     }
3972   }
3973 
3974   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
3975   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
3976   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
3977   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
3978   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128);
3979   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
3980   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
3981   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
3982 }
3983 
3984 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
3985   cvtMIMG(Inst, Operands, true);
3986 }
3987 
3988 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDMask() const {
3989   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDMask);
3990 }
3991 
3992 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultUNorm() const {
3993   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyUNorm);
3994 }
3995 
3996 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDA() const {
3997   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDA);
3998 }
3999 
4000 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultR128() const {
4001   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyR128);
4002 }
4003 
4004 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultLWE() const {
4005   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyLWE);
4006 }
4007 
4008 //===----------------------------------------------------------------------===//
4009 // smrd
4010 //===----------------------------------------------------------------------===//
4011 
4012 bool AMDGPUOperand::isSMRDOffset8() const {
4013   return isImm() && isUInt<8>(getImm());
4014 }
4015 
4016 bool AMDGPUOperand::isSMRDOffset20() const {
4017   return isImm() && isUInt<20>(getImm());
4018 }
4019 
4020 bool AMDGPUOperand::isSMRDLiteralOffset() const {
4021   // 32-bit literals are only supported on CI and we only want to use them
4022   // when the offset is > 8-bits.
4023   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
4024 }
4025 
4026 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
4027   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4028 }
4029 
4030 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
4031   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4032 }
4033 
4034 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
4035   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4036 }
4037 
4038 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
4039   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4040 }
4041 
4042 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
4043   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4044 }
4045 
4046 //===----------------------------------------------------------------------===//
4047 // vop3
4048 //===----------------------------------------------------------------------===//
4049 
4050 static bool ConvertOmodMul(int64_t &Mul) {
4051   if (Mul != 1 && Mul != 2 && Mul != 4)
4052     return false;
4053 
4054   Mul >>= 1;
4055   return true;
4056 }
4057 
4058 static bool ConvertOmodDiv(int64_t &Div) {
4059   if (Div == 1) {
4060     Div = 0;
4061     return true;
4062   }
4063 
4064   if (Div == 2) {
4065     Div = 3;
4066     return true;
4067   }
4068 
4069   return false;
4070 }
4071 
4072 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
4073   if (BoundCtrl == 0) {
4074     BoundCtrl = 1;
4075     return true;
4076   }
4077 
4078   if (BoundCtrl == -1) {
4079     BoundCtrl = 0;
4080     return true;
4081   }
4082 
4083   return false;
4084 }
4085 
4086 // Note: the order in this table matches the order of operands in AsmString.
4087 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
4088   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
4089   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
4090   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
4091   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
4092   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
4093   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
4094   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
4095   {"dfmt",    AMDGPUOperand::ImmTyDFMT, false, nullptr},
4096   {"nfmt",    AMDGPUOperand::ImmTyNFMT, false, nullptr},
4097   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
4098   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
4099   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
4100   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
4101   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
4102   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
4103   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
4104   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
4105   {"r128",    AMDGPUOperand::ImmTyR128,  true, nullptr},
4106   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
4107   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
4108   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
4109   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
4110   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
4111   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
4112   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
4113   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
4114   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
4115   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
4116   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
4117   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
4118   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
4119   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
4120   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
4121 };
4122 
4123 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
4124   OperandMatchResultTy res;
4125   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
4126     // try to parse any optional operand here
4127     if (Op.IsBit) {
4128       res = parseNamedBit(Op.Name, Operands, Op.Type);
4129     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
4130       res = parseOModOperand(Operands);
4131     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
4132                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
4133                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
4134       res = parseSDWASel(Operands, Op.Name, Op.Type);
4135     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
4136       res = parseSDWADstUnused(Operands);
4137     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
4138                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
4139                Op.Type == AMDGPUOperand::ImmTyNegLo ||
4140                Op.Type == AMDGPUOperand::ImmTyNegHi) {
4141       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
4142                                         Op.ConvertResult);
4143     } else {
4144       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
4145     }
4146     if (res != MatchOperand_NoMatch) {
4147       return res;
4148     }
4149   }
4150   return MatchOperand_NoMatch;
4151 }
4152 
4153 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
4154   StringRef Name = Parser.getTok().getString();
4155   if (Name == "mul") {
4156     return parseIntWithPrefix("mul", Operands,
4157                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
4158   }
4159 
4160   if (Name == "div") {
4161     return parseIntWithPrefix("div", Operands,
4162                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
4163   }
4164 
4165   return MatchOperand_NoMatch;
4166 }
4167 
4168 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
4169   cvtVOP3P(Inst, Operands);
4170 
4171   int Opc = Inst.getOpcode();
4172 
4173   int SrcNum;
4174   const int Ops[] = { AMDGPU::OpName::src0,
4175                       AMDGPU::OpName::src1,
4176                       AMDGPU::OpName::src2 };
4177   for (SrcNum = 0;
4178        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
4179        ++SrcNum);
4180   assert(SrcNum > 0);
4181 
4182   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4183   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4184 
4185   if ((OpSel & (1 << SrcNum)) != 0) {
4186     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
4187     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
4188     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
4189   }
4190 }
4191 
4192 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
4193       // 1. This operand is input modifiers
4194   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
4195       // 2. This is not last operand
4196       && Desc.NumOperands > (OpNum + 1)
4197       // 3. Next operand is register class
4198       && Desc.OpInfo[OpNum + 1].RegClass != -1
4199       // 4. Next register is not tied to any other operand
4200       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
4201 }
4202 
4203 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
4204 {
4205   OptionalImmIndexMap OptionalIdx;
4206   unsigned Opc = Inst.getOpcode();
4207 
4208   unsigned I = 1;
4209   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4210   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4211     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4212   }
4213 
4214   for (unsigned E = Operands.size(); I != E; ++I) {
4215     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4216     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4217       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
4218     } else if (Op.isInterpSlot() ||
4219                Op.isInterpAttr() ||
4220                Op.isAttrChan()) {
4221       Inst.addOperand(MCOperand::createImm(Op.Imm.Val));
4222     } else if (Op.isImmModifier()) {
4223       OptionalIdx[Op.getImmTy()] = I;
4224     } else {
4225       llvm_unreachable("unhandled operand type");
4226     }
4227   }
4228 
4229   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
4230     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
4231   }
4232 
4233   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
4234     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
4235   }
4236 
4237   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
4238     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
4239   }
4240 }
4241 
4242 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
4243                               OptionalImmIndexMap &OptionalIdx) {
4244   unsigned Opc = Inst.getOpcode();
4245 
4246   unsigned I = 1;
4247   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4248   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4249     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4250   }
4251 
4252   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
4253     // This instruction has src modifiers
4254     for (unsigned E = Operands.size(); I != E; ++I) {
4255       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4256       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4257         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
4258       } else if (Op.isImmModifier()) {
4259         OptionalIdx[Op.getImmTy()] = I;
4260       } else if (Op.isRegOrImm()) {
4261         Op.addRegOrImmOperands(Inst, 1);
4262       } else {
4263         llvm_unreachable("unhandled operand type");
4264       }
4265     }
4266   } else {
4267     // No src modifiers
4268     for (unsigned E = Operands.size(); I != E; ++I) {
4269       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4270       if (Op.isMod()) {
4271         OptionalIdx[Op.getImmTy()] = I;
4272       } else {
4273         Op.addRegOrImmOperands(Inst, 1);
4274       }
4275     }
4276   }
4277 
4278   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
4279     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
4280   }
4281 
4282   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
4283     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
4284   }
4285 
4286   // special case v_mac_{f16, f32}:
4287   // it has src2 register operand that is tied to dst operand
4288   // we don't allow modifiers for this operand in assembler so src2_modifiers
4289   // should be 0
4290   if (Opc == AMDGPU::V_MAC_F32_e64_si || Opc == AMDGPU::V_MAC_F32_e64_vi ||
4291       Opc == AMDGPU::V_MAC_F16_e64_vi) {
4292     auto it = Inst.begin();
4293     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
4294     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
4295     ++it;
4296     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
4297   }
4298 }
4299 
4300 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
4301   OptionalImmIndexMap OptionalIdx;
4302   cvtVOP3(Inst, Operands, OptionalIdx);
4303 }
4304 
4305 void AMDGPUAsmParser::cvtVOP3PImpl(MCInst &Inst,
4306                                    const OperandVector &Operands,
4307                                    bool IsPacked) {
4308   OptionalImmIndexMap OptIdx;
4309   int Opc = Inst.getOpcode();
4310 
4311   cvtVOP3(Inst, Operands, OptIdx);
4312 
4313   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
4314     assert(!IsPacked);
4315     Inst.addOperand(Inst.getOperand(0));
4316   }
4317 
4318   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
4319   // instruction, and then figure out where to actually put the modifiers
4320 
4321   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
4322 
4323   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4324   if (OpSelHiIdx != -1) {
4325     // TODO: Should we change the printing to match?
4326     int DefaultVal = IsPacked ? -1 : 0;
4327     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
4328                           DefaultVal);
4329   }
4330 
4331   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
4332   if (NegLoIdx != -1) {
4333     assert(IsPacked);
4334     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
4335     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
4336   }
4337 
4338   const int Ops[] = { AMDGPU::OpName::src0,
4339                       AMDGPU::OpName::src1,
4340                       AMDGPU::OpName::src2 };
4341   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
4342                          AMDGPU::OpName::src1_modifiers,
4343                          AMDGPU::OpName::src2_modifiers };
4344 
4345   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4346 
4347   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4348   unsigned OpSelHi = 0;
4349   unsigned NegLo = 0;
4350   unsigned NegHi = 0;
4351 
4352   if (OpSelHiIdx != -1) {
4353     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4354   }
4355 
4356   if (NegLoIdx != -1) {
4357     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
4358     NegLo = Inst.getOperand(NegLoIdx).getImm();
4359     NegHi = Inst.getOperand(NegHiIdx).getImm();
4360   }
4361 
4362   for (int J = 0; J < 3; ++J) {
4363     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
4364     if (OpIdx == -1)
4365       break;
4366 
4367     uint32_t ModVal = 0;
4368 
4369     if ((OpSel & (1 << J)) != 0)
4370       ModVal |= SISrcMods::OP_SEL_0;
4371 
4372     if ((OpSelHi & (1 << J)) != 0)
4373       ModVal |= SISrcMods::OP_SEL_1;
4374 
4375     if ((NegLo & (1 << J)) != 0)
4376       ModVal |= SISrcMods::NEG;
4377 
4378     if ((NegHi & (1 << J)) != 0)
4379       ModVal |= SISrcMods::NEG_HI;
4380 
4381     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
4382 
4383     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
4384   }
4385 }
4386 
4387 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
4388   cvtVOP3PImpl(Inst, Operands, true);
4389 }
4390 
4391 void AMDGPUAsmParser::cvtVOP3P_NotPacked(MCInst &Inst,
4392                                          const OperandVector &Operands) {
4393   cvtVOP3PImpl(Inst, Operands, false);
4394 }
4395 
4396 //===----------------------------------------------------------------------===//
4397 // dpp
4398 //===----------------------------------------------------------------------===//
4399 
4400 bool AMDGPUOperand::isDPPCtrl() const {
4401   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
4402   if (result) {
4403     int64_t Imm = getImm();
4404     return ((Imm >= 0x000) && (Imm <= 0x0ff)) ||
4405            ((Imm >= 0x101) && (Imm <= 0x10f)) ||
4406            ((Imm >= 0x111) && (Imm <= 0x11f)) ||
4407            ((Imm >= 0x121) && (Imm <= 0x12f)) ||
4408            (Imm == 0x130) ||
4409            (Imm == 0x134) ||
4410            (Imm == 0x138) ||
4411            (Imm == 0x13c) ||
4412            (Imm == 0x140) ||
4413            (Imm == 0x141) ||
4414            (Imm == 0x142) ||
4415            (Imm == 0x143);
4416   }
4417   return false;
4418 }
4419 
4420 bool AMDGPUOperand::isGPRIdxMode() const {
4421   return isImm() && isUInt<4>(getImm());
4422 }
4423 
4424 bool AMDGPUOperand::isS16Imm() const {
4425   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
4426 }
4427 
4428 bool AMDGPUOperand::isU16Imm() const {
4429   return isImm() && isUInt<16>(getImm());
4430 }
4431 
4432 OperandMatchResultTy
4433 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
4434   SMLoc S = Parser.getTok().getLoc();
4435   StringRef Prefix;
4436   int64_t Int;
4437 
4438   if (getLexer().getKind() == AsmToken::Identifier) {
4439     Prefix = Parser.getTok().getString();
4440   } else {
4441     return MatchOperand_NoMatch;
4442   }
4443 
4444   if (Prefix == "row_mirror") {
4445     Int = 0x140;
4446     Parser.Lex();
4447   } else if (Prefix == "row_half_mirror") {
4448     Int = 0x141;
4449     Parser.Lex();
4450   } else {
4451     // Check to prevent parseDPPCtrlOps from eating invalid tokens
4452     if (Prefix != "quad_perm"
4453         && Prefix != "row_shl"
4454         && Prefix != "row_shr"
4455         && Prefix != "row_ror"
4456         && Prefix != "wave_shl"
4457         && Prefix != "wave_rol"
4458         && Prefix != "wave_shr"
4459         && Prefix != "wave_ror"
4460         && Prefix != "row_bcast") {
4461       return MatchOperand_NoMatch;
4462     }
4463 
4464     Parser.Lex();
4465     if (getLexer().isNot(AsmToken::Colon))
4466       return MatchOperand_ParseFail;
4467 
4468     if (Prefix == "quad_perm") {
4469       // quad_perm:[%d,%d,%d,%d]
4470       Parser.Lex();
4471       if (getLexer().isNot(AsmToken::LBrac))
4472         return MatchOperand_ParseFail;
4473       Parser.Lex();
4474 
4475       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
4476         return MatchOperand_ParseFail;
4477 
4478       for (int i = 0; i < 3; ++i) {
4479         if (getLexer().isNot(AsmToken::Comma))
4480           return MatchOperand_ParseFail;
4481         Parser.Lex();
4482 
4483         int64_t Temp;
4484         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
4485           return MatchOperand_ParseFail;
4486         const int shift = i*2 + 2;
4487         Int += (Temp << shift);
4488       }
4489 
4490       if (getLexer().isNot(AsmToken::RBrac))
4491         return MatchOperand_ParseFail;
4492       Parser.Lex();
4493     } else {
4494       // sel:%d
4495       Parser.Lex();
4496       if (getParser().parseAbsoluteExpression(Int))
4497         return MatchOperand_ParseFail;
4498 
4499       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
4500         Int |= 0x100;
4501       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
4502         Int |= 0x110;
4503       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
4504         Int |= 0x120;
4505       } else if (Prefix == "wave_shl" && 1 == Int) {
4506         Int = 0x130;
4507       } else if (Prefix == "wave_rol" && 1 == Int) {
4508         Int = 0x134;
4509       } else if (Prefix == "wave_shr" && 1 == Int) {
4510         Int = 0x138;
4511       } else if (Prefix == "wave_ror" && 1 == Int) {
4512         Int = 0x13C;
4513       } else if (Prefix == "row_bcast") {
4514         if (Int == 15) {
4515           Int = 0x142;
4516         } else if (Int == 31) {
4517           Int = 0x143;
4518         } else {
4519           return MatchOperand_ParseFail;
4520         }
4521       } else {
4522         return MatchOperand_ParseFail;
4523       }
4524     }
4525   }
4526 
4527   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
4528   return MatchOperand_Success;
4529 }
4530 
4531 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
4532   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
4533 }
4534 
4535 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
4536   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
4537 }
4538 
4539 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
4540   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
4541 }
4542 
4543 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
4544   OptionalImmIndexMap OptionalIdx;
4545 
4546   unsigned I = 1;
4547   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4548   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4549     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4550   }
4551 
4552   // All DPP instructions with at least one source operand have a fake "old"
4553   // source at the beginning that's tied to the dst operand. Handle it here.
4554   if (Desc.getNumOperands() >= 2)
4555     Inst.addOperand(Inst.getOperand(0));
4556 
4557   for (unsigned E = Operands.size(); I != E; ++I) {
4558     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4559     // Add the register arguments
4560     if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
4561       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
4562       // Skip it.
4563       continue;
4564     } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4565       Op.addRegWithFPInputModsOperands(Inst, 2);
4566     } else if (Op.isDPPCtrl()) {
4567       Op.addImmOperands(Inst, 1);
4568     } else if (Op.isImm()) {
4569       // Handle optional arguments
4570       OptionalIdx[Op.getImmTy()] = I;
4571     } else {
4572       llvm_unreachable("Invalid operand type");
4573     }
4574   }
4575 
4576   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
4577   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
4578   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
4579 }
4580 
4581 //===----------------------------------------------------------------------===//
4582 // sdwa
4583 //===----------------------------------------------------------------------===//
4584 
4585 OperandMatchResultTy
4586 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
4587                               AMDGPUOperand::ImmTy Type) {
4588   using namespace llvm::AMDGPU::SDWA;
4589 
4590   SMLoc S = Parser.getTok().getLoc();
4591   StringRef Value;
4592   OperandMatchResultTy res;
4593 
4594   res = parseStringWithPrefix(Prefix, Value);
4595   if (res != MatchOperand_Success) {
4596     return res;
4597   }
4598 
4599   int64_t Int;
4600   Int = StringSwitch<int64_t>(Value)
4601         .Case("BYTE_0", SdwaSel::BYTE_0)
4602         .Case("BYTE_1", SdwaSel::BYTE_1)
4603         .Case("BYTE_2", SdwaSel::BYTE_2)
4604         .Case("BYTE_3", SdwaSel::BYTE_3)
4605         .Case("WORD_0", SdwaSel::WORD_0)
4606         .Case("WORD_1", SdwaSel::WORD_1)
4607         .Case("DWORD", SdwaSel::DWORD)
4608         .Default(0xffffffff);
4609   Parser.Lex(); // eat last token
4610 
4611   if (Int == 0xffffffff) {
4612     return MatchOperand_ParseFail;
4613   }
4614 
4615   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
4616   return MatchOperand_Success;
4617 }
4618 
4619 OperandMatchResultTy
4620 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
4621   using namespace llvm::AMDGPU::SDWA;
4622 
4623   SMLoc S = Parser.getTok().getLoc();
4624   StringRef Value;
4625   OperandMatchResultTy res;
4626 
4627   res = parseStringWithPrefix("dst_unused", Value);
4628   if (res != MatchOperand_Success) {
4629     return res;
4630   }
4631 
4632   int64_t Int;
4633   Int = StringSwitch<int64_t>(Value)
4634         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
4635         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
4636         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
4637         .Default(0xffffffff);
4638   Parser.Lex(); // eat last token
4639 
4640   if (Int == 0xffffffff) {
4641     return MatchOperand_ParseFail;
4642   }
4643 
4644   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
4645   return MatchOperand_Success;
4646 }
4647 
4648 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
4649   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
4650 }
4651 
4652 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
4653   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
4654 }
4655 
4656 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
4657   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
4658 }
4659 
4660 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
4661   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
4662 }
4663 
4664 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
4665                               uint64_t BasicInstType, bool skipVcc) {
4666   using namespace llvm::AMDGPU::SDWA;
4667 
4668   OptionalImmIndexMap OptionalIdx;
4669   bool skippedVcc = false;
4670 
4671   unsigned I = 1;
4672   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4673   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4674     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4675   }
4676 
4677   for (unsigned E = Operands.size(); I != E; ++I) {
4678     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4679     if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
4680       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
4681       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
4682       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
4683       // Skip VCC only if we didn't skip it on previous iteration.
4684       if (BasicInstType == SIInstrFlags::VOP2 &&
4685           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
4686         skippedVcc = true;
4687         continue;
4688       } else if (BasicInstType == SIInstrFlags::VOPC &&
4689                  Inst.getNumOperands() == 0) {
4690         skippedVcc = true;
4691         continue;
4692       }
4693     }
4694     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4695       Op.addRegWithInputModsOperands(Inst, 2);
4696     } else if (Op.isImm()) {
4697       // Handle optional arguments
4698       OptionalIdx[Op.getImmTy()] = I;
4699     } else {
4700       llvm_unreachable("Invalid operand type");
4701     }
4702     skippedVcc = false;
4703   }
4704 
4705   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
4706       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
4707     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
4708     switch (BasicInstType) {
4709     case SIInstrFlags::VOP1:
4710       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4711       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
4712         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
4713       }
4714       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
4715       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
4716       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
4717       break;
4718 
4719     case SIInstrFlags::VOP2:
4720       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4721       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
4722         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
4723       }
4724       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
4725       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
4726       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
4727       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
4728       break;
4729 
4730     case SIInstrFlags::VOPC:
4731       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4732       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
4733       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
4734       break;
4735 
4736     default:
4737       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
4738     }
4739   }
4740 
4741   // special case v_mac_{f16, f32}:
4742   // it has src2 register operand that is tied to dst operand
4743   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
4744       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
4745     auto it = Inst.begin();
4746     std::advance(
4747       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
4748     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
4749   }
4750 }
4751 
4752 /// Force static initialization.
4753 extern "C" void LLVMInitializeAMDGPUAsmParser() {
4754   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
4755   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
4756 }
4757 
4758 #define GET_REGISTER_MATCHER
4759 #define GET_MATCHER_IMPLEMENTATION
4760 #include "AMDGPUGenAsmMatcher.inc"
4761 
4762 // This fuction should be defined after auto-generated include so that we have
4763 // MatchClassKind enum defined
4764 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
4765                                                      unsigned Kind) {
4766   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
4767   // But MatchInstructionImpl() expects to meet token and fails to validate
4768   // operand. This method checks if we are given immediate operand but expect to
4769   // get corresponding token.
4770   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
4771   switch (Kind) {
4772   case MCK_addr64:
4773     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
4774   case MCK_gds:
4775     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
4776   case MCK_glc:
4777     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
4778   case MCK_idxen:
4779     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
4780   case MCK_offen:
4781     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
4782   case MCK_SSrcB32:
4783     // When operands have expression values, they will return true for isToken,
4784     // because it is not possible to distinguish between a token and an
4785     // expression at parse time. MatchInstructionImpl() will always try to
4786     // match an operand as a token, when isToken returns true, and when the
4787     // name of the expression is not a valid token, the match will fail,
4788     // so we need to handle it here.
4789     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
4790   case MCK_SSrcF32:
4791     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
4792   case MCK_SoppBrTarget:
4793     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
4794   case MCK_VReg32OrOff:
4795     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
4796   case MCK_InterpSlot:
4797     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
4798   case MCK_Attr:
4799     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
4800   case MCK_AttrChan:
4801     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
4802   default:
4803     return Match_InvalidOperand;
4804   }
4805 }
4806