1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "AMDGPU.h"
11 #include "AMDKernelCodeT.h"
12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
14 #include "SIDefines.h"
15 #include "Utils/AMDGPUAsmUtils.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/ADT/Twine.h"
27 #include "llvm/BinaryFormat/ELF.h"
28 #include "llvm/CodeGen/MachineValueType.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include "llvm/Support/MathExtras.h"
49 #include "llvm/Support/SMLoc.h"
50 #include "llvm/Support/TargetRegistry.h"
51 #include "llvm/Support/raw_ostream.h"
52 #include <algorithm>
53 #include <cassert>
54 #include <cstdint>
55 #include <cstring>
56 #include <iterator>
57 #include <map>
58 #include <memory>
59 #include <string>
60 
61 using namespace llvm;
62 using namespace llvm::AMDGPU;
63 
64 namespace {
65 
66 class AMDGPUAsmParser;
67 
68 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
69 
70 //===----------------------------------------------------------------------===//
71 // Operand
72 //===----------------------------------------------------------------------===//
73 
74 class AMDGPUOperand : public MCParsedAsmOperand {
75   enum KindTy {
76     Token,
77     Immediate,
78     Register,
79     Expression
80   } Kind;
81 
82   SMLoc StartLoc, EndLoc;
83   const AMDGPUAsmParser *AsmParser;
84 
85 public:
86   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
87     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
88 
89   using Ptr = std::unique_ptr<AMDGPUOperand>;
90 
91   struct Modifiers {
92     bool Abs = false;
93     bool Neg = false;
94     bool Sext = false;
95 
96     bool hasFPModifiers() const { return Abs || Neg; }
97     bool hasIntModifiers() const { return Sext; }
98     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
99 
100     int64_t getFPModifiersOperand() const {
101       int64_t Operand = 0;
102       Operand |= Abs ? SISrcMods::ABS : 0;
103       Operand |= Neg ? SISrcMods::NEG : 0;
104       return Operand;
105     }
106 
107     int64_t getIntModifiersOperand() const {
108       int64_t Operand = 0;
109       Operand |= Sext ? SISrcMods::SEXT : 0;
110       return Operand;
111     }
112 
113     int64_t getModifiersOperand() const {
114       assert(!(hasFPModifiers() && hasIntModifiers())
115            && "fp and int modifiers should not be used simultaneously");
116       if (hasFPModifiers()) {
117         return getFPModifiersOperand();
118       } else if (hasIntModifiers()) {
119         return getIntModifiersOperand();
120       } else {
121         return 0;
122       }
123     }
124 
125     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
126   };
127 
128   enum ImmTy {
129     ImmTyNone,
130     ImmTyGDS,
131     ImmTyOffen,
132     ImmTyIdxen,
133     ImmTyAddr64,
134     ImmTyOffset,
135     ImmTyOffset0,
136     ImmTyOffset1,
137     ImmTyGLC,
138     ImmTySLC,
139     ImmTyTFE,
140     ImmTyClampSI,
141     ImmTyOModSI,
142     ImmTyDppCtrl,
143     ImmTyDppRowMask,
144     ImmTyDppBankMask,
145     ImmTyDppBoundCtrl,
146     ImmTySdwaDstSel,
147     ImmTySdwaSrc0Sel,
148     ImmTySdwaSrc1Sel,
149     ImmTySdwaDstUnused,
150     ImmTyDMask,
151     ImmTyUNorm,
152     ImmTyDA,
153     ImmTyR128,
154     ImmTyLWE,
155     ImmTyExpTgt,
156     ImmTyExpCompr,
157     ImmTyExpVM,
158     ImmTyDFMT,
159     ImmTyNFMT,
160     ImmTyHwreg,
161     ImmTyOff,
162     ImmTySendMsg,
163     ImmTyInterpSlot,
164     ImmTyInterpAttr,
165     ImmTyAttrChan,
166     ImmTyOpSel,
167     ImmTyOpSelHi,
168     ImmTyNegLo,
169     ImmTyNegHi,
170     ImmTySwizzle,
171     ImmTyHigh
172   };
173 
174   struct TokOp {
175     const char *Data;
176     unsigned Length;
177   };
178 
179   struct ImmOp {
180     int64_t Val;
181     ImmTy Type;
182     bool IsFPImm;
183     Modifiers Mods;
184   };
185 
186   struct RegOp {
187     unsigned RegNo;
188     bool IsForcedVOP3;
189     Modifiers Mods;
190   };
191 
192   union {
193     TokOp Tok;
194     ImmOp Imm;
195     RegOp Reg;
196     const MCExpr *Expr;
197   };
198 
199   bool isToken() const override {
200     if (Kind == Token)
201       return true;
202 
203     if (Kind != Expression || !Expr)
204       return false;
205 
206     // When parsing operands, we can't always tell if something was meant to be
207     // a token, like 'gds', or an expression that references a global variable.
208     // In this case, we assume the string is an expression, and if we need to
209     // interpret is a token, then we treat the symbol name as the token.
210     return isa<MCSymbolRefExpr>(Expr);
211   }
212 
213   bool isImm() const override {
214     return Kind == Immediate;
215   }
216 
217   bool isInlinableImm(MVT type) const;
218   bool isLiteralImm(MVT type) const;
219 
220   bool isRegKind() const {
221     return Kind == Register;
222   }
223 
224   bool isReg() const override {
225     return isRegKind() && !hasModifiers();
226   }
227 
228   bool isRegOrImmWithInputMods(MVT type) const {
229     return isRegKind() || isInlinableImm(type);
230   }
231 
232   bool isRegOrImmWithInt16InputMods() const {
233     return isRegOrImmWithInputMods(MVT::i16);
234   }
235 
236   bool isRegOrImmWithInt32InputMods() const {
237     return isRegOrImmWithInputMods(MVT::i32);
238   }
239 
240   bool isRegOrImmWithInt64InputMods() const {
241     return isRegOrImmWithInputMods(MVT::i64);
242   }
243 
244   bool isRegOrImmWithFP16InputMods() const {
245     return isRegOrImmWithInputMods(MVT::f16);
246   }
247 
248   bool isRegOrImmWithFP32InputMods() const {
249     return isRegOrImmWithInputMods(MVT::f32);
250   }
251 
252   bool isRegOrImmWithFP64InputMods() const {
253     return isRegOrImmWithInputMods(MVT::f64);
254   }
255 
256   bool isVReg() const {
257     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
258            isRegClass(AMDGPU::VReg_64RegClassID) ||
259            isRegClass(AMDGPU::VReg_96RegClassID) ||
260            isRegClass(AMDGPU::VReg_128RegClassID) ||
261            isRegClass(AMDGPU::VReg_256RegClassID) ||
262            isRegClass(AMDGPU::VReg_512RegClassID);
263   }
264 
265   bool isVReg32OrOff() const {
266     return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID);
267   }
268 
269   bool isSDWARegKind() const;
270 
271   bool isImmTy(ImmTy ImmT) const {
272     return isImm() && Imm.Type == ImmT;
273   }
274 
275   bool isImmModifier() const {
276     return isImm() && Imm.Type != ImmTyNone;
277   }
278 
279   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
280   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
281   bool isDMask() const { return isImmTy(ImmTyDMask); }
282   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
283   bool isDA() const { return isImmTy(ImmTyDA); }
284   bool isR128() const { return isImmTy(ImmTyUNorm); }
285   bool isLWE() const { return isImmTy(ImmTyLWE); }
286   bool isOff() const { return isImmTy(ImmTyOff); }
287   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
288   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
289   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
290   bool isOffen() const { return isImmTy(ImmTyOffen); }
291   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
292   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
293   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
294   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
295   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
296 
297   bool isOffsetU12() const { return isImmTy(ImmTyOffset) && isUInt<12>(getImm()); }
298   bool isOffsetS13() const { return isImmTy(ImmTyOffset) && isInt<13>(getImm()); }
299   bool isGDS() const { return isImmTy(ImmTyGDS); }
300   bool isGLC() const { return isImmTy(ImmTyGLC); }
301   bool isSLC() const { return isImmTy(ImmTySLC); }
302   bool isTFE() const { return isImmTy(ImmTyTFE); }
303   bool isDFMT() const { return isImmTy(ImmTyDFMT) && isUInt<8>(getImm()); }
304   bool isNFMT() const { return isImmTy(ImmTyNFMT) && isUInt<8>(getImm()); }
305   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
306   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
307   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
308   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
309   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
310   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
311   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
312   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
313   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
314   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
315   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
316   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
317   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
318   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
319   bool isHigh() const { return isImmTy(ImmTyHigh); }
320 
321   bool isMod() const {
322     return isClampSI() || isOModSI();
323   }
324 
325   bool isRegOrImm() const {
326     return isReg() || isImm();
327   }
328 
329   bool isRegClass(unsigned RCID) const;
330 
331   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
332     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
333   }
334 
335   bool isSCSrcB16() const {
336     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
337   }
338 
339   bool isSCSrcV2B16() const {
340     return isSCSrcB16();
341   }
342 
343   bool isSCSrcB32() const {
344     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
345   }
346 
347   bool isSCSrcB64() const {
348     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
349   }
350 
351   bool isSCSrcF16() const {
352     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
353   }
354 
355   bool isSCSrcV2F16() const {
356     return isSCSrcF16();
357   }
358 
359   bool isSCSrcF32() const {
360     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
361   }
362 
363   bool isSCSrcF64() const {
364     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
365   }
366 
367   bool isSSrcB32() const {
368     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
369   }
370 
371   bool isSSrcB16() const {
372     return isSCSrcB16() || isLiteralImm(MVT::i16);
373   }
374 
375   bool isSSrcV2B16() const {
376     llvm_unreachable("cannot happen");
377     return isSSrcB16();
378   }
379 
380   bool isSSrcB64() const {
381     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
382     // See isVSrc64().
383     return isSCSrcB64() || isLiteralImm(MVT::i64);
384   }
385 
386   bool isSSrcF32() const {
387     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
388   }
389 
390   bool isSSrcF64() const {
391     return isSCSrcB64() || isLiteralImm(MVT::f64);
392   }
393 
394   bool isSSrcF16() const {
395     return isSCSrcB16() || isLiteralImm(MVT::f16);
396   }
397 
398   bool isSSrcV2F16() const {
399     llvm_unreachable("cannot happen");
400     return isSSrcF16();
401   }
402 
403   bool isVCSrcB32() const {
404     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
405   }
406 
407   bool isVCSrcB64() const {
408     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
409   }
410 
411   bool isVCSrcB16() const {
412     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
413   }
414 
415   bool isVCSrcV2B16() const {
416     return isVCSrcB16();
417   }
418 
419   bool isVCSrcF32() const {
420     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
421   }
422 
423   bool isVCSrcF64() const {
424     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
425   }
426 
427   bool isVCSrcF16() const {
428     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
429   }
430 
431   bool isVCSrcV2F16() const {
432     return isVCSrcF16();
433   }
434 
435   bool isVSrcB32() const {
436     return isVCSrcF32() || isLiteralImm(MVT::i32);
437   }
438 
439   bool isVSrcB64() const {
440     return isVCSrcF64() || isLiteralImm(MVT::i64);
441   }
442 
443   bool isVSrcB16() const {
444     return isVCSrcF16() || isLiteralImm(MVT::i16);
445   }
446 
447   bool isVSrcV2B16() const {
448     llvm_unreachable("cannot happen");
449     return isVSrcB16();
450   }
451 
452   bool isVSrcF32() const {
453     return isVCSrcF32() || isLiteralImm(MVT::f32);
454   }
455 
456   bool isVSrcF64() const {
457     return isVCSrcF64() || isLiteralImm(MVT::f64);
458   }
459 
460   bool isVSrcF16() const {
461     return isVCSrcF16() || isLiteralImm(MVT::f16);
462   }
463 
464   bool isVSrcV2F16() const {
465     llvm_unreachable("cannot happen");
466     return isVSrcF16();
467   }
468 
469   bool isKImmFP32() const {
470     return isLiteralImm(MVT::f32);
471   }
472 
473   bool isKImmFP16() const {
474     return isLiteralImm(MVT::f16);
475   }
476 
477   bool isMem() const override {
478     return false;
479   }
480 
481   bool isExpr() const {
482     return Kind == Expression;
483   }
484 
485   bool isSoppBrTarget() const {
486     return isExpr() || isImm();
487   }
488 
489   bool isSWaitCnt() const;
490   bool isHwreg() const;
491   bool isSendMsg() const;
492   bool isSwizzle() const;
493   bool isSMRDOffset8() const;
494   bool isSMRDOffset20() const;
495   bool isSMRDLiteralOffset() const;
496   bool isDPPCtrl() const;
497   bool isGPRIdxMode() const;
498   bool isS16Imm() const;
499   bool isU16Imm() const;
500 
501   StringRef getExpressionAsToken() const {
502     assert(isExpr());
503     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
504     return S->getSymbol().getName();
505   }
506 
507   StringRef getToken() const {
508     assert(isToken());
509 
510     if (Kind == Expression)
511       return getExpressionAsToken();
512 
513     return StringRef(Tok.Data, Tok.Length);
514   }
515 
516   int64_t getImm() const {
517     assert(isImm());
518     return Imm.Val;
519   }
520 
521   ImmTy getImmTy() const {
522     assert(isImm());
523     return Imm.Type;
524   }
525 
526   unsigned getReg() const override {
527     return Reg.RegNo;
528   }
529 
530   SMLoc getStartLoc() const override {
531     return StartLoc;
532   }
533 
534   SMLoc getEndLoc() const override {
535     return EndLoc;
536   }
537 
538   Modifiers getModifiers() const {
539     assert(isRegKind() || isImmTy(ImmTyNone));
540     return isRegKind() ? Reg.Mods : Imm.Mods;
541   }
542 
543   void setModifiers(Modifiers Mods) {
544     assert(isRegKind() || isImmTy(ImmTyNone));
545     if (isRegKind())
546       Reg.Mods = Mods;
547     else
548       Imm.Mods = Mods;
549   }
550 
551   bool hasModifiers() const {
552     return getModifiers().hasModifiers();
553   }
554 
555   bool hasFPModifiers() const {
556     return getModifiers().hasFPModifiers();
557   }
558 
559   bool hasIntModifiers() const {
560     return getModifiers().hasIntModifiers();
561   }
562 
563   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
564 
565   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
566 
567   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
568 
569   template <unsigned Bitwidth>
570   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
571 
572   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
573     addKImmFPOperands<16>(Inst, N);
574   }
575 
576   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
577     addKImmFPOperands<32>(Inst, N);
578   }
579 
580   void addRegOperands(MCInst &Inst, unsigned N) const;
581 
582   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
583     if (isRegKind())
584       addRegOperands(Inst, N);
585     else if (isExpr())
586       Inst.addOperand(MCOperand::createExpr(Expr));
587     else
588       addImmOperands(Inst, N);
589   }
590 
591   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
592     Modifiers Mods = getModifiers();
593     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
594     if (isRegKind()) {
595       addRegOperands(Inst, N);
596     } else {
597       addImmOperands(Inst, N, false);
598     }
599   }
600 
601   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
602     assert(!hasIntModifiers());
603     addRegOrImmWithInputModsOperands(Inst, N);
604   }
605 
606   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
607     assert(!hasFPModifiers());
608     addRegOrImmWithInputModsOperands(Inst, N);
609   }
610 
611   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
612     Modifiers Mods = getModifiers();
613     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
614     assert(isRegKind());
615     addRegOperands(Inst, N);
616   }
617 
618   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
619     assert(!hasIntModifiers());
620     addRegWithInputModsOperands(Inst, N);
621   }
622 
623   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
624     assert(!hasFPModifiers());
625     addRegWithInputModsOperands(Inst, N);
626   }
627 
628   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
629     if (isImm())
630       addImmOperands(Inst, N);
631     else {
632       assert(isExpr());
633       Inst.addOperand(MCOperand::createExpr(Expr));
634     }
635   }
636 
637   static void printImmTy(raw_ostream& OS, ImmTy Type) {
638     switch (Type) {
639     case ImmTyNone: OS << "None"; break;
640     case ImmTyGDS: OS << "GDS"; break;
641     case ImmTyOffen: OS << "Offen"; break;
642     case ImmTyIdxen: OS << "Idxen"; break;
643     case ImmTyAddr64: OS << "Addr64"; break;
644     case ImmTyOffset: OS << "Offset"; break;
645     case ImmTyOffset0: OS << "Offset0"; break;
646     case ImmTyOffset1: OS << "Offset1"; break;
647     case ImmTyGLC: OS << "GLC"; break;
648     case ImmTySLC: OS << "SLC"; break;
649     case ImmTyTFE: OS << "TFE"; break;
650     case ImmTyDFMT: OS << "DFMT"; break;
651     case ImmTyNFMT: OS << "NFMT"; break;
652     case ImmTyClampSI: OS << "ClampSI"; break;
653     case ImmTyOModSI: OS << "OModSI"; break;
654     case ImmTyDppCtrl: OS << "DppCtrl"; break;
655     case ImmTyDppRowMask: OS << "DppRowMask"; break;
656     case ImmTyDppBankMask: OS << "DppBankMask"; break;
657     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
658     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
659     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
660     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
661     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
662     case ImmTyDMask: OS << "DMask"; break;
663     case ImmTyUNorm: OS << "UNorm"; break;
664     case ImmTyDA: OS << "DA"; break;
665     case ImmTyR128: OS << "R128"; break;
666     case ImmTyLWE: OS << "LWE"; break;
667     case ImmTyOff: OS << "Off"; break;
668     case ImmTyExpTgt: OS << "ExpTgt"; break;
669     case ImmTyExpCompr: OS << "ExpCompr"; break;
670     case ImmTyExpVM: OS << "ExpVM"; break;
671     case ImmTyHwreg: OS << "Hwreg"; break;
672     case ImmTySendMsg: OS << "SendMsg"; break;
673     case ImmTyInterpSlot: OS << "InterpSlot"; break;
674     case ImmTyInterpAttr: OS << "InterpAttr"; break;
675     case ImmTyAttrChan: OS << "AttrChan"; break;
676     case ImmTyOpSel: OS << "OpSel"; break;
677     case ImmTyOpSelHi: OS << "OpSelHi"; break;
678     case ImmTyNegLo: OS << "NegLo"; break;
679     case ImmTyNegHi: OS << "NegHi"; break;
680     case ImmTySwizzle: OS << "Swizzle"; break;
681     case ImmTyHigh: OS << "High"; break;
682     }
683   }
684 
685   void print(raw_ostream &OS) const override {
686     switch (Kind) {
687     case Register:
688       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
689       break;
690     case Immediate:
691       OS << '<' << getImm();
692       if (getImmTy() != ImmTyNone) {
693         OS << " type: "; printImmTy(OS, getImmTy());
694       }
695       OS << " mods: " << Imm.Mods << '>';
696       break;
697     case Token:
698       OS << '\'' << getToken() << '\'';
699       break;
700     case Expression:
701       OS << "<expr " << *Expr << '>';
702       break;
703     }
704   }
705 
706   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
707                                       int64_t Val, SMLoc Loc,
708                                       ImmTy Type = ImmTyNone,
709                                       bool IsFPImm = false) {
710     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
711     Op->Imm.Val = Val;
712     Op->Imm.IsFPImm = IsFPImm;
713     Op->Imm.Type = Type;
714     Op->Imm.Mods = Modifiers();
715     Op->StartLoc = Loc;
716     Op->EndLoc = Loc;
717     return Op;
718   }
719 
720   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
721                                         StringRef Str, SMLoc Loc,
722                                         bool HasExplicitEncodingSize = true) {
723     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
724     Res->Tok.Data = Str.data();
725     Res->Tok.Length = Str.size();
726     Res->StartLoc = Loc;
727     Res->EndLoc = Loc;
728     return Res;
729   }
730 
731   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
732                                       unsigned RegNo, SMLoc S,
733                                       SMLoc E,
734                                       bool ForceVOP3) {
735     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
736     Op->Reg.RegNo = RegNo;
737     Op->Reg.Mods = Modifiers();
738     Op->Reg.IsForcedVOP3 = ForceVOP3;
739     Op->StartLoc = S;
740     Op->EndLoc = E;
741     return Op;
742   }
743 
744   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
745                                        const class MCExpr *Expr, SMLoc S) {
746     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
747     Op->Expr = Expr;
748     Op->StartLoc = S;
749     Op->EndLoc = S;
750     return Op;
751   }
752 };
753 
754 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
755   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
756   return OS;
757 }
758 
759 //===----------------------------------------------------------------------===//
760 // AsmParser
761 //===----------------------------------------------------------------------===//
762 
763 // Holds info related to the current kernel, e.g. count of SGPRs used.
764 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
765 // .amdgpu_hsa_kernel or at EOF.
766 class KernelScopeInfo {
767   int SgprIndexUnusedMin = -1;
768   int VgprIndexUnusedMin = -1;
769   MCContext *Ctx = nullptr;
770 
771   void usesSgprAt(int i) {
772     if (i >= SgprIndexUnusedMin) {
773       SgprIndexUnusedMin = ++i;
774       if (Ctx) {
775         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
776         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
777       }
778     }
779   }
780 
781   void usesVgprAt(int i) {
782     if (i >= VgprIndexUnusedMin) {
783       VgprIndexUnusedMin = ++i;
784       if (Ctx) {
785         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
786         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
787       }
788     }
789   }
790 
791 public:
792   KernelScopeInfo() = default;
793 
794   void initialize(MCContext &Context) {
795     Ctx = &Context;
796     usesSgprAt(SgprIndexUnusedMin = -1);
797     usesVgprAt(VgprIndexUnusedMin = -1);
798   }
799 
800   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
801     switch (RegKind) {
802       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
803       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
804       default: break;
805     }
806   }
807 };
808 
809 class AMDGPUAsmParser : public MCTargetAsmParser {
810   MCAsmParser &Parser;
811 
812   unsigned ForcedEncodingSize = 0;
813   bool ForcedDPP = false;
814   bool ForcedSDWA = false;
815   KernelScopeInfo KernelScope;
816 
817   /// @name Auto-generated Match Functions
818   /// {
819 
820 #define GET_ASSEMBLER_HEADER
821 #include "AMDGPUGenAsmMatcher.inc"
822 
823   /// }
824 
825 private:
826   bool ParseAsAbsoluteExpression(uint32_t &Ret);
827   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
828   bool ParseDirectiveHSACodeObjectVersion();
829   bool ParseDirectiveHSACodeObjectISA();
830   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
831   bool ParseDirectiveAMDKernelCodeT();
832   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
833   bool ParseDirectiveAMDGPUHsaKernel();
834 
835   bool ParseDirectiveISAVersion();
836   bool ParseDirectiveHSAMetadata();
837   bool ParseDirectivePALMetadata();
838 
839   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
840                              RegisterKind RegKind, unsigned Reg1,
841                              unsigned RegNum);
842   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
843                            unsigned& RegNum, unsigned& RegWidth,
844                            unsigned *DwordRegIndex);
845   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
846                     bool IsAtomic, bool IsAtomicReturn);
847   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
848                  bool IsGdsHardcoded);
849 
850 public:
851   enum AMDGPUMatchResultTy {
852     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
853   };
854 
855   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
856 
857   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
858                const MCInstrInfo &MII,
859                const MCTargetOptions &Options)
860       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
861     MCAsmParserExtension::Initialize(Parser);
862 
863     if (getFeatureBits().none()) {
864       // Set default features.
865       copySTI().ToggleFeature("SOUTHERN_ISLANDS");
866     }
867 
868     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
869 
870     {
871       // TODO: make those pre-defined variables read-only.
872       // Currently there is none suitable machinery in the core llvm-mc for this.
873       // MCSymbol::isRedefinable is intended for another purpose, and
874       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
875       AMDGPU::IsaInfo::IsaVersion ISA =
876           AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
877       MCContext &Ctx = getContext();
878       MCSymbol *Sym =
879           Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
880       Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
881       Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
882       Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
883       Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
884       Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
885     }
886     KernelScope.initialize(getContext());
887   }
888 
889   bool isSI() const {
890     return AMDGPU::isSI(getSTI());
891   }
892 
893   bool isCI() const {
894     return AMDGPU::isCI(getSTI());
895   }
896 
897   bool isVI() const {
898     return AMDGPU::isVI(getSTI());
899   }
900 
901   bool isGFX9() const {
902     return AMDGPU::isGFX9(getSTI());
903   }
904 
905   bool hasInv2PiInlineImm() const {
906     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
907   }
908 
909   bool hasFlatOffsets() const {
910     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
911   }
912 
913   bool hasSGPR102_SGPR103() const {
914     return !isVI();
915   }
916 
917   bool hasIntClamp() const {
918     return getFeatureBits()[AMDGPU::FeatureIntClamp];
919   }
920 
921   AMDGPUTargetStreamer &getTargetStreamer() {
922     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
923     return static_cast<AMDGPUTargetStreamer &>(TS);
924   }
925 
926   const MCRegisterInfo *getMRI() const {
927     // We need this const_cast because for some reason getContext() is not const
928     // in MCAsmParser.
929     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
930   }
931 
932   const MCInstrInfo *getMII() const {
933     return &MII;
934   }
935 
936   const FeatureBitset &getFeatureBits() const {
937     return getSTI().getFeatureBits();
938   }
939 
940   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
941   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
942   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
943 
944   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
945   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
946   bool isForcedDPP() const { return ForcedDPP; }
947   bool isForcedSDWA() const { return ForcedSDWA; }
948   ArrayRef<unsigned> getMatchedVariants() const;
949 
950   std::unique_ptr<AMDGPUOperand> parseRegister();
951   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
952   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
953   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
954                                       unsigned Kind) override;
955   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
956                                OperandVector &Operands, MCStreamer &Out,
957                                uint64_t &ErrorInfo,
958                                bool MatchingInlineAsm) override;
959   bool ParseDirective(AsmToken DirectiveID) override;
960   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
961   StringRef parseMnemonicSuffix(StringRef Name);
962   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
963                         SMLoc NameLoc, OperandVector &Operands) override;
964   //bool ProcessInstruction(MCInst &Inst);
965 
966   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
967 
968   OperandMatchResultTy
969   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
970                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
971                      bool (*ConvertResult)(int64_t &) = nullptr);
972 
973   OperandMatchResultTy parseOperandArrayWithPrefix(
974     const char *Prefix,
975     OperandVector &Operands,
976     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
977     bool (*ConvertResult)(int64_t&) = nullptr);
978 
979   OperandMatchResultTy
980   parseNamedBit(const char *Name, OperandVector &Operands,
981                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
982   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
983                                              StringRef &Value);
984 
985   bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false);
986   OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false);
987   OperandMatchResultTy parseReg(OperandVector &Operands);
988   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
989   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
990   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
991   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
992   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
993   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
994 
995   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
996   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
997   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
998   void cvtExp(MCInst &Inst, const OperandVector &Operands);
999 
1000   bool parseCnt(int64_t &IntVal);
1001   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1002   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1003 
1004 private:
1005   struct OperandInfoTy {
1006     int64_t Id;
1007     bool IsSymbolic = false;
1008 
1009     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1010   };
1011 
1012   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1013   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1014 
1015   void errorExpTgt();
1016   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1017 
1018   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1019   bool validateConstantBusLimitations(const MCInst &Inst);
1020   bool validateEarlyClobberLimitations(const MCInst &Inst);
1021   bool validateIntClampSupported(const MCInst &Inst);
1022   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1023   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1024   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1025 
1026   bool trySkipId(const StringRef Id);
1027   bool trySkipToken(const AsmToken::TokenKind Kind);
1028   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1029   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1030   bool parseExpr(int64_t &Imm);
1031 
1032 public:
1033   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1034 
1035   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1036   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1037   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1038   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1039   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1040 
1041   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1042                             const unsigned MinVal,
1043                             const unsigned MaxVal,
1044                             const StringRef ErrMsg);
1045   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1046   bool parseSwizzleOffset(int64_t &Imm);
1047   bool parseSwizzleMacro(int64_t &Imm);
1048   bool parseSwizzleQuadPerm(int64_t &Imm);
1049   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1050   bool parseSwizzleBroadcast(int64_t &Imm);
1051   bool parseSwizzleSwap(int64_t &Imm);
1052   bool parseSwizzleReverse(int64_t &Imm);
1053 
1054   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1055   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1056   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1057   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1058 
1059   AMDGPUOperand::Ptr defaultGLC() const;
1060   AMDGPUOperand::Ptr defaultSLC() const;
1061   AMDGPUOperand::Ptr defaultTFE() const;
1062 
1063   AMDGPUOperand::Ptr defaultDMask() const;
1064   AMDGPUOperand::Ptr defaultUNorm() const;
1065   AMDGPUOperand::Ptr defaultDA() const;
1066   AMDGPUOperand::Ptr defaultR128() const;
1067   AMDGPUOperand::Ptr defaultLWE() const;
1068   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1069   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1070   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1071   AMDGPUOperand::Ptr defaultOffsetU12() const;
1072   AMDGPUOperand::Ptr defaultOffsetS13() const;
1073 
1074   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1075 
1076   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1077                OptionalImmIndexMap &OptionalIdx);
1078   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1079   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1080   void cvtVOP3PImpl(MCInst &Inst, const OperandVector &Operands,
1081                     bool IsPacked);
1082   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1083   void cvtVOP3P_NotPacked(MCInst &Inst, const OperandVector &Operands);
1084 
1085   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1086 
1087   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1088                bool IsAtomic = false);
1089   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1090 
1091   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1092   AMDGPUOperand::Ptr defaultRowMask() const;
1093   AMDGPUOperand::Ptr defaultBankMask() const;
1094   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1095   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1096 
1097   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1098                                     AMDGPUOperand::ImmTy Type);
1099   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1100   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1101   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1102   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1103   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1104   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1105                 uint64_t BasicInstType, bool skipVcc = false);
1106 };
1107 
1108 struct OptionalOperand {
1109   const char *Name;
1110   AMDGPUOperand::ImmTy Type;
1111   bool IsBit;
1112   bool (*ConvertResult)(int64_t&);
1113 };
1114 
1115 } // end anonymous namespace
1116 
1117 // May be called with integer type with equivalent bitwidth.
1118 static const fltSemantics *getFltSemantics(unsigned Size) {
1119   switch (Size) {
1120   case 4:
1121     return &APFloat::IEEEsingle();
1122   case 8:
1123     return &APFloat::IEEEdouble();
1124   case 2:
1125     return &APFloat::IEEEhalf();
1126   default:
1127     llvm_unreachable("unsupported fp type");
1128   }
1129 }
1130 
1131 static const fltSemantics *getFltSemantics(MVT VT) {
1132   return getFltSemantics(VT.getSizeInBits() / 8);
1133 }
1134 
1135 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1136   switch (OperandType) {
1137   case AMDGPU::OPERAND_REG_IMM_INT32:
1138   case AMDGPU::OPERAND_REG_IMM_FP32:
1139   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1140   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1141     return &APFloat::IEEEsingle();
1142   case AMDGPU::OPERAND_REG_IMM_INT64:
1143   case AMDGPU::OPERAND_REG_IMM_FP64:
1144   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1145   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1146     return &APFloat::IEEEdouble();
1147   case AMDGPU::OPERAND_REG_IMM_INT16:
1148   case AMDGPU::OPERAND_REG_IMM_FP16:
1149   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1150   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1151   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1152   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1153     return &APFloat::IEEEhalf();
1154   default:
1155     llvm_unreachable("unsupported fp type");
1156   }
1157 }
1158 
1159 //===----------------------------------------------------------------------===//
1160 // Operand
1161 //===----------------------------------------------------------------------===//
1162 
1163 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1164   bool Lost;
1165 
1166   // Convert literal to single precision
1167   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1168                                                APFloat::rmNearestTiesToEven,
1169                                                &Lost);
1170   // We allow precision lost but not overflow or underflow
1171   if (Status != APFloat::opOK &&
1172       Lost &&
1173       ((Status & APFloat::opOverflow)  != 0 ||
1174        (Status & APFloat::opUnderflow) != 0)) {
1175     return false;
1176   }
1177 
1178   return true;
1179 }
1180 
1181 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1182   if (!isImmTy(ImmTyNone)) {
1183     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1184     return false;
1185   }
1186   // TODO: We should avoid using host float here. It would be better to
1187   // check the float bit values which is what a few other places do.
1188   // We've had bot failures before due to weird NaN support on mips hosts.
1189 
1190   APInt Literal(64, Imm.Val);
1191 
1192   if (Imm.IsFPImm) { // We got fp literal token
1193     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1194       return AMDGPU::isInlinableLiteral64(Imm.Val,
1195                                           AsmParser->hasInv2PiInlineImm());
1196     }
1197 
1198     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1199     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1200       return false;
1201 
1202     if (type.getScalarSizeInBits() == 16) {
1203       return AMDGPU::isInlinableLiteral16(
1204         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1205         AsmParser->hasInv2PiInlineImm());
1206     }
1207 
1208     // Check if single precision literal is inlinable
1209     return AMDGPU::isInlinableLiteral32(
1210       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1211       AsmParser->hasInv2PiInlineImm());
1212   }
1213 
1214   // We got int literal token.
1215   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1216     return AMDGPU::isInlinableLiteral64(Imm.Val,
1217                                         AsmParser->hasInv2PiInlineImm());
1218   }
1219 
1220   if (type.getScalarSizeInBits() == 16) {
1221     return AMDGPU::isInlinableLiteral16(
1222       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1223       AsmParser->hasInv2PiInlineImm());
1224   }
1225 
1226   return AMDGPU::isInlinableLiteral32(
1227     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1228     AsmParser->hasInv2PiInlineImm());
1229 }
1230 
1231 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1232   // Check that this immediate can be added as literal
1233   if (!isImmTy(ImmTyNone)) {
1234     return false;
1235   }
1236 
1237   if (!Imm.IsFPImm) {
1238     // We got int literal token.
1239 
1240     if (type == MVT::f64 && hasFPModifiers()) {
1241       // Cannot apply fp modifiers to int literals preserving the same semantics
1242       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1243       // disable these cases.
1244       return false;
1245     }
1246 
1247     unsigned Size = type.getSizeInBits();
1248     if (Size == 64)
1249       Size = 32;
1250 
1251     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1252     // types.
1253     return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val);
1254   }
1255 
1256   // We got fp literal token
1257   if (type == MVT::f64) { // Expected 64-bit fp operand
1258     // We would set low 64-bits of literal to zeroes but we accept this literals
1259     return true;
1260   }
1261 
1262   if (type == MVT::i64) { // Expected 64-bit int operand
1263     // We don't allow fp literals in 64-bit integer instructions. It is
1264     // unclear how we should encode them.
1265     return false;
1266   }
1267 
1268   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1269   return canLosslesslyConvertToFPType(FPLiteral, type);
1270 }
1271 
1272 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1273   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1274 }
1275 
1276 bool AMDGPUOperand::isSDWARegKind() const {
1277   if (AsmParser->isVI())
1278     return isVReg();
1279   else if (AsmParser->isGFX9())
1280     return isRegKind();
1281   else
1282     return false;
1283 }
1284 
1285 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1286 {
1287   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1288   assert(Size == 2 || Size == 4 || Size == 8);
1289 
1290   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1291 
1292   if (Imm.Mods.Abs) {
1293     Val &= ~FpSignMask;
1294   }
1295   if (Imm.Mods.Neg) {
1296     Val ^= FpSignMask;
1297   }
1298 
1299   return Val;
1300 }
1301 
1302 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1303   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1304                              Inst.getNumOperands())) {
1305     addLiteralImmOperand(Inst, Imm.Val,
1306                          ApplyModifiers &
1307                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1308   } else {
1309     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1310     Inst.addOperand(MCOperand::createImm(Imm.Val));
1311   }
1312 }
1313 
1314 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1315   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1316   auto OpNum = Inst.getNumOperands();
1317   // Check that this operand accepts literals
1318   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1319 
1320   if (ApplyModifiers) {
1321     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1322     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1323     Val = applyInputFPModifiers(Val, Size);
1324   }
1325 
1326   APInt Literal(64, Val);
1327   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1328 
1329   if (Imm.IsFPImm) { // We got fp literal token
1330     switch (OpTy) {
1331     case AMDGPU::OPERAND_REG_IMM_INT64:
1332     case AMDGPU::OPERAND_REG_IMM_FP64:
1333     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1334     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1335       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1336                                        AsmParser->hasInv2PiInlineImm())) {
1337         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1338         return;
1339       }
1340 
1341       // Non-inlineable
1342       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1343         // For fp operands we check if low 32 bits are zeros
1344         if (Literal.getLoBits(32) != 0) {
1345           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1346           "Can't encode literal as exact 64-bit floating-point operand. "
1347           "Low 32-bits will be set to zero");
1348         }
1349 
1350         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1351         return;
1352       }
1353 
1354       // We don't allow fp literals in 64-bit integer instructions. It is
1355       // unclear how we should encode them. This case should be checked earlier
1356       // in predicate methods (isLiteralImm())
1357       llvm_unreachable("fp literal in 64-bit integer instruction.");
1358 
1359     case AMDGPU::OPERAND_REG_IMM_INT32:
1360     case AMDGPU::OPERAND_REG_IMM_FP32:
1361     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1362     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1363     case AMDGPU::OPERAND_REG_IMM_INT16:
1364     case AMDGPU::OPERAND_REG_IMM_FP16:
1365     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1366     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1367     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1368     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1369       bool lost;
1370       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1371       // Convert literal to single precision
1372       FPLiteral.convert(*getOpFltSemantics(OpTy),
1373                         APFloat::rmNearestTiesToEven, &lost);
1374       // We allow precision lost but not overflow or underflow. This should be
1375       // checked earlier in isLiteralImm()
1376 
1377       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1378       if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
1379           OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
1380         ImmVal |= (ImmVal << 16);
1381       }
1382 
1383       Inst.addOperand(MCOperand::createImm(ImmVal));
1384       return;
1385     }
1386     default:
1387       llvm_unreachable("invalid operand size");
1388     }
1389 
1390     return;
1391   }
1392 
1393    // We got int literal token.
1394   // Only sign extend inline immediates.
1395   // FIXME: No errors on truncation
1396   switch (OpTy) {
1397   case AMDGPU::OPERAND_REG_IMM_INT32:
1398   case AMDGPU::OPERAND_REG_IMM_FP32:
1399   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1400   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1401     if (isInt<32>(Val) &&
1402         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1403                                      AsmParser->hasInv2PiInlineImm())) {
1404       Inst.addOperand(MCOperand::createImm(Val));
1405       return;
1406     }
1407 
1408     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1409     return;
1410 
1411   case AMDGPU::OPERAND_REG_IMM_INT64:
1412   case AMDGPU::OPERAND_REG_IMM_FP64:
1413   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1414   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1415     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1416       Inst.addOperand(MCOperand::createImm(Val));
1417       return;
1418     }
1419 
1420     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1421     return;
1422 
1423   case AMDGPU::OPERAND_REG_IMM_INT16:
1424   case AMDGPU::OPERAND_REG_IMM_FP16:
1425   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1426   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1427     if (isInt<16>(Val) &&
1428         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1429                                      AsmParser->hasInv2PiInlineImm())) {
1430       Inst.addOperand(MCOperand::createImm(Val));
1431       return;
1432     }
1433 
1434     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1435     return;
1436 
1437   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1438   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1439     auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
1440     assert(AMDGPU::isInlinableLiteral16(LiteralVal,
1441                                         AsmParser->hasInv2PiInlineImm()));
1442 
1443     uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
1444                       static_cast<uint32_t>(LiteralVal);
1445     Inst.addOperand(MCOperand::createImm(ImmVal));
1446     return;
1447   }
1448   default:
1449     llvm_unreachable("invalid operand size");
1450   }
1451 }
1452 
1453 template <unsigned Bitwidth>
1454 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1455   APInt Literal(64, Imm.Val);
1456 
1457   if (!Imm.IsFPImm) {
1458     // We got int literal token.
1459     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1460     return;
1461   }
1462 
1463   bool Lost;
1464   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1465   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1466                     APFloat::rmNearestTiesToEven, &Lost);
1467   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1468 }
1469 
1470 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1471   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1472 }
1473 
1474 //===----------------------------------------------------------------------===//
1475 // AsmParser
1476 //===----------------------------------------------------------------------===//
1477 
1478 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1479   if (Is == IS_VGPR) {
1480     switch (RegWidth) {
1481       default: return -1;
1482       case 1: return AMDGPU::VGPR_32RegClassID;
1483       case 2: return AMDGPU::VReg_64RegClassID;
1484       case 3: return AMDGPU::VReg_96RegClassID;
1485       case 4: return AMDGPU::VReg_128RegClassID;
1486       case 8: return AMDGPU::VReg_256RegClassID;
1487       case 16: return AMDGPU::VReg_512RegClassID;
1488     }
1489   } else if (Is == IS_TTMP) {
1490     switch (RegWidth) {
1491       default: return -1;
1492       case 1: return AMDGPU::TTMP_32RegClassID;
1493       case 2: return AMDGPU::TTMP_64RegClassID;
1494       case 4: return AMDGPU::TTMP_128RegClassID;
1495     }
1496   } else if (Is == IS_SGPR) {
1497     switch (RegWidth) {
1498       default: return -1;
1499       case 1: return AMDGPU::SGPR_32RegClassID;
1500       case 2: return AMDGPU::SGPR_64RegClassID;
1501       case 4: return AMDGPU::SGPR_128RegClassID;
1502       case 8: return AMDGPU::SReg_256RegClassID;
1503       case 16: return AMDGPU::SReg_512RegClassID;
1504     }
1505   }
1506   return -1;
1507 }
1508 
1509 static unsigned getSpecialRegForName(StringRef RegName) {
1510   return StringSwitch<unsigned>(RegName)
1511     .Case("exec", AMDGPU::EXEC)
1512     .Case("vcc", AMDGPU::VCC)
1513     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1514     .Case("m0", AMDGPU::M0)
1515     .Case("scc", AMDGPU::SCC)
1516     .Case("tba", AMDGPU::TBA)
1517     .Case("tma", AMDGPU::TMA)
1518     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1519     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1520     .Case("vcc_lo", AMDGPU::VCC_LO)
1521     .Case("vcc_hi", AMDGPU::VCC_HI)
1522     .Case("exec_lo", AMDGPU::EXEC_LO)
1523     .Case("exec_hi", AMDGPU::EXEC_HI)
1524     .Case("tma_lo", AMDGPU::TMA_LO)
1525     .Case("tma_hi", AMDGPU::TMA_HI)
1526     .Case("tba_lo", AMDGPU::TBA_LO)
1527     .Case("tba_hi", AMDGPU::TBA_HI)
1528     .Default(0);
1529 }
1530 
1531 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1532                                     SMLoc &EndLoc) {
1533   auto R = parseRegister();
1534   if (!R) return true;
1535   assert(R->isReg());
1536   RegNo = R->getReg();
1537   StartLoc = R->getStartLoc();
1538   EndLoc = R->getEndLoc();
1539   return false;
1540 }
1541 
1542 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1543                                             RegisterKind RegKind, unsigned Reg1,
1544                                             unsigned RegNum) {
1545   switch (RegKind) {
1546   case IS_SPECIAL:
1547     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1548       Reg = AMDGPU::EXEC;
1549       RegWidth = 2;
1550       return true;
1551     }
1552     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1553       Reg = AMDGPU::FLAT_SCR;
1554       RegWidth = 2;
1555       return true;
1556     }
1557     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1558       Reg = AMDGPU::VCC;
1559       RegWidth = 2;
1560       return true;
1561     }
1562     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1563       Reg = AMDGPU::TBA;
1564       RegWidth = 2;
1565       return true;
1566     }
1567     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1568       Reg = AMDGPU::TMA;
1569       RegWidth = 2;
1570       return true;
1571     }
1572     return false;
1573   case IS_VGPR:
1574   case IS_SGPR:
1575   case IS_TTMP:
1576     if (Reg1 != Reg + RegWidth) {
1577       return false;
1578     }
1579     RegWidth++;
1580     return true;
1581   default:
1582     llvm_unreachable("unexpected register kind");
1583   }
1584 }
1585 
1586 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1587                                           unsigned &RegNum, unsigned &RegWidth,
1588                                           unsigned *DwordRegIndex) {
1589   if (DwordRegIndex) { *DwordRegIndex = 0; }
1590   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1591   if (getLexer().is(AsmToken::Identifier)) {
1592     StringRef RegName = Parser.getTok().getString();
1593     if ((Reg = getSpecialRegForName(RegName))) {
1594       Parser.Lex();
1595       RegKind = IS_SPECIAL;
1596     } else {
1597       unsigned RegNumIndex = 0;
1598       if (RegName[0] == 'v') {
1599         RegNumIndex = 1;
1600         RegKind = IS_VGPR;
1601       } else if (RegName[0] == 's') {
1602         RegNumIndex = 1;
1603         RegKind = IS_SGPR;
1604       } else if (RegName.startswith("ttmp")) {
1605         RegNumIndex = strlen("ttmp");
1606         RegKind = IS_TTMP;
1607       } else {
1608         return false;
1609       }
1610       if (RegName.size() > RegNumIndex) {
1611         // Single 32-bit register: vXX.
1612         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1613           return false;
1614         Parser.Lex();
1615         RegWidth = 1;
1616       } else {
1617         // Range of registers: v[XX:YY]. ":YY" is optional.
1618         Parser.Lex();
1619         int64_t RegLo, RegHi;
1620         if (getLexer().isNot(AsmToken::LBrac))
1621           return false;
1622         Parser.Lex();
1623 
1624         if (getParser().parseAbsoluteExpression(RegLo))
1625           return false;
1626 
1627         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1628         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1629           return false;
1630         Parser.Lex();
1631 
1632         if (isRBrace) {
1633           RegHi = RegLo;
1634         } else {
1635           if (getParser().parseAbsoluteExpression(RegHi))
1636             return false;
1637 
1638           if (getLexer().isNot(AsmToken::RBrac))
1639             return false;
1640           Parser.Lex();
1641         }
1642         RegNum = (unsigned) RegLo;
1643         RegWidth = (RegHi - RegLo) + 1;
1644       }
1645     }
1646   } else if (getLexer().is(AsmToken::LBrac)) {
1647     // List of consecutive registers: [s0,s1,s2,s3]
1648     Parser.Lex();
1649     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1650       return false;
1651     if (RegWidth != 1)
1652       return false;
1653     RegisterKind RegKind1;
1654     unsigned Reg1, RegNum1, RegWidth1;
1655     do {
1656       if (getLexer().is(AsmToken::Comma)) {
1657         Parser.Lex();
1658       } else if (getLexer().is(AsmToken::RBrac)) {
1659         Parser.Lex();
1660         break;
1661       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1662         if (RegWidth1 != 1) {
1663           return false;
1664         }
1665         if (RegKind1 != RegKind) {
1666           return false;
1667         }
1668         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1669           return false;
1670         }
1671       } else {
1672         return false;
1673       }
1674     } while (true);
1675   } else {
1676     return false;
1677   }
1678   switch (RegKind) {
1679   case IS_SPECIAL:
1680     RegNum = 0;
1681     RegWidth = 1;
1682     break;
1683   case IS_VGPR:
1684   case IS_SGPR:
1685   case IS_TTMP:
1686   {
1687     unsigned Size = 1;
1688     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1689       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1690       Size = std::min(RegWidth, 4u);
1691     }
1692     if (RegNum % Size != 0)
1693       return false;
1694     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1695     RegNum = RegNum / Size;
1696     int RCID = getRegClass(RegKind, RegWidth);
1697     if (RCID == -1)
1698       return false;
1699     const MCRegisterClass RC = TRI->getRegClass(RCID);
1700     if (RegNum >= RC.getNumRegs())
1701       return false;
1702     Reg = RC.getRegister(RegNum);
1703     break;
1704   }
1705 
1706   default:
1707     llvm_unreachable("unexpected register kind");
1708   }
1709 
1710   if (!subtargetHasRegister(*TRI, Reg))
1711     return false;
1712   return true;
1713 }
1714 
1715 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
1716   const auto &Tok = Parser.getTok();
1717   SMLoc StartLoc = Tok.getLoc();
1718   SMLoc EndLoc = Tok.getEndLoc();
1719   RegisterKind RegKind;
1720   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
1721 
1722   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
1723     return nullptr;
1724   }
1725   KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
1726   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
1727 }
1728 
1729 bool
1730 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) {
1731   if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) &&
1732       (getLexer().getKind() == AsmToken::Integer ||
1733        getLexer().getKind() == AsmToken::Real)) {
1734     // This is a workaround for handling operands like these:
1735     //     |1.0|
1736     //     |-1|
1737     // This syntax is not compatible with syntax of standard
1738     // MC expressions (due to the trailing '|').
1739 
1740     SMLoc EndLoc;
1741     const MCExpr *Expr;
1742 
1743     if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
1744       return true;
1745     }
1746 
1747     return !Expr->evaluateAsAbsolute(Val);
1748   }
1749 
1750   return getParser().parseAbsoluteExpression(Val);
1751 }
1752 
1753 OperandMatchResultTy
1754 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
1755   // TODO: add syntactic sugar for 1/(2*PI)
1756   bool Minus = false;
1757   if (getLexer().getKind() == AsmToken::Minus) {
1758     Minus = true;
1759     Parser.Lex();
1760   }
1761 
1762   SMLoc S = Parser.getTok().getLoc();
1763   switch(getLexer().getKind()) {
1764   case AsmToken::Integer: {
1765     int64_t IntVal;
1766     if (parseAbsoluteExpr(IntVal, AbsMod))
1767       return MatchOperand_ParseFail;
1768     if (Minus)
1769       IntVal *= -1;
1770     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
1771     return MatchOperand_Success;
1772   }
1773   case AsmToken::Real: {
1774     int64_t IntVal;
1775     if (parseAbsoluteExpr(IntVal, AbsMod))
1776       return MatchOperand_ParseFail;
1777 
1778     APFloat F(BitsToDouble(IntVal));
1779     if (Minus)
1780       F.changeSign();
1781     Operands.push_back(
1782         AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S,
1783                                  AMDGPUOperand::ImmTyNone, true));
1784     return MatchOperand_Success;
1785   }
1786   default:
1787     return Minus ? MatchOperand_ParseFail : MatchOperand_NoMatch;
1788   }
1789 }
1790 
1791 OperandMatchResultTy
1792 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
1793   if (auto R = parseRegister()) {
1794     assert(R->isReg());
1795     R->Reg.IsForcedVOP3 = isForcedVOP3();
1796     Operands.push_back(std::move(R));
1797     return MatchOperand_Success;
1798   }
1799   return MatchOperand_NoMatch;
1800 }
1801 
1802 OperandMatchResultTy
1803 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
1804   auto res = parseImm(Operands, AbsMod);
1805   if (res != MatchOperand_NoMatch) {
1806     return res;
1807   }
1808 
1809   return parseReg(Operands);
1810 }
1811 
1812 OperandMatchResultTy
1813 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
1814                                               bool AllowImm) {
1815   bool Negate = false, Negate2 = false, Abs = false, Abs2 = false;
1816 
1817   if (getLexer().getKind()== AsmToken::Minus) {
1818     const AsmToken NextToken = getLexer().peekTok();
1819 
1820     // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
1821     if (NextToken.is(AsmToken::Minus)) {
1822       Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier");
1823       return MatchOperand_ParseFail;
1824     }
1825 
1826     // '-' followed by an integer literal N should be interpreted as integer
1827     // negation rather than a floating-point NEG modifier applied to N.
1828     // Beside being contr-intuitive, such use of floating-point NEG modifier
1829     // results in different meaning of integer literals used with VOP1/2/C
1830     // and VOP3, for example:
1831     //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
1832     //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
1833     // Negative fp literals should be handled likewise for unifomtity
1834     if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) {
1835       Parser.Lex();
1836       Negate = true;
1837     }
1838   }
1839 
1840   if (getLexer().getKind() == AsmToken::Identifier &&
1841       Parser.getTok().getString() == "neg") {
1842     if (Negate) {
1843       Error(Parser.getTok().getLoc(), "expected register or immediate");
1844       return MatchOperand_ParseFail;
1845     }
1846     Parser.Lex();
1847     Negate2 = true;
1848     if (getLexer().isNot(AsmToken::LParen)) {
1849       Error(Parser.getTok().getLoc(), "expected left paren after neg");
1850       return MatchOperand_ParseFail;
1851     }
1852     Parser.Lex();
1853   }
1854 
1855   if (getLexer().getKind() == AsmToken::Identifier &&
1856       Parser.getTok().getString() == "abs") {
1857     Parser.Lex();
1858     Abs2 = true;
1859     if (getLexer().isNot(AsmToken::LParen)) {
1860       Error(Parser.getTok().getLoc(), "expected left paren after abs");
1861       return MatchOperand_ParseFail;
1862     }
1863     Parser.Lex();
1864   }
1865 
1866   if (getLexer().getKind() == AsmToken::Pipe) {
1867     if (Abs2) {
1868       Error(Parser.getTok().getLoc(), "expected register or immediate");
1869       return MatchOperand_ParseFail;
1870     }
1871     Parser.Lex();
1872     Abs = true;
1873   }
1874 
1875   OperandMatchResultTy Res;
1876   if (AllowImm) {
1877     Res = parseRegOrImm(Operands, Abs);
1878   } else {
1879     Res = parseReg(Operands);
1880   }
1881   if (Res != MatchOperand_Success) {
1882     return Res;
1883   }
1884 
1885   AMDGPUOperand::Modifiers Mods;
1886   if (Abs) {
1887     if (getLexer().getKind() != AsmToken::Pipe) {
1888       Error(Parser.getTok().getLoc(), "expected vertical bar");
1889       return MatchOperand_ParseFail;
1890     }
1891     Parser.Lex();
1892     Mods.Abs = true;
1893   }
1894   if (Abs2) {
1895     if (getLexer().isNot(AsmToken::RParen)) {
1896       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1897       return MatchOperand_ParseFail;
1898     }
1899     Parser.Lex();
1900     Mods.Abs = true;
1901   }
1902 
1903   if (Negate) {
1904     Mods.Neg = true;
1905   } else if (Negate2) {
1906     if (getLexer().isNot(AsmToken::RParen)) {
1907       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1908       return MatchOperand_ParseFail;
1909     }
1910     Parser.Lex();
1911     Mods.Neg = true;
1912   }
1913 
1914   if (Mods.hasFPModifiers()) {
1915     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
1916     Op.setModifiers(Mods);
1917   }
1918   return MatchOperand_Success;
1919 }
1920 
1921 OperandMatchResultTy
1922 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
1923                                                bool AllowImm) {
1924   bool Sext = false;
1925 
1926   if (getLexer().getKind() == AsmToken::Identifier &&
1927       Parser.getTok().getString() == "sext") {
1928     Parser.Lex();
1929     Sext = true;
1930     if (getLexer().isNot(AsmToken::LParen)) {
1931       Error(Parser.getTok().getLoc(), "expected left paren after sext");
1932       return MatchOperand_ParseFail;
1933     }
1934     Parser.Lex();
1935   }
1936 
1937   OperandMatchResultTy Res;
1938   if (AllowImm) {
1939     Res = parseRegOrImm(Operands);
1940   } else {
1941     Res = parseReg(Operands);
1942   }
1943   if (Res != MatchOperand_Success) {
1944     return Res;
1945   }
1946 
1947   AMDGPUOperand::Modifiers Mods;
1948   if (Sext) {
1949     if (getLexer().isNot(AsmToken::RParen)) {
1950       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1951       return MatchOperand_ParseFail;
1952     }
1953     Parser.Lex();
1954     Mods.Sext = true;
1955   }
1956 
1957   if (Mods.hasIntModifiers()) {
1958     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
1959     Op.setModifiers(Mods);
1960   }
1961 
1962   return MatchOperand_Success;
1963 }
1964 
1965 OperandMatchResultTy
1966 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
1967   return parseRegOrImmWithFPInputMods(Operands, false);
1968 }
1969 
1970 OperandMatchResultTy
1971 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
1972   return parseRegOrImmWithIntInputMods(Operands, false);
1973 }
1974 
1975 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
1976   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
1977   if (Reg) {
1978     Operands.push_back(std::move(Reg));
1979     return MatchOperand_Success;
1980   }
1981 
1982   const AsmToken &Tok = Parser.getTok();
1983   if (Tok.getString() == "off") {
1984     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(),
1985                                                 AMDGPUOperand::ImmTyOff, false));
1986     Parser.Lex();
1987     return MatchOperand_Success;
1988   }
1989 
1990   return MatchOperand_NoMatch;
1991 }
1992 
1993 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
1994   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
1995 
1996   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
1997       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
1998       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
1999       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2000     return Match_InvalidOperand;
2001 
2002   if ((TSFlags & SIInstrFlags::VOP3) &&
2003       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2004       getForcedEncodingSize() != 64)
2005     return Match_PreferE32;
2006 
2007   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2008       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2009     // v_mac_f32/16 allow only dst_sel == DWORD;
2010     auto OpNum =
2011         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2012     const auto &Op = Inst.getOperand(OpNum);
2013     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2014       return Match_InvalidOperand;
2015     }
2016   }
2017 
2018   if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
2019     // FIXME: Produces error without correct column reported.
2020     auto OpNum =
2021         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
2022     const auto &Op = Inst.getOperand(OpNum);
2023     if (Op.getImm() != 0)
2024       return Match_InvalidOperand;
2025   }
2026 
2027   return Match_Success;
2028 }
2029 
2030 // What asm variants we should check
2031 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2032   if (getForcedEncodingSize() == 32) {
2033     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2034     return makeArrayRef(Variants);
2035   }
2036 
2037   if (isForcedVOP3()) {
2038     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2039     return makeArrayRef(Variants);
2040   }
2041 
2042   if (isForcedSDWA()) {
2043     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2044                                         AMDGPUAsmVariants::SDWA9};
2045     return makeArrayRef(Variants);
2046   }
2047 
2048   if (isForcedDPP()) {
2049     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2050     return makeArrayRef(Variants);
2051   }
2052 
2053   static const unsigned Variants[] = {
2054     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2055     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2056   };
2057 
2058   return makeArrayRef(Variants);
2059 }
2060 
2061 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2062   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2063   const unsigned Num = Desc.getNumImplicitUses();
2064   for (unsigned i = 0; i < Num; ++i) {
2065     unsigned Reg = Desc.ImplicitUses[i];
2066     switch (Reg) {
2067     case AMDGPU::FLAT_SCR:
2068     case AMDGPU::VCC:
2069     case AMDGPU::M0:
2070       return Reg;
2071     default:
2072       break;
2073     }
2074   }
2075   return AMDGPU::NoRegister;
2076 }
2077 
2078 // NB: This code is correct only when used to check constant
2079 // bus limitations because GFX7 support no f16 inline constants.
2080 // Note that there are no cases when a GFX7 opcode violates
2081 // constant bus limitations due to the use of an f16 constant.
2082 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2083                                        unsigned OpIdx) const {
2084   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2085 
2086   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2087     return false;
2088   }
2089 
2090   const MCOperand &MO = Inst.getOperand(OpIdx);
2091 
2092   int64_t Val = MO.getImm();
2093   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2094 
2095   switch (OpSize) { // expected operand size
2096   case 8:
2097     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2098   case 4:
2099     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2100   case 2: {
2101     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2102     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2103         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
2104       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2105     } else {
2106       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2107     }
2108   }
2109   default:
2110     llvm_unreachable("invalid operand size");
2111   }
2112 }
2113 
2114 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2115   const MCOperand &MO = Inst.getOperand(OpIdx);
2116   if (MO.isImm()) {
2117     return !isInlineConstant(Inst, OpIdx);
2118   }
2119   return !MO.isReg() ||
2120          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2121 }
2122 
2123 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2124   const unsigned Opcode = Inst.getOpcode();
2125   const MCInstrDesc &Desc = MII.get(Opcode);
2126   unsigned ConstantBusUseCount = 0;
2127 
2128   if (Desc.TSFlags &
2129       (SIInstrFlags::VOPC |
2130        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2131        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2132        SIInstrFlags::SDWA)) {
2133     // Check special imm operands (used by madmk, etc)
2134     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2135       ++ConstantBusUseCount;
2136     }
2137 
2138     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2139     if (SGPRUsed != AMDGPU::NoRegister) {
2140       ++ConstantBusUseCount;
2141     }
2142 
2143     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2144     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2145     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2146 
2147     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2148 
2149     for (int OpIdx : OpIndices) {
2150       if (OpIdx == -1) break;
2151 
2152       const MCOperand &MO = Inst.getOperand(OpIdx);
2153       if (usesConstantBus(Inst, OpIdx)) {
2154         if (MO.isReg()) {
2155           const unsigned Reg = mc2PseudoReg(MO.getReg());
2156           // Pairs of registers with a partial intersections like these
2157           //   s0, s[0:1]
2158           //   flat_scratch_lo, flat_scratch
2159           //   flat_scratch_lo, flat_scratch_hi
2160           // are theoretically valid but they are disabled anyway.
2161           // Note that this code mimics SIInstrInfo::verifyInstruction
2162           if (Reg != SGPRUsed) {
2163             ++ConstantBusUseCount;
2164           }
2165           SGPRUsed = Reg;
2166         } else { // Expression or a literal
2167           ++ConstantBusUseCount;
2168         }
2169       }
2170     }
2171   }
2172 
2173   return ConstantBusUseCount <= 1;
2174 }
2175 
2176 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2177   const unsigned Opcode = Inst.getOpcode();
2178   const MCInstrDesc &Desc = MII.get(Opcode);
2179 
2180   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2181   if (DstIdx == -1 ||
2182       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2183     return true;
2184   }
2185 
2186   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2187 
2188   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2189   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2190   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2191 
2192   assert(DstIdx != -1);
2193   const MCOperand &Dst = Inst.getOperand(DstIdx);
2194   assert(Dst.isReg());
2195   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2196 
2197   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2198 
2199   for (int SrcIdx : SrcIndices) {
2200     if (SrcIdx == -1) break;
2201     const MCOperand &Src = Inst.getOperand(SrcIdx);
2202     if (Src.isReg()) {
2203       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2204       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2205         return false;
2206       }
2207     }
2208   }
2209 
2210   return true;
2211 }
2212 
2213 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2214 
2215   const unsigned Opc = Inst.getOpcode();
2216   const MCInstrDesc &Desc = MII.get(Opc);
2217 
2218   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2219     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2220     assert(ClampIdx != -1);
2221     return Inst.getOperand(ClampIdx).getImm() == 0;
2222   }
2223 
2224   return true;
2225 }
2226 
2227 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2228                                           const SMLoc &IDLoc) {
2229   if (!validateConstantBusLimitations(Inst)) {
2230     Error(IDLoc,
2231       "invalid operand (violates constant bus restrictions)");
2232     return false;
2233   }
2234   if (!validateEarlyClobberLimitations(Inst)) {
2235     Error(IDLoc,
2236       "destination must be different than all sources");
2237     return false;
2238   }
2239   if (!validateIntClampSupported(Inst)) {
2240     Error(IDLoc,
2241       "integer clamping is not supported on this GPU");
2242     return false;
2243   }
2244 
2245   return true;
2246 }
2247 
2248 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2249                                               OperandVector &Operands,
2250                                               MCStreamer &Out,
2251                                               uint64_t &ErrorInfo,
2252                                               bool MatchingInlineAsm) {
2253   MCInst Inst;
2254   unsigned Result = Match_Success;
2255   for (auto Variant : getMatchedVariants()) {
2256     uint64_t EI;
2257     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
2258                                   Variant);
2259     // We order match statuses from least to most specific. We use most specific
2260     // status as resulting
2261     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
2262     if ((R == Match_Success) ||
2263         (R == Match_PreferE32) ||
2264         (R == Match_MissingFeature && Result != Match_PreferE32) ||
2265         (R == Match_InvalidOperand && Result != Match_MissingFeature
2266                                    && Result != Match_PreferE32) ||
2267         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
2268                                    && Result != Match_MissingFeature
2269                                    && Result != Match_PreferE32)) {
2270       Result = R;
2271       ErrorInfo = EI;
2272     }
2273     if (R == Match_Success)
2274       break;
2275   }
2276 
2277   switch (Result) {
2278   default: break;
2279   case Match_Success:
2280     if (!validateInstruction(Inst, IDLoc)) {
2281       return true;
2282     }
2283     Inst.setLoc(IDLoc);
2284     Out.EmitInstruction(Inst, getSTI());
2285     return false;
2286 
2287   case Match_MissingFeature:
2288     return Error(IDLoc, "instruction not supported on this GPU");
2289 
2290   case Match_MnemonicFail:
2291     return Error(IDLoc, "unrecognized instruction mnemonic");
2292 
2293   case Match_InvalidOperand: {
2294     SMLoc ErrorLoc = IDLoc;
2295     if (ErrorInfo != ~0ULL) {
2296       if (ErrorInfo >= Operands.size()) {
2297         return Error(IDLoc, "too few operands for instruction");
2298       }
2299       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
2300       if (ErrorLoc == SMLoc())
2301         ErrorLoc = IDLoc;
2302     }
2303     return Error(ErrorLoc, "invalid operand for instruction");
2304   }
2305 
2306   case Match_PreferE32:
2307     return Error(IDLoc, "internal error: instruction without _e64 suffix "
2308                         "should be encoded as e32");
2309   }
2310   llvm_unreachable("Implement any new match types added!");
2311 }
2312 
2313 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
2314   int64_t Tmp = -1;
2315   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
2316     return true;
2317   }
2318   if (getParser().parseAbsoluteExpression(Tmp)) {
2319     return true;
2320   }
2321   Ret = static_cast<uint32_t>(Tmp);
2322   return false;
2323 }
2324 
2325 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
2326                                                uint32_t &Minor) {
2327   if (ParseAsAbsoluteExpression(Major))
2328     return TokError("invalid major version");
2329 
2330   if (getLexer().isNot(AsmToken::Comma))
2331     return TokError("minor version number required, comma expected");
2332   Lex();
2333 
2334   if (ParseAsAbsoluteExpression(Minor))
2335     return TokError("invalid minor version");
2336 
2337   return false;
2338 }
2339 
2340 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
2341   uint32_t Major;
2342   uint32_t Minor;
2343 
2344   if (ParseDirectiveMajorMinor(Major, Minor))
2345     return true;
2346 
2347   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
2348   return false;
2349 }
2350 
2351 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
2352   uint32_t Major;
2353   uint32_t Minor;
2354   uint32_t Stepping;
2355   StringRef VendorName;
2356   StringRef ArchName;
2357 
2358   // If this directive has no arguments, then use the ISA version for the
2359   // targeted GPU.
2360   if (getLexer().is(AsmToken::EndOfStatement)) {
2361     AMDGPU::IsaInfo::IsaVersion ISA =
2362         AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
2363     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
2364                                                       ISA.Stepping,
2365                                                       "AMD", "AMDGPU");
2366     return false;
2367   }
2368 
2369   if (ParseDirectiveMajorMinor(Major, Minor))
2370     return true;
2371 
2372   if (getLexer().isNot(AsmToken::Comma))
2373     return TokError("stepping version number required, comma expected");
2374   Lex();
2375 
2376   if (ParseAsAbsoluteExpression(Stepping))
2377     return TokError("invalid stepping version");
2378 
2379   if (getLexer().isNot(AsmToken::Comma))
2380     return TokError("vendor name required, comma expected");
2381   Lex();
2382 
2383   if (getLexer().isNot(AsmToken::String))
2384     return TokError("invalid vendor name");
2385 
2386   VendorName = getLexer().getTok().getStringContents();
2387   Lex();
2388 
2389   if (getLexer().isNot(AsmToken::Comma))
2390     return TokError("arch name required, comma expected");
2391   Lex();
2392 
2393   if (getLexer().isNot(AsmToken::String))
2394     return TokError("invalid arch name");
2395 
2396   ArchName = getLexer().getTok().getStringContents();
2397   Lex();
2398 
2399   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
2400                                                     VendorName, ArchName);
2401   return false;
2402 }
2403 
2404 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
2405                                                amd_kernel_code_t &Header) {
2406   SmallString<40> ErrStr;
2407   raw_svector_ostream Err(ErrStr);
2408   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
2409     return TokError(Err.str());
2410   }
2411   Lex();
2412   return false;
2413 }
2414 
2415 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
2416   amd_kernel_code_t Header;
2417   AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits());
2418 
2419   while (true) {
2420     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
2421     // will set the current token to EndOfStatement.
2422     while(getLexer().is(AsmToken::EndOfStatement))
2423       Lex();
2424 
2425     if (getLexer().isNot(AsmToken::Identifier))
2426       return TokError("expected value identifier or .end_amd_kernel_code_t");
2427 
2428     StringRef ID = getLexer().getTok().getIdentifier();
2429     Lex();
2430 
2431     if (ID == ".end_amd_kernel_code_t")
2432       break;
2433 
2434     if (ParseAMDKernelCodeTValue(ID, Header))
2435       return true;
2436   }
2437 
2438   getTargetStreamer().EmitAMDKernelCodeT(Header);
2439 
2440   return false;
2441 }
2442 
2443 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
2444   if (getLexer().isNot(AsmToken::Identifier))
2445     return TokError("expected symbol name");
2446 
2447   StringRef KernelName = Parser.getTok().getString();
2448 
2449   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
2450                                            ELF::STT_AMDGPU_HSA_KERNEL);
2451   Lex();
2452   KernelScope.initialize(getContext());
2453   return false;
2454 }
2455 
2456 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
2457   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
2458     return Error(getParser().getTok().getLoc(),
2459                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
2460                  "architectures");
2461   }
2462 
2463   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
2464 
2465   std::string ISAVersionStringFromSTI;
2466   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
2467   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
2468 
2469   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
2470     return Error(getParser().getTok().getLoc(),
2471                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
2472                  "arguments specified through the command line");
2473   }
2474 
2475   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
2476   Lex();
2477 
2478   return false;
2479 }
2480 
2481 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
2482   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
2483     return Error(getParser().getTok().getLoc(),
2484                  (Twine(HSAMD::AssemblerDirectiveBegin) + Twine(" directive is "
2485                  "not available on non-amdhsa OSes")).str());
2486   }
2487 
2488   std::string HSAMetadataString;
2489   raw_string_ostream YamlStream(HSAMetadataString);
2490 
2491   getLexer().setSkipSpace(false);
2492 
2493   bool FoundEnd = false;
2494   while (!getLexer().is(AsmToken::Eof)) {
2495     while (getLexer().is(AsmToken::Space)) {
2496       YamlStream << getLexer().getTok().getString();
2497       Lex();
2498     }
2499 
2500     if (getLexer().is(AsmToken::Identifier)) {
2501       StringRef ID = getLexer().getTok().getIdentifier();
2502       if (ID == AMDGPU::HSAMD::AssemblerDirectiveEnd) {
2503         Lex();
2504         FoundEnd = true;
2505         break;
2506       }
2507     }
2508 
2509     YamlStream << Parser.parseStringToEndOfStatement()
2510                << getContext().getAsmInfo()->getSeparatorString();
2511 
2512     Parser.eatToEndOfStatement();
2513   }
2514 
2515   getLexer().setSkipSpace(true);
2516 
2517   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
2518     return TokError(Twine("expected directive ") +
2519                     Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found"));
2520   }
2521 
2522   YamlStream.flush();
2523 
2524   if (!getTargetStreamer().EmitHSAMetadata(HSAMetadataString))
2525     return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
2526 
2527   return false;
2528 }
2529 
2530 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
2531   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
2532     return Error(getParser().getTok().getLoc(),
2533                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
2534                  "not available on non-amdpal OSes")).str());
2535   }
2536 
2537   PALMD::Metadata PALMetadata;
2538   for (;;) {
2539     uint32_t Value;
2540     if (ParseAsAbsoluteExpression(Value)) {
2541       return TokError(Twine("invalid value in ") +
2542                       Twine(PALMD::AssemblerDirective));
2543     }
2544     PALMetadata.push_back(Value);
2545     if (getLexer().isNot(AsmToken::Comma))
2546       break;
2547     Lex();
2548   }
2549   getTargetStreamer().EmitPALMetadata(PALMetadata);
2550   return false;
2551 }
2552 
2553 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
2554   StringRef IDVal = DirectiveID.getString();
2555 
2556   if (IDVal == ".hsa_code_object_version")
2557     return ParseDirectiveHSACodeObjectVersion();
2558 
2559   if (IDVal == ".hsa_code_object_isa")
2560     return ParseDirectiveHSACodeObjectISA();
2561 
2562   if (IDVal == ".amd_kernel_code_t")
2563     return ParseDirectiveAMDKernelCodeT();
2564 
2565   if (IDVal == ".amdgpu_hsa_kernel")
2566     return ParseDirectiveAMDGPUHsaKernel();
2567 
2568   if (IDVal == ".amd_amdgpu_isa")
2569     return ParseDirectiveISAVersion();
2570 
2571   if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
2572     return ParseDirectiveHSAMetadata();
2573 
2574   if (IDVal == PALMD::AssemblerDirective)
2575     return ParseDirectivePALMetadata();
2576 
2577   return true;
2578 }
2579 
2580 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
2581                                            unsigned RegNo) const {
2582   if (isCI())
2583     return true;
2584 
2585   if (isSI()) {
2586     // No flat_scr
2587     switch (RegNo) {
2588     case AMDGPU::FLAT_SCR:
2589     case AMDGPU::FLAT_SCR_LO:
2590     case AMDGPU::FLAT_SCR_HI:
2591       return false;
2592     default:
2593       return true;
2594     }
2595   }
2596 
2597   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
2598   // SI/CI have.
2599   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
2600        R.isValid(); ++R) {
2601     if (*R == RegNo)
2602       return false;
2603   }
2604 
2605   return true;
2606 }
2607 
2608 OperandMatchResultTy
2609 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
2610   // Try to parse with a custom parser
2611   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
2612 
2613   // If we successfully parsed the operand or if there as an error parsing,
2614   // we are done.
2615   //
2616   // If we are parsing after we reach EndOfStatement then this means we
2617   // are appending default values to the Operands list.  This is only done
2618   // by custom parser, so we shouldn't continue on to the generic parsing.
2619   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
2620       getLexer().is(AsmToken::EndOfStatement))
2621     return ResTy;
2622 
2623   ResTy = parseRegOrImm(Operands);
2624 
2625   if (ResTy == MatchOperand_Success)
2626     return ResTy;
2627 
2628   const auto &Tok = Parser.getTok();
2629   SMLoc S = Tok.getLoc();
2630 
2631   const MCExpr *Expr = nullptr;
2632   if (!Parser.parseExpression(Expr)) {
2633     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2634     return MatchOperand_Success;
2635   }
2636 
2637   // Possibly this is an instruction flag like 'gds'.
2638   if (Tok.getKind() == AsmToken::Identifier) {
2639     Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
2640     Parser.Lex();
2641     return MatchOperand_Success;
2642   }
2643 
2644   return MatchOperand_NoMatch;
2645 }
2646 
2647 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
2648   // Clear any forced encodings from the previous instruction.
2649   setForcedEncodingSize(0);
2650   setForcedDPP(false);
2651   setForcedSDWA(false);
2652 
2653   if (Name.endswith("_e64")) {
2654     setForcedEncodingSize(64);
2655     return Name.substr(0, Name.size() - 4);
2656   } else if (Name.endswith("_e32")) {
2657     setForcedEncodingSize(32);
2658     return Name.substr(0, Name.size() - 4);
2659   } else if (Name.endswith("_dpp")) {
2660     setForcedDPP(true);
2661     return Name.substr(0, Name.size() - 4);
2662   } else if (Name.endswith("_sdwa")) {
2663     setForcedSDWA(true);
2664     return Name.substr(0, Name.size() - 5);
2665   }
2666   return Name;
2667 }
2668 
2669 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
2670                                        StringRef Name,
2671                                        SMLoc NameLoc, OperandVector &Operands) {
2672   // Add the instruction mnemonic
2673   Name = parseMnemonicSuffix(Name);
2674   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
2675 
2676   while (!getLexer().is(AsmToken::EndOfStatement)) {
2677     OperandMatchResultTy Res = parseOperand(Operands, Name);
2678 
2679     // Eat the comma or space if there is one.
2680     if (getLexer().is(AsmToken::Comma))
2681       Parser.Lex();
2682 
2683     switch (Res) {
2684       case MatchOperand_Success: break;
2685       case MatchOperand_ParseFail:
2686         Error(getLexer().getLoc(), "failed parsing operand.");
2687         while (!getLexer().is(AsmToken::EndOfStatement)) {
2688           Parser.Lex();
2689         }
2690         return true;
2691       case MatchOperand_NoMatch:
2692         Error(getLexer().getLoc(), "not a valid operand.");
2693         while (!getLexer().is(AsmToken::EndOfStatement)) {
2694           Parser.Lex();
2695         }
2696         return true;
2697     }
2698   }
2699 
2700   return false;
2701 }
2702 
2703 //===----------------------------------------------------------------------===//
2704 // Utility functions
2705 //===----------------------------------------------------------------------===//
2706 
2707 OperandMatchResultTy
2708 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
2709   switch(getLexer().getKind()) {
2710     default: return MatchOperand_NoMatch;
2711     case AsmToken::Identifier: {
2712       StringRef Name = Parser.getTok().getString();
2713       if (!Name.equals(Prefix)) {
2714         return MatchOperand_NoMatch;
2715       }
2716 
2717       Parser.Lex();
2718       if (getLexer().isNot(AsmToken::Colon))
2719         return MatchOperand_ParseFail;
2720 
2721       Parser.Lex();
2722 
2723       bool IsMinus = false;
2724       if (getLexer().getKind() == AsmToken::Minus) {
2725         Parser.Lex();
2726         IsMinus = true;
2727       }
2728 
2729       if (getLexer().isNot(AsmToken::Integer))
2730         return MatchOperand_ParseFail;
2731 
2732       if (getParser().parseAbsoluteExpression(Int))
2733         return MatchOperand_ParseFail;
2734 
2735       if (IsMinus)
2736         Int = -Int;
2737       break;
2738     }
2739   }
2740   return MatchOperand_Success;
2741 }
2742 
2743 OperandMatchResultTy
2744 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
2745                                     AMDGPUOperand::ImmTy ImmTy,
2746                                     bool (*ConvertResult)(int64_t&)) {
2747   SMLoc S = Parser.getTok().getLoc();
2748   int64_t Value = 0;
2749 
2750   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
2751   if (Res != MatchOperand_Success)
2752     return Res;
2753 
2754   if (ConvertResult && !ConvertResult(Value)) {
2755     return MatchOperand_ParseFail;
2756   }
2757 
2758   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
2759   return MatchOperand_Success;
2760 }
2761 
2762 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
2763   const char *Prefix,
2764   OperandVector &Operands,
2765   AMDGPUOperand::ImmTy ImmTy,
2766   bool (*ConvertResult)(int64_t&)) {
2767   StringRef Name = Parser.getTok().getString();
2768   if (!Name.equals(Prefix))
2769     return MatchOperand_NoMatch;
2770 
2771   Parser.Lex();
2772   if (getLexer().isNot(AsmToken::Colon))
2773     return MatchOperand_ParseFail;
2774 
2775   Parser.Lex();
2776   if (getLexer().isNot(AsmToken::LBrac))
2777     return MatchOperand_ParseFail;
2778   Parser.Lex();
2779 
2780   unsigned Val = 0;
2781   SMLoc S = Parser.getTok().getLoc();
2782 
2783   // FIXME: How to verify the number of elements matches the number of src
2784   // operands?
2785   for (int I = 0; I < 4; ++I) {
2786     if (I != 0) {
2787       if (getLexer().is(AsmToken::RBrac))
2788         break;
2789 
2790       if (getLexer().isNot(AsmToken::Comma))
2791         return MatchOperand_ParseFail;
2792       Parser.Lex();
2793     }
2794 
2795     if (getLexer().isNot(AsmToken::Integer))
2796       return MatchOperand_ParseFail;
2797 
2798     int64_t Op;
2799     if (getParser().parseAbsoluteExpression(Op))
2800       return MatchOperand_ParseFail;
2801 
2802     if (Op != 0 && Op != 1)
2803       return MatchOperand_ParseFail;
2804     Val |= (Op << I);
2805   }
2806 
2807   Parser.Lex();
2808   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
2809   return MatchOperand_Success;
2810 }
2811 
2812 OperandMatchResultTy
2813 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
2814                                AMDGPUOperand::ImmTy ImmTy) {
2815   int64_t Bit = 0;
2816   SMLoc S = Parser.getTok().getLoc();
2817 
2818   // We are at the end of the statement, and this is a default argument, so
2819   // use a default value.
2820   if (getLexer().isNot(AsmToken::EndOfStatement)) {
2821     switch(getLexer().getKind()) {
2822       case AsmToken::Identifier: {
2823         StringRef Tok = Parser.getTok().getString();
2824         if (Tok == Name) {
2825           Bit = 1;
2826           Parser.Lex();
2827         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
2828           Bit = 0;
2829           Parser.Lex();
2830         } else {
2831           return MatchOperand_NoMatch;
2832         }
2833         break;
2834       }
2835       default:
2836         return MatchOperand_NoMatch;
2837     }
2838   }
2839 
2840   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
2841   return MatchOperand_Success;
2842 }
2843 
2844 static void addOptionalImmOperand(
2845   MCInst& Inst, const OperandVector& Operands,
2846   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
2847   AMDGPUOperand::ImmTy ImmT,
2848   int64_t Default = 0) {
2849   auto i = OptionalIdx.find(ImmT);
2850   if (i != OptionalIdx.end()) {
2851     unsigned Idx = i->second;
2852     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
2853   } else {
2854     Inst.addOperand(MCOperand::createImm(Default));
2855   }
2856 }
2857 
2858 OperandMatchResultTy
2859 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
2860   if (getLexer().isNot(AsmToken::Identifier)) {
2861     return MatchOperand_NoMatch;
2862   }
2863   StringRef Tok = Parser.getTok().getString();
2864   if (Tok != Prefix) {
2865     return MatchOperand_NoMatch;
2866   }
2867 
2868   Parser.Lex();
2869   if (getLexer().isNot(AsmToken::Colon)) {
2870     return MatchOperand_ParseFail;
2871   }
2872 
2873   Parser.Lex();
2874   if (getLexer().isNot(AsmToken::Identifier)) {
2875     return MatchOperand_ParseFail;
2876   }
2877 
2878   Value = Parser.getTok().getString();
2879   return MatchOperand_Success;
2880 }
2881 
2882 //===----------------------------------------------------------------------===//
2883 // ds
2884 //===----------------------------------------------------------------------===//
2885 
2886 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
2887                                     const OperandVector &Operands) {
2888   OptionalImmIndexMap OptionalIdx;
2889 
2890   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
2891     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
2892 
2893     // Add the register arguments
2894     if (Op.isReg()) {
2895       Op.addRegOperands(Inst, 1);
2896       continue;
2897     }
2898 
2899     // Handle optional arguments
2900     OptionalIdx[Op.getImmTy()] = i;
2901   }
2902 
2903   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
2904   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
2905   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
2906 
2907   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
2908 }
2909 
2910 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
2911                                 bool IsGdsHardcoded) {
2912   OptionalImmIndexMap OptionalIdx;
2913 
2914   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
2915     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
2916 
2917     // Add the register arguments
2918     if (Op.isReg()) {
2919       Op.addRegOperands(Inst, 1);
2920       continue;
2921     }
2922 
2923     if (Op.isToken() && Op.getToken() == "gds") {
2924       IsGdsHardcoded = true;
2925       continue;
2926     }
2927 
2928     // Handle optional arguments
2929     OptionalIdx[Op.getImmTy()] = i;
2930   }
2931 
2932   AMDGPUOperand::ImmTy OffsetType =
2933     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
2934      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
2935                                                       AMDGPUOperand::ImmTyOffset;
2936 
2937   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
2938 
2939   if (!IsGdsHardcoded) {
2940     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
2941   }
2942   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
2943 }
2944 
2945 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
2946   OptionalImmIndexMap OptionalIdx;
2947 
2948   unsigned OperandIdx[4];
2949   unsigned EnMask = 0;
2950   int SrcIdx = 0;
2951 
2952   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
2953     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
2954 
2955     // Add the register arguments
2956     if (Op.isReg()) {
2957       assert(SrcIdx < 4);
2958       OperandIdx[SrcIdx] = Inst.size();
2959       Op.addRegOperands(Inst, 1);
2960       ++SrcIdx;
2961       continue;
2962     }
2963 
2964     if (Op.isOff()) {
2965       assert(SrcIdx < 4);
2966       OperandIdx[SrcIdx] = Inst.size();
2967       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
2968       ++SrcIdx;
2969       continue;
2970     }
2971 
2972     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
2973       Op.addImmOperands(Inst, 1);
2974       continue;
2975     }
2976 
2977     if (Op.isToken() && Op.getToken() == "done")
2978       continue;
2979 
2980     // Handle optional arguments
2981     OptionalIdx[Op.getImmTy()] = i;
2982   }
2983 
2984   assert(SrcIdx == 4);
2985 
2986   bool Compr = false;
2987   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
2988     Compr = true;
2989     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
2990     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
2991     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
2992   }
2993 
2994   for (auto i = 0; i < SrcIdx; ++i) {
2995     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
2996       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
2997     }
2998   }
2999 
3000   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
3001   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
3002 
3003   Inst.addOperand(MCOperand::createImm(EnMask));
3004 }
3005 
3006 //===----------------------------------------------------------------------===//
3007 // s_waitcnt
3008 //===----------------------------------------------------------------------===//
3009 
3010 static bool
3011 encodeCnt(
3012   const AMDGPU::IsaInfo::IsaVersion ISA,
3013   int64_t &IntVal,
3014   int64_t CntVal,
3015   bool Saturate,
3016   unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned),
3017   unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned))
3018 {
3019   bool Failed = false;
3020 
3021   IntVal = encode(ISA, IntVal, CntVal);
3022   if (CntVal != decode(ISA, IntVal)) {
3023     if (Saturate) {
3024       IntVal = encode(ISA, IntVal, -1);
3025     } else {
3026       Failed = true;
3027     }
3028   }
3029   return Failed;
3030 }
3031 
3032 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
3033   StringRef CntName = Parser.getTok().getString();
3034   int64_t CntVal;
3035 
3036   Parser.Lex();
3037   if (getLexer().isNot(AsmToken::LParen))
3038     return true;
3039 
3040   Parser.Lex();
3041   if (getLexer().isNot(AsmToken::Integer))
3042     return true;
3043 
3044   SMLoc ValLoc = Parser.getTok().getLoc();
3045   if (getParser().parseAbsoluteExpression(CntVal))
3046     return true;
3047 
3048   AMDGPU::IsaInfo::IsaVersion ISA =
3049       AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
3050 
3051   bool Failed = true;
3052   bool Sat = CntName.endswith("_sat");
3053 
3054   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
3055     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
3056   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
3057     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
3058   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
3059     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
3060   }
3061 
3062   if (Failed) {
3063     Error(ValLoc, "too large value for " + CntName);
3064     return true;
3065   }
3066 
3067   if (getLexer().isNot(AsmToken::RParen)) {
3068     return true;
3069   }
3070 
3071   Parser.Lex();
3072   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
3073     const AsmToken NextToken = getLexer().peekTok();
3074     if (NextToken.is(AsmToken::Identifier)) {
3075       Parser.Lex();
3076     }
3077   }
3078 
3079   return false;
3080 }
3081 
3082 OperandMatchResultTy
3083 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
3084   AMDGPU::IsaInfo::IsaVersion ISA =
3085       AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
3086   int64_t Waitcnt = getWaitcntBitMask(ISA);
3087   SMLoc S = Parser.getTok().getLoc();
3088 
3089   switch(getLexer().getKind()) {
3090     default: return MatchOperand_ParseFail;
3091     case AsmToken::Integer:
3092       // The operand can be an integer value.
3093       if (getParser().parseAbsoluteExpression(Waitcnt))
3094         return MatchOperand_ParseFail;
3095       break;
3096 
3097     case AsmToken::Identifier:
3098       do {
3099         if (parseCnt(Waitcnt))
3100           return MatchOperand_ParseFail;
3101       } while(getLexer().isNot(AsmToken::EndOfStatement));
3102       break;
3103   }
3104   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
3105   return MatchOperand_Success;
3106 }
3107 
3108 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
3109                                           int64_t &Width) {
3110   using namespace llvm::AMDGPU::Hwreg;
3111 
3112   if (Parser.getTok().getString() != "hwreg")
3113     return true;
3114   Parser.Lex();
3115 
3116   if (getLexer().isNot(AsmToken::LParen))
3117     return true;
3118   Parser.Lex();
3119 
3120   if (getLexer().is(AsmToken::Identifier)) {
3121     HwReg.IsSymbolic = true;
3122     HwReg.Id = ID_UNKNOWN_;
3123     const StringRef tok = Parser.getTok().getString();
3124     for (int i = ID_SYMBOLIC_FIRST_; i < ID_SYMBOLIC_LAST_; ++i) {
3125       if (tok == IdSymbolic[i]) {
3126         HwReg.Id = i;
3127         break;
3128       }
3129     }
3130     Parser.Lex();
3131   } else {
3132     HwReg.IsSymbolic = false;
3133     if (getLexer().isNot(AsmToken::Integer))
3134       return true;
3135     if (getParser().parseAbsoluteExpression(HwReg.Id))
3136       return true;
3137   }
3138 
3139   if (getLexer().is(AsmToken::RParen)) {
3140     Parser.Lex();
3141     return false;
3142   }
3143 
3144   // optional params
3145   if (getLexer().isNot(AsmToken::Comma))
3146     return true;
3147   Parser.Lex();
3148 
3149   if (getLexer().isNot(AsmToken::Integer))
3150     return true;
3151   if (getParser().parseAbsoluteExpression(Offset))
3152     return true;
3153 
3154   if (getLexer().isNot(AsmToken::Comma))
3155     return true;
3156   Parser.Lex();
3157 
3158   if (getLexer().isNot(AsmToken::Integer))
3159     return true;
3160   if (getParser().parseAbsoluteExpression(Width))
3161     return true;
3162 
3163   if (getLexer().isNot(AsmToken::RParen))
3164     return true;
3165   Parser.Lex();
3166 
3167   return false;
3168 }
3169 
3170 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
3171   using namespace llvm::AMDGPU::Hwreg;
3172 
3173   int64_t Imm16Val = 0;
3174   SMLoc S = Parser.getTok().getLoc();
3175 
3176   switch(getLexer().getKind()) {
3177     default: return MatchOperand_NoMatch;
3178     case AsmToken::Integer:
3179       // The operand can be an integer value.
3180       if (getParser().parseAbsoluteExpression(Imm16Val))
3181         return MatchOperand_NoMatch;
3182       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
3183         Error(S, "invalid immediate: only 16-bit values are legal");
3184         // Do not return error code, but create an imm operand anyway and proceed
3185         // to the next operand, if any. That avoids unneccessary error messages.
3186       }
3187       break;
3188 
3189     case AsmToken::Identifier: {
3190         OperandInfoTy HwReg(ID_UNKNOWN_);
3191         int64_t Offset = OFFSET_DEFAULT_;
3192         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
3193         if (parseHwregConstruct(HwReg, Offset, Width))
3194           return MatchOperand_ParseFail;
3195         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
3196           if (HwReg.IsSymbolic)
3197             Error(S, "invalid symbolic name of hardware register");
3198           else
3199             Error(S, "invalid code of hardware register: only 6-bit values are legal");
3200         }
3201         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
3202           Error(S, "invalid bit offset: only 5-bit values are legal");
3203         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
3204           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
3205         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
3206       }
3207       break;
3208   }
3209   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
3210   return MatchOperand_Success;
3211 }
3212 
3213 bool AMDGPUOperand::isSWaitCnt() const {
3214   return isImm();
3215 }
3216 
3217 bool AMDGPUOperand::isHwreg() const {
3218   return isImmTy(ImmTyHwreg);
3219 }
3220 
3221 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
3222   using namespace llvm::AMDGPU::SendMsg;
3223 
3224   if (Parser.getTok().getString() != "sendmsg")
3225     return true;
3226   Parser.Lex();
3227 
3228   if (getLexer().isNot(AsmToken::LParen))
3229     return true;
3230   Parser.Lex();
3231 
3232   if (getLexer().is(AsmToken::Identifier)) {
3233     Msg.IsSymbolic = true;
3234     Msg.Id = ID_UNKNOWN_;
3235     const std::string tok = Parser.getTok().getString();
3236     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
3237       switch(i) {
3238         default: continue; // Omit gaps.
3239         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:  case ID_SYSMSG: break;
3240       }
3241       if (tok == IdSymbolic[i]) {
3242         Msg.Id = i;
3243         break;
3244       }
3245     }
3246     Parser.Lex();
3247   } else {
3248     Msg.IsSymbolic = false;
3249     if (getLexer().isNot(AsmToken::Integer))
3250       return true;
3251     if (getParser().parseAbsoluteExpression(Msg.Id))
3252       return true;
3253     if (getLexer().is(AsmToken::Integer))
3254       if (getParser().parseAbsoluteExpression(Msg.Id))
3255         Msg.Id = ID_UNKNOWN_;
3256   }
3257   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
3258     return false;
3259 
3260   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
3261     if (getLexer().isNot(AsmToken::RParen))
3262       return true;
3263     Parser.Lex();
3264     return false;
3265   }
3266 
3267   if (getLexer().isNot(AsmToken::Comma))
3268     return true;
3269   Parser.Lex();
3270 
3271   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
3272   Operation.Id = ID_UNKNOWN_;
3273   if (getLexer().is(AsmToken::Identifier)) {
3274     Operation.IsSymbolic = true;
3275     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
3276     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
3277     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
3278     const StringRef Tok = Parser.getTok().getString();
3279     for (int i = F; i < L; ++i) {
3280       if (Tok == S[i]) {
3281         Operation.Id = i;
3282         break;
3283       }
3284     }
3285     Parser.Lex();
3286   } else {
3287     Operation.IsSymbolic = false;
3288     if (getLexer().isNot(AsmToken::Integer))
3289       return true;
3290     if (getParser().parseAbsoluteExpression(Operation.Id))
3291       return true;
3292   }
3293 
3294   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
3295     // Stream id is optional.
3296     if (getLexer().is(AsmToken::RParen)) {
3297       Parser.Lex();
3298       return false;
3299     }
3300 
3301     if (getLexer().isNot(AsmToken::Comma))
3302       return true;
3303     Parser.Lex();
3304 
3305     if (getLexer().isNot(AsmToken::Integer))
3306       return true;
3307     if (getParser().parseAbsoluteExpression(StreamId))
3308       return true;
3309   }
3310 
3311   if (getLexer().isNot(AsmToken::RParen))
3312     return true;
3313   Parser.Lex();
3314   return false;
3315 }
3316 
3317 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
3318   if (getLexer().getKind() != AsmToken::Identifier)
3319     return MatchOperand_NoMatch;
3320 
3321   StringRef Str = Parser.getTok().getString();
3322   int Slot = StringSwitch<int>(Str)
3323     .Case("p10", 0)
3324     .Case("p20", 1)
3325     .Case("p0", 2)
3326     .Default(-1);
3327 
3328   SMLoc S = Parser.getTok().getLoc();
3329   if (Slot == -1)
3330     return MatchOperand_ParseFail;
3331 
3332   Parser.Lex();
3333   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
3334                                               AMDGPUOperand::ImmTyInterpSlot));
3335   return MatchOperand_Success;
3336 }
3337 
3338 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
3339   if (getLexer().getKind() != AsmToken::Identifier)
3340     return MatchOperand_NoMatch;
3341 
3342   StringRef Str = Parser.getTok().getString();
3343   if (!Str.startswith("attr"))
3344     return MatchOperand_NoMatch;
3345 
3346   StringRef Chan = Str.take_back(2);
3347   int AttrChan = StringSwitch<int>(Chan)
3348     .Case(".x", 0)
3349     .Case(".y", 1)
3350     .Case(".z", 2)
3351     .Case(".w", 3)
3352     .Default(-1);
3353   if (AttrChan == -1)
3354     return MatchOperand_ParseFail;
3355 
3356   Str = Str.drop_back(2).drop_front(4);
3357 
3358   uint8_t Attr;
3359   if (Str.getAsInteger(10, Attr))
3360     return MatchOperand_ParseFail;
3361 
3362   SMLoc S = Parser.getTok().getLoc();
3363   Parser.Lex();
3364   if (Attr > 63) {
3365     Error(S, "out of bounds attr");
3366     return MatchOperand_Success;
3367   }
3368 
3369   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
3370 
3371   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
3372                                               AMDGPUOperand::ImmTyInterpAttr));
3373   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
3374                                               AMDGPUOperand::ImmTyAttrChan));
3375   return MatchOperand_Success;
3376 }
3377 
3378 void AMDGPUAsmParser::errorExpTgt() {
3379   Error(Parser.getTok().getLoc(), "invalid exp target");
3380 }
3381 
3382 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
3383                                                       uint8_t &Val) {
3384   if (Str == "null") {
3385     Val = 9;
3386     return MatchOperand_Success;
3387   }
3388 
3389   if (Str.startswith("mrt")) {
3390     Str = Str.drop_front(3);
3391     if (Str == "z") { // == mrtz
3392       Val = 8;
3393       return MatchOperand_Success;
3394     }
3395 
3396     if (Str.getAsInteger(10, Val))
3397       return MatchOperand_ParseFail;
3398 
3399     if (Val > 7)
3400       errorExpTgt();
3401 
3402     return MatchOperand_Success;
3403   }
3404 
3405   if (Str.startswith("pos")) {
3406     Str = Str.drop_front(3);
3407     if (Str.getAsInteger(10, Val))
3408       return MatchOperand_ParseFail;
3409 
3410     if (Val > 3)
3411       errorExpTgt();
3412 
3413     Val += 12;
3414     return MatchOperand_Success;
3415   }
3416 
3417   if (Str.startswith("param")) {
3418     Str = Str.drop_front(5);
3419     if (Str.getAsInteger(10, Val))
3420       return MatchOperand_ParseFail;
3421 
3422     if (Val >= 32)
3423       errorExpTgt();
3424 
3425     Val += 32;
3426     return MatchOperand_Success;
3427   }
3428 
3429   if (Str.startswith("invalid_target_")) {
3430     Str = Str.drop_front(15);
3431     if (Str.getAsInteger(10, Val))
3432       return MatchOperand_ParseFail;
3433 
3434     errorExpTgt();
3435     return MatchOperand_Success;
3436   }
3437 
3438   return MatchOperand_NoMatch;
3439 }
3440 
3441 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
3442   uint8_t Val;
3443   StringRef Str = Parser.getTok().getString();
3444 
3445   auto Res = parseExpTgtImpl(Str, Val);
3446   if (Res != MatchOperand_Success)
3447     return Res;
3448 
3449   SMLoc S = Parser.getTok().getLoc();
3450   Parser.Lex();
3451 
3452   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
3453                                               AMDGPUOperand::ImmTyExpTgt));
3454   return MatchOperand_Success;
3455 }
3456 
3457 OperandMatchResultTy
3458 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
3459   using namespace llvm::AMDGPU::SendMsg;
3460 
3461   int64_t Imm16Val = 0;
3462   SMLoc S = Parser.getTok().getLoc();
3463 
3464   switch(getLexer().getKind()) {
3465   default:
3466     return MatchOperand_NoMatch;
3467   case AsmToken::Integer:
3468     // The operand can be an integer value.
3469     if (getParser().parseAbsoluteExpression(Imm16Val))
3470       return MatchOperand_NoMatch;
3471     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
3472       Error(S, "invalid immediate: only 16-bit values are legal");
3473       // Do not return error code, but create an imm operand anyway and proceed
3474       // to the next operand, if any. That avoids unneccessary error messages.
3475     }
3476     break;
3477   case AsmToken::Identifier: {
3478       OperandInfoTy Msg(ID_UNKNOWN_);
3479       OperandInfoTy Operation(OP_UNKNOWN_);
3480       int64_t StreamId = STREAM_ID_DEFAULT_;
3481       if (parseSendMsgConstruct(Msg, Operation, StreamId))
3482         return MatchOperand_ParseFail;
3483       do {
3484         // Validate and encode message ID.
3485         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
3486                 || Msg.Id == ID_SYSMSG)) {
3487           if (Msg.IsSymbolic)
3488             Error(S, "invalid/unsupported symbolic name of message");
3489           else
3490             Error(S, "invalid/unsupported code of message");
3491           break;
3492         }
3493         Imm16Val = (Msg.Id << ID_SHIFT_);
3494         // Validate and encode operation ID.
3495         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
3496           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
3497             if (Operation.IsSymbolic)
3498               Error(S, "invalid symbolic name of GS_OP");
3499             else
3500               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
3501             break;
3502           }
3503           if (Operation.Id == OP_GS_NOP
3504               && Msg.Id != ID_GS_DONE) {
3505             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
3506             break;
3507           }
3508           Imm16Val |= (Operation.Id << OP_SHIFT_);
3509         }
3510         if (Msg.Id == ID_SYSMSG) {
3511           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
3512             if (Operation.IsSymbolic)
3513               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
3514             else
3515               Error(S, "invalid/unsupported code of SYSMSG_OP");
3516             break;
3517           }
3518           Imm16Val |= (Operation.Id << OP_SHIFT_);
3519         }
3520         // Validate and encode stream ID.
3521         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
3522           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
3523             Error(S, "invalid stream id: only 2-bit values are legal");
3524             break;
3525           }
3526           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
3527         }
3528       } while (false);
3529     }
3530     break;
3531   }
3532   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
3533   return MatchOperand_Success;
3534 }
3535 
3536 bool AMDGPUOperand::isSendMsg() const {
3537   return isImmTy(ImmTySendMsg);
3538 }
3539 
3540 //===----------------------------------------------------------------------===//
3541 // parser helpers
3542 //===----------------------------------------------------------------------===//
3543 
3544 bool
3545 AMDGPUAsmParser::trySkipId(const StringRef Id) {
3546   if (getLexer().getKind() == AsmToken::Identifier &&
3547       Parser.getTok().getString() == Id) {
3548     Parser.Lex();
3549     return true;
3550   }
3551   return false;
3552 }
3553 
3554 bool
3555 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
3556   if (getLexer().getKind() == Kind) {
3557     Parser.Lex();
3558     return true;
3559   }
3560   return false;
3561 }
3562 
3563 bool
3564 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
3565                            const StringRef ErrMsg) {
3566   if (!trySkipToken(Kind)) {
3567     Error(Parser.getTok().getLoc(), ErrMsg);
3568     return false;
3569   }
3570   return true;
3571 }
3572 
3573 bool
3574 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
3575   return !getParser().parseAbsoluteExpression(Imm);
3576 }
3577 
3578 bool
3579 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
3580   SMLoc S = Parser.getTok().getLoc();
3581   if (getLexer().getKind() == AsmToken::String) {
3582     Val = Parser.getTok().getStringContents();
3583     Parser.Lex();
3584     return true;
3585   } else {
3586     Error(S, ErrMsg);
3587     return false;
3588   }
3589 }
3590 
3591 //===----------------------------------------------------------------------===//
3592 // swizzle
3593 //===----------------------------------------------------------------------===//
3594 
3595 LLVM_READNONE
3596 static unsigned
3597 encodeBitmaskPerm(const unsigned AndMask,
3598                   const unsigned OrMask,
3599                   const unsigned XorMask) {
3600   using namespace llvm::AMDGPU::Swizzle;
3601 
3602   return BITMASK_PERM_ENC |
3603          (AndMask << BITMASK_AND_SHIFT) |
3604          (OrMask  << BITMASK_OR_SHIFT)  |
3605          (XorMask << BITMASK_XOR_SHIFT);
3606 }
3607 
3608 bool
3609 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
3610                                       const unsigned MinVal,
3611                                       const unsigned MaxVal,
3612                                       const StringRef ErrMsg) {
3613   for (unsigned i = 0; i < OpNum; ++i) {
3614     if (!skipToken(AsmToken::Comma, "expected a comma")){
3615       return false;
3616     }
3617     SMLoc ExprLoc = Parser.getTok().getLoc();
3618     if (!parseExpr(Op[i])) {
3619       return false;
3620     }
3621     if (Op[i] < MinVal || Op[i] > MaxVal) {
3622       Error(ExprLoc, ErrMsg);
3623       return false;
3624     }
3625   }
3626 
3627   return true;
3628 }
3629 
3630 bool
3631 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
3632   using namespace llvm::AMDGPU::Swizzle;
3633 
3634   int64_t Lane[LANE_NUM];
3635   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
3636                            "expected a 2-bit lane id")) {
3637     Imm = QUAD_PERM_ENC;
3638     for (auto i = 0; i < LANE_NUM; ++i) {
3639       Imm |= Lane[i] << (LANE_SHIFT * i);
3640     }
3641     return true;
3642   }
3643   return false;
3644 }
3645 
3646 bool
3647 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
3648   using namespace llvm::AMDGPU::Swizzle;
3649 
3650   SMLoc S = Parser.getTok().getLoc();
3651   int64_t GroupSize;
3652   int64_t LaneIdx;
3653 
3654   if (!parseSwizzleOperands(1, &GroupSize,
3655                             2, 32,
3656                             "group size must be in the interval [2,32]")) {
3657     return false;
3658   }
3659   if (!isPowerOf2_64(GroupSize)) {
3660     Error(S, "group size must be a power of two");
3661     return false;
3662   }
3663   if (parseSwizzleOperands(1, &LaneIdx,
3664                            0, GroupSize - 1,
3665                            "lane id must be in the interval [0,group size - 1]")) {
3666     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
3667     return true;
3668   }
3669   return false;
3670 }
3671 
3672 bool
3673 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
3674   using namespace llvm::AMDGPU::Swizzle;
3675 
3676   SMLoc S = Parser.getTok().getLoc();
3677   int64_t GroupSize;
3678 
3679   if (!parseSwizzleOperands(1, &GroupSize,
3680       2, 32, "group size must be in the interval [2,32]")) {
3681     return false;
3682   }
3683   if (!isPowerOf2_64(GroupSize)) {
3684     Error(S, "group size must be a power of two");
3685     return false;
3686   }
3687 
3688   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
3689   return true;
3690 }
3691 
3692 bool
3693 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
3694   using namespace llvm::AMDGPU::Swizzle;
3695 
3696   SMLoc S = Parser.getTok().getLoc();
3697   int64_t GroupSize;
3698 
3699   if (!parseSwizzleOperands(1, &GroupSize,
3700       1, 16, "group size must be in the interval [1,16]")) {
3701     return false;
3702   }
3703   if (!isPowerOf2_64(GroupSize)) {
3704     Error(S, "group size must be a power of two");
3705     return false;
3706   }
3707 
3708   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
3709   return true;
3710 }
3711 
3712 bool
3713 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
3714   using namespace llvm::AMDGPU::Swizzle;
3715 
3716   if (!skipToken(AsmToken::Comma, "expected a comma")) {
3717     return false;
3718   }
3719 
3720   StringRef Ctl;
3721   SMLoc StrLoc = Parser.getTok().getLoc();
3722   if (!parseString(Ctl)) {
3723     return false;
3724   }
3725   if (Ctl.size() != BITMASK_WIDTH) {
3726     Error(StrLoc, "expected a 5-character mask");
3727     return false;
3728   }
3729 
3730   unsigned AndMask = 0;
3731   unsigned OrMask = 0;
3732   unsigned XorMask = 0;
3733 
3734   for (size_t i = 0; i < Ctl.size(); ++i) {
3735     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
3736     switch(Ctl[i]) {
3737     default:
3738       Error(StrLoc, "invalid mask");
3739       return false;
3740     case '0':
3741       break;
3742     case '1':
3743       OrMask |= Mask;
3744       break;
3745     case 'p':
3746       AndMask |= Mask;
3747       break;
3748     case 'i':
3749       AndMask |= Mask;
3750       XorMask |= Mask;
3751       break;
3752     }
3753   }
3754 
3755   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
3756   return true;
3757 }
3758 
3759 bool
3760 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
3761 
3762   SMLoc OffsetLoc = Parser.getTok().getLoc();
3763 
3764   if (!parseExpr(Imm)) {
3765     return false;
3766   }
3767   if (!isUInt<16>(Imm)) {
3768     Error(OffsetLoc, "expected a 16-bit offset");
3769     return false;
3770   }
3771   return true;
3772 }
3773 
3774 bool
3775 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
3776   using namespace llvm::AMDGPU::Swizzle;
3777 
3778   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
3779 
3780     SMLoc ModeLoc = Parser.getTok().getLoc();
3781     bool Ok = false;
3782 
3783     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
3784       Ok = parseSwizzleQuadPerm(Imm);
3785     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
3786       Ok = parseSwizzleBitmaskPerm(Imm);
3787     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
3788       Ok = parseSwizzleBroadcast(Imm);
3789     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
3790       Ok = parseSwizzleSwap(Imm);
3791     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
3792       Ok = parseSwizzleReverse(Imm);
3793     } else {
3794       Error(ModeLoc, "expected a swizzle mode");
3795     }
3796 
3797     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
3798   }
3799 
3800   return false;
3801 }
3802 
3803 OperandMatchResultTy
3804 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
3805   SMLoc S = Parser.getTok().getLoc();
3806   int64_t Imm = 0;
3807 
3808   if (trySkipId("offset")) {
3809 
3810     bool Ok = false;
3811     if (skipToken(AsmToken::Colon, "expected a colon")) {
3812       if (trySkipId("swizzle")) {
3813         Ok = parseSwizzleMacro(Imm);
3814       } else {
3815         Ok = parseSwizzleOffset(Imm);
3816       }
3817     }
3818 
3819     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
3820 
3821     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
3822   } else {
3823     return MatchOperand_NoMatch;
3824   }
3825 }
3826 
3827 bool
3828 AMDGPUOperand::isSwizzle() const {
3829   return isImmTy(ImmTySwizzle);
3830 }
3831 
3832 //===----------------------------------------------------------------------===//
3833 // sopp branch targets
3834 //===----------------------------------------------------------------------===//
3835 
3836 OperandMatchResultTy
3837 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
3838   SMLoc S = Parser.getTok().getLoc();
3839 
3840   switch (getLexer().getKind()) {
3841     default: return MatchOperand_ParseFail;
3842     case AsmToken::Integer: {
3843       int64_t Imm;
3844       if (getParser().parseAbsoluteExpression(Imm))
3845         return MatchOperand_ParseFail;
3846       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
3847       return MatchOperand_Success;
3848     }
3849 
3850     case AsmToken::Identifier:
3851       Operands.push_back(AMDGPUOperand::CreateExpr(this,
3852           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
3853                                   Parser.getTok().getString()), getContext()), S));
3854       Parser.Lex();
3855       return MatchOperand_Success;
3856   }
3857 }
3858 
3859 //===----------------------------------------------------------------------===//
3860 // mubuf
3861 //===----------------------------------------------------------------------===//
3862 
3863 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
3864   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
3865 }
3866 
3867 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
3868   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
3869 }
3870 
3871 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultTFE() const {
3872   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyTFE);
3873 }
3874 
3875 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
3876                                const OperandVector &Operands,
3877                                bool IsAtomic, bool IsAtomicReturn) {
3878   OptionalImmIndexMap OptionalIdx;
3879   assert(IsAtomicReturn ? IsAtomic : true);
3880 
3881   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3882     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3883 
3884     // Add the register arguments
3885     if (Op.isReg()) {
3886       Op.addRegOperands(Inst, 1);
3887       continue;
3888     }
3889 
3890     // Handle the case where soffset is an immediate
3891     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
3892       Op.addImmOperands(Inst, 1);
3893       continue;
3894     }
3895 
3896     // Handle tokens like 'offen' which are sometimes hard-coded into the
3897     // asm string.  There are no MCInst operands for these.
3898     if (Op.isToken()) {
3899       continue;
3900     }
3901     assert(Op.isImm());
3902 
3903     // Handle optional arguments
3904     OptionalIdx[Op.getImmTy()] = i;
3905   }
3906 
3907   // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns.
3908   if (IsAtomicReturn) {
3909     MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning.
3910     Inst.insert(I, *I);
3911   }
3912 
3913   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
3914   if (!IsAtomic) { // glc is hard-coded.
3915     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
3916   }
3917   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
3918   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
3919 }
3920 
3921 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
3922   OptionalImmIndexMap OptionalIdx;
3923 
3924   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3925     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3926 
3927     // Add the register arguments
3928     if (Op.isReg()) {
3929       Op.addRegOperands(Inst, 1);
3930       continue;
3931     }
3932 
3933     // Handle the case where soffset is an immediate
3934     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
3935       Op.addImmOperands(Inst, 1);
3936       continue;
3937     }
3938 
3939     // Handle tokens like 'offen' which are sometimes hard-coded into the
3940     // asm string.  There are no MCInst operands for these.
3941     if (Op.isToken()) {
3942       continue;
3943     }
3944     assert(Op.isImm());
3945 
3946     // Handle optional arguments
3947     OptionalIdx[Op.getImmTy()] = i;
3948   }
3949 
3950   addOptionalImmOperand(Inst, Operands, OptionalIdx,
3951                         AMDGPUOperand::ImmTyOffset);
3952   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDFMT);
3953   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyNFMT);
3954   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
3955   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
3956   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
3957 }
3958 
3959 //===----------------------------------------------------------------------===//
3960 // mimg
3961 //===----------------------------------------------------------------------===//
3962 
3963 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
3964                               bool IsAtomic) {
3965   unsigned I = 1;
3966   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3967   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
3968     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
3969   }
3970 
3971   if (IsAtomic) {
3972     // Add src, same as dst
3973     ((AMDGPUOperand &)*Operands[I]).addRegOperands(Inst, 1);
3974   }
3975 
3976   OptionalImmIndexMap OptionalIdx;
3977 
3978   for (unsigned E = Operands.size(); I != E; ++I) {
3979     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
3980 
3981     // Add the register arguments
3982     if (Op.isRegOrImm()) {
3983       Op.addRegOrImmOperands(Inst, 1);
3984       continue;
3985     } else if (Op.isImmModifier()) {
3986       OptionalIdx[Op.getImmTy()] = I;
3987     } else {
3988       llvm_unreachable("unexpected operand type");
3989     }
3990   }
3991 
3992   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
3993   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
3994   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
3995   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
3996   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128);
3997   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
3998   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
3999   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4000 }
4001 
4002 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
4003   cvtMIMG(Inst, Operands, true);
4004 }
4005 
4006 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDMask() const {
4007   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDMask);
4008 }
4009 
4010 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultUNorm() const {
4011   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyUNorm);
4012 }
4013 
4014 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDA() const {
4015   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDA);
4016 }
4017 
4018 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultR128() const {
4019   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyR128);
4020 }
4021 
4022 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultLWE() const {
4023   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyLWE);
4024 }
4025 
4026 //===----------------------------------------------------------------------===//
4027 // smrd
4028 //===----------------------------------------------------------------------===//
4029 
4030 bool AMDGPUOperand::isSMRDOffset8() const {
4031   return isImm() && isUInt<8>(getImm());
4032 }
4033 
4034 bool AMDGPUOperand::isSMRDOffset20() const {
4035   return isImm() && isUInt<20>(getImm());
4036 }
4037 
4038 bool AMDGPUOperand::isSMRDLiteralOffset() const {
4039   // 32-bit literals are only supported on CI and we only want to use them
4040   // when the offset is > 8-bits.
4041   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
4042 }
4043 
4044 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
4045   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4046 }
4047 
4048 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
4049   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4050 }
4051 
4052 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
4053   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4054 }
4055 
4056 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
4057   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4058 }
4059 
4060 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
4061   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4062 }
4063 
4064 //===----------------------------------------------------------------------===//
4065 // vop3
4066 //===----------------------------------------------------------------------===//
4067 
4068 static bool ConvertOmodMul(int64_t &Mul) {
4069   if (Mul != 1 && Mul != 2 && Mul != 4)
4070     return false;
4071 
4072   Mul >>= 1;
4073   return true;
4074 }
4075 
4076 static bool ConvertOmodDiv(int64_t &Div) {
4077   if (Div == 1) {
4078     Div = 0;
4079     return true;
4080   }
4081 
4082   if (Div == 2) {
4083     Div = 3;
4084     return true;
4085   }
4086 
4087   return false;
4088 }
4089 
4090 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
4091   if (BoundCtrl == 0) {
4092     BoundCtrl = 1;
4093     return true;
4094   }
4095 
4096   if (BoundCtrl == -1) {
4097     BoundCtrl = 0;
4098     return true;
4099   }
4100 
4101   return false;
4102 }
4103 
4104 // Note: the order in this table matches the order of operands in AsmString.
4105 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
4106   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
4107   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
4108   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
4109   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
4110   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
4111   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
4112   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
4113   {"dfmt",    AMDGPUOperand::ImmTyDFMT, false, nullptr},
4114   {"nfmt",    AMDGPUOperand::ImmTyNFMT, false, nullptr},
4115   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
4116   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
4117   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
4118   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
4119   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
4120   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
4121   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
4122   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
4123   {"r128",    AMDGPUOperand::ImmTyR128,  true, nullptr},
4124   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
4125   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
4126   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
4127   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
4128   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
4129   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
4130   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
4131   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
4132   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
4133   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
4134   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
4135   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
4136   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
4137   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
4138   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
4139 };
4140 
4141 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
4142   OperandMatchResultTy res;
4143   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
4144     // try to parse any optional operand here
4145     if (Op.IsBit) {
4146       res = parseNamedBit(Op.Name, Operands, Op.Type);
4147     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
4148       res = parseOModOperand(Operands);
4149     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
4150                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
4151                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
4152       res = parseSDWASel(Operands, Op.Name, Op.Type);
4153     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
4154       res = parseSDWADstUnused(Operands);
4155     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
4156                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
4157                Op.Type == AMDGPUOperand::ImmTyNegLo ||
4158                Op.Type == AMDGPUOperand::ImmTyNegHi) {
4159       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
4160                                         Op.ConvertResult);
4161     } else {
4162       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
4163     }
4164     if (res != MatchOperand_NoMatch) {
4165       return res;
4166     }
4167   }
4168   return MatchOperand_NoMatch;
4169 }
4170 
4171 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
4172   StringRef Name = Parser.getTok().getString();
4173   if (Name == "mul") {
4174     return parseIntWithPrefix("mul", Operands,
4175                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
4176   }
4177 
4178   if (Name == "div") {
4179     return parseIntWithPrefix("div", Operands,
4180                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
4181   }
4182 
4183   return MatchOperand_NoMatch;
4184 }
4185 
4186 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
4187   cvtVOP3P(Inst, Operands);
4188 
4189   int Opc = Inst.getOpcode();
4190 
4191   int SrcNum;
4192   const int Ops[] = { AMDGPU::OpName::src0,
4193                       AMDGPU::OpName::src1,
4194                       AMDGPU::OpName::src2 };
4195   for (SrcNum = 0;
4196        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
4197        ++SrcNum);
4198   assert(SrcNum > 0);
4199 
4200   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4201   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4202 
4203   if ((OpSel & (1 << SrcNum)) != 0) {
4204     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
4205     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
4206     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
4207   }
4208 }
4209 
4210 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
4211       // 1. This operand is input modifiers
4212   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
4213       // 2. This is not last operand
4214       && Desc.NumOperands > (OpNum + 1)
4215       // 3. Next operand is register class
4216       && Desc.OpInfo[OpNum + 1].RegClass != -1
4217       // 4. Next register is not tied to any other operand
4218       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
4219 }
4220 
4221 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
4222 {
4223   OptionalImmIndexMap OptionalIdx;
4224   unsigned Opc = Inst.getOpcode();
4225 
4226   unsigned I = 1;
4227   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4228   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4229     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4230   }
4231 
4232   for (unsigned E = Operands.size(); I != E; ++I) {
4233     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4234     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4235       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
4236     } else if (Op.isInterpSlot() ||
4237                Op.isInterpAttr() ||
4238                Op.isAttrChan()) {
4239       Inst.addOperand(MCOperand::createImm(Op.Imm.Val));
4240     } else if (Op.isImmModifier()) {
4241       OptionalIdx[Op.getImmTy()] = I;
4242     } else {
4243       llvm_unreachable("unhandled operand type");
4244     }
4245   }
4246 
4247   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
4248     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
4249   }
4250 
4251   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
4252     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
4253   }
4254 
4255   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
4256     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
4257   }
4258 }
4259 
4260 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
4261                               OptionalImmIndexMap &OptionalIdx) {
4262   unsigned Opc = Inst.getOpcode();
4263 
4264   unsigned I = 1;
4265   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4266   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4267     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4268   }
4269 
4270   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
4271     // This instruction has src modifiers
4272     for (unsigned E = Operands.size(); I != E; ++I) {
4273       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4274       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4275         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
4276       } else if (Op.isImmModifier()) {
4277         OptionalIdx[Op.getImmTy()] = I;
4278       } else if (Op.isRegOrImm()) {
4279         Op.addRegOrImmOperands(Inst, 1);
4280       } else {
4281         llvm_unreachable("unhandled operand type");
4282       }
4283     }
4284   } else {
4285     // No src modifiers
4286     for (unsigned E = Operands.size(); I != E; ++I) {
4287       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4288       if (Op.isMod()) {
4289         OptionalIdx[Op.getImmTy()] = I;
4290       } else {
4291         Op.addRegOrImmOperands(Inst, 1);
4292       }
4293     }
4294   }
4295 
4296   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
4297     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
4298   }
4299 
4300   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
4301     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
4302   }
4303 
4304   // special case v_mac_{f16, f32}:
4305   // it has src2 register operand that is tied to dst operand
4306   // we don't allow modifiers for this operand in assembler so src2_modifiers
4307   // should be 0
4308   if (Opc == AMDGPU::V_MAC_F32_e64_si || Opc == AMDGPU::V_MAC_F32_e64_vi ||
4309       Opc == AMDGPU::V_MAC_F16_e64_vi) {
4310     auto it = Inst.begin();
4311     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
4312     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
4313     ++it;
4314     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
4315   }
4316 }
4317 
4318 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
4319   OptionalImmIndexMap OptionalIdx;
4320   cvtVOP3(Inst, Operands, OptionalIdx);
4321 }
4322 
4323 void AMDGPUAsmParser::cvtVOP3PImpl(MCInst &Inst,
4324                                    const OperandVector &Operands,
4325                                    bool IsPacked) {
4326   OptionalImmIndexMap OptIdx;
4327   int Opc = Inst.getOpcode();
4328 
4329   cvtVOP3(Inst, Operands, OptIdx);
4330 
4331   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
4332     assert(!IsPacked);
4333     Inst.addOperand(Inst.getOperand(0));
4334   }
4335 
4336   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
4337   // instruction, and then figure out where to actually put the modifiers
4338 
4339   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
4340 
4341   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4342   if (OpSelHiIdx != -1) {
4343     // TODO: Should we change the printing to match?
4344     int DefaultVal = IsPacked ? -1 : 0;
4345     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
4346                           DefaultVal);
4347   }
4348 
4349   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
4350   if (NegLoIdx != -1) {
4351     assert(IsPacked);
4352     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
4353     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
4354   }
4355 
4356   const int Ops[] = { AMDGPU::OpName::src0,
4357                       AMDGPU::OpName::src1,
4358                       AMDGPU::OpName::src2 };
4359   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
4360                          AMDGPU::OpName::src1_modifiers,
4361                          AMDGPU::OpName::src2_modifiers };
4362 
4363   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4364 
4365   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4366   unsigned OpSelHi = 0;
4367   unsigned NegLo = 0;
4368   unsigned NegHi = 0;
4369 
4370   if (OpSelHiIdx != -1) {
4371     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4372   }
4373 
4374   if (NegLoIdx != -1) {
4375     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
4376     NegLo = Inst.getOperand(NegLoIdx).getImm();
4377     NegHi = Inst.getOperand(NegHiIdx).getImm();
4378   }
4379 
4380   for (int J = 0; J < 3; ++J) {
4381     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
4382     if (OpIdx == -1)
4383       break;
4384 
4385     uint32_t ModVal = 0;
4386 
4387     if ((OpSel & (1 << J)) != 0)
4388       ModVal |= SISrcMods::OP_SEL_0;
4389 
4390     if ((OpSelHi & (1 << J)) != 0)
4391       ModVal |= SISrcMods::OP_SEL_1;
4392 
4393     if ((NegLo & (1 << J)) != 0)
4394       ModVal |= SISrcMods::NEG;
4395 
4396     if ((NegHi & (1 << J)) != 0)
4397       ModVal |= SISrcMods::NEG_HI;
4398 
4399     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
4400 
4401     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
4402   }
4403 }
4404 
4405 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
4406   cvtVOP3PImpl(Inst, Operands, true);
4407 }
4408 
4409 void AMDGPUAsmParser::cvtVOP3P_NotPacked(MCInst &Inst,
4410                                          const OperandVector &Operands) {
4411   cvtVOP3PImpl(Inst, Operands, false);
4412 }
4413 
4414 //===----------------------------------------------------------------------===//
4415 // dpp
4416 //===----------------------------------------------------------------------===//
4417 
4418 bool AMDGPUOperand::isDPPCtrl() const {
4419   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
4420   if (result) {
4421     int64_t Imm = getImm();
4422     return ((Imm >= 0x000) && (Imm <= 0x0ff)) ||
4423            ((Imm >= 0x101) && (Imm <= 0x10f)) ||
4424            ((Imm >= 0x111) && (Imm <= 0x11f)) ||
4425            ((Imm >= 0x121) && (Imm <= 0x12f)) ||
4426            (Imm == 0x130) ||
4427            (Imm == 0x134) ||
4428            (Imm == 0x138) ||
4429            (Imm == 0x13c) ||
4430            (Imm == 0x140) ||
4431            (Imm == 0x141) ||
4432            (Imm == 0x142) ||
4433            (Imm == 0x143);
4434   }
4435   return false;
4436 }
4437 
4438 bool AMDGPUOperand::isGPRIdxMode() const {
4439   return isImm() && isUInt<4>(getImm());
4440 }
4441 
4442 bool AMDGPUOperand::isS16Imm() const {
4443   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
4444 }
4445 
4446 bool AMDGPUOperand::isU16Imm() const {
4447   return isImm() && isUInt<16>(getImm());
4448 }
4449 
4450 OperandMatchResultTy
4451 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
4452   SMLoc S = Parser.getTok().getLoc();
4453   StringRef Prefix;
4454   int64_t Int;
4455 
4456   if (getLexer().getKind() == AsmToken::Identifier) {
4457     Prefix = Parser.getTok().getString();
4458   } else {
4459     return MatchOperand_NoMatch;
4460   }
4461 
4462   if (Prefix == "row_mirror") {
4463     Int = 0x140;
4464     Parser.Lex();
4465   } else if (Prefix == "row_half_mirror") {
4466     Int = 0x141;
4467     Parser.Lex();
4468   } else {
4469     // Check to prevent parseDPPCtrlOps from eating invalid tokens
4470     if (Prefix != "quad_perm"
4471         && Prefix != "row_shl"
4472         && Prefix != "row_shr"
4473         && Prefix != "row_ror"
4474         && Prefix != "wave_shl"
4475         && Prefix != "wave_rol"
4476         && Prefix != "wave_shr"
4477         && Prefix != "wave_ror"
4478         && Prefix != "row_bcast") {
4479       return MatchOperand_NoMatch;
4480     }
4481 
4482     Parser.Lex();
4483     if (getLexer().isNot(AsmToken::Colon))
4484       return MatchOperand_ParseFail;
4485 
4486     if (Prefix == "quad_perm") {
4487       // quad_perm:[%d,%d,%d,%d]
4488       Parser.Lex();
4489       if (getLexer().isNot(AsmToken::LBrac))
4490         return MatchOperand_ParseFail;
4491       Parser.Lex();
4492 
4493       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
4494         return MatchOperand_ParseFail;
4495 
4496       for (int i = 0; i < 3; ++i) {
4497         if (getLexer().isNot(AsmToken::Comma))
4498           return MatchOperand_ParseFail;
4499         Parser.Lex();
4500 
4501         int64_t Temp;
4502         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
4503           return MatchOperand_ParseFail;
4504         const int shift = i*2 + 2;
4505         Int += (Temp << shift);
4506       }
4507 
4508       if (getLexer().isNot(AsmToken::RBrac))
4509         return MatchOperand_ParseFail;
4510       Parser.Lex();
4511     } else {
4512       // sel:%d
4513       Parser.Lex();
4514       if (getParser().parseAbsoluteExpression(Int))
4515         return MatchOperand_ParseFail;
4516 
4517       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
4518         Int |= 0x100;
4519       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
4520         Int |= 0x110;
4521       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
4522         Int |= 0x120;
4523       } else if (Prefix == "wave_shl" && 1 == Int) {
4524         Int = 0x130;
4525       } else if (Prefix == "wave_rol" && 1 == Int) {
4526         Int = 0x134;
4527       } else if (Prefix == "wave_shr" && 1 == Int) {
4528         Int = 0x138;
4529       } else if (Prefix == "wave_ror" && 1 == Int) {
4530         Int = 0x13C;
4531       } else if (Prefix == "row_bcast") {
4532         if (Int == 15) {
4533           Int = 0x142;
4534         } else if (Int == 31) {
4535           Int = 0x143;
4536         } else {
4537           return MatchOperand_ParseFail;
4538         }
4539       } else {
4540         return MatchOperand_ParseFail;
4541       }
4542     }
4543   }
4544 
4545   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
4546   return MatchOperand_Success;
4547 }
4548 
4549 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
4550   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
4551 }
4552 
4553 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
4554   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
4555 }
4556 
4557 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
4558   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
4559 }
4560 
4561 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
4562   OptionalImmIndexMap OptionalIdx;
4563 
4564   unsigned I = 1;
4565   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4566   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4567     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4568   }
4569 
4570   // All DPP instructions with at least one source operand have a fake "old"
4571   // source at the beginning that's tied to the dst operand. Handle it here.
4572   if (Desc.getNumOperands() >= 2)
4573     Inst.addOperand(Inst.getOperand(0));
4574 
4575   for (unsigned E = Operands.size(); I != E; ++I) {
4576     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4577     // Add the register arguments
4578     if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
4579       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
4580       // Skip it.
4581       continue;
4582     } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4583       Op.addRegWithFPInputModsOperands(Inst, 2);
4584     } else if (Op.isDPPCtrl()) {
4585       Op.addImmOperands(Inst, 1);
4586     } else if (Op.isImm()) {
4587       // Handle optional arguments
4588       OptionalIdx[Op.getImmTy()] = I;
4589     } else {
4590       llvm_unreachable("Invalid operand type");
4591     }
4592   }
4593 
4594   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
4595   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
4596   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
4597 }
4598 
4599 //===----------------------------------------------------------------------===//
4600 // sdwa
4601 //===----------------------------------------------------------------------===//
4602 
4603 OperandMatchResultTy
4604 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
4605                               AMDGPUOperand::ImmTy Type) {
4606   using namespace llvm::AMDGPU::SDWA;
4607 
4608   SMLoc S = Parser.getTok().getLoc();
4609   StringRef Value;
4610   OperandMatchResultTy res;
4611 
4612   res = parseStringWithPrefix(Prefix, Value);
4613   if (res != MatchOperand_Success) {
4614     return res;
4615   }
4616 
4617   int64_t Int;
4618   Int = StringSwitch<int64_t>(Value)
4619         .Case("BYTE_0", SdwaSel::BYTE_0)
4620         .Case("BYTE_1", SdwaSel::BYTE_1)
4621         .Case("BYTE_2", SdwaSel::BYTE_2)
4622         .Case("BYTE_3", SdwaSel::BYTE_3)
4623         .Case("WORD_0", SdwaSel::WORD_0)
4624         .Case("WORD_1", SdwaSel::WORD_1)
4625         .Case("DWORD", SdwaSel::DWORD)
4626         .Default(0xffffffff);
4627   Parser.Lex(); // eat last token
4628 
4629   if (Int == 0xffffffff) {
4630     return MatchOperand_ParseFail;
4631   }
4632 
4633   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
4634   return MatchOperand_Success;
4635 }
4636 
4637 OperandMatchResultTy
4638 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
4639   using namespace llvm::AMDGPU::SDWA;
4640 
4641   SMLoc S = Parser.getTok().getLoc();
4642   StringRef Value;
4643   OperandMatchResultTy res;
4644 
4645   res = parseStringWithPrefix("dst_unused", Value);
4646   if (res != MatchOperand_Success) {
4647     return res;
4648   }
4649 
4650   int64_t Int;
4651   Int = StringSwitch<int64_t>(Value)
4652         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
4653         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
4654         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
4655         .Default(0xffffffff);
4656   Parser.Lex(); // eat last token
4657 
4658   if (Int == 0xffffffff) {
4659     return MatchOperand_ParseFail;
4660   }
4661 
4662   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
4663   return MatchOperand_Success;
4664 }
4665 
4666 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
4667   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
4668 }
4669 
4670 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
4671   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
4672 }
4673 
4674 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
4675   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
4676 }
4677 
4678 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
4679   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
4680 }
4681 
4682 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
4683                               uint64_t BasicInstType, bool skipVcc) {
4684   using namespace llvm::AMDGPU::SDWA;
4685 
4686   OptionalImmIndexMap OptionalIdx;
4687   bool skippedVcc = false;
4688 
4689   unsigned I = 1;
4690   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4691   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4692     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4693   }
4694 
4695   for (unsigned E = Operands.size(); I != E; ++I) {
4696     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4697     if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
4698       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
4699       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
4700       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
4701       // Skip VCC only if we didn't skip it on previous iteration.
4702       if (BasicInstType == SIInstrFlags::VOP2 &&
4703           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
4704         skippedVcc = true;
4705         continue;
4706       } else if (BasicInstType == SIInstrFlags::VOPC &&
4707                  Inst.getNumOperands() == 0) {
4708         skippedVcc = true;
4709         continue;
4710       }
4711     }
4712     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4713       Op.addRegWithInputModsOperands(Inst, 2);
4714     } else if (Op.isImm()) {
4715       // Handle optional arguments
4716       OptionalIdx[Op.getImmTy()] = I;
4717     } else {
4718       llvm_unreachable("Invalid operand type");
4719     }
4720     skippedVcc = false;
4721   }
4722 
4723   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
4724       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
4725     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
4726     switch (BasicInstType) {
4727     case SIInstrFlags::VOP1:
4728       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4729       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
4730         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
4731       }
4732       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
4733       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
4734       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
4735       break;
4736 
4737     case SIInstrFlags::VOP2:
4738       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4739       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
4740         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
4741       }
4742       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
4743       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
4744       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
4745       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
4746       break;
4747 
4748     case SIInstrFlags::VOPC:
4749       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4750       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
4751       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
4752       break;
4753 
4754     default:
4755       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
4756     }
4757   }
4758 
4759   // special case v_mac_{f16, f32}:
4760   // it has src2 register operand that is tied to dst operand
4761   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
4762       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
4763     auto it = Inst.begin();
4764     std::advance(
4765       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
4766     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
4767   }
4768 }
4769 
4770 /// Force static initialization.
4771 extern "C" void LLVMInitializeAMDGPUAsmParser() {
4772   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
4773   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
4774 }
4775 
4776 #define GET_REGISTER_MATCHER
4777 #define GET_MATCHER_IMPLEMENTATION
4778 #include "AMDGPUGenAsmMatcher.inc"
4779 
4780 // This fuction should be defined after auto-generated include so that we have
4781 // MatchClassKind enum defined
4782 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
4783                                                      unsigned Kind) {
4784   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
4785   // But MatchInstructionImpl() expects to meet token and fails to validate
4786   // operand. This method checks if we are given immediate operand but expect to
4787   // get corresponding token.
4788   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
4789   switch (Kind) {
4790   case MCK_addr64:
4791     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
4792   case MCK_gds:
4793     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
4794   case MCK_glc:
4795     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
4796   case MCK_idxen:
4797     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
4798   case MCK_offen:
4799     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
4800   case MCK_SSrcB32:
4801     // When operands have expression values, they will return true for isToken,
4802     // because it is not possible to distinguish between a token and an
4803     // expression at parse time. MatchInstructionImpl() will always try to
4804     // match an operand as a token, when isToken returns true, and when the
4805     // name of the expression is not a valid token, the match will fail,
4806     // so we need to handle it here.
4807     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
4808   case MCK_SSrcF32:
4809     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
4810   case MCK_SoppBrTarget:
4811     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
4812   case MCK_VReg32OrOff:
4813     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
4814   case MCK_InterpSlot:
4815     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
4816   case MCK_Attr:
4817     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
4818   case MCK_AttrChan:
4819     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
4820   default:
4821     return Match_InvalidOperand;
4822   }
4823 }
4824