1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCInstrDesc.h"
29 #include "llvm/MC/MCParser/MCAsmLexer.h"
30 #include "llvm/MC/MCParser/MCAsmParser.h"
31 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
32 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/MC/TargetRegistry.h"
35 #include "llvm/Support/AMDGPUMetadata.h"
36 #include "llvm/Support/AMDHSAKernelDescriptor.h"
37 #include "llvm/Support/Casting.h"
38 #include "llvm/Support/MachineValueType.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/Support/TargetParser.h"
41 
42 using namespace llvm;
43 using namespace llvm::AMDGPU;
44 using namespace llvm::amdhsa;
45 
46 namespace {
47 
48 class AMDGPUAsmParser;
49 
50 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
51 
52 //===----------------------------------------------------------------------===//
53 // Operand
54 //===----------------------------------------------------------------------===//
55 
56 class AMDGPUOperand : public MCParsedAsmOperand {
57   enum KindTy {
58     Token,
59     Immediate,
60     Register,
61     Expression
62   } Kind;
63 
64   SMLoc StartLoc, EndLoc;
65   const AMDGPUAsmParser *AsmParser;
66 
67 public:
68   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
69       : Kind(Kind_), AsmParser(AsmParser_) {}
70 
71   using Ptr = std::unique_ptr<AMDGPUOperand>;
72 
73   struct Modifiers {
74     bool Abs = false;
75     bool Neg = false;
76     bool Sext = false;
77 
78     bool hasFPModifiers() const { return Abs || Neg; }
79     bool hasIntModifiers() const { return Sext; }
80     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
81 
82     int64_t getFPModifiersOperand() const {
83       int64_t Operand = 0;
84       Operand |= Abs ? SISrcMods::ABS : 0u;
85       Operand |= Neg ? SISrcMods::NEG : 0u;
86       return Operand;
87     }
88 
89     int64_t getIntModifiersOperand() const {
90       int64_t Operand = 0;
91       Operand |= Sext ? SISrcMods::SEXT : 0u;
92       return Operand;
93     }
94 
95     int64_t getModifiersOperand() const {
96       assert(!(hasFPModifiers() && hasIntModifiers())
97            && "fp and int modifiers should not be used simultaneously");
98       if (hasFPModifiers()) {
99         return getFPModifiersOperand();
100       } else if (hasIntModifiers()) {
101         return getIntModifiersOperand();
102       } else {
103         return 0;
104       }
105     }
106 
107     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
108   };
109 
110   enum ImmTy {
111     ImmTyNone,
112     ImmTyGDS,
113     ImmTyLDS,
114     ImmTyOffen,
115     ImmTyIdxen,
116     ImmTyAddr64,
117     ImmTyOffset,
118     ImmTyInstOffset,
119     ImmTyOffset0,
120     ImmTyOffset1,
121     ImmTyCPol,
122     ImmTySWZ,
123     ImmTyTFE,
124     ImmTyD16,
125     ImmTyClampSI,
126     ImmTyOModSI,
127     ImmTySdwaDstSel,
128     ImmTySdwaSrc0Sel,
129     ImmTySdwaSrc1Sel,
130     ImmTySdwaDstUnused,
131     ImmTyDMask,
132     ImmTyDim,
133     ImmTyUNorm,
134     ImmTyDA,
135     ImmTyR128A16,
136     ImmTyA16,
137     ImmTyLWE,
138     ImmTyExpTgt,
139     ImmTyExpCompr,
140     ImmTyExpVM,
141     ImmTyFORMAT,
142     ImmTyHwreg,
143     ImmTyOff,
144     ImmTySendMsg,
145     ImmTyInterpSlot,
146     ImmTyInterpAttr,
147     ImmTyAttrChan,
148     ImmTyOpSel,
149     ImmTyOpSelHi,
150     ImmTyNegLo,
151     ImmTyNegHi,
152     ImmTyDPP8,
153     ImmTyDppCtrl,
154     ImmTyDppRowMask,
155     ImmTyDppBankMask,
156     ImmTyDppBoundCtrl,
157     ImmTyDppFi,
158     ImmTySwizzle,
159     ImmTyGprIdxMode,
160     ImmTyHigh,
161     ImmTyBLGP,
162     ImmTyCBSZ,
163     ImmTyABID,
164     ImmTyEndpgm,
165     ImmTyWaitVDST,
166     ImmTyWaitEXP,
167   };
168 
169   enum ImmKindTy {
170     ImmKindTyNone,
171     ImmKindTyLiteral,
172     ImmKindTyConst,
173   };
174 
175 private:
176   struct TokOp {
177     const char *Data;
178     unsigned Length;
179   };
180 
181   struct ImmOp {
182     int64_t Val;
183     ImmTy Type;
184     bool IsFPImm;
185     mutable ImmKindTy Kind;
186     Modifiers Mods;
187   };
188 
189   struct RegOp {
190     unsigned RegNo;
191     Modifiers Mods;
192   };
193 
194   union {
195     TokOp Tok;
196     ImmOp Imm;
197     RegOp Reg;
198     const MCExpr *Expr;
199   };
200 
201 public:
202   bool isToken() const override {
203     if (Kind == Token)
204       return true;
205 
206     // When parsing operands, we can't always tell if something was meant to be
207     // a token, like 'gds', or an expression that references a global variable.
208     // In this case, we assume the string is an expression, and if we need to
209     // interpret is a token, then we treat the symbol name as the token.
210     return isSymbolRefExpr();
211   }
212 
213   bool isSymbolRefExpr() const {
214     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
215   }
216 
217   bool isImm() const override {
218     return Kind == Immediate;
219   }
220 
221   void setImmKindNone() const {
222     assert(isImm());
223     Imm.Kind = ImmKindTyNone;
224   }
225 
226   void setImmKindLiteral() const {
227     assert(isImm());
228     Imm.Kind = ImmKindTyLiteral;
229   }
230 
231   void setImmKindConst() const {
232     assert(isImm());
233     Imm.Kind = ImmKindTyConst;
234   }
235 
236   bool IsImmKindLiteral() const {
237     return isImm() && Imm.Kind == ImmKindTyLiteral;
238   }
239 
240   bool isImmKindConst() const {
241     return isImm() && Imm.Kind == ImmKindTyConst;
242   }
243 
244   bool isInlinableImm(MVT type) const;
245   bool isLiteralImm(MVT type) const;
246 
247   bool isRegKind() const {
248     return Kind == Register;
249   }
250 
251   bool isReg() const override {
252     return isRegKind() && !hasModifiers();
253   }
254 
255   bool isRegOrInline(unsigned RCID, MVT type) const {
256     return isRegClass(RCID) || isInlinableImm(type);
257   }
258 
259   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
260     return isRegOrInline(RCID, type) || isLiteralImm(type);
261   }
262 
263   bool isRegOrImmWithInt16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
265   }
266 
267   bool isRegOrImmWithInt32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
269   }
270 
271   bool isRegOrInlineImmWithInt16InputMods() const {
272     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
273   }
274 
275   bool isRegOrInlineImmWithInt32InputMods() const {
276     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
277   }
278 
279   bool isRegOrImmWithInt64InputMods() const {
280     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
281   }
282 
283   bool isRegOrImmWithFP16InputMods() const {
284     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
285   }
286 
287   bool isRegOrImmWithFP32InputMods() const {
288     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
289   }
290 
291   bool isRegOrImmWithFP64InputMods() const {
292     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
293   }
294 
295   bool isRegOrInlineImmWithFP16InputMods() const {
296     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
297   }
298 
299   bool isRegOrInlineImmWithFP32InputMods() const {
300     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
301   }
302 
303 
304   bool isVReg() const {
305     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
306            isRegClass(AMDGPU::VReg_64RegClassID) ||
307            isRegClass(AMDGPU::VReg_96RegClassID) ||
308            isRegClass(AMDGPU::VReg_128RegClassID) ||
309            isRegClass(AMDGPU::VReg_160RegClassID) ||
310            isRegClass(AMDGPU::VReg_192RegClassID) ||
311            isRegClass(AMDGPU::VReg_256RegClassID) ||
312            isRegClass(AMDGPU::VReg_512RegClassID) ||
313            isRegClass(AMDGPU::VReg_1024RegClassID);
314   }
315 
316   bool isVReg32() const {
317     return isRegClass(AMDGPU::VGPR_32RegClassID);
318   }
319 
320   bool isVReg32OrOff() const {
321     return isOff() || isVReg32();
322   }
323 
324   bool isNull() const {
325     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
326   }
327 
328   bool isVRegWithInputMods() const;
329 
330   bool isSDWAOperand(MVT type) const;
331   bool isSDWAFP16Operand() const;
332   bool isSDWAFP32Operand() const;
333   bool isSDWAInt16Operand() const;
334   bool isSDWAInt32Operand() const;
335 
336   bool isImmTy(ImmTy ImmT) const {
337     return isImm() && Imm.Type == ImmT;
338   }
339 
340   bool isImmModifier() const {
341     return isImm() && Imm.Type != ImmTyNone;
342   }
343 
344   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
345   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
346   bool isDMask() const { return isImmTy(ImmTyDMask); }
347   bool isDim() const { return isImmTy(ImmTyDim); }
348   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
349   bool isDA() const { return isImmTy(ImmTyDA); }
350   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
351   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
352   bool isLWE() const { return isImmTy(ImmTyLWE); }
353   bool isOff() const { return isImmTy(ImmTyOff); }
354   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
355   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
356   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
357   bool isOffen() const { return isImmTy(ImmTyOffen); }
358   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
359   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
360   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
361   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
362   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
363 
364   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
365   bool isGDS() const { return isImmTy(ImmTyGDS); }
366   bool isLDS() const { return isImmTy(ImmTyLDS); }
367   bool isCPol() const { return isImmTy(ImmTyCPol); }
368   bool isSWZ() const { return isImmTy(ImmTySWZ); }
369   bool isTFE() const { return isImmTy(ImmTyTFE); }
370   bool isD16() const { return isImmTy(ImmTyD16); }
371   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
372   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
373   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
374   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
375   bool isFI() const { return isImmTy(ImmTyDppFi); }
376   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
377   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
378   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
379   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
380   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
381   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
382   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
383   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
384   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
385   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
386   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
387   bool isHigh() const { return isImmTy(ImmTyHigh); }
388 
389   bool isMod() const {
390     return isClampSI() || isOModSI();
391   }
392 
393   bool isRegOrImm() const {
394     return isReg() || isImm();
395   }
396 
397   bool isRegClass(unsigned RCID) const;
398 
399   bool isInlineValue() const;
400 
401   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
402     return isRegOrInline(RCID, type) && !hasModifiers();
403   }
404 
405   bool isSCSrcB16() const {
406     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
407   }
408 
409   bool isSCSrcV2B16() const {
410     return isSCSrcB16();
411   }
412 
413   bool isSCSrcB32() const {
414     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
415   }
416 
417   bool isSCSrcB64() const {
418     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
419   }
420 
421   bool isBoolReg() const;
422 
423   bool isSCSrcF16() const {
424     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
425   }
426 
427   bool isSCSrcV2F16() const {
428     return isSCSrcF16();
429   }
430 
431   bool isSCSrcF32() const {
432     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
433   }
434 
435   bool isSCSrcF64() const {
436     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
437   }
438 
439   bool isSSrcB32() const {
440     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
441   }
442 
443   bool isSSrcB16() const {
444     return isSCSrcB16() || isLiteralImm(MVT::i16);
445   }
446 
447   bool isSSrcV2B16() const {
448     llvm_unreachable("cannot happen");
449     return isSSrcB16();
450   }
451 
452   bool isSSrcB64() const {
453     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
454     // See isVSrc64().
455     return isSCSrcB64() || isLiteralImm(MVT::i64);
456   }
457 
458   bool isSSrcF32() const {
459     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
460   }
461 
462   bool isSSrcF64() const {
463     return isSCSrcB64() || isLiteralImm(MVT::f64);
464   }
465 
466   bool isSSrcF16() const {
467     return isSCSrcB16() || isLiteralImm(MVT::f16);
468   }
469 
470   bool isSSrcV2F16() const {
471     llvm_unreachable("cannot happen");
472     return isSSrcF16();
473   }
474 
475   bool isSSrcV2FP32() const {
476     llvm_unreachable("cannot happen");
477     return isSSrcF32();
478   }
479 
480   bool isSCSrcV2FP32() const {
481     llvm_unreachable("cannot happen");
482     return isSCSrcF32();
483   }
484 
485   bool isSSrcV2INT32() const {
486     llvm_unreachable("cannot happen");
487     return isSSrcB32();
488   }
489 
490   bool isSCSrcV2INT32() const {
491     llvm_unreachable("cannot happen");
492     return isSCSrcB32();
493   }
494 
495   bool isSSrcOrLdsB32() const {
496     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
497            isLiteralImm(MVT::i32) || isExpr();
498   }
499 
500   bool isVCSrcB32() const {
501     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
502   }
503 
504   bool isVCSrcB64() const {
505     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
506   }
507 
508   bool isVCSrcB16() const {
509     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
510   }
511 
512   bool isVCSrcV2B16() const {
513     return isVCSrcB16();
514   }
515 
516   bool isVCSrcF32() const {
517     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
518   }
519 
520   bool isVCSrcF64() const {
521     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
522   }
523 
524   bool isVCSrcF16() const {
525     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
526   }
527 
528   bool isVCSrcV2F16() const {
529     return isVCSrcF16();
530   }
531 
532   bool isVSrcB32() const {
533     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
534   }
535 
536   bool isVSrcB64() const {
537     return isVCSrcF64() || isLiteralImm(MVT::i64);
538   }
539 
540   bool isVSrcB16() const {
541     return isVCSrcB16() || isLiteralImm(MVT::i16);
542   }
543 
544   bool isVSrcV2B16() const {
545     return isVSrcB16() || isLiteralImm(MVT::v2i16);
546   }
547 
548   bool isVCSrcV2FP32() const {
549     return isVCSrcF64();
550   }
551 
552   bool isVSrcV2FP32() const {
553     return isVSrcF64() || isLiteralImm(MVT::v2f32);
554   }
555 
556   bool isVCSrcV2INT32() const {
557     return isVCSrcB64();
558   }
559 
560   bool isVSrcV2INT32() const {
561     return isVSrcB64() || isLiteralImm(MVT::v2i32);
562   }
563 
564   bool isVSrcF32() const {
565     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
566   }
567 
568   bool isVSrcF64() const {
569     return isVCSrcF64() || isLiteralImm(MVT::f64);
570   }
571 
572   bool isVSrcF16() const {
573     return isVCSrcF16() || isLiteralImm(MVT::f16);
574   }
575 
576   bool isVSrcV2F16() const {
577     return isVSrcF16() || isLiteralImm(MVT::v2f16);
578   }
579 
580   bool isVISrcB32() const {
581     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
582   }
583 
584   bool isVISrcB16() const {
585     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
586   }
587 
588   bool isVISrcV2B16() const {
589     return isVISrcB16();
590   }
591 
592   bool isVISrcF32() const {
593     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
594   }
595 
596   bool isVISrcF16() const {
597     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
598   }
599 
600   bool isVISrcV2F16() const {
601     return isVISrcF16() || isVISrcB32();
602   }
603 
604   bool isVISrc_64B64() const {
605     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
606   }
607 
608   bool isVISrc_64F64() const {
609     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
610   }
611 
612   bool isVISrc_64V2FP32() const {
613     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
614   }
615 
616   bool isVISrc_64V2INT32() const {
617     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
618   }
619 
620   bool isVISrc_256B64() const {
621     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
622   }
623 
624   bool isVISrc_256F64() const {
625     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
626   }
627 
628   bool isVISrc_128B16() const {
629     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
630   }
631 
632   bool isVISrc_128V2B16() const {
633     return isVISrc_128B16();
634   }
635 
636   bool isVISrc_128B32() const {
637     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
638   }
639 
640   bool isVISrc_128F32() const {
641     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
642   }
643 
644   bool isVISrc_256V2FP32() const {
645     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
646   }
647 
648   bool isVISrc_256V2INT32() const {
649     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
650   }
651 
652   bool isVISrc_512B32() const {
653     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
654   }
655 
656   bool isVISrc_512B16() const {
657     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
658   }
659 
660   bool isVISrc_512V2B16() const {
661     return isVISrc_512B16();
662   }
663 
664   bool isVISrc_512F32() const {
665     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
666   }
667 
668   bool isVISrc_512F16() const {
669     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
670   }
671 
672   bool isVISrc_512V2F16() const {
673     return isVISrc_512F16() || isVISrc_512B32();
674   }
675 
676   bool isVISrc_1024B32() const {
677     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
678   }
679 
680   bool isVISrc_1024B16() const {
681     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
682   }
683 
684   bool isVISrc_1024V2B16() const {
685     return isVISrc_1024B16();
686   }
687 
688   bool isVISrc_1024F32() const {
689     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
690   }
691 
692   bool isVISrc_1024F16() const {
693     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
694   }
695 
696   bool isVISrc_1024V2F16() const {
697     return isVISrc_1024F16() || isVISrc_1024B32();
698   }
699 
700   bool isAISrcB32() const {
701     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
702   }
703 
704   bool isAISrcB16() const {
705     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
706   }
707 
708   bool isAISrcV2B16() const {
709     return isAISrcB16();
710   }
711 
712   bool isAISrcF32() const {
713     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
714   }
715 
716   bool isAISrcF16() const {
717     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
718   }
719 
720   bool isAISrcV2F16() const {
721     return isAISrcF16() || isAISrcB32();
722   }
723 
724   bool isAISrc_64B64() const {
725     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
726   }
727 
728   bool isAISrc_64F64() const {
729     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
730   }
731 
732   bool isAISrc_128B32() const {
733     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
734   }
735 
736   bool isAISrc_128B16() const {
737     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
738   }
739 
740   bool isAISrc_128V2B16() const {
741     return isAISrc_128B16();
742   }
743 
744   bool isAISrc_128F32() const {
745     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
746   }
747 
748   bool isAISrc_128F16() const {
749     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
750   }
751 
752   bool isAISrc_128V2F16() const {
753     return isAISrc_128F16() || isAISrc_128B32();
754   }
755 
756   bool isVISrc_128F16() const {
757     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
758   }
759 
760   bool isVISrc_128V2F16() const {
761     return isVISrc_128F16() || isVISrc_128B32();
762   }
763 
764   bool isAISrc_256B64() const {
765     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
766   }
767 
768   bool isAISrc_256F64() const {
769     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
770   }
771 
772   bool isAISrc_512B32() const {
773     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
774   }
775 
776   bool isAISrc_512B16() const {
777     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
778   }
779 
780   bool isAISrc_512V2B16() const {
781     return isAISrc_512B16();
782   }
783 
784   bool isAISrc_512F32() const {
785     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
786   }
787 
788   bool isAISrc_512F16() const {
789     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
790   }
791 
792   bool isAISrc_512V2F16() const {
793     return isAISrc_512F16() || isAISrc_512B32();
794   }
795 
796   bool isAISrc_1024B32() const {
797     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
798   }
799 
800   bool isAISrc_1024B16() const {
801     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
802   }
803 
804   bool isAISrc_1024V2B16() const {
805     return isAISrc_1024B16();
806   }
807 
808   bool isAISrc_1024F32() const {
809     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
810   }
811 
812   bool isAISrc_1024F16() const {
813     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
814   }
815 
816   bool isAISrc_1024V2F16() const {
817     return isAISrc_1024F16() || isAISrc_1024B32();
818   }
819 
820   bool isKImmFP32() const {
821     return isLiteralImm(MVT::f32);
822   }
823 
824   bool isKImmFP16() const {
825     return isLiteralImm(MVT::f16);
826   }
827 
828   bool isMem() const override {
829     return false;
830   }
831 
832   bool isExpr() const {
833     return Kind == Expression;
834   }
835 
836   bool isSoppBrTarget() const {
837     return isExpr() || isImm();
838   }
839 
840   bool isSWaitCnt() const;
841   bool isDepCtr() const;
842   bool isSDelayAlu() const;
843   bool isHwreg() const;
844   bool isSendMsg() const;
845   bool isSwizzle() const;
846   bool isSMRDOffset8() const;
847   bool isSMEMOffset() const;
848   bool isSMRDLiteralOffset() const;
849   bool isDPP8() const;
850   bool isDPPCtrl() const;
851   bool isBLGP() const;
852   bool isCBSZ() const;
853   bool isABID() const;
854   bool isGPRIdxMode() const;
855   bool isS16Imm() const;
856   bool isU16Imm() const;
857   bool isEndpgm() const;
858   bool isWaitVDST() const;
859   bool isWaitEXP() const;
860 
861   StringRef getExpressionAsToken() const {
862     assert(isExpr());
863     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
864     return S->getSymbol().getName();
865   }
866 
867   StringRef getToken() const {
868     assert(isToken());
869 
870     if (Kind == Expression)
871       return getExpressionAsToken();
872 
873     return StringRef(Tok.Data, Tok.Length);
874   }
875 
876   int64_t getImm() const {
877     assert(isImm());
878     return Imm.Val;
879   }
880 
881   void setImm(int64_t Val) {
882     assert(isImm());
883     Imm.Val = Val;
884   }
885 
886   ImmTy getImmTy() const {
887     assert(isImm());
888     return Imm.Type;
889   }
890 
891   unsigned getReg() const override {
892     assert(isRegKind());
893     return Reg.RegNo;
894   }
895 
896   SMLoc getStartLoc() const override {
897     return StartLoc;
898   }
899 
900   SMLoc getEndLoc() const override {
901     return EndLoc;
902   }
903 
904   SMRange getLocRange() const {
905     return SMRange(StartLoc, EndLoc);
906   }
907 
908   Modifiers getModifiers() const {
909     assert(isRegKind() || isImmTy(ImmTyNone));
910     return isRegKind() ? Reg.Mods : Imm.Mods;
911   }
912 
913   void setModifiers(Modifiers Mods) {
914     assert(isRegKind() || isImmTy(ImmTyNone));
915     if (isRegKind())
916       Reg.Mods = Mods;
917     else
918       Imm.Mods = Mods;
919   }
920 
921   bool hasModifiers() const {
922     return getModifiers().hasModifiers();
923   }
924 
925   bool hasFPModifiers() const {
926     return getModifiers().hasFPModifiers();
927   }
928 
929   bool hasIntModifiers() const {
930     return getModifiers().hasIntModifiers();
931   }
932 
933   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
934 
935   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
936 
937   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
938 
939   template <unsigned Bitwidth>
940   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
941 
942   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
943     addKImmFPOperands<16>(Inst, N);
944   }
945 
946   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
947     addKImmFPOperands<32>(Inst, N);
948   }
949 
950   void addRegOperands(MCInst &Inst, unsigned N) const;
951 
952   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
953     addRegOperands(Inst, N);
954   }
955 
956   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
957     if (isRegKind())
958       addRegOperands(Inst, N);
959     else if (isExpr())
960       Inst.addOperand(MCOperand::createExpr(Expr));
961     else
962       addImmOperands(Inst, N);
963   }
964 
965   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
966     Modifiers Mods = getModifiers();
967     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
968     if (isRegKind()) {
969       addRegOperands(Inst, N);
970     } else {
971       addImmOperands(Inst, N, false);
972     }
973   }
974 
975   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
976     assert(!hasIntModifiers());
977     addRegOrImmWithInputModsOperands(Inst, N);
978   }
979 
980   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
981     assert(!hasFPModifiers());
982     addRegOrImmWithInputModsOperands(Inst, N);
983   }
984 
985   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
986     Modifiers Mods = getModifiers();
987     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
988     assert(isRegKind());
989     addRegOperands(Inst, N);
990   }
991 
992   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
993     assert(!hasIntModifiers());
994     addRegWithInputModsOperands(Inst, N);
995   }
996 
997   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
998     assert(!hasFPModifiers());
999     addRegWithInputModsOperands(Inst, N);
1000   }
1001 
1002   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
1003     if (isImm())
1004       addImmOperands(Inst, N);
1005     else {
1006       assert(isExpr());
1007       Inst.addOperand(MCOperand::createExpr(Expr));
1008     }
1009   }
1010 
1011   static void printImmTy(raw_ostream& OS, ImmTy Type) {
1012     switch (Type) {
1013     case ImmTyNone: OS << "None"; break;
1014     case ImmTyGDS: OS << "GDS"; break;
1015     case ImmTyLDS: OS << "LDS"; break;
1016     case ImmTyOffen: OS << "Offen"; break;
1017     case ImmTyIdxen: OS << "Idxen"; break;
1018     case ImmTyAddr64: OS << "Addr64"; break;
1019     case ImmTyOffset: OS << "Offset"; break;
1020     case ImmTyInstOffset: OS << "InstOffset"; break;
1021     case ImmTyOffset0: OS << "Offset0"; break;
1022     case ImmTyOffset1: OS << "Offset1"; break;
1023     case ImmTyCPol: OS << "CPol"; break;
1024     case ImmTySWZ: OS << "SWZ"; break;
1025     case ImmTyTFE: OS << "TFE"; break;
1026     case ImmTyD16: OS << "D16"; break;
1027     case ImmTyFORMAT: OS << "FORMAT"; break;
1028     case ImmTyClampSI: OS << "ClampSI"; break;
1029     case ImmTyOModSI: OS << "OModSI"; break;
1030     case ImmTyDPP8: OS << "DPP8"; break;
1031     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1032     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1033     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1034     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1035     case ImmTyDppFi: OS << "FI"; break;
1036     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1037     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1038     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1039     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1040     case ImmTyDMask: OS << "DMask"; break;
1041     case ImmTyDim: OS << "Dim"; break;
1042     case ImmTyUNorm: OS << "UNorm"; break;
1043     case ImmTyDA: OS << "DA"; break;
1044     case ImmTyR128A16: OS << "R128A16"; break;
1045     case ImmTyA16: OS << "A16"; break;
1046     case ImmTyLWE: OS << "LWE"; break;
1047     case ImmTyOff: OS << "Off"; break;
1048     case ImmTyExpTgt: OS << "ExpTgt"; break;
1049     case ImmTyExpCompr: OS << "ExpCompr"; break;
1050     case ImmTyExpVM: OS << "ExpVM"; break;
1051     case ImmTyHwreg: OS << "Hwreg"; break;
1052     case ImmTySendMsg: OS << "SendMsg"; break;
1053     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1054     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1055     case ImmTyAttrChan: OS << "AttrChan"; break;
1056     case ImmTyOpSel: OS << "OpSel"; break;
1057     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1058     case ImmTyNegLo: OS << "NegLo"; break;
1059     case ImmTyNegHi: OS << "NegHi"; break;
1060     case ImmTySwizzle: OS << "Swizzle"; break;
1061     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1062     case ImmTyHigh: OS << "High"; break;
1063     case ImmTyBLGP: OS << "BLGP"; break;
1064     case ImmTyCBSZ: OS << "CBSZ"; break;
1065     case ImmTyABID: OS << "ABID"; break;
1066     case ImmTyEndpgm: OS << "Endpgm"; break;
1067     case ImmTyWaitVDST: OS << "WaitVDST"; break;
1068     case ImmTyWaitEXP: OS << "WaitEXP"; break;
1069     }
1070   }
1071 
1072   void print(raw_ostream &OS) const override {
1073     switch (Kind) {
1074     case Register:
1075       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1076       break;
1077     case Immediate:
1078       OS << '<' << getImm();
1079       if (getImmTy() != ImmTyNone) {
1080         OS << " type: "; printImmTy(OS, getImmTy());
1081       }
1082       OS << " mods: " << Imm.Mods << '>';
1083       break;
1084     case Token:
1085       OS << '\'' << getToken() << '\'';
1086       break;
1087     case Expression:
1088       OS << "<expr " << *Expr << '>';
1089       break;
1090     }
1091   }
1092 
1093   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1094                                       int64_t Val, SMLoc Loc,
1095                                       ImmTy Type = ImmTyNone,
1096                                       bool IsFPImm = false) {
1097     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1098     Op->Imm.Val = Val;
1099     Op->Imm.IsFPImm = IsFPImm;
1100     Op->Imm.Kind = ImmKindTyNone;
1101     Op->Imm.Type = Type;
1102     Op->Imm.Mods = Modifiers();
1103     Op->StartLoc = Loc;
1104     Op->EndLoc = Loc;
1105     return Op;
1106   }
1107 
1108   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1109                                         StringRef Str, SMLoc Loc,
1110                                         bool HasExplicitEncodingSize = true) {
1111     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1112     Res->Tok.Data = Str.data();
1113     Res->Tok.Length = Str.size();
1114     Res->StartLoc = Loc;
1115     Res->EndLoc = Loc;
1116     return Res;
1117   }
1118 
1119   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1120                                       unsigned RegNo, SMLoc S,
1121                                       SMLoc E) {
1122     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1123     Op->Reg.RegNo = RegNo;
1124     Op->Reg.Mods = Modifiers();
1125     Op->StartLoc = S;
1126     Op->EndLoc = E;
1127     return Op;
1128   }
1129 
1130   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1131                                        const class MCExpr *Expr, SMLoc S) {
1132     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1133     Op->Expr = Expr;
1134     Op->StartLoc = S;
1135     Op->EndLoc = S;
1136     return Op;
1137   }
1138 };
1139 
1140 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1141   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1142   return OS;
1143 }
1144 
1145 //===----------------------------------------------------------------------===//
1146 // AsmParser
1147 //===----------------------------------------------------------------------===//
1148 
1149 // Holds info related to the current kernel, e.g. count of SGPRs used.
1150 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1151 // .amdgpu_hsa_kernel or at EOF.
1152 class KernelScopeInfo {
1153   int SgprIndexUnusedMin = -1;
1154   int VgprIndexUnusedMin = -1;
1155   int AgprIndexUnusedMin = -1;
1156   MCContext *Ctx = nullptr;
1157   MCSubtargetInfo const *MSTI = nullptr;
1158 
1159   void usesSgprAt(int i) {
1160     if (i >= SgprIndexUnusedMin) {
1161       SgprIndexUnusedMin = ++i;
1162       if (Ctx) {
1163         MCSymbol* const Sym =
1164           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1165         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1166       }
1167     }
1168   }
1169 
1170   void usesVgprAt(int i) {
1171     if (i >= VgprIndexUnusedMin) {
1172       VgprIndexUnusedMin = ++i;
1173       if (Ctx) {
1174         MCSymbol* const Sym =
1175           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1176         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1177                                          VgprIndexUnusedMin);
1178         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1179       }
1180     }
1181   }
1182 
1183   void usesAgprAt(int i) {
1184     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1185     if (!hasMAIInsts(*MSTI))
1186       return;
1187 
1188     if (i >= AgprIndexUnusedMin) {
1189       AgprIndexUnusedMin = ++i;
1190       if (Ctx) {
1191         MCSymbol* const Sym =
1192           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1193         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1194 
1195         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1196         MCSymbol* const vSym =
1197           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1198         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1199                                          VgprIndexUnusedMin);
1200         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1201       }
1202     }
1203   }
1204 
1205 public:
1206   KernelScopeInfo() = default;
1207 
1208   void initialize(MCContext &Context) {
1209     Ctx = &Context;
1210     MSTI = Ctx->getSubtargetInfo();
1211 
1212     usesSgprAt(SgprIndexUnusedMin = -1);
1213     usesVgprAt(VgprIndexUnusedMin = -1);
1214     if (hasMAIInsts(*MSTI)) {
1215       usesAgprAt(AgprIndexUnusedMin = -1);
1216     }
1217   }
1218 
1219   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1220                     unsigned RegWidth) {
1221     switch (RegKind) {
1222     case IS_SGPR:
1223       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1224       break;
1225     case IS_AGPR:
1226       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1227       break;
1228     case IS_VGPR:
1229       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1230       break;
1231     default:
1232       break;
1233     }
1234   }
1235 };
1236 
1237 class AMDGPUAsmParser : public MCTargetAsmParser {
1238   MCAsmParser &Parser;
1239 
1240   // Number of extra operands parsed after the first optional operand.
1241   // This may be necessary to skip hardcoded mandatory operands.
1242   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1243 
1244   unsigned ForcedEncodingSize = 0;
1245   bool ForcedDPP = false;
1246   bool ForcedSDWA = false;
1247   KernelScopeInfo KernelScope;
1248   unsigned CPolSeen;
1249 
1250   /// @name Auto-generated Match Functions
1251   /// {
1252 
1253 #define GET_ASSEMBLER_HEADER
1254 #include "AMDGPUGenAsmMatcher.inc"
1255 
1256   /// }
1257 
1258 private:
1259   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1260   bool OutOfRangeError(SMRange Range);
1261   /// Calculate VGPR/SGPR blocks required for given target, reserved
1262   /// registers, and user-specified NextFreeXGPR values.
1263   ///
1264   /// \param Features [in] Target features, used for bug corrections.
1265   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1266   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1267   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1268   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1269   /// descriptor field, if valid.
1270   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1271   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1272   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1273   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1274   /// \param VGPRBlocks [out] Result VGPR block count.
1275   /// \param SGPRBlocks [out] Result SGPR block count.
1276   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1277                           bool FlatScrUsed, bool XNACKUsed,
1278                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1279                           SMRange VGPRRange, unsigned NextFreeSGPR,
1280                           SMRange SGPRRange, unsigned &VGPRBlocks,
1281                           unsigned &SGPRBlocks);
1282   bool ParseDirectiveAMDGCNTarget();
1283   bool ParseDirectiveAMDHSAKernel();
1284   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1285   bool ParseDirectiveHSACodeObjectVersion();
1286   bool ParseDirectiveHSACodeObjectISA();
1287   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1288   bool ParseDirectiveAMDKernelCodeT();
1289   // TODO: Possibly make subtargetHasRegister const.
1290   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1291   bool ParseDirectiveAMDGPUHsaKernel();
1292 
1293   bool ParseDirectiveISAVersion();
1294   bool ParseDirectiveHSAMetadata();
1295   bool ParseDirectivePALMetadataBegin();
1296   bool ParseDirectivePALMetadata();
1297   bool ParseDirectiveAMDGPULDS();
1298 
1299   /// Common code to parse out a block of text (typically YAML) between start and
1300   /// end directives.
1301   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1302                            const char *AssemblerDirectiveEnd,
1303                            std::string &CollectString);
1304 
1305   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1306                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1307   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1308                            unsigned &RegNum, unsigned &RegWidth,
1309                            bool RestoreOnFailure = false);
1310   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1311                            unsigned &RegNum, unsigned &RegWidth,
1312                            SmallVectorImpl<AsmToken> &Tokens);
1313   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1314                            unsigned &RegWidth,
1315                            SmallVectorImpl<AsmToken> &Tokens);
1316   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1317                            unsigned &RegWidth,
1318                            SmallVectorImpl<AsmToken> &Tokens);
1319   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1320                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1321   bool ParseRegRange(unsigned& Num, unsigned& Width);
1322   unsigned getRegularReg(RegisterKind RegKind,
1323                          unsigned RegNum,
1324                          unsigned RegWidth,
1325                          SMLoc Loc);
1326 
1327   bool isRegister();
1328   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1329   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1330   void initializeGprCountSymbol(RegisterKind RegKind);
1331   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1332                              unsigned RegWidth);
1333   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1334                     bool IsAtomic, bool IsLds = false);
1335   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1336                  bool IsGdsHardcoded);
1337 
1338 public:
1339   enum AMDGPUMatchResultTy {
1340     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1341   };
1342   enum OperandMode {
1343     OperandMode_Default,
1344     OperandMode_NSA,
1345   };
1346 
1347   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1348 
1349   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1350                const MCInstrInfo &MII,
1351                const MCTargetOptions &Options)
1352       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1353     MCAsmParserExtension::Initialize(Parser);
1354 
1355     if (getFeatureBits().none()) {
1356       // Set default features.
1357       copySTI().ToggleFeature("southern-islands");
1358     }
1359 
1360     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1361 
1362     {
1363       // TODO: make those pre-defined variables read-only.
1364       // Currently there is none suitable machinery in the core llvm-mc for this.
1365       // MCSymbol::isRedefinable is intended for another purpose, and
1366       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1367       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1368       MCContext &Ctx = getContext();
1369       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1370         MCSymbol *Sym =
1371             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1372         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1373         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1374         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1375         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1376         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1377       } else {
1378         MCSymbol *Sym =
1379             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1380         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1381         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1382         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1383         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1384         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1385       }
1386       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1387         initializeGprCountSymbol(IS_VGPR);
1388         initializeGprCountSymbol(IS_SGPR);
1389       } else
1390         KernelScope.initialize(getContext());
1391     }
1392   }
1393 
1394   bool hasMIMG_R128() const {
1395     return AMDGPU::hasMIMG_R128(getSTI());
1396   }
1397 
1398   bool hasPackedD16() const {
1399     return AMDGPU::hasPackedD16(getSTI());
1400   }
1401 
1402   bool hasGFX10A16() const {
1403     return AMDGPU::hasGFX10A16(getSTI());
1404   }
1405 
1406   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1407 
1408   bool isSI() const {
1409     return AMDGPU::isSI(getSTI());
1410   }
1411 
1412   bool isCI() const {
1413     return AMDGPU::isCI(getSTI());
1414   }
1415 
1416   bool isVI() const {
1417     return AMDGPU::isVI(getSTI());
1418   }
1419 
1420   bool isGFX9() const {
1421     return AMDGPU::isGFX9(getSTI());
1422   }
1423 
1424   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1425   bool isGFX90A() const {
1426     return AMDGPU::isGFX90A(getSTI());
1427   }
1428 
1429   bool isGFX940() const {
1430     return AMDGPU::isGFX940(getSTI());
1431   }
1432 
1433   bool isGFX9Plus() const {
1434     return AMDGPU::isGFX9Plus(getSTI());
1435   }
1436 
1437   bool isGFX10() const {
1438     return AMDGPU::isGFX10(getSTI());
1439   }
1440 
1441   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1442 
1443   bool isGFX11() const {
1444     return AMDGPU::isGFX11(getSTI());
1445   }
1446 
1447   bool isGFX11Plus() const {
1448     return AMDGPU::isGFX11Plus(getSTI());
1449   }
1450 
1451   bool isGFX10_BEncoding() const {
1452     return AMDGPU::isGFX10_BEncoding(getSTI());
1453   }
1454 
1455   bool hasInv2PiInlineImm() const {
1456     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1457   }
1458 
1459   bool hasFlatOffsets() const {
1460     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1461   }
1462 
1463   bool hasArchitectedFlatScratch() const {
1464     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1465   }
1466 
1467   bool hasSGPR102_SGPR103() const {
1468     return !isVI() && !isGFX9();
1469   }
1470 
1471   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1472 
1473   bool hasIntClamp() const {
1474     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1475   }
1476 
1477   AMDGPUTargetStreamer &getTargetStreamer() {
1478     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1479     return static_cast<AMDGPUTargetStreamer &>(TS);
1480   }
1481 
1482   const MCRegisterInfo *getMRI() const {
1483     // We need this const_cast because for some reason getContext() is not const
1484     // in MCAsmParser.
1485     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1486   }
1487 
1488   const MCInstrInfo *getMII() const {
1489     return &MII;
1490   }
1491 
1492   const FeatureBitset &getFeatureBits() const {
1493     return getSTI().getFeatureBits();
1494   }
1495 
1496   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1497   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1498   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1499 
1500   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1501   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1502   bool isForcedDPP() const { return ForcedDPP; }
1503   bool isForcedSDWA() const { return ForcedSDWA; }
1504   ArrayRef<unsigned> getMatchedVariants() const;
1505   StringRef getMatchedVariantName() const;
1506 
1507   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1508   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1509                      bool RestoreOnFailure);
1510   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1511   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1512                                         SMLoc &EndLoc) override;
1513   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1514   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1515                                       unsigned Kind) override;
1516   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1517                                OperandVector &Operands, MCStreamer &Out,
1518                                uint64_t &ErrorInfo,
1519                                bool MatchingInlineAsm) override;
1520   bool ParseDirective(AsmToken DirectiveID) override;
1521   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1522                                     OperandMode Mode = OperandMode_Default);
1523   StringRef parseMnemonicSuffix(StringRef Name);
1524   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1525                         SMLoc NameLoc, OperandVector &Operands) override;
1526   //bool ProcessInstruction(MCInst &Inst);
1527 
1528   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1529 
1530   OperandMatchResultTy
1531   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1532                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1533                      bool (*ConvertResult)(int64_t &) = nullptr);
1534 
1535   OperandMatchResultTy
1536   parseOperandArrayWithPrefix(const char *Prefix,
1537                               OperandVector &Operands,
1538                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1539                               bool (*ConvertResult)(int64_t&) = nullptr);
1540 
1541   OperandMatchResultTy
1542   parseNamedBit(StringRef Name, OperandVector &Operands,
1543                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1544   OperandMatchResultTy parseCPol(OperandVector &Operands);
1545   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1546                                              StringRef &Value,
1547                                              SMLoc &StringLoc);
1548 
1549   bool isModifier();
1550   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1551   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1552   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1553   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1554   bool parseSP3NegModifier();
1555   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1556   OperandMatchResultTy parseReg(OperandVector &Operands);
1557   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1558   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1559   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1560   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1561   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1562   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1563   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1564   OperandMatchResultTy parseUfmt(int64_t &Format);
1565   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1566   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1567   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1568   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1569   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1570   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1571   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1572 
1573   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1574   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1575   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1576   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1577 
1578   bool parseCnt(int64_t &IntVal);
1579   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1580 
1581   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1582   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1583   OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
1584 
1585   bool parseDelay(int64_t &Delay);
1586   OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
1587 
1588   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1589 
1590 private:
1591   struct OperandInfoTy {
1592     SMLoc Loc;
1593     int64_t Id;
1594     bool IsSymbolic = false;
1595     bool IsDefined = false;
1596 
1597     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1598   };
1599 
1600   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1601   bool validateSendMsg(const OperandInfoTy &Msg,
1602                        const OperandInfoTy &Op,
1603                        const OperandInfoTy &Stream);
1604 
1605   bool parseHwregBody(OperandInfoTy &HwReg,
1606                       OperandInfoTy &Offset,
1607                       OperandInfoTy &Width);
1608   bool validateHwreg(const OperandInfoTy &HwReg,
1609                      const OperandInfoTy &Offset,
1610                      const OperandInfoTy &Width);
1611 
1612   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1613   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1614   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1615 
1616   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1617                       const OperandVector &Operands) const;
1618   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1619   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1620   SMLoc getLitLoc(const OperandVector &Operands) const;
1621   SMLoc getConstLoc(const OperandVector &Operands) const;
1622 
1623   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1624   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1625   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1626   bool validateSOPLiteral(const MCInst &Inst) const;
1627   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1628   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1629   bool validateIntClampSupported(const MCInst &Inst);
1630   bool validateMIMGAtomicDMask(const MCInst &Inst);
1631   bool validateMIMGGatherDMask(const MCInst &Inst);
1632   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1633   Optional<StringRef> validateMIMGDataSize(const MCInst &Inst);
1634   bool validateMIMGAddrSize(const MCInst &Inst);
1635   bool validateMIMGD16(const MCInst &Inst);
1636   bool validateMIMGDim(const MCInst &Inst);
1637   bool validateMIMGMSAA(const MCInst &Inst);
1638   bool validateOpSel(const MCInst &Inst);
1639   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1640   bool validateVccOperand(unsigned Reg) const;
1641   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1642   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1643   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1644   bool validateAGPRLdSt(const MCInst &Inst) const;
1645   bool validateVGPRAlign(const MCInst &Inst) const;
1646   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1647   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1648   bool validateDivScale(const MCInst &Inst);
1649   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1650                              const SMLoc &IDLoc);
1651   bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands,
1652                           const SMLoc &IDLoc);
1653   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1654   unsigned getConstantBusLimit(unsigned Opcode) const;
1655   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1656   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1657   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1658 
1659   bool isSupportedMnemo(StringRef Mnemo,
1660                         const FeatureBitset &FBS);
1661   bool isSupportedMnemo(StringRef Mnemo,
1662                         const FeatureBitset &FBS,
1663                         ArrayRef<unsigned> Variants);
1664   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1665 
1666   bool isId(const StringRef Id) const;
1667   bool isId(const AsmToken &Token, const StringRef Id) const;
1668   bool isToken(const AsmToken::TokenKind Kind) const;
1669   bool trySkipId(const StringRef Id);
1670   bool trySkipId(const StringRef Pref, const StringRef Id);
1671   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1672   bool trySkipToken(const AsmToken::TokenKind Kind);
1673   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1674   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1675   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1676 
1677   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1678   AsmToken::TokenKind getTokenKind() const;
1679   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1680   bool parseExpr(OperandVector &Operands);
1681   StringRef getTokenStr() const;
1682   AsmToken peekToken();
1683   AsmToken getToken() const;
1684   SMLoc getLoc() const;
1685   void lex();
1686 
1687 public:
1688   void onBeginOfFile() override;
1689 
1690   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1691   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1692 
1693   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1694   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1695   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1696   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1697   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1698   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1699 
1700   bool parseSwizzleOperand(int64_t &Op,
1701                            const unsigned MinVal,
1702                            const unsigned MaxVal,
1703                            const StringRef ErrMsg,
1704                            SMLoc &Loc);
1705   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1706                             const unsigned MinVal,
1707                             const unsigned MaxVal,
1708                             const StringRef ErrMsg);
1709   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1710   bool parseSwizzleOffset(int64_t &Imm);
1711   bool parseSwizzleMacro(int64_t &Imm);
1712   bool parseSwizzleQuadPerm(int64_t &Imm);
1713   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1714   bool parseSwizzleBroadcast(int64_t &Imm);
1715   bool parseSwizzleSwap(int64_t &Imm);
1716   bool parseSwizzleReverse(int64_t &Imm);
1717 
1718   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1719   int64_t parseGPRIdxMacro();
1720 
1721   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1722   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1723   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1724   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1725 
1726   AMDGPUOperand::Ptr defaultCPol() const;
1727 
1728   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1729   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1730   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1731   AMDGPUOperand::Ptr defaultFlatOffset() const;
1732 
1733   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1734 
1735   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1736                OptionalImmIndexMap &OptionalIdx);
1737   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1738   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1739   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1740   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1741                 OptionalImmIndexMap &OptionalIdx);
1742 
1743   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1744   void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1745 
1746   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1747                bool IsAtomic = false);
1748   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1749   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1750 
1751   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1752 
1753   bool parseDimId(unsigned &Encoding);
1754   OperandMatchResultTy parseDim(OperandVector &Operands);
1755   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1756   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1757   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1758   int64_t parseDPPCtrlSel(StringRef Ctrl);
1759   int64_t parseDPPCtrlPerm();
1760   AMDGPUOperand::Ptr defaultRowMask() const;
1761   AMDGPUOperand::Ptr defaultBankMask() const;
1762   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1763   AMDGPUOperand::Ptr defaultFI() const;
1764   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1765   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1766   void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1767   void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { cvtVOP3DPP(Inst, Operands, true); }
1768 
1769   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1770                                     AMDGPUOperand::ImmTy Type);
1771   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1772   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1773   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1774   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1775   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1776   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1777   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1778                uint64_t BasicInstType,
1779                bool SkipDstVcc = false,
1780                bool SkipSrcVcc = false);
1781 
1782   AMDGPUOperand::Ptr defaultBLGP() const;
1783   AMDGPUOperand::Ptr defaultCBSZ() const;
1784   AMDGPUOperand::Ptr defaultABID() const;
1785 
1786   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1787   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1788 
1789   AMDGPUOperand::Ptr defaultWaitVDST() const;
1790   AMDGPUOperand::Ptr defaultWaitEXP() const;
1791 };
1792 
1793 struct OptionalOperand {
1794   const char *Name;
1795   AMDGPUOperand::ImmTy Type;
1796   bool IsBit;
1797   bool (*ConvertResult)(int64_t&);
1798 };
1799 
1800 } // end anonymous namespace
1801 
1802 // May be called with integer type with equivalent bitwidth.
1803 static const fltSemantics *getFltSemantics(unsigned Size) {
1804   switch (Size) {
1805   case 4:
1806     return &APFloat::IEEEsingle();
1807   case 8:
1808     return &APFloat::IEEEdouble();
1809   case 2:
1810     return &APFloat::IEEEhalf();
1811   default:
1812     llvm_unreachable("unsupported fp type");
1813   }
1814 }
1815 
1816 static const fltSemantics *getFltSemantics(MVT VT) {
1817   return getFltSemantics(VT.getSizeInBits() / 8);
1818 }
1819 
1820 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1821   switch (OperandType) {
1822   case AMDGPU::OPERAND_REG_IMM_INT32:
1823   case AMDGPU::OPERAND_REG_IMM_FP32:
1824   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1825   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1826   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1827   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1828   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1829   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1830   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1831   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1832   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1833   case AMDGPU::OPERAND_KIMM32:
1834     return &APFloat::IEEEsingle();
1835   case AMDGPU::OPERAND_REG_IMM_INT64:
1836   case AMDGPU::OPERAND_REG_IMM_FP64:
1837   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1838   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1839   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1840     return &APFloat::IEEEdouble();
1841   case AMDGPU::OPERAND_REG_IMM_INT16:
1842   case AMDGPU::OPERAND_REG_IMM_FP16:
1843   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1844   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1845   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1846   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1847   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1848   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1849   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1850   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1851   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1852   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1853   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1854   case AMDGPU::OPERAND_KIMM16:
1855     return &APFloat::IEEEhalf();
1856   default:
1857     llvm_unreachable("unsupported fp type");
1858   }
1859 }
1860 
1861 //===----------------------------------------------------------------------===//
1862 // Operand
1863 //===----------------------------------------------------------------------===//
1864 
1865 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1866   bool Lost;
1867 
1868   // Convert literal to single precision
1869   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1870                                                APFloat::rmNearestTiesToEven,
1871                                                &Lost);
1872   // We allow precision lost but not overflow or underflow
1873   if (Status != APFloat::opOK &&
1874       Lost &&
1875       ((Status & APFloat::opOverflow)  != 0 ||
1876        (Status & APFloat::opUnderflow) != 0)) {
1877     return false;
1878   }
1879 
1880   return true;
1881 }
1882 
1883 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1884   return isUIntN(Size, Val) || isIntN(Size, Val);
1885 }
1886 
1887 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1888   if (VT.getScalarType() == MVT::i16) {
1889     // FP immediate values are broken.
1890     return isInlinableIntLiteral(Val);
1891   }
1892 
1893   // f16/v2f16 operands work correctly for all values.
1894   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1895 }
1896 
1897 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1898 
1899   // This is a hack to enable named inline values like
1900   // shared_base with both 32-bit and 64-bit operands.
1901   // Note that these values are defined as
1902   // 32-bit operands only.
1903   if (isInlineValue()) {
1904     return true;
1905   }
1906 
1907   if (!isImmTy(ImmTyNone)) {
1908     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1909     return false;
1910   }
1911   // TODO: We should avoid using host float here. It would be better to
1912   // check the float bit values which is what a few other places do.
1913   // We've had bot failures before due to weird NaN support on mips hosts.
1914 
1915   APInt Literal(64, Imm.Val);
1916 
1917   if (Imm.IsFPImm) { // We got fp literal token
1918     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1919       return AMDGPU::isInlinableLiteral64(Imm.Val,
1920                                           AsmParser->hasInv2PiInlineImm());
1921     }
1922 
1923     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1924     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1925       return false;
1926 
1927     if (type.getScalarSizeInBits() == 16) {
1928       return isInlineableLiteralOp16(
1929         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1930         type, AsmParser->hasInv2PiInlineImm());
1931     }
1932 
1933     // Check if single precision literal is inlinable
1934     return AMDGPU::isInlinableLiteral32(
1935       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1936       AsmParser->hasInv2PiInlineImm());
1937   }
1938 
1939   // We got int literal token.
1940   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1941     return AMDGPU::isInlinableLiteral64(Imm.Val,
1942                                         AsmParser->hasInv2PiInlineImm());
1943   }
1944 
1945   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1946     return false;
1947   }
1948 
1949   if (type.getScalarSizeInBits() == 16) {
1950     return isInlineableLiteralOp16(
1951       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1952       type, AsmParser->hasInv2PiInlineImm());
1953   }
1954 
1955   return AMDGPU::isInlinableLiteral32(
1956     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1957     AsmParser->hasInv2PiInlineImm());
1958 }
1959 
1960 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1961   // Check that this immediate can be added as literal
1962   if (!isImmTy(ImmTyNone)) {
1963     return false;
1964   }
1965 
1966   if (!Imm.IsFPImm) {
1967     // We got int literal token.
1968 
1969     if (type == MVT::f64 && hasFPModifiers()) {
1970       // Cannot apply fp modifiers to int literals preserving the same semantics
1971       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1972       // disable these cases.
1973       return false;
1974     }
1975 
1976     unsigned Size = type.getSizeInBits();
1977     if (Size == 64)
1978       Size = 32;
1979 
1980     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1981     // types.
1982     return isSafeTruncation(Imm.Val, Size);
1983   }
1984 
1985   // We got fp literal token
1986   if (type == MVT::f64) { // Expected 64-bit fp operand
1987     // We would set low 64-bits of literal to zeroes but we accept this literals
1988     return true;
1989   }
1990 
1991   if (type == MVT::i64) { // Expected 64-bit int operand
1992     // We don't allow fp literals in 64-bit integer instructions. It is
1993     // unclear how we should encode them.
1994     return false;
1995   }
1996 
1997   // We allow fp literals with f16x2 operands assuming that the specified
1998   // literal goes into the lower half and the upper half is zero. We also
1999   // require that the literal may be losslessly converted to f16.
2000   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
2001                      (type == MVT::v2i16)? MVT::i16 :
2002                      (type == MVT::v2f32)? MVT::f32 : type;
2003 
2004   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2005   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2006 }
2007 
2008 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2009   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2010 }
2011 
2012 bool AMDGPUOperand::isVRegWithInputMods() const {
2013   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2014          // GFX90A allows DPP on 64-bit operands.
2015          (isRegClass(AMDGPU::VReg_64RegClassID) &&
2016           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
2017 }
2018 
2019 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2020   if (AsmParser->isVI())
2021     return isVReg32();
2022   else if (AsmParser->isGFX9Plus())
2023     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2024   else
2025     return false;
2026 }
2027 
2028 bool AMDGPUOperand::isSDWAFP16Operand() const {
2029   return isSDWAOperand(MVT::f16);
2030 }
2031 
2032 bool AMDGPUOperand::isSDWAFP32Operand() const {
2033   return isSDWAOperand(MVT::f32);
2034 }
2035 
2036 bool AMDGPUOperand::isSDWAInt16Operand() const {
2037   return isSDWAOperand(MVT::i16);
2038 }
2039 
2040 bool AMDGPUOperand::isSDWAInt32Operand() const {
2041   return isSDWAOperand(MVT::i32);
2042 }
2043 
2044 bool AMDGPUOperand::isBoolReg() const {
2045   auto FB = AsmParser->getFeatureBits();
2046   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2047                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2048 }
2049 
2050 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2051 {
2052   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2053   assert(Size == 2 || Size == 4 || Size == 8);
2054 
2055   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2056 
2057   if (Imm.Mods.Abs) {
2058     Val &= ~FpSignMask;
2059   }
2060   if (Imm.Mods.Neg) {
2061     Val ^= FpSignMask;
2062   }
2063 
2064   return Val;
2065 }
2066 
2067 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2068   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2069                              Inst.getNumOperands())) {
2070     addLiteralImmOperand(Inst, Imm.Val,
2071                          ApplyModifiers &
2072                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2073   } else {
2074     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2075     Inst.addOperand(MCOperand::createImm(Imm.Val));
2076     setImmKindNone();
2077   }
2078 }
2079 
2080 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2081   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2082   auto OpNum = Inst.getNumOperands();
2083   // Check that this operand accepts literals
2084   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2085 
2086   if (ApplyModifiers) {
2087     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2088     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2089     Val = applyInputFPModifiers(Val, Size);
2090   }
2091 
2092   APInt Literal(64, Val);
2093   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2094 
2095   if (Imm.IsFPImm) { // We got fp literal token
2096     switch (OpTy) {
2097     case AMDGPU::OPERAND_REG_IMM_INT64:
2098     case AMDGPU::OPERAND_REG_IMM_FP64:
2099     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2100     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2101     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2102       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2103                                        AsmParser->hasInv2PiInlineImm())) {
2104         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2105         setImmKindConst();
2106         return;
2107       }
2108 
2109       // Non-inlineable
2110       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2111         // For fp operands we check if low 32 bits are zeros
2112         if (Literal.getLoBits(32) != 0) {
2113           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2114           "Can't encode literal as exact 64-bit floating-point operand. "
2115           "Low 32-bits will be set to zero");
2116         }
2117 
2118         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2119         setImmKindLiteral();
2120         return;
2121       }
2122 
2123       // We don't allow fp literals in 64-bit integer instructions. It is
2124       // unclear how we should encode them. This case should be checked earlier
2125       // in predicate methods (isLiteralImm())
2126       llvm_unreachable("fp literal in 64-bit integer instruction.");
2127 
2128     case AMDGPU::OPERAND_REG_IMM_INT32:
2129     case AMDGPU::OPERAND_REG_IMM_FP32:
2130     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2131     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2132     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2133     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2134     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2135     case AMDGPU::OPERAND_REG_IMM_INT16:
2136     case AMDGPU::OPERAND_REG_IMM_FP16:
2137     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2138     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2139     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2140     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2141     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2142     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2143     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2144     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2145     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2146     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2147     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2148     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2149     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2150     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2151     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2152     case AMDGPU::OPERAND_KIMM32:
2153     case AMDGPU::OPERAND_KIMM16: {
2154       bool lost;
2155       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2156       // Convert literal to single precision
2157       FPLiteral.convert(*getOpFltSemantics(OpTy),
2158                         APFloat::rmNearestTiesToEven, &lost);
2159       // We allow precision lost but not overflow or underflow. This should be
2160       // checked earlier in isLiteralImm()
2161 
2162       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2163       Inst.addOperand(MCOperand::createImm(ImmVal));
2164       setImmKindLiteral();
2165       return;
2166     }
2167     default:
2168       llvm_unreachable("invalid operand size");
2169     }
2170 
2171     return;
2172   }
2173 
2174   // We got int literal token.
2175   // Only sign extend inline immediates.
2176   switch (OpTy) {
2177   case AMDGPU::OPERAND_REG_IMM_INT32:
2178   case AMDGPU::OPERAND_REG_IMM_FP32:
2179   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2180   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2181   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2182   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2183   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2184   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2185   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2186   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2187   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2188   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2189   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2190     if (isSafeTruncation(Val, 32) &&
2191         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2192                                      AsmParser->hasInv2PiInlineImm())) {
2193       Inst.addOperand(MCOperand::createImm(Val));
2194       setImmKindConst();
2195       return;
2196     }
2197 
2198     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2199     setImmKindLiteral();
2200     return;
2201 
2202   case AMDGPU::OPERAND_REG_IMM_INT64:
2203   case AMDGPU::OPERAND_REG_IMM_FP64:
2204   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2205   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2206   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2207     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2208       Inst.addOperand(MCOperand::createImm(Val));
2209       setImmKindConst();
2210       return;
2211     }
2212 
2213     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2214     setImmKindLiteral();
2215     return;
2216 
2217   case AMDGPU::OPERAND_REG_IMM_INT16:
2218   case AMDGPU::OPERAND_REG_IMM_FP16:
2219   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2220   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2221   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2222   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2223   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2224     if (isSafeTruncation(Val, 16) &&
2225         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2226                                      AsmParser->hasInv2PiInlineImm())) {
2227       Inst.addOperand(MCOperand::createImm(Val));
2228       setImmKindConst();
2229       return;
2230     }
2231 
2232     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2233     setImmKindLiteral();
2234     return;
2235 
2236   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2237   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2238   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2239   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2240     assert(isSafeTruncation(Val, 16));
2241     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2242                                         AsmParser->hasInv2PiInlineImm()));
2243 
2244     Inst.addOperand(MCOperand::createImm(Val));
2245     return;
2246   }
2247   case AMDGPU::OPERAND_KIMM32:
2248     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2249     setImmKindNone();
2250     return;
2251   case AMDGPU::OPERAND_KIMM16:
2252     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2253     setImmKindNone();
2254     return;
2255   default:
2256     llvm_unreachable("invalid operand size");
2257   }
2258 }
2259 
2260 template <unsigned Bitwidth>
2261 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2262   APInt Literal(64, Imm.Val);
2263   setImmKindNone();
2264 
2265   if (!Imm.IsFPImm) {
2266     // We got int literal token.
2267     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2268     return;
2269   }
2270 
2271   bool Lost;
2272   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2273   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2274                     APFloat::rmNearestTiesToEven, &Lost);
2275   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2276 }
2277 
2278 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2279   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2280 }
2281 
2282 static bool isInlineValue(unsigned Reg) {
2283   switch (Reg) {
2284   case AMDGPU::SRC_SHARED_BASE:
2285   case AMDGPU::SRC_SHARED_LIMIT:
2286   case AMDGPU::SRC_PRIVATE_BASE:
2287   case AMDGPU::SRC_PRIVATE_LIMIT:
2288   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2289     return true;
2290   case AMDGPU::SRC_VCCZ:
2291   case AMDGPU::SRC_EXECZ:
2292   case AMDGPU::SRC_SCC:
2293     return true;
2294   case AMDGPU::SGPR_NULL:
2295     return true;
2296   default:
2297     return false;
2298   }
2299 }
2300 
2301 bool AMDGPUOperand::isInlineValue() const {
2302   return isRegKind() && ::isInlineValue(getReg());
2303 }
2304 
2305 //===----------------------------------------------------------------------===//
2306 // AsmParser
2307 //===----------------------------------------------------------------------===//
2308 
2309 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2310   if (Is == IS_VGPR) {
2311     switch (RegWidth) {
2312       default: return -1;
2313       case 32:
2314         return AMDGPU::VGPR_32RegClassID;
2315       case 64:
2316         return AMDGPU::VReg_64RegClassID;
2317       case 96:
2318         return AMDGPU::VReg_96RegClassID;
2319       case 128:
2320         return AMDGPU::VReg_128RegClassID;
2321       case 160:
2322         return AMDGPU::VReg_160RegClassID;
2323       case 192:
2324         return AMDGPU::VReg_192RegClassID;
2325       case 224:
2326         return AMDGPU::VReg_224RegClassID;
2327       case 256:
2328         return AMDGPU::VReg_256RegClassID;
2329       case 512:
2330         return AMDGPU::VReg_512RegClassID;
2331       case 1024:
2332         return AMDGPU::VReg_1024RegClassID;
2333     }
2334   } else if (Is == IS_TTMP) {
2335     switch (RegWidth) {
2336       default: return -1;
2337       case 32:
2338         return AMDGPU::TTMP_32RegClassID;
2339       case 64:
2340         return AMDGPU::TTMP_64RegClassID;
2341       case 128:
2342         return AMDGPU::TTMP_128RegClassID;
2343       case 256:
2344         return AMDGPU::TTMP_256RegClassID;
2345       case 512:
2346         return AMDGPU::TTMP_512RegClassID;
2347     }
2348   } else if (Is == IS_SGPR) {
2349     switch (RegWidth) {
2350       default: return -1;
2351       case 32:
2352         return AMDGPU::SGPR_32RegClassID;
2353       case 64:
2354         return AMDGPU::SGPR_64RegClassID;
2355       case 96:
2356         return AMDGPU::SGPR_96RegClassID;
2357       case 128:
2358         return AMDGPU::SGPR_128RegClassID;
2359       case 160:
2360         return AMDGPU::SGPR_160RegClassID;
2361       case 192:
2362         return AMDGPU::SGPR_192RegClassID;
2363       case 224:
2364         return AMDGPU::SGPR_224RegClassID;
2365       case 256:
2366         return AMDGPU::SGPR_256RegClassID;
2367       case 512:
2368         return AMDGPU::SGPR_512RegClassID;
2369     }
2370   } else if (Is == IS_AGPR) {
2371     switch (RegWidth) {
2372       default: return -1;
2373       case 32:
2374         return AMDGPU::AGPR_32RegClassID;
2375       case 64:
2376         return AMDGPU::AReg_64RegClassID;
2377       case 96:
2378         return AMDGPU::AReg_96RegClassID;
2379       case 128:
2380         return AMDGPU::AReg_128RegClassID;
2381       case 160:
2382         return AMDGPU::AReg_160RegClassID;
2383       case 192:
2384         return AMDGPU::AReg_192RegClassID;
2385       case 224:
2386         return AMDGPU::AReg_224RegClassID;
2387       case 256:
2388         return AMDGPU::AReg_256RegClassID;
2389       case 512:
2390         return AMDGPU::AReg_512RegClassID;
2391       case 1024:
2392         return AMDGPU::AReg_1024RegClassID;
2393     }
2394   }
2395   return -1;
2396 }
2397 
2398 static unsigned getSpecialRegForName(StringRef RegName) {
2399   return StringSwitch<unsigned>(RegName)
2400     .Case("exec", AMDGPU::EXEC)
2401     .Case("vcc", AMDGPU::VCC)
2402     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2403     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2404     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2405     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2406     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2407     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2408     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2409     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2410     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2411     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2412     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2413     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2414     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2415     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2416     .Case("m0", AMDGPU::M0)
2417     .Case("vccz", AMDGPU::SRC_VCCZ)
2418     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2419     .Case("execz", AMDGPU::SRC_EXECZ)
2420     .Case("src_execz", AMDGPU::SRC_EXECZ)
2421     .Case("scc", AMDGPU::SRC_SCC)
2422     .Case("src_scc", AMDGPU::SRC_SCC)
2423     .Case("tba", AMDGPU::TBA)
2424     .Case("tma", AMDGPU::TMA)
2425     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2426     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2427     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2428     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2429     .Case("vcc_lo", AMDGPU::VCC_LO)
2430     .Case("vcc_hi", AMDGPU::VCC_HI)
2431     .Case("exec_lo", AMDGPU::EXEC_LO)
2432     .Case("exec_hi", AMDGPU::EXEC_HI)
2433     .Case("tma_lo", AMDGPU::TMA_LO)
2434     .Case("tma_hi", AMDGPU::TMA_HI)
2435     .Case("tba_lo", AMDGPU::TBA_LO)
2436     .Case("tba_hi", AMDGPU::TBA_HI)
2437     .Case("pc", AMDGPU::PC_REG)
2438     .Case("null", AMDGPU::SGPR_NULL)
2439     .Default(AMDGPU::NoRegister);
2440 }
2441 
2442 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2443                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2444   auto R = parseRegister();
2445   if (!R) return true;
2446   assert(R->isReg());
2447   RegNo = R->getReg();
2448   StartLoc = R->getStartLoc();
2449   EndLoc = R->getEndLoc();
2450   return false;
2451 }
2452 
2453 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2454                                     SMLoc &EndLoc) {
2455   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2456 }
2457 
2458 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2459                                                        SMLoc &StartLoc,
2460                                                        SMLoc &EndLoc) {
2461   bool Result =
2462       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2463   bool PendingErrors = getParser().hasPendingError();
2464   getParser().clearPendingErrors();
2465   if (PendingErrors)
2466     return MatchOperand_ParseFail;
2467   if (Result)
2468     return MatchOperand_NoMatch;
2469   return MatchOperand_Success;
2470 }
2471 
2472 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2473                                             RegisterKind RegKind, unsigned Reg1,
2474                                             SMLoc Loc) {
2475   switch (RegKind) {
2476   case IS_SPECIAL:
2477     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2478       Reg = AMDGPU::EXEC;
2479       RegWidth = 64;
2480       return true;
2481     }
2482     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2483       Reg = AMDGPU::FLAT_SCR;
2484       RegWidth = 64;
2485       return true;
2486     }
2487     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2488       Reg = AMDGPU::XNACK_MASK;
2489       RegWidth = 64;
2490       return true;
2491     }
2492     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2493       Reg = AMDGPU::VCC;
2494       RegWidth = 64;
2495       return true;
2496     }
2497     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2498       Reg = AMDGPU::TBA;
2499       RegWidth = 64;
2500       return true;
2501     }
2502     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2503       Reg = AMDGPU::TMA;
2504       RegWidth = 64;
2505       return true;
2506     }
2507     Error(Loc, "register does not fit in the list");
2508     return false;
2509   case IS_VGPR:
2510   case IS_SGPR:
2511   case IS_AGPR:
2512   case IS_TTMP:
2513     if (Reg1 != Reg + RegWidth / 32) {
2514       Error(Loc, "registers in a list must have consecutive indices");
2515       return false;
2516     }
2517     RegWidth += 32;
2518     return true;
2519   default:
2520     llvm_unreachable("unexpected register kind");
2521   }
2522 }
2523 
2524 struct RegInfo {
2525   StringLiteral Name;
2526   RegisterKind Kind;
2527 };
2528 
2529 static constexpr RegInfo RegularRegisters[] = {
2530   {{"v"},    IS_VGPR},
2531   {{"s"},    IS_SGPR},
2532   {{"ttmp"}, IS_TTMP},
2533   {{"acc"},  IS_AGPR},
2534   {{"a"},    IS_AGPR},
2535 };
2536 
2537 static bool isRegularReg(RegisterKind Kind) {
2538   return Kind == IS_VGPR ||
2539          Kind == IS_SGPR ||
2540          Kind == IS_TTMP ||
2541          Kind == IS_AGPR;
2542 }
2543 
2544 static const RegInfo* getRegularRegInfo(StringRef Str) {
2545   for (const RegInfo &Reg : RegularRegisters)
2546     if (Str.startswith(Reg.Name))
2547       return &Reg;
2548   return nullptr;
2549 }
2550 
2551 static bool getRegNum(StringRef Str, unsigned& Num) {
2552   return !Str.getAsInteger(10, Num);
2553 }
2554 
2555 bool
2556 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2557                             const AsmToken &NextToken) const {
2558 
2559   // A list of consecutive registers: [s0,s1,s2,s3]
2560   if (Token.is(AsmToken::LBrac))
2561     return true;
2562 
2563   if (!Token.is(AsmToken::Identifier))
2564     return false;
2565 
2566   // A single register like s0 or a range of registers like s[0:1]
2567 
2568   StringRef Str = Token.getString();
2569   const RegInfo *Reg = getRegularRegInfo(Str);
2570   if (Reg) {
2571     StringRef RegName = Reg->Name;
2572     StringRef RegSuffix = Str.substr(RegName.size());
2573     if (!RegSuffix.empty()) {
2574       unsigned Num;
2575       // A single register with an index: rXX
2576       if (getRegNum(RegSuffix, Num))
2577         return true;
2578     } else {
2579       // A range of registers: r[XX:YY].
2580       if (NextToken.is(AsmToken::LBrac))
2581         return true;
2582     }
2583   }
2584 
2585   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2586 }
2587 
2588 bool
2589 AMDGPUAsmParser::isRegister()
2590 {
2591   return isRegister(getToken(), peekToken());
2592 }
2593 
2594 unsigned
2595 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2596                                unsigned RegNum,
2597                                unsigned RegWidth,
2598                                SMLoc Loc) {
2599 
2600   assert(isRegularReg(RegKind));
2601 
2602   unsigned AlignSize = 1;
2603   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2604     // SGPR and TTMP registers must be aligned.
2605     // Max required alignment is 4 dwords.
2606     AlignSize = std::min(RegWidth / 32, 4u);
2607   }
2608 
2609   if (RegNum % AlignSize != 0) {
2610     Error(Loc, "invalid register alignment");
2611     return AMDGPU::NoRegister;
2612   }
2613 
2614   unsigned RegIdx = RegNum / AlignSize;
2615   int RCID = getRegClass(RegKind, RegWidth);
2616   if (RCID == -1) {
2617     Error(Loc, "invalid or unsupported register size");
2618     return AMDGPU::NoRegister;
2619   }
2620 
2621   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2622   const MCRegisterClass RC = TRI->getRegClass(RCID);
2623   if (RegIdx >= RC.getNumRegs()) {
2624     Error(Loc, "register index is out of range");
2625     return AMDGPU::NoRegister;
2626   }
2627 
2628   return RC.getRegister(RegIdx);
2629 }
2630 
2631 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2632   int64_t RegLo, RegHi;
2633   if (!skipToken(AsmToken::LBrac, "missing register index"))
2634     return false;
2635 
2636   SMLoc FirstIdxLoc = getLoc();
2637   SMLoc SecondIdxLoc;
2638 
2639   if (!parseExpr(RegLo))
2640     return false;
2641 
2642   if (trySkipToken(AsmToken::Colon)) {
2643     SecondIdxLoc = getLoc();
2644     if (!parseExpr(RegHi))
2645       return false;
2646   } else {
2647     RegHi = RegLo;
2648   }
2649 
2650   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2651     return false;
2652 
2653   if (!isUInt<32>(RegLo)) {
2654     Error(FirstIdxLoc, "invalid register index");
2655     return false;
2656   }
2657 
2658   if (!isUInt<32>(RegHi)) {
2659     Error(SecondIdxLoc, "invalid register index");
2660     return false;
2661   }
2662 
2663   if (RegLo > RegHi) {
2664     Error(FirstIdxLoc, "first register index should not exceed second index");
2665     return false;
2666   }
2667 
2668   Num = static_cast<unsigned>(RegLo);
2669   RegWidth = 32 * ((RegHi - RegLo) + 1);
2670   return true;
2671 }
2672 
2673 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2674                                           unsigned &RegNum, unsigned &RegWidth,
2675                                           SmallVectorImpl<AsmToken> &Tokens) {
2676   assert(isToken(AsmToken::Identifier));
2677   unsigned Reg = getSpecialRegForName(getTokenStr());
2678   if (Reg) {
2679     RegNum = 0;
2680     RegWidth = 32;
2681     RegKind = IS_SPECIAL;
2682     Tokens.push_back(getToken());
2683     lex(); // skip register name
2684   }
2685   return Reg;
2686 }
2687 
2688 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2689                                           unsigned &RegNum, unsigned &RegWidth,
2690                                           SmallVectorImpl<AsmToken> &Tokens) {
2691   assert(isToken(AsmToken::Identifier));
2692   StringRef RegName = getTokenStr();
2693   auto Loc = getLoc();
2694 
2695   const RegInfo *RI = getRegularRegInfo(RegName);
2696   if (!RI) {
2697     Error(Loc, "invalid register name");
2698     return AMDGPU::NoRegister;
2699   }
2700 
2701   Tokens.push_back(getToken());
2702   lex(); // skip register name
2703 
2704   RegKind = RI->Kind;
2705   StringRef RegSuffix = RegName.substr(RI->Name.size());
2706   if (!RegSuffix.empty()) {
2707     // Single 32-bit register: vXX.
2708     if (!getRegNum(RegSuffix, RegNum)) {
2709       Error(Loc, "invalid register index");
2710       return AMDGPU::NoRegister;
2711     }
2712     RegWidth = 32;
2713   } else {
2714     // Range of registers: v[XX:YY]. ":YY" is optional.
2715     if (!ParseRegRange(RegNum, RegWidth))
2716       return AMDGPU::NoRegister;
2717   }
2718 
2719   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2720 }
2721 
2722 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2723                                        unsigned &RegWidth,
2724                                        SmallVectorImpl<AsmToken> &Tokens) {
2725   unsigned Reg = AMDGPU::NoRegister;
2726   auto ListLoc = getLoc();
2727 
2728   if (!skipToken(AsmToken::LBrac,
2729                  "expected a register or a list of registers")) {
2730     return AMDGPU::NoRegister;
2731   }
2732 
2733   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2734 
2735   auto Loc = getLoc();
2736   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2737     return AMDGPU::NoRegister;
2738   if (RegWidth != 32) {
2739     Error(Loc, "expected a single 32-bit register");
2740     return AMDGPU::NoRegister;
2741   }
2742 
2743   for (; trySkipToken(AsmToken::Comma); ) {
2744     RegisterKind NextRegKind;
2745     unsigned NextReg, NextRegNum, NextRegWidth;
2746     Loc = getLoc();
2747 
2748     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2749                              NextRegNum, NextRegWidth,
2750                              Tokens)) {
2751       return AMDGPU::NoRegister;
2752     }
2753     if (NextRegWidth != 32) {
2754       Error(Loc, "expected a single 32-bit register");
2755       return AMDGPU::NoRegister;
2756     }
2757     if (NextRegKind != RegKind) {
2758       Error(Loc, "registers in a list must be of the same kind");
2759       return AMDGPU::NoRegister;
2760     }
2761     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2762       return AMDGPU::NoRegister;
2763   }
2764 
2765   if (!skipToken(AsmToken::RBrac,
2766                  "expected a comma or a closing square bracket")) {
2767     return AMDGPU::NoRegister;
2768   }
2769 
2770   if (isRegularReg(RegKind))
2771     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2772 
2773   return Reg;
2774 }
2775 
2776 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2777                                           unsigned &RegNum, unsigned &RegWidth,
2778                                           SmallVectorImpl<AsmToken> &Tokens) {
2779   auto Loc = getLoc();
2780   Reg = AMDGPU::NoRegister;
2781 
2782   if (isToken(AsmToken::Identifier)) {
2783     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2784     if (Reg == AMDGPU::NoRegister)
2785       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2786   } else {
2787     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2788   }
2789 
2790   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2791   if (Reg == AMDGPU::NoRegister) {
2792     assert(Parser.hasPendingError());
2793     return false;
2794   }
2795 
2796   if (!subtargetHasRegister(*TRI, Reg)) {
2797     if (Reg == AMDGPU::SGPR_NULL) {
2798       Error(Loc, "'null' operand is not supported on this GPU");
2799     } else {
2800       Error(Loc, "register not available on this GPU");
2801     }
2802     return false;
2803   }
2804 
2805   return true;
2806 }
2807 
2808 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2809                                           unsigned &RegNum, unsigned &RegWidth,
2810                                           bool RestoreOnFailure /*=false*/) {
2811   Reg = AMDGPU::NoRegister;
2812 
2813   SmallVector<AsmToken, 1> Tokens;
2814   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2815     if (RestoreOnFailure) {
2816       while (!Tokens.empty()) {
2817         getLexer().UnLex(Tokens.pop_back_val());
2818       }
2819     }
2820     return true;
2821   }
2822   return false;
2823 }
2824 
2825 Optional<StringRef>
2826 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2827   switch (RegKind) {
2828   case IS_VGPR:
2829     return StringRef(".amdgcn.next_free_vgpr");
2830   case IS_SGPR:
2831     return StringRef(".amdgcn.next_free_sgpr");
2832   default:
2833     return None;
2834   }
2835 }
2836 
2837 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2838   auto SymbolName = getGprCountSymbolName(RegKind);
2839   assert(SymbolName && "initializing invalid register kind");
2840   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2841   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2842 }
2843 
2844 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2845                                             unsigned DwordRegIndex,
2846                                             unsigned RegWidth) {
2847   // Symbols are only defined for GCN targets
2848   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2849     return true;
2850 
2851   auto SymbolName = getGprCountSymbolName(RegKind);
2852   if (!SymbolName)
2853     return true;
2854   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2855 
2856   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2857   int64_t OldCount;
2858 
2859   if (!Sym->isVariable())
2860     return !Error(getLoc(),
2861                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2862   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2863     return !Error(
2864         getLoc(),
2865         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2866 
2867   if (OldCount <= NewMax)
2868     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2869 
2870   return true;
2871 }
2872 
2873 std::unique_ptr<AMDGPUOperand>
2874 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2875   const auto &Tok = getToken();
2876   SMLoc StartLoc = Tok.getLoc();
2877   SMLoc EndLoc = Tok.getEndLoc();
2878   RegisterKind RegKind;
2879   unsigned Reg, RegNum, RegWidth;
2880 
2881   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2882     return nullptr;
2883   }
2884   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2885     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2886       return nullptr;
2887   } else
2888     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2889   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2890 }
2891 
2892 OperandMatchResultTy
2893 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2894   // TODO: add syntactic sugar for 1/(2*PI)
2895 
2896   assert(!isRegister());
2897   assert(!isModifier());
2898 
2899   const auto& Tok = getToken();
2900   const auto& NextTok = peekToken();
2901   bool IsReal = Tok.is(AsmToken::Real);
2902   SMLoc S = getLoc();
2903   bool Negate = false;
2904 
2905   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2906     lex();
2907     IsReal = true;
2908     Negate = true;
2909   }
2910 
2911   if (IsReal) {
2912     // Floating-point expressions are not supported.
2913     // Can only allow floating-point literals with an
2914     // optional sign.
2915 
2916     StringRef Num = getTokenStr();
2917     lex();
2918 
2919     APFloat RealVal(APFloat::IEEEdouble());
2920     auto roundMode = APFloat::rmNearestTiesToEven;
2921     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2922       return MatchOperand_ParseFail;
2923     }
2924     if (Negate)
2925       RealVal.changeSign();
2926 
2927     Operands.push_back(
2928       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2929                                AMDGPUOperand::ImmTyNone, true));
2930 
2931     return MatchOperand_Success;
2932 
2933   } else {
2934     int64_t IntVal;
2935     const MCExpr *Expr;
2936     SMLoc S = getLoc();
2937 
2938     if (HasSP3AbsModifier) {
2939       // This is a workaround for handling expressions
2940       // as arguments of SP3 'abs' modifier, for example:
2941       //     |1.0|
2942       //     |-1|
2943       //     |1+x|
2944       // This syntax is not compatible with syntax of standard
2945       // MC expressions (due to the trailing '|').
2946       SMLoc EndLoc;
2947       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2948         return MatchOperand_ParseFail;
2949     } else {
2950       if (Parser.parseExpression(Expr))
2951         return MatchOperand_ParseFail;
2952     }
2953 
2954     if (Expr->evaluateAsAbsolute(IntVal)) {
2955       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2956     } else {
2957       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2958     }
2959 
2960     return MatchOperand_Success;
2961   }
2962 
2963   return MatchOperand_NoMatch;
2964 }
2965 
2966 OperandMatchResultTy
2967 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2968   if (!isRegister())
2969     return MatchOperand_NoMatch;
2970 
2971   if (auto R = parseRegister()) {
2972     assert(R->isReg());
2973     Operands.push_back(std::move(R));
2974     return MatchOperand_Success;
2975   }
2976   return MatchOperand_ParseFail;
2977 }
2978 
2979 OperandMatchResultTy
2980 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2981   auto res = parseReg(Operands);
2982   if (res != MatchOperand_NoMatch) {
2983     return res;
2984   } else if (isModifier()) {
2985     return MatchOperand_NoMatch;
2986   } else {
2987     return parseImm(Operands, HasSP3AbsMod);
2988   }
2989 }
2990 
2991 bool
2992 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2993   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2994     const auto &str = Token.getString();
2995     return str == "abs" || str == "neg" || str == "sext";
2996   }
2997   return false;
2998 }
2999 
3000 bool
3001 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3002   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3003 }
3004 
3005 bool
3006 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3007   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3008 }
3009 
3010 bool
3011 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3012   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3013 }
3014 
3015 // Check if this is an operand modifier or an opcode modifier
3016 // which may look like an expression but it is not. We should
3017 // avoid parsing these modifiers as expressions. Currently
3018 // recognized sequences are:
3019 //   |...|
3020 //   abs(...)
3021 //   neg(...)
3022 //   sext(...)
3023 //   -reg
3024 //   -|...|
3025 //   -abs(...)
3026 //   name:...
3027 // Note that simple opcode modifiers like 'gds' may be parsed as
3028 // expressions; this is a special case. See getExpressionAsToken.
3029 //
3030 bool
3031 AMDGPUAsmParser::isModifier() {
3032 
3033   AsmToken Tok = getToken();
3034   AsmToken NextToken[2];
3035   peekTokens(NextToken);
3036 
3037   return isOperandModifier(Tok, NextToken[0]) ||
3038          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3039          isOpcodeModifierWithVal(Tok, NextToken[0]);
3040 }
3041 
3042 // Check if the current token is an SP3 'neg' modifier.
3043 // Currently this modifier is allowed in the following context:
3044 //
3045 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3046 // 2. Before an 'abs' modifier: -abs(...)
3047 // 3. Before an SP3 'abs' modifier: -|...|
3048 //
3049 // In all other cases "-" is handled as a part
3050 // of an expression that follows the sign.
3051 //
3052 // Note: When "-" is followed by an integer literal,
3053 // this is interpreted as integer negation rather
3054 // than a floating-point NEG modifier applied to N.
3055 // Beside being contr-intuitive, such use of floating-point
3056 // NEG modifier would have resulted in different meaning
3057 // of integer literals used with VOP1/2/C and VOP3,
3058 // for example:
3059 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3060 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3061 // Negative fp literals with preceding "-" are
3062 // handled likewise for uniformity
3063 //
3064 bool
3065 AMDGPUAsmParser::parseSP3NegModifier() {
3066 
3067   AsmToken NextToken[2];
3068   peekTokens(NextToken);
3069 
3070   if (isToken(AsmToken::Minus) &&
3071       (isRegister(NextToken[0], NextToken[1]) ||
3072        NextToken[0].is(AsmToken::Pipe) ||
3073        isId(NextToken[0], "abs"))) {
3074     lex();
3075     return true;
3076   }
3077 
3078   return false;
3079 }
3080 
3081 OperandMatchResultTy
3082 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3083                                               bool AllowImm) {
3084   bool Neg, SP3Neg;
3085   bool Abs, SP3Abs;
3086   SMLoc Loc;
3087 
3088   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3089   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3090     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3091     return MatchOperand_ParseFail;
3092   }
3093 
3094   SP3Neg = parseSP3NegModifier();
3095 
3096   Loc = getLoc();
3097   Neg = trySkipId("neg");
3098   if (Neg && SP3Neg) {
3099     Error(Loc, "expected register or immediate");
3100     return MatchOperand_ParseFail;
3101   }
3102   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3103     return MatchOperand_ParseFail;
3104 
3105   Abs = trySkipId("abs");
3106   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3107     return MatchOperand_ParseFail;
3108 
3109   Loc = getLoc();
3110   SP3Abs = trySkipToken(AsmToken::Pipe);
3111   if (Abs && SP3Abs) {
3112     Error(Loc, "expected register or immediate");
3113     return MatchOperand_ParseFail;
3114   }
3115 
3116   OperandMatchResultTy Res;
3117   if (AllowImm) {
3118     Res = parseRegOrImm(Operands, SP3Abs);
3119   } else {
3120     Res = parseReg(Operands);
3121   }
3122   if (Res != MatchOperand_Success) {
3123     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3124   }
3125 
3126   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3127     return MatchOperand_ParseFail;
3128   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3129     return MatchOperand_ParseFail;
3130   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3131     return MatchOperand_ParseFail;
3132 
3133   AMDGPUOperand::Modifiers Mods;
3134   Mods.Abs = Abs || SP3Abs;
3135   Mods.Neg = Neg || SP3Neg;
3136 
3137   if (Mods.hasFPModifiers()) {
3138     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3139     if (Op.isExpr()) {
3140       Error(Op.getStartLoc(), "expected an absolute expression");
3141       return MatchOperand_ParseFail;
3142     }
3143     Op.setModifiers(Mods);
3144   }
3145   return MatchOperand_Success;
3146 }
3147 
3148 OperandMatchResultTy
3149 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3150                                                bool AllowImm) {
3151   bool Sext = trySkipId("sext");
3152   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3153     return MatchOperand_ParseFail;
3154 
3155   OperandMatchResultTy Res;
3156   if (AllowImm) {
3157     Res = parseRegOrImm(Operands);
3158   } else {
3159     Res = parseReg(Operands);
3160   }
3161   if (Res != MatchOperand_Success) {
3162     return Sext? MatchOperand_ParseFail : Res;
3163   }
3164 
3165   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3166     return MatchOperand_ParseFail;
3167 
3168   AMDGPUOperand::Modifiers Mods;
3169   Mods.Sext = Sext;
3170 
3171   if (Mods.hasIntModifiers()) {
3172     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3173     if (Op.isExpr()) {
3174       Error(Op.getStartLoc(), "expected an absolute expression");
3175       return MatchOperand_ParseFail;
3176     }
3177     Op.setModifiers(Mods);
3178   }
3179 
3180   return MatchOperand_Success;
3181 }
3182 
3183 OperandMatchResultTy
3184 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3185   return parseRegOrImmWithFPInputMods(Operands, false);
3186 }
3187 
3188 OperandMatchResultTy
3189 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3190   return parseRegOrImmWithIntInputMods(Operands, false);
3191 }
3192 
3193 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3194   auto Loc = getLoc();
3195   if (trySkipId("off")) {
3196     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3197                                                 AMDGPUOperand::ImmTyOff, false));
3198     return MatchOperand_Success;
3199   }
3200 
3201   if (!isRegister())
3202     return MatchOperand_NoMatch;
3203 
3204   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3205   if (Reg) {
3206     Operands.push_back(std::move(Reg));
3207     return MatchOperand_Success;
3208   }
3209 
3210   return MatchOperand_ParseFail;
3211 
3212 }
3213 
3214 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3215   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3216 
3217   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3218       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3219       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3220       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3221     return Match_InvalidOperand;
3222 
3223   if ((TSFlags & SIInstrFlags::VOP3) &&
3224       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3225       getForcedEncodingSize() != 64)
3226     return Match_PreferE32;
3227 
3228   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3229       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3230     // v_mac_f32/16 allow only dst_sel == DWORD;
3231     auto OpNum =
3232         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3233     const auto &Op = Inst.getOperand(OpNum);
3234     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3235       return Match_InvalidOperand;
3236     }
3237   }
3238 
3239   return Match_Success;
3240 }
3241 
3242 static ArrayRef<unsigned> getAllVariants() {
3243   static const unsigned Variants[] = {
3244     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3245     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3246     AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3247   };
3248 
3249   return makeArrayRef(Variants);
3250 }
3251 
3252 // What asm variants we should check
3253 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3254   if (isForcedDPP() && isForcedVOP3()) {
3255     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3256     return makeArrayRef(Variants);
3257   }
3258   if (getForcedEncodingSize() == 32) {
3259     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3260     return makeArrayRef(Variants);
3261   }
3262 
3263   if (isForcedVOP3()) {
3264     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3265     return makeArrayRef(Variants);
3266   }
3267 
3268   if (isForcedSDWA()) {
3269     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3270                                         AMDGPUAsmVariants::SDWA9};
3271     return makeArrayRef(Variants);
3272   }
3273 
3274   if (isForcedDPP()) {
3275     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3276     return makeArrayRef(Variants);
3277   }
3278 
3279   return getAllVariants();
3280 }
3281 
3282 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3283   if (isForcedDPP() && isForcedVOP3())
3284     return "e64_dpp";
3285 
3286   if (getForcedEncodingSize() == 32)
3287     return "e32";
3288 
3289   if (isForcedVOP3())
3290     return "e64";
3291 
3292   if (isForcedSDWA())
3293     return "sdwa";
3294 
3295   if (isForcedDPP())
3296     return "dpp";
3297 
3298   return "";
3299 }
3300 
3301 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3302   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3303   const unsigned Num = Desc.getNumImplicitUses();
3304   for (unsigned i = 0; i < Num; ++i) {
3305     unsigned Reg = Desc.ImplicitUses[i];
3306     switch (Reg) {
3307     case AMDGPU::FLAT_SCR:
3308     case AMDGPU::VCC:
3309     case AMDGPU::VCC_LO:
3310     case AMDGPU::VCC_HI:
3311     case AMDGPU::M0:
3312       return Reg;
3313     default:
3314       break;
3315     }
3316   }
3317   return AMDGPU::NoRegister;
3318 }
3319 
3320 // NB: This code is correct only when used to check constant
3321 // bus limitations because GFX7 support no f16 inline constants.
3322 // Note that there are no cases when a GFX7 opcode violates
3323 // constant bus limitations due to the use of an f16 constant.
3324 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3325                                        unsigned OpIdx) const {
3326   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3327 
3328   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3329     return false;
3330   }
3331 
3332   const MCOperand &MO = Inst.getOperand(OpIdx);
3333 
3334   int64_t Val = MO.getImm();
3335   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3336 
3337   switch (OpSize) { // expected operand size
3338   case 8:
3339     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3340   case 4:
3341     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3342   case 2: {
3343     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3344     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3345         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3346         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3347       return AMDGPU::isInlinableIntLiteral(Val);
3348 
3349     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3350         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3351         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3352       return AMDGPU::isInlinableIntLiteralV216(Val);
3353 
3354     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3355         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3356         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3357       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3358 
3359     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3360   }
3361   default:
3362     llvm_unreachable("invalid operand size");
3363   }
3364 }
3365 
3366 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3367   if (!isGFX10Plus())
3368     return 1;
3369 
3370   switch (Opcode) {
3371   // 64-bit shift instructions can use only one scalar value input
3372   case AMDGPU::V_LSHLREV_B64_e64:
3373   case AMDGPU::V_LSHLREV_B64_gfx10:
3374   case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3375   case AMDGPU::V_LSHRREV_B64_e64:
3376   case AMDGPU::V_LSHRREV_B64_gfx10:
3377   case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3378   case AMDGPU::V_ASHRREV_I64_e64:
3379   case AMDGPU::V_ASHRREV_I64_gfx10:
3380   case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3381   case AMDGPU::V_LSHL_B64_e64:
3382   case AMDGPU::V_LSHR_B64_e64:
3383   case AMDGPU::V_ASHR_I64_e64:
3384     return 1;
3385   default:
3386     return 2;
3387   }
3388 }
3389 
3390 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3391   const MCOperand &MO = Inst.getOperand(OpIdx);
3392   if (MO.isImm()) {
3393     return !isInlineConstant(Inst, OpIdx);
3394   } else if (MO.isReg()) {
3395     auto Reg = MO.getReg();
3396     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3397     auto PReg = mc2PseudoReg(Reg);
3398     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3399   } else {
3400     return true;
3401   }
3402 }
3403 
3404 bool
3405 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3406                                                 const OperandVector &Operands) {
3407   const unsigned Opcode = Inst.getOpcode();
3408   const MCInstrDesc &Desc = MII.get(Opcode);
3409   unsigned LastSGPR = AMDGPU::NoRegister;
3410   unsigned ConstantBusUseCount = 0;
3411   unsigned NumLiterals = 0;
3412   unsigned LiteralSize;
3413 
3414   if (Desc.TSFlags &
3415       (SIInstrFlags::VOPC |
3416        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3417        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3418        SIInstrFlags::SDWA)) {
3419     // Check special imm operands (used by madmk, etc)
3420     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3421       ++NumLiterals;
3422       LiteralSize = 4;
3423     }
3424 
3425     SmallDenseSet<unsigned> SGPRsUsed;
3426     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3427     if (SGPRUsed != AMDGPU::NoRegister) {
3428       SGPRsUsed.insert(SGPRUsed);
3429       ++ConstantBusUseCount;
3430     }
3431 
3432     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3433     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3434     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3435 
3436     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3437 
3438     for (int OpIdx : OpIndices) {
3439       if (OpIdx == -1) break;
3440 
3441       const MCOperand &MO = Inst.getOperand(OpIdx);
3442       if (usesConstantBus(Inst, OpIdx)) {
3443         if (MO.isReg()) {
3444           LastSGPR = mc2PseudoReg(MO.getReg());
3445           // Pairs of registers with a partial intersections like these
3446           //   s0, s[0:1]
3447           //   flat_scratch_lo, flat_scratch
3448           //   flat_scratch_lo, flat_scratch_hi
3449           // are theoretically valid but they are disabled anyway.
3450           // Note that this code mimics SIInstrInfo::verifyInstruction
3451           if (!SGPRsUsed.count(LastSGPR)) {
3452             SGPRsUsed.insert(LastSGPR);
3453             ++ConstantBusUseCount;
3454           }
3455         } else { // Expression or a literal
3456 
3457           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3458             continue; // special operand like VINTERP attr_chan
3459 
3460           // An instruction may use only one literal.
3461           // This has been validated on the previous step.
3462           // See validateVOPLiteral.
3463           // This literal may be used as more than one operand.
3464           // If all these operands are of the same size,
3465           // this literal counts as one scalar value.
3466           // Otherwise it counts as 2 scalar values.
3467           // See "GFX10 Shader Programming", section 3.6.2.3.
3468 
3469           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3470           if (Size < 4) Size = 4;
3471 
3472           if (NumLiterals == 0) {
3473             NumLiterals = 1;
3474             LiteralSize = Size;
3475           } else if (LiteralSize != Size) {
3476             NumLiterals = 2;
3477           }
3478         }
3479       }
3480     }
3481   }
3482   ConstantBusUseCount += NumLiterals;
3483 
3484   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3485     return true;
3486 
3487   SMLoc LitLoc = getLitLoc(Operands);
3488   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3489   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3490   Error(Loc, "invalid operand (violates constant bus restrictions)");
3491   return false;
3492 }
3493 
3494 bool
3495 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3496                                                  const OperandVector &Operands) {
3497   const unsigned Opcode = Inst.getOpcode();
3498   const MCInstrDesc &Desc = MII.get(Opcode);
3499 
3500   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3501   if (DstIdx == -1 ||
3502       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3503     return true;
3504   }
3505 
3506   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3507 
3508   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3509   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3510   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3511 
3512   assert(DstIdx != -1);
3513   const MCOperand &Dst = Inst.getOperand(DstIdx);
3514   assert(Dst.isReg());
3515 
3516   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3517 
3518   for (int SrcIdx : SrcIndices) {
3519     if (SrcIdx == -1) break;
3520     const MCOperand &Src = Inst.getOperand(SrcIdx);
3521     if (Src.isReg()) {
3522       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3523         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3524         Error(getRegLoc(SrcReg, Operands),
3525           "destination must be different than all sources");
3526         return false;
3527       }
3528     }
3529   }
3530 
3531   return true;
3532 }
3533 
3534 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3535 
3536   const unsigned Opc = Inst.getOpcode();
3537   const MCInstrDesc &Desc = MII.get(Opc);
3538 
3539   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3540     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3541     assert(ClampIdx != -1);
3542     return Inst.getOperand(ClampIdx).getImm() == 0;
3543   }
3544 
3545   return true;
3546 }
3547 
3548 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3549 
3550   const unsigned Opc = Inst.getOpcode();
3551   const MCInstrDesc &Desc = MII.get(Opc);
3552 
3553   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3554     return None;
3555 
3556   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3557   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3558   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3559 
3560   assert(VDataIdx != -1);
3561 
3562   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3563     return None;
3564 
3565   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3566   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3567   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3568   if (DMask == 0)
3569     DMask = 1;
3570 
3571   bool isPackedD16 = false;
3572   unsigned DataSize =
3573     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3574   if (hasPackedD16()) {
3575     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3576     isPackedD16 = D16Idx >= 0;
3577     if (isPackedD16 && Inst.getOperand(D16Idx).getImm())
3578       DataSize = (DataSize + 1) / 2;
3579   }
3580 
3581   if ((VDataSize / 4) == DataSize + TFESize)
3582     return None;
3583 
3584   return StringRef(isPackedD16
3585                        ? "image data size does not match dmask, d16 and tfe"
3586                        : "image data size does not match dmask and tfe");
3587 }
3588 
3589 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3590   const unsigned Opc = Inst.getOpcode();
3591   const MCInstrDesc &Desc = MII.get(Opc);
3592 
3593   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3594     return true;
3595 
3596   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3597 
3598   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3599       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3600   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3601   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3602   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3603   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3604 
3605   assert(VAddr0Idx != -1);
3606   assert(SrsrcIdx != -1);
3607   assert(SrsrcIdx > VAddr0Idx);
3608 
3609   if (DimIdx == -1)
3610     return true; // intersect_ray
3611 
3612   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3613   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3614   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3615   unsigned ActualAddrSize =
3616       IsNSA ? SrsrcIdx - VAddr0Idx
3617             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3618   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3619 
3620   unsigned ExpectedAddrSize =
3621       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3622 
3623   if (!IsNSA) {
3624     if (ExpectedAddrSize > 8)
3625       ExpectedAddrSize = 16;
3626 
3627     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3628     // This provides backward compatibility for assembly created
3629     // before 160b/192b/224b types were directly supported.
3630     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3631       return true;
3632   }
3633 
3634   return ActualAddrSize == ExpectedAddrSize;
3635 }
3636 
3637 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3638 
3639   const unsigned Opc = Inst.getOpcode();
3640   const MCInstrDesc &Desc = MII.get(Opc);
3641 
3642   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3643     return true;
3644   if (!Desc.mayLoad() || !Desc.mayStore())
3645     return true; // Not atomic
3646 
3647   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3648   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3649 
3650   // This is an incomplete check because image_atomic_cmpswap
3651   // may only use 0x3 and 0xf while other atomic operations
3652   // may use 0x1 and 0x3. However these limitations are
3653   // verified when we check that dmask matches dst size.
3654   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3655 }
3656 
3657 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3658 
3659   const unsigned Opc = Inst.getOpcode();
3660   const MCInstrDesc &Desc = MII.get(Opc);
3661 
3662   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3663     return true;
3664 
3665   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3666   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3667 
3668   // GATHER4 instructions use dmask in a different fashion compared to
3669   // other MIMG instructions. The only useful DMASK values are
3670   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3671   // (red,red,red,red) etc.) The ISA document doesn't mention
3672   // this.
3673   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3674 }
3675 
3676 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3677   const unsigned Opc = Inst.getOpcode();
3678   const MCInstrDesc &Desc = MII.get(Opc);
3679 
3680   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3681     return true;
3682 
3683   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3684   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3685       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3686 
3687   if (!BaseOpcode->MSAA)
3688     return true;
3689 
3690   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3691   assert(DimIdx != -1);
3692 
3693   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3694   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3695 
3696   return DimInfo->MSAA;
3697 }
3698 
3699 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3700 {
3701   switch (Opcode) {
3702   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3703   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3704   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3705     return true;
3706   default:
3707     return false;
3708   }
3709 }
3710 
3711 // movrels* opcodes should only allow VGPRS as src0.
3712 // This is specified in .td description for vop1/vop3,
3713 // but sdwa is handled differently. See isSDWAOperand.
3714 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3715                                       const OperandVector &Operands) {
3716 
3717   const unsigned Opc = Inst.getOpcode();
3718   const MCInstrDesc &Desc = MII.get(Opc);
3719 
3720   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3721     return true;
3722 
3723   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3724   assert(Src0Idx != -1);
3725 
3726   SMLoc ErrLoc;
3727   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3728   if (Src0.isReg()) {
3729     auto Reg = mc2PseudoReg(Src0.getReg());
3730     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3731     if (!isSGPR(Reg, TRI))
3732       return true;
3733     ErrLoc = getRegLoc(Reg, Operands);
3734   } else {
3735     ErrLoc = getConstLoc(Operands);
3736   }
3737 
3738   Error(ErrLoc, "source operand must be a VGPR");
3739   return false;
3740 }
3741 
3742 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3743                                           const OperandVector &Operands) {
3744 
3745   const unsigned Opc = Inst.getOpcode();
3746 
3747   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3748     return true;
3749 
3750   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3751   assert(Src0Idx != -1);
3752 
3753   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3754   if (!Src0.isReg())
3755     return true;
3756 
3757   auto Reg = mc2PseudoReg(Src0.getReg());
3758   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3759   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3760     Error(getRegLoc(Reg, Operands),
3761           "source operand must be either a VGPR or an inline constant");
3762     return false;
3763   }
3764 
3765   return true;
3766 }
3767 
3768 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3769                                    const OperandVector &Operands) {
3770   const unsigned Opc = Inst.getOpcode();
3771   const MCInstrDesc &Desc = MII.get(Opc);
3772 
3773   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3774     return true;
3775 
3776   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3777   if (Src2Idx == -1)
3778     return true;
3779 
3780   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3781   if (!Src2.isReg())
3782     return true;
3783 
3784   MCRegister Src2Reg = Src2.getReg();
3785   MCRegister DstReg = Inst.getOperand(0).getReg();
3786   if (Src2Reg == DstReg)
3787     return true;
3788 
3789   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3790   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3791     return true;
3792 
3793   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3794     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3795           "source 2 operand must not partially overlap with dst");
3796     return false;
3797   }
3798 
3799   return true;
3800 }
3801 
3802 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3803   switch (Inst.getOpcode()) {
3804   default:
3805     return true;
3806   case V_DIV_SCALE_F32_gfx6_gfx7:
3807   case V_DIV_SCALE_F32_vi:
3808   case V_DIV_SCALE_F32_gfx10:
3809   case V_DIV_SCALE_F64_gfx6_gfx7:
3810   case V_DIV_SCALE_F64_vi:
3811   case V_DIV_SCALE_F64_gfx10:
3812     break;
3813   }
3814 
3815   // TODO: Check that src0 = src1 or src2.
3816 
3817   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3818                     AMDGPU::OpName::src2_modifiers,
3819                     AMDGPU::OpName::src2_modifiers}) {
3820     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3821             .getImm() &
3822         SISrcMods::ABS) {
3823       return false;
3824     }
3825   }
3826 
3827   return true;
3828 }
3829 
3830 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3831 
3832   const unsigned Opc = Inst.getOpcode();
3833   const MCInstrDesc &Desc = MII.get(Opc);
3834 
3835   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3836     return true;
3837 
3838   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3839   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3840     if (isCI() || isSI())
3841       return false;
3842   }
3843 
3844   return true;
3845 }
3846 
3847 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3848   const unsigned Opc = Inst.getOpcode();
3849   const MCInstrDesc &Desc = MII.get(Opc);
3850 
3851   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3852     return true;
3853 
3854   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3855   if (DimIdx < 0)
3856     return true;
3857 
3858   long Imm = Inst.getOperand(DimIdx).getImm();
3859   if (Imm < 0 || Imm >= 8)
3860     return false;
3861 
3862   return true;
3863 }
3864 
3865 static bool IsRevOpcode(const unsigned Opcode)
3866 {
3867   switch (Opcode) {
3868   case AMDGPU::V_SUBREV_F32_e32:
3869   case AMDGPU::V_SUBREV_F32_e64:
3870   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3871   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3872   case AMDGPU::V_SUBREV_F32_e32_vi:
3873   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3874   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3875   case AMDGPU::V_SUBREV_F32_e64_vi:
3876 
3877   case AMDGPU::V_SUBREV_CO_U32_e32:
3878   case AMDGPU::V_SUBREV_CO_U32_e64:
3879   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3880   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3881 
3882   case AMDGPU::V_SUBBREV_U32_e32:
3883   case AMDGPU::V_SUBBREV_U32_e64:
3884   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3885   case AMDGPU::V_SUBBREV_U32_e32_vi:
3886   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3887   case AMDGPU::V_SUBBREV_U32_e64_vi:
3888 
3889   case AMDGPU::V_SUBREV_U32_e32:
3890   case AMDGPU::V_SUBREV_U32_e64:
3891   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3892   case AMDGPU::V_SUBREV_U32_e32_vi:
3893   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3894   case AMDGPU::V_SUBREV_U32_e64_vi:
3895 
3896   case AMDGPU::V_SUBREV_F16_e32:
3897   case AMDGPU::V_SUBREV_F16_e64:
3898   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3899   case AMDGPU::V_SUBREV_F16_e32_vi:
3900   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3901   case AMDGPU::V_SUBREV_F16_e64_vi:
3902 
3903   case AMDGPU::V_SUBREV_U16_e32:
3904   case AMDGPU::V_SUBREV_U16_e64:
3905   case AMDGPU::V_SUBREV_U16_e32_vi:
3906   case AMDGPU::V_SUBREV_U16_e64_vi:
3907 
3908   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3909   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3910   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3911 
3912   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3913   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3914 
3915   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3916   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3917 
3918   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3919   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3920 
3921   case AMDGPU::V_LSHRREV_B32_e32:
3922   case AMDGPU::V_LSHRREV_B32_e64:
3923   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3924   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3925   case AMDGPU::V_LSHRREV_B32_e32_vi:
3926   case AMDGPU::V_LSHRREV_B32_e64_vi:
3927   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3928   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3929 
3930   case AMDGPU::V_ASHRREV_I32_e32:
3931   case AMDGPU::V_ASHRREV_I32_e64:
3932   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3933   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3934   case AMDGPU::V_ASHRREV_I32_e32_vi:
3935   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3936   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3937   case AMDGPU::V_ASHRREV_I32_e64_vi:
3938 
3939   case AMDGPU::V_LSHLREV_B32_e32:
3940   case AMDGPU::V_LSHLREV_B32_e64:
3941   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3942   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3943   case AMDGPU::V_LSHLREV_B32_e32_vi:
3944   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3945   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3946   case AMDGPU::V_LSHLREV_B32_e64_vi:
3947 
3948   case AMDGPU::V_LSHLREV_B16_e32:
3949   case AMDGPU::V_LSHLREV_B16_e64:
3950   case AMDGPU::V_LSHLREV_B16_e32_vi:
3951   case AMDGPU::V_LSHLREV_B16_e64_vi:
3952   case AMDGPU::V_LSHLREV_B16_gfx10:
3953 
3954   case AMDGPU::V_LSHRREV_B16_e32:
3955   case AMDGPU::V_LSHRREV_B16_e64:
3956   case AMDGPU::V_LSHRREV_B16_e32_vi:
3957   case AMDGPU::V_LSHRREV_B16_e64_vi:
3958   case AMDGPU::V_LSHRREV_B16_gfx10:
3959 
3960   case AMDGPU::V_ASHRREV_I16_e32:
3961   case AMDGPU::V_ASHRREV_I16_e64:
3962   case AMDGPU::V_ASHRREV_I16_e32_vi:
3963   case AMDGPU::V_ASHRREV_I16_e64_vi:
3964   case AMDGPU::V_ASHRREV_I16_gfx10:
3965 
3966   case AMDGPU::V_LSHLREV_B64_e64:
3967   case AMDGPU::V_LSHLREV_B64_gfx10:
3968   case AMDGPU::V_LSHLREV_B64_vi:
3969 
3970   case AMDGPU::V_LSHRREV_B64_e64:
3971   case AMDGPU::V_LSHRREV_B64_gfx10:
3972   case AMDGPU::V_LSHRREV_B64_vi:
3973 
3974   case AMDGPU::V_ASHRREV_I64_e64:
3975   case AMDGPU::V_ASHRREV_I64_gfx10:
3976   case AMDGPU::V_ASHRREV_I64_vi:
3977 
3978   case AMDGPU::V_PK_LSHLREV_B16:
3979   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3980   case AMDGPU::V_PK_LSHLREV_B16_vi:
3981 
3982   case AMDGPU::V_PK_LSHRREV_B16:
3983   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3984   case AMDGPU::V_PK_LSHRREV_B16_vi:
3985   case AMDGPU::V_PK_ASHRREV_I16:
3986   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3987   case AMDGPU::V_PK_ASHRREV_I16_vi:
3988     return true;
3989   default:
3990     return false;
3991   }
3992 }
3993 
3994 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3995 
3996   using namespace SIInstrFlags;
3997   const unsigned Opcode = Inst.getOpcode();
3998   const MCInstrDesc &Desc = MII.get(Opcode);
3999 
4000   // lds_direct register is defined so that it can be used
4001   // with 9-bit operands only. Ignore encodings which do not accept these.
4002   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4003   if ((Desc.TSFlags & Enc) == 0)
4004     return None;
4005 
4006   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4007     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4008     if (SrcIdx == -1)
4009       break;
4010     const auto &Src = Inst.getOperand(SrcIdx);
4011     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4012 
4013       if (isGFX90A() || isGFX11Plus())
4014         return StringRef("lds_direct is not supported on this GPU");
4015 
4016       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4017         return StringRef("lds_direct cannot be used with this instruction");
4018 
4019       if (SrcName != OpName::src0)
4020         return StringRef("lds_direct may be used as src0 only");
4021     }
4022   }
4023 
4024   return None;
4025 }
4026 
4027 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4028   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4029     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4030     if (Op.isFlatOffset())
4031       return Op.getStartLoc();
4032   }
4033   return getLoc();
4034 }
4035 
4036 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4037                                          const OperandVector &Operands) {
4038   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4039   if ((TSFlags & SIInstrFlags::FLAT) == 0)
4040     return true;
4041 
4042   auto Opcode = Inst.getOpcode();
4043   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4044   assert(OpNum != -1);
4045 
4046   const auto &Op = Inst.getOperand(OpNum);
4047   if (!hasFlatOffsets() && Op.getImm() != 0) {
4048     Error(getFlatOffsetLoc(Operands),
4049           "flat offset modifier is not supported on this GPU");
4050     return false;
4051   }
4052 
4053   // For FLAT segment the offset must be positive;
4054   // MSB is ignored and forced to zero.
4055   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
4056     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
4057     if (!isIntN(OffsetSize, Op.getImm())) {
4058       Error(getFlatOffsetLoc(Operands),
4059             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4060       return false;
4061     }
4062   } else {
4063     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
4064     if (!isUIntN(OffsetSize, Op.getImm())) {
4065       Error(getFlatOffsetLoc(Operands),
4066             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4067       return false;
4068     }
4069   }
4070 
4071   return true;
4072 }
4073 
4074 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4075   // Start with second operand because SMEM Offset cannot be dst or src0.
4076   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4077     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4078     if (Op.isSMEMOffset())
4079       return Op.getStartLoc();
4080   }
4081   return getLoc();
4082 }
4083 
4084 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4085                                          const OperandVector &Operands) {
4086   if (isCI() || isSI())
4087     return true;
4088 
4089   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4090   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4091     return true;
4092 
4093   auto Opcode = Inst.getOpcode();
4094   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4095   if (OpNum == -1)
4096     return true;
4097 
4098   const auto &Op = Inst.getOperand(OpNum);
4099   if (!Op.isImm())
4100     return true;
4101 
4102   uint64_t Offset = Op.getImm();
4103   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4104   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4105       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4106     return true;
4107 
4108   Error(getSMEMOffsetLoc(Operands),
4109         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4110                                "expected a 21-bit signed offset");
4111 
4112   return false;
4113 }
4114 
4115 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4116   unsigned Opcode = Inst.getOpcode();
4117   const MCInstrDesc &Desc = MII.get(Opcode);
4118   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4119     return true;
4120 
4121   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4122   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4123 
4124   const int OpIndices[] = { Src0Idx, Src1Idx };
4125 
4126   unsigned NumExprs = 0;
4127   unsigned NumLiterals = 0;
4128   uint32_t LiteralValue;
4129 
4130   for (int OpIdx : OpIndices) {
4131     if (OpIdx == -1) break;
4132 
4133     const MCOperand &MO = Inst.getOperand(OpIdx);
4134     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4135     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4136       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4137         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4138         if (NumLiterals == 0 || LiteralValue != Value) {
4139           LiteralValue = Value;
4140           ++NumLiterals;
4141         }
4142       } else if (MO.isExpr()) {
4143         ++NumExprs;
4144       }
4145     }
4146   }
4147 
4148   return NumLiterals + NumExprs <= 1;
4149 }
4150 
4151 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4152   const unsigned Opc = Inst.getOpcode();
4153   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4154       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4155     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4156     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4157 
4158     if (OpSel & ~3)
4159       return false;
4160   }
4161 
4162   if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
4163     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4164     if (OpSelIdx != -1) {
4165       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4166         return false;
4167     }
4168     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4169     if (OpSelHiIdx != -1) {
4170       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4171         return false;
4172     }
4173   }
4174 
4175   return true;
4176 }
4177 
4178 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4179                                   const OperandVector &Operands) {
4180   const unsigned Opc = Inst.getOpcode();
4181   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4182   if (DppCtrlIdx < 0)
4183     return true;
4184   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4185 
4186   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4187     // DPP64 is supported for row_newbcast only.
4188     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4189     if (Src0Idx >= 0 &&
4190         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4191       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4192       Error(S, "64 bit dpp only supports row_newbcast");
4193       return false;
4194     }
4195   }
4196 
4197   return true;
4198 }
4199 
4200 // Check if VCC register matches wavefront size
4201 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4202   auto FB = getFeatureBits();
4203   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4204     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4205 }
4206 
4207 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4208 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4209                                          const OperandVector &Operands) {
4210   unsigned Opcode = Inst.getOpcode();
4211   const MCInstrDesc &Desc = MII.get(Opcode);
4212   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4213   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4214       ImmIdx == -1)
4215     return true;
4216 
4217   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4218   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4219   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4220 
4221   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4222 
4223   unsigned NumExprs = 0;
4224   unsigned NumLiterals = 0;
4225   uint32_t LiteralValue;
4226 
4227   for (int OpIdx : OpIndices) {
4228     if (OpIdx == -1)
4229       continue;
4230 
4231     const MCOperand &MO = Inst.getOperand(OpIdx);
4232     if (!MO.isImm() && !MO.isExpr())
4233       continue;
4234     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4235       continue;
4236 
4237     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4238         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4239       Error(getConstLoc(Operands),
4240             "inline constants are not allowed for this operand");
4241       return false;
4242     }
4243 
4244     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4245       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4246       if (NumLiterals == 0 || LiteralValue != Value) {
4247         LiteralValue = Value;
4248         ++NumLiterals;
4249       }
4250     } else if (MO.isExpr()) {
4251       ++NumExprs;
4252     }
4253   }
4254   NumLiterals += NumExprs;
4255 
4256   if (!NumLiterals)
4257     return true;
4258 
4259   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4260     Error(getLitLoc(Operands), "literal operands are not supported");
4261     return false;
4262   }
4263 
4264   if (NumLiterals > 1) {
4265     Error(getLitLoc(Operands), "only one literal operand is allowed");
4266     return false;
4267   }
4268 
4269   return true;
4270 }
4271 
4272 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4273 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4274                          const MCRegisterInfo *MRI) {
4275   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4276   if (OpIdx < 0)
4277     return -1;
4278 
4279   const MCOperand &Op = Inst.getOperand(OpIdx);
4280   if (!Op.isReg())
4281     return -1;
4282 
4283   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4284   auto Reg = Sub ? Sub : Op.getReg();
4285   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4286   return AGPR32.contains(Reg) ? 1 : 0;
4287 }
4288 
4289 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4290   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4291   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4292                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4293                   SIInstrFlags::DS)) == 0)
4294     return true;
4295 
4296   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4297                                                       : AMDGPU::OpName::vdata;
4298 
4299   const MCRegisterInfo *MRI = getMRI();
4300   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4301   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4302 
4303   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4304     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4305     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4306       return false;
4307   }
4308 
4309   auto FB = getFeatureBits();
4310   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4311     if (DataAreg < 0 || DstAreg < 0)
4312       return true;
4313     return DstAreg == DataAreg;
4314   }
4315 
4316   return DstAreg < 1 && DataAreg < 1;
4317 }
4318 
4319 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4320   auto FB = getFeatureBits();
4321   if (!FB[AMDGPU::FeatureGFX90AInsts])
4322     return true;
4323 
4324   const MCRegisterInfo *MRI = getMRI();
4325   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4326   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4327   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4328     const MCOperand &Op = Inst.getOperand(I);
4329     if (!Op.isReg())
4330       continue;
4331 
4332     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4333     if (!Sub)
4334       continue;
4335 
4336     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4337       return false;
4338     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4339       return false;
4340   }
4341 
4342   return true;
4343 }
4344 
4345 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4346   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4347     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4348     if (Op.isBLGP())
4349       return Op.getStartLoc();
4350   }
4351   return SMLoc();
4352 }
4353 
4354 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4355                                    const OperandVector &Operands) {
4356   unsigned Opc = Inst.getOpcode();
4357   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4358   if (BlgpIdx == -1)
4359     return true;
4360   SMLoc BLGPLoc = getBLGPLoc(Operands);
4361   if (!BLGPLoc.isValid())
4362     return true;
4363   bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4364   auto FB = getFeatureBits();
4365   bool UsesNeg = false;
4366   if (FB[AMDGPU::FeatureGFX940Insts]) {
4367     switch (Opc) {
4368     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4369     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4370     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4371     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4372       UsesNeg = true;
4373     }
4374   }
4375 
4376   if (IsNeg == UsesNeg)
4377     return true;
4378 
4379   Error(BLGPLoc,
4380         UsesNeg ? "invalid modifier: blgp is not supported"
4381                 : "invalid modifier: neg is not supported");
4382 
4383   return false;
4384 }
4385 
4386 // gfx90a has an undocumented limitation:
4387 // DS_GWS opcodes must use even aligned registers.
4388 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4389                                   const OperandVector &Operands) {
4390   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4391     return true;
4392 
4393   int Opc = Inst.getOpcode();
4394   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4395       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4396     return true;
4397 
4398   const MCRegisterInfo *MRI = getMRI();
4399   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4400   int Data0Pos =
4401       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4402   assert(Data0Pos != -1);
4403   auto Reg = Inst.getOperand(Data0Pos).getReg();
4404   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4405   if (RegIdx & 1) {
4406     SMLoc RegLoc = getRegLoc(Reg, Operands);
4407     Error(RegLoc, "vgpr must be even aligned");
4408     return false;
4409   }
4410 
4411   return true;
4412 }
4413 
4414 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4415                                             const OperandVector &Operands,
4416                                             const SMLoc &IDLoc) {
4417   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4418                                            AMDGPU::OpName::cpol);
4419   if (CPolPos == -1)
4420     return true;
4421 
4422   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4423 
4424   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4425   if (TSFlags & SIInstrFlags::SMRD) {
4426     if (CPol && (isSI() || isCI())) {
4427       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4428       Error(S, "cache policy is not supported for SMRD instructions");
4429       return false;
4430     }
4431     if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4432       Error(IDLoc, "invalid cache policy for SMEM instruction");
4433       return false;
4434     }
4435   }
4436 
4437   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4438     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4439     StringRef CStr(S.getPointer());
4440     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4441     Error(S, "scc is not supported on this GPU");
4442     return false;
4443   }
4444 
4445   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4446     return true;
4447 
4448   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4449     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4450       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4451                               : "instruction must use glc");
4452       return false;
4453     }
4454   } else {
4455     if (CPol & CPol::GLC) {
4456       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4457       StringRef CStr(S.getPointer());
4458       S = SMLoc::getFromPointer(
4459           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4460       Error(S, isGFX940() ? "instruction must not use sc0"
4461                           : "instruction must not use glc");
4462       return false;
4463     }
4464   }
4465 
4466   return true;
4467 }
4468 
4469 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst,
4470                                          const OperandVector &Operands,
4471                                          const SMLoc &IDLoc) {
4472   if (isGFX940())
4473     return true;
4474 
4475   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4476   if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) !=
4477       (SIInstrFlags::VALU | SIInstrFlags::FLAT))
4478     return true;
4479   // This is FLAT LDS DMA.
4480 
4481   SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands);
4482   StringRef CStr(S.getPointer());
4483   if (!CStr.startswith("lds")) {
4484     // This is incorrectly selected LDS DMA version of a FLAT load opcode.
4485     // And LDS version should have 'lds' modifier, but it follows optional
4486     // operands so its absense is ignored by the matcher.
4487     Error(IDLoc, "invalid operands for instruction");
4488     return false;
4489   }
4490 
4491   return true;
4492 }
4493 
4494 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4495                                           const SMLoc &IDLoc,
4496                                           const OperandVector &Operands) {
4497   if (auto ErrMsg = validateLdsDirect(Inst)) {
4498     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4499     return false;
4500   }
4501   if (!validateSOPLiteral(Inst)) {
4502     Error(getLitLoc(Operands),
4503       "only one literal operand is allowed");
4504     return false;
4505   }
4506   if (!validateVOPLiteral(Inst, Operands)) {
4507     return false;
4508   }
4509   if (!validateConstantBusLimitations(Inst, Operands)) {
4510     return false;
4511   }
4512   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4513     return false;
4514   }
4515   if (!validateIntClampSupported(Inst)) {
4516     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4517       "integer clamping is not supported on this GPU");
4518     return false;
4519   }
4520   if (!validateOpSel(Inst)) {
4521     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4522       "invalid op_sel operand");
4523     return false;
4524   }
4525   if (!validateDPP(Inst, Operands)) {
4526     return false;
4527   }
4528   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4529   if (!validateMIMGD16(Inst)) {
4530     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4531       "d16 modifier is not supported on this GPU");
4532     return false;
4533   }
4534   if (!validateMIMGDim(Inst)) {
4535     Error(IDLoc, "dim modifier is required on this GPU");
4536     return false;
4537   }
4538   if (!validateMIMGMSAA(Inst)) {
4539     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4540           "invalid dim; must be MSAA type");
4541     return false;
4542   }
4543   if (auto ErrMsg = validateMIMGDataSize(Inst)) {
4544     Error(IDLoc, *ErrMsg);
4545     return false;
4546   }
4547   if (!validateMIMGAddrSize(Inst)) {
4548     Error(IDLoc,
4549       "image address size does not match dim and a16");
4550     return false;
4551   }
4552   if (!validateMIMGAtomicDMask(Inst)) {
4553     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4554       "invalid atomic image dmask");
4555     return false;
4556   }
4557   if (!validateMIMGGatherDMask(Inst)) {
4558     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4559       "invalid image_gather dmask: only one bit must be set");
4560     return false;
4561   }
4562   if (!validateMovrels(Inst, Operands)) {
4563     return false;
4564   }
4565   if (!validateFlatOffset(Inst, Operands)) {
4566     return false;
4567   }
4568   if (!validateSMEMOffset(Inst, Operands)) {
4569     return false;
4570   }
4571   if (!validateMAIAccWrite(Inst, Operands)) {
4572     return false;
4573   }
4574   if (!validateMFMA(Inst, Operands)) {
4575     return false;
4576   }
4577   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4578     return false;
4579   }
4580 
4581   if (!validateAGPRLdSt(Inst)) {
4582     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4583     ? "invalid register class: data and dst should be all VGPR or AGPR"
4584     : "invalid register class: agpr loads and stores not supported on this GPU"
4585     );
4586     return false;
4587   }
4588   if (!validateVGPRAlign(Inst)) {
4589     Error(IDLoc,
4590       "invalid register class: vgpr tuples must be 64 bit aligned");
4591     return false;
4592   }
4593   if (!validateGWS(Inst, Operands)) {
4594     return false;
4595   }
4596 
4597   if (!validateBLGP(Inst, Operands)) {
4598     return false;
4599   }
4600 
4601   if (!validateDivScale(Inst)) {
4602     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4603     return false;
4604   }
4605   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4606     return false;
4607   }
4608 
4609   if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) {
4610     return false;
4611   }
4612 
4613   return true;
4614 }
4615 
4616 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4617                                             const FeatureBitset &FBS,
4618                                             unsigned VariantID = 0);
4619 
4620 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4621                                 const FeatureBitset &AvailableFeatures,
4622                                 unsigned VariantID);
4623 
4624 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4625                                        const FeatureBitset &FBS) {
4626   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4627 }
4628 
4629 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4630                                        const FeatureBitset &FBS,
4631                                        ArrayRef<unsigned> Variants) {
4632   for (auto Variant : Variants) {
4633     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4634       return true;
4635   }
4636 
4637   return false;
4638 }
4639 
4640 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4641                                                   const SMLoc &IDLoc) {
4642   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4643 
4644   // Check if requested instruction variant is supported.
4645   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4646     return false;
4647 
4648   // This instruction is not supported.
4649   // Clear any other pending errors because they are no longer relevant.
4650   getParser().clearPendingErrors();
4651 
4652   // Requested instruction variant is not supported.
4653   // Check if any other variants are supported.
4654   StringRef VariantName = getMatchedVariantName();
4655   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4656     return Error(IDLoc,
4657                  Twine(VariantName,
4658                        " variant of this instruction is not supported"));
4659   }
4660 
4661   // Finally check if this instruction is supported on any other GPU.
4662   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4663     return Error(IDLoc, "instruction not supported on this GPU");
4664   }
4665 
4666   // Instruction not supported on any GPU. Probably a typo.
4667   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4668   return Error(IDLoc, "invalid instruction" + Suggestion);
4669 }
4670 
4671 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4672                                               OperandVector &Operands,
4673                                               MCStreamer &Out,
4674                                               uint64_t &ErrorInfo,
4675                                               bool MatchingInlineAsm) {
4676   MCInst Inst;
4677   unsigned Result = Match_Success;
4678   for (auto Variant : getMatchedVariants()) {
4679     uint64_t EI;
4680     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4681                                   Variant);
4682     // We order match statuses from least to most specific. We use most specific
4683     // status as resulting
4684     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4685     if ((R == Match_Success) ||
4686         (R == Match_PreferE32) ||
4687         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4688         (R == Match_InvalidOperand && Result != Match_MissingFeature
4689                                    && Result != Match_PreferE32) ||
4690         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4691                                    && Result != Match_MissingFeature
4692                                    && Result != Match_PreferE32)) {
4693       Result = R;
4694       ErrorInfo = EI;
4695     }
4696     if (R == Match_Success)
4697       break;
4698   }
4699 
4700   if (Result == Match_Success) {
4701     if (!validateInstruction(Inst, IDLoc, Operands)) {
4702       return true;
4703     }
4704     Inst.setLoc(IDLoc);
4705     Out.emitInstruction(Inst, getSTI());
4706     return false;
4707   }
4708 
4709   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4710   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4711     return true;
4712   }
4713 
4714   switch (Result) {
4715   default: break;
4716   case Match_MissingFeature:
4717     // It has been verified that the specified instruction
4718     // mnemonic is valid. A match was found but it requires
4719     // features which are not supported on this GPU.
4720     return Error(IDLoc, "operands are not valid for this GPU or mode");
4721 
4722   case Match_InvalidOperand: {
4723     SMLoc ErrorLoc = IDLoc;
4724     if (ErrorInfo != ~0ULL) {
4725       if (ErrorInfo >= Operands.size()) {
4726         return Error(IDLoc, "too few operands for instruction");
4727       }
4728       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4729       if (ErrorLoc == SMLoc())
4730         ErrorLoc = IDLoc;
4731     }
4732     return Error(ErrorLoc, "invalid operand for instruction");
4733   }
4734 
4735   case Match_PreferE32:
4736     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4737                         "should be encoded as e32");
4738   case Match_MnemonicFail:
4739     llvm_unreachable("Invalid instructions should have been handled already");
4740   }
4741   llvm_unreachable("Implement any new match types added!");
4742 }
4743 
4744 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4745   int64_t Tmp = -1;
4746   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4747     return true;
4748   }
4749   if (getParser().parseAbsoluteExpression(Tmp)) {
4750     return true;
4751   }
4752   Ret = static_cast<uint32_t>(Tmp);
4753   return false;
4754 }
4755 
4756 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4757                                                uint32_t &Minor) {
4758   if (ParseAsAbsoluteExpression(Major))
4759     return TokError("invalid major version");
4760 
4761   if (!trySkipToken(AsmToken::Comma))
4762     return TokError("minor version number required, comma expected");
4763 
4764   if (ParseAsAbsoluteExpression(Minor))
4765     return TokError("invalid minor version");
4766 
4767   return false;
4768 }
4769 
4770 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4771   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4772     return TokError("directive only supported for amdgcn architecture");
4773 
4774   std::string TargetIDDirective;
4775   SMLoc TargetStart = getTok().getLoc();
4776   if (getParser().parseEscapedString(TargetIDDirective))
4777     return true;
4778 
4779   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4780   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4781     return getParser().Error(TargetRange.Start,
4782         (Twine(".amdgcn_target directive's target id ") +
4783          Twine(TargetIDDirective) +
4784          Twine(" does not match the specified target id ") +
4785          Twine(getTargetStreamer().getTargetID()->toString())).str());
4786 
4787   return false;
4788 }
4789 
4790 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4791   return Error(Range.Start, "value out of range", Range);
4792 }
4793 
4794 bool AMDGPUAsmParser::calculateGPRBlocks(
4795     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4796     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4797     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4798     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4799   // TODO(scott.linder): These calculations are duplicated from
4800   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4801   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4802 
4803   unsigned NumVGPRs = NextFreeVGPR;
4804   unsigned NumSGPRs = NextFreeSGPR;
4805 
4806   if (Version.Major >= 10)
4807     NumSGPRs = 0;
4808   else {
4809     unsigned MaxAddressableNumSGPRs =
4810         IsaInfo::getAddressableNumSGPRs(&getSTI());
4811 
4812     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4813         NumSGPRs > MaxAddressableNumSGPRs)
4814       return OutOfRangeError(SGPRRange);
4815 
4816     NumSGPRs +=
4817         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4818 
4819     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4820         NumSGPRs > MaxAddressableNumSGPRs)
4821       return OutOfRangeError(SGPRRange);
4822 
4823     if (Features.test(FeatureSGPRInitBug))
4824       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4825   }
4826 
4827   VGPRBlocks =
4828       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4829   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4830 
4831   return false;
4832 }
4833 
4834 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4835   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4836     return TokError("directive only supported for amdgcn architecture");
4837 
4838   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4839     return TokError("directive only supported for amdhsa OS");
4840 
4841   StringRef KernelName;
4842   if (getParser().parseIdentifier(KernelName))
4843     return true;
4844 
4845   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4846 
4847   StringSet<> Seen;
4848 
4849   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4850 
4851   SMRange VGPRRange;
4852   uint64_t NextFreeVGPR = 0;
4853   uint64_t AccumOffset = 0;
4854   uint64_t SharedVGPRCount = 0;
4855   SMRange SGPRRange;
4856   uint64_t NextFreeSGPR = 0;
4857 
4858   // Count the number of user SGPRs implied from the enabled feature bits.
4859   unsigned ImpliedUserSGPRCount = 0;
4860 
4861   // Track if the asm explicitly contains the directive for the user SGPR
4862   // count.
4863   Optional<unsigned> ExplicitUserSGPRCount;
4864   bool ReserveVCC = true;
4865   bool ReserveFlatScr = true;
4866   Optional<bool> EnableWavefrontSize32;
4867 
4868   while (true) {
4869     while (trySkipToken(AsmToken::EndOfStatement));
4870 
4871     StringRef ID;
4872     SMRange IDRange = getTok().getLocRange();
4873     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4874       return true;
4875 
4876     if (ID == ".end_amdhsa_kernel")
4877       break;
4878 
4879     if (Seen.find(ID) != Seen.end())
4880       return TokError(".amdhsa_ directives cannot be repeated");
4881     Seen.insert(ID);
4882 
4883     SMLoc ValStart = getLoc();
4884     int64_t IVal;
4885     if (getParser().parseAbsoluteExpression(IVal))
4886       return true;
4887     SMLoc ValEnd = getLoc();
4888     SMRange ValRange = SMRange(ValStart, ValEnd);
4889 
4890     if (IVal < 0)
4891       return OutOfRangeError(ValRange);
4892 
4893     uint64_t Val = IVal;
4894 
4895 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4896   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4897     return OutOfRangeError(RANGE);                                             \
4898   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4899 
4900     if (ID == ".amdhsa_group_segment_fixed_size") {
4901       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4902         return OutOfRangeError(ValRange);
4903       KD.group_segment_fixed_size = Val;
4904     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4905       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4906         return OutOfRangeError(ValRange);
4907       KD.private_segment_fixed_size = Val;
4908     } else if (ID == ".amdhsa_kernarg_size") {
4909       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4910         return OutOfRangeError(ValRange);
4911       KD.kernarg_size = Val;
4912     } else if (ID == ".amdhsa_user_sgpr_count") {
4913       ExplicitUserSGPRCount = Val;
4914     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4915       if (hasArchitectedFlatScratch())
4916         return Error(IDRange.Start,
4917                      "directive is not supported with architected flat scratch",
4918                      IDRange);
4919       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4920                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4921                        Val, ValRange);
4922       if (Val)
4923         ImpliedUserSGPRCount += 4;
4924     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4925       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4926                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4927                        ValRange);
4928       if (Val)
4929         ImpliedUserSGPRCount += 2;
4930     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4931       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4932                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4933                        ValRange);
4934       if (Val)
4935         ImpliedUserSGPRCount += 2;
4936     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4937       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4938                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4939                        Val, ValRange);
4940       if (Val)
4941         ImpliedUserSGPRCount += 2;
4942     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4943       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4944                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4945                        ValRange);
4946       if (Val)
4947         ImpliedUserSGPRCount += 2;
4948     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4949       if (hasArchitectedFlatScratch())
4950         return Error(IDRange.Start,
4951                      "directive is not supported with architected flat scratch",
4952                      IDRange);
4953       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4954                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4955                        ValRange);
4956       if (Val)
4957         ImpliedUserSGPRCount += 2;
4958     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4959       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4960                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4961                        Val, ValRange);
4962       if (Val)
4963         ImpliedUserSGPRCount += 1;
4964     } else if (ID == ".amdhsa_wavefront_size32") {
4965       if (IVersion.Major < 10)
4966         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4967       EnableWavefrontSize32 = Val;
4968       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4969                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4970                        Val, ValRange);
4971     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4972       if (hasArchitectedFlatScratch())
4973         return Error(IDRange.Start,
4974                      "directive is not supported with architected flat scratch",
4975                      IDRange);
4976       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4977                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4978     } else if (ID == ".amdhsa_enable_private_segment") {
4979       if (!hasArchitectedFlatScratch())
4980         return Error(
4981             IDRange.Start,
4982             "directive is not supported without architected flat scratch",
4983             IDRange);
4984       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4985                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4986     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4987       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4988                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4989                        ValRange);
4990     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4991       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4992                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4993                        ValRange);
4994     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4995       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4996                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4997                        ValRange);
4998     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4999       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5000                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5001                        ValRange);
5002     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5003       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5004                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5005                        ValRange);
5006     } else if (ID == ".amdhsa_next_free_vgpr") {
5007       VGPRRange = ValRange;
5008       NextFreeVGPR = Val;
5009     } else if (ID == ".amdhsa_next_free_sgpr") {
5010       SGPRRange = ValRange;
5011       NextFreeSGPR = Val;
5012     } else if (ID == ".amdhsa_accum_offset") {
5013       if (!isGFX90A())
5014         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5015       AccumOffset = Val;
5016     } else if (ID == ".amdhsa_reserve_vcc") {
5017       if (!isUInt<1>(Val))
5018         return OutOfRangeError(ValRange);
5019       ReserveVCC = Val;
5020     } else if (ID == ".amdhsa_reserve_flat_scratch") {
5021       if (IVersion.Major < 7)
5022         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5023       if (hasArchitectedFlatScratch())
5024         return Error(IDRange.Start,
5025                      "directive is not supported with architected flat scratch",
5026                      IDRange);
5027       if (!isUInt<1>(Val))
5028         return OutOfRangeError(ValRange);
5029       ReserveFlatScr = Val;
5030     } else if (ID == ".amdhsa_reserve_xnack_mask") {
5031       if (IVersion.Major < 8)
5032         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5033       if (!isUInt<1>(Val))
5034         return OutOfRangeError(ValRange);
5035       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5036         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5037                                  IDRange);
5038     } else if (ID == ".amdhsa_float_round_mode_32") {
5039       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5040                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5041     } else if (ID == ".amdhsa_float_round_mode_16_64") {
5042       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5043                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5044     } else if (ID == ".amdhsa_float_denorm_mode_32") {
5045       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5046                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5047     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5048       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5049                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5050                        ValRange);
5051     } else if (ID == ".amdhsa_dx10_clamp") {
5052       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5053                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5054     } else if (ID == ".amdhsa_ieee_mode") {
5055       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5056                        Val, ValRange);
5057     } else if (ID == ".amdhsa_fp16_overflow") {
5058       if (IVersion.Major < 9)
5059         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5060       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5061                        ValRange);
5062     } else if (ID == ".amdhsa_tg_split") {
5063       if (!isGFX90A())
5064         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5065       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5066                        ValRange);
5067     } else if (ID == ".amdhsa_workgroup_processor_mode") {
5068       if (IVersion.Major < 10)
5069         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5070       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5071                        ValRange);
5072     } else if (ID == ".amdhsa_memory_ordered") {
5073       if (IVersion.Major < 10)
5074         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5075       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5076                        ValRange);
5077     } else if (ID == ".amdhsa_forward_progress") {
5078       if (IVersion.Major < 10)
5079         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5080       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5081                        ValRange);
5082     } else if (ID == ".amdhsa_shared_vgpr_count") {
5083       if (IVersion.Major < 10)
5084         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5085       SharedVGPRCount = Val;
5086       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5087                        COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val,
5088                        ValRange);
5089     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5090       PARSE_BITS_ENTRY(
5091           KD.compute_pgm_rsrc2,
5092           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5093           ValRange);
5094     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5095       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5096                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5097                        Val, ValRange);
5098     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5099       PARSE_BITS_ENTRY(
5100           KD.compute_pgm_rsrc2,
5101           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5102           ValRange);
5103     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5104       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5105                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5106                        Val, ValRange);
5107     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5108       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5109                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5110                        Val, ValRange);
5111     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5112       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5113                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5114                        Val, ValRange);
5115     } else if (ID == ".amdhsa_exception_int_div_zero") {
5116       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5117                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5118                        Val, ValRange);
5119     } else {
5120       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5121     }
5122 
5123 #undef PARSE_BITS_ENTRY
5124   }
5125 
5126   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5127     return TokError(".amdhsa_next_free_vgpr directive is required");
5128 
5129   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5130     return TokError(".amdhsa_next_free_sgpr directive is required");
5131 
5132   unsigned VGPRBlocks;
5133   unsigned SGPRBlocks;
5134   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5135                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5136                          EnableWavefrontSize32, NextFreeVGPR,
5137                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5138                          SGPRBlocks))
5139     return true;
5140 
5141   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5142           VGPRBlocks))
5143     return OutOfRangeError(VGPRRange);
5144   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5145                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5146 
5147   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5148           SGPRBlocks))
5149     return OutOfRangeError(SGPRRange);
5150   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5151                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5152                   SGPRBlocks);
5153 
5154   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5155     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5156                     "enabled user SGPRs");
5157 
5158   unsigned UserSGPRCount =
5159       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5160 
5161   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5162     return TokError("too many user SGPRs enabled");
5163   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5164                   UserSGPRCount);
5165 
5166   if (isGFX90A()) {
5167     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5168       return TokError(".amdhsa_accum_offset directive is required");
5169     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5170       return TokError("accum_offset should be in range [4..256] in "
5171                       "increments of 4");
5172     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5173       return TokError("accum_offset exceeds total VGPR allocation");
5174     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5175                     (AccumOffset / 4 - 1));
5176   }
5177 
5178   if (IVersion.Major == 10) {
5179     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5180     if (SharedVGPRCount && EnableWavefrontSize32) {
5181       return TokError("shared_vgpr_count directive not valid on "
5182                       "wavefront size 32");
5183     }
5184     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5185       return TokError("shared_vgpr_count*2 + "
5186                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5187                       "exceed 63\n");
5188     }
5189   }
5190 
5191   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5192       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5193       ReserveFlatScr);
5194   return false;
5195 }
5196 
5197 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5198   uint32_t Major;
5199   uint32_t Minor;
5200 
5201   if (ParseDirectiveMajorMinor(Major, Minor))
5202     return true;
5203 
5204   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5205   return false;
5206 }
5207 
5208 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5209   uint32_t Major;
5210   uint32_t Minor;
5211   uint32_t Stepping;
5212   StringRef VendorName;
5213   StringRef ArchName;
5214 
5215   // If this directive has no arguments, then use the ISA version for the
5216   // targeted GPU.
5217   if (isToken(AsmToken::EndOfStatement)) {
5218     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5219     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5220                                                         ISA.Stepping,
5221                                                         "AMD", "AMDGPU");
5222     return false;
5223   }
5224 
5225   if (ParseDirectiveMajorMinor(Major, Minor))
5226     return true;
5227 
5228   if (!trySkipToken(AsmToken::Comma))
5229     return TokError("stepping version number required, comma expected");
5230 
5231   if (ParseAsAbsoluteExpression(Stepping))
5232     return TokError("invalid stepping version");
5233 
5234   if (!trySkipToken(AsmToken::Comma))
5235     return TokError("vendor name required, comma expected");
5236 
5237   if (!parseString(VendorName, "invalid vendor name"))
5238     return true;
5239 
5240   if (!trySkipToken(AsmToken::Comma))
5241     return TokError("arch name required, comma expected");
5242 
5243   if (!parseString(ArchName, "invalid arch name"))
5244     return true;
5245 
5246   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5247                                                       VendorName, ArchName);
5248   return false;
5249 }
5250 
5251 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5252                                                amd_kernel_code_t &Header) {
5253   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5254   // assembly for backwards compatibility.
5255   if (ID == "max_scratch_backing_memory_byte_size") {
5256     Parser.eatToEndOfStatement();
5257     return false;
5258   }
5259 
5260   SmallString<40> ErrStr;
5261   raw_svector_ostream Err(ErrStr);
5262   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5263     return TokError(Err.str());
5264   }
5265   Lex();
5266 
5267   if (ID == "enable_wavefront_size32") {
5268     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5269       if (!isGFX10Plus())
5270         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5271       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5272         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5273     } else {
5274       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5275         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5276     }
5277   }
5278 
5279   if (ID == "wavefront_size") {
5280     if (Header.wavefront_size == 5) {
5281       if (!isGFX10Plus())
5282         return TokError("wavefront_size=5 is only allowed on GFX10+");
5283       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5284         return TokError("wavefront_size=5 requires +WavefrontSize32");
5285     } else if (Header.wavefront_size == 6) {
5286       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5287         return TokError("wavefront_size=6 requires +WavefrontSize64");
5288     }
5289   }
5290 
5291   if (ID == "enable_wgp_mode") {
5292     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5293         !isGFX10Plus())
5294       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5295   }
5296 
5297   if (ID == "enable_mem_ordered") {
5298     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5299         !isGFX10Plus())
5300       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5301   }
5302 
5303   if (ID == "enable_fwd_progress") {
5304     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5305         !isGFX10Plus())
5306       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5307   }
5308 
5309   return false;
5310 }
5311 
5312 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5313   amd_kernel_code_t Header;
5314   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5315 
5316   while (true) {
5317     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5318     // will set the current token to EndOfStatement.
5319     while(trySkipToken(AsmToken::EndOfStatement));
5320 
5321     StringRef ID;
5322     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5323       return true;
5324 
5325     if (ID == ".end_amd_kernel_code_t")
5326       break;
5327 
5328     if (ParseAMDKernelCodeTValue(ID, Header))
5329       return true;
5330   }
5331 
5332   getTargetStreamer().EmitAMDKernelCodeT(Header);
5333 
5334   return false;
5335 }
5336 
5337 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5338   StringRef KernelName;
5339   if (!parseId(KernelName, "expected symbol name"))
5340     return true;
5341 
5342   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5343                                            ELF::STT_AMDGPU_HSA_KERNEL);
5344 
5345   KernelScope.initialize(getContext());
5346   return false;
5347 }
5348 
5349 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5350   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5351     return Error(getLoc(),
5352                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5353                  "architectures");
5354   }
5355 
5356   auto TargetIDDirective = getLexer().getTok().getStringContents();
5357   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5358     return Error(getParser().getTok().getLoc(), "target id must match options");
5359 
5360   getTargetStreamer().EmitISAVersion();
5361   Lex();
5362 
5363   return false;
5364 }
5365 
5366 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5367   const char *AssemblerDirectiveBegin;
5368   const char *AssemblerDirectiveEnd;
5369   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5370       isHsaAbiVersion3AndAbove(&getSTI())
5371           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5372                             HSAMD::V3::AssemblerDirectiveEnd)
5373           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5374                             HSAMD::AssemblerDirectiveEnd);
5375 
5376   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5377     return Error(getLoc(),
5378                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5379                  "not available on non-amdhsa OSes")).str());
5380   }
5381 
5382   std::string HSAMetadataString;
5383   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5384                           HSAMetadataString))
5385     return true;
5386 
5387   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5388     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5389       return Error(getLoc(), "invalid HSA metadata");
5390   } else {
5391     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5392       return Error(getLoc(), "invalid HSA metadata");
5393   }
5394 
5395   return false;
5396 }
5397 
5398 /// Common code to parse out a block of text (typically YAML) between start and
5399 /// end directives.
5400 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5401                                           const char *AssemblerDirectiveEnd,
5402                                           std::string &CollectString) {
5403 
5404   raw_string_ostream CollectStream(CollectString);
5405 
5406   getLexer().setSkipSpace(false);
5407 
5408   bool FoundEnd = false;
5409   while (!isToken(AsmToken::Eof)) {
5410     while (isToken(AsmToken::Space)) {
5411       CollectStream << getTokenStr();
5412       Lex();
5413     }
5414 
5415     if (trySkipId(AssemblerDirectiveEnd)) {
5416       FoundEnd = true;
5417       break;
5418     }
5419 
5420     CollectStream << Parser.parseStringToEndOfStatement()
5421                   << getContext().getAsmInfo()->getSeparatorString();
5422 
5423     Parser.eatToEndOfStatement();
5424   }
5425 
5426   getLexer().setSkipSpace(true);
5427 
5428   if (isToken(AsmToken::Eof) && !FoundEnd) {
5429     return TokError(Twine("expected directive ") +
5430                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5431   }
5432 
5433   CollectStream.flush();
5434   return false;
5435 }
5436 
5437 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5438 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5439   std::string String;
5440   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5441                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5442     return true;
5443 
5444   auto PALMetadata = getTargetStreamer().getPALMetadata();
5445   if (!PALMetadata->setFromString(String))
5446     return Error(getLoc(), "invalid PAL metadata");
5447   return false;
5448 }
5449 
5450 /// Parse the assembler directive for old linear-format PAL metadata.
5451 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5452   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5453     return Error(getLoc(),
5454                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5455                  "not available on non-amdpal OSes")).str());
5456   }
5457 
5458   auto PALMetadata = getTargetStreamer().getPALMetadata();
5459   PALMetadata->setLegacy();
5460   for (;;) {
5461     uint32_t Key, Value;
5462     if (ParseAsAbsoluteExpression(Key)) {
5463       return TokError(Twine("invalid value in ") +
5464                       Twine(PALMD::AssemblerDirective));
5465     }
5466     if (!trySkipToken(AsmToken::Comma)) {
5467       return TokError(Twine("expected an even number of values in ") +
5468                       Twine(PALMD::AssemblerDirective));
5469     }
5470     if (ParseAsAbsoluteExpression(Value)) {
5471       return TokError(Twine("invalid value in ") +
5472                       Twine(PALMD::AssemblerDirective));
5473     }
5474     PALMetadata->setRegister(Key, Value);
5475     if (!trySkipToken(AsmToken::Comma))
5476       break;
5477   }
5478   return false;
5479 }
5480 
5481 /// ParseDirectiveAMDGPULDS
5482 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5483 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5484   if (getParser().checkForValidSection())
5485     return true;
5486 
5487   StringRef Name;
5488   SMLoc NameLoc = getLoc();
5489   if (getParser().parseIdentifier(Name))
5490     return TokError("expected identifier in directive");
5491 
5492   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5493   if (parseToken(AsmToken::Comma, "expected ','"))
5494     return true;
5495 
5496   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5497 
5498   int64_t Size;
5499   SMLoc SizeLoc = getLoc();
5500   if (getParser().parseAbsoluteExpression(Size))
5501     return true;
5502   if (Size < 0)
5503     return Error(SizeLoc, "size must be non-negative");
5504   if (Size > LocalMemorySize)
5505     return Error(SizeLoc, "size is too large");
5506 
5507   int64_t Alignment = 4;
5508   if (trySkipToken(AsmToken::Comma)) {
5509     SMLoc AlignLoc = getLoc();
5510     if (getParser().parseAbsoluteExpression(Alignment))
5511       return true;
5512     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5513       return Error(AlignLoc, "alignment must be a power of two");
5514 
5515     // Alignment larger than the size of LDS is possible in theory, as long
5516     // as the linker manages to place to symbol at address 0, but we do want
5517     // to make sure the alignment fits nicely into a 32-bit integer.
5518     if (Alignment >= 1u << 31)
5519       return Error(AlignLoc, "alignment is too large");
5520   }
5521 
5522   if (parseEOL())
5523     return true;
5524 
5525   Symbol->redefineIfPossible();
5526   if (!Symbol->isUndefined())
5527     return Error(NameLoc, "invalid symbol redefinition");
5528 
5529   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5530   return false;
5531 }
5532 
5533 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5534   StringRef IDVal = DirectiveID.getString();
5535 
5536   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5537     if (IDVal == ".amdhsa_kernel")
5538      return ParseDirectiveAMDHSAKernel();
5539 
5540     // TODO: Restructure/combine with PAL metadata directive.
5541     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5542       return ParseDirectiveHSAMetadata();
5543   } else {
5544     if (IDVal == ".hsa_code_object_version")
5545       return ParseDirectiveHSACodeObjectVersion();
5546 
5547     if (IDVal == ".hsa_code_object_isa")
5548       return ParseDirectiveHSACodeObjectISA();
5549 
5550     if (IDVal == ".amd_kernel_code_t")
5551       return ParseDirectiveAMDKernelCodeT();
5552 
5553     if (IDVal == ".amdgpu_hsa_kernel")
5554       return ParseDirectiveAMDGPUHsaKernel();
5555 
5556     if (IDVal == ".amd_amdgpu_isa")
5557       return ParseDirectiveISAVersion();
5558 
5559     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5560       return ParseDirectiveHSAMetadata();
5561   }
5562 
5563   if (IDVal == ".amdgcn_target")
5564     return ParseDirectiveAMDGCNTarget();
5565 
5566   if (IDVal == ".amdgpu_lds")
5567     return ParseDirectiveAMDGPULDS();
5568 
5569   if (IDVal == PALMD::AssemblerDirectiveBegin)
5570     return ParseDirectivePALMetadataBegin();
5571 
5572   if (IDVal == PALMD::AssemblerDirective)
5573     return ParseDirectivePALMetadata();
5574 
5575   return true;
5576 }
5577 
5578 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5579                                            unsigned RegNo) {
5580 
5581   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5582     return isGFX9Plus();
5583 
5584   // GFX10 has 2 more SGPRs 104 and 105.
5585   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5586     return hasSGPR104_SGPR105();
5587 
5588   switch (RegNo) {
5589   case AMDGPU::SRC_SHARED_BASE:
5590   case AMDGPU::SRC_SHARED_LIMIT:
5591   case AMDGPU::SRC_PRIVATE_BASE:
5592   case AMDGPU::SRC_PRIVATE_LIMIT:
5593   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5594     return isGFX9Plus();
5595   case AMDGPU::TBA:
5596   case AMDGPU::TBA_LO:
5597   case AMDGPU::TBA_HI:
5598   case AMDGPU::TMA:
5599   case AMDGPU::TMA_LO:
5600   case AMDGPU::TMA_HI:
5601     return !isGFX9Plus();
5602   case AMDGPU::XNACK_MASK:
5603   case AMDGPU::XNACK_MASK_LO:
5604   case AMDGPU::XNACK_MASK_HI:
5605     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5606   case AMDGPU::SGPR_NULL:
5607     return isGFX10Plus();
5608   default:
5609     break;
5610   }
5611 
5612   if (isCI())
5613     return true;
5614 
5615   if (isSI() || isGFX10Plus()) {
5616     // No flat_scr on SI.
5617     // On GFX10 flat scratch is not a valid register operand and can only be
5618     // accessed with s_setreg/s_getreg.
5619     switch (RegNo) {
5620     case AMDGPU::FLAT_SCR:
5621     case AMDGPU::FLAT_SCR_LO:
5622     case AMDGPU::FLAT_SCR_HI:
5623       return false;
5624     default:
5625       return true;
5626     }
5627   }
5628 
5629   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5630   // SI/CI have.
5631   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5632     return hasSGPR102_SGPR103();
5633 
5634   return true;
5635 }
5636 
5637 OperandMatchResultTy
5638 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5639                               OperandMode Mode) {
5640   // Try to parse with a custom parser
5641   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5642 
5643   // If we successfully parsed the operand or if there as an error parsing,
5644   // we are done.
5645   //
5646   // If we are parsing after we reach EndOfStatement then this means we
5647   // are appending default values to the Operands list.  This is only done
5648   // by custom parser, so we shouldn't continue on to the generic parsing.
5649   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5650       isToken(AsmToken::EndOfStatement))
5651     return ResTy;
5652 
5653   SMLoc RBraceLoc;
5654   SMLoc LBraceLoc = getLoc();
5655   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5656     unsigned Prefix = Operands.size();
5657 
5658     for (;;) {
5659       auto Loc = getLoc();
5660       ResTy = parseReg(Operands);
5661       if (ResTy == MatchOperand_NoMatch)
5662         Error(Loc, "expected a register");
5663       if (ResTy != MatchOperand_Success)
5664         return MatchOperand_ParseFail;
5665 
5666       RBraceLoc = getLoc();
5667       if (trySkipToken(AsmToken::RBrac))
5668         break;
5669 
5670       if (!skipToken(AsmToken::Comma,
5671                      "expected a comma or a closing square bracket")) {
5672         return MatchOperand_ParseFail;
5673       }
5674     }
5675 
5676     if (Operands.size() - Prefix > 1) {
5677       Operands.insert(Operands.begin() + Prefix,
5678                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5679       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5680     }
5681 
5682     return MatchOperand_Success;
5683   }
5684 
5685   return parseRegOrImm(Operands);
5686 }
5687 
5688 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5689   // Clear any forced encodings from the previous instruction.
5690   setForcedEncodingSize(0);
5691   setForcedDPP(false);
5692   setForcedSDWA(false);
5693 
5694   if (Name.endswith("_e64_dpp")) {
5695     setForcedDPP(true);
5696     setForcedEncodingSize(64);
5697     return Name.substr(0, Name.size() - 8);
5698   } else if (Name.endswith("_e64")) {
5699     setForcedEncodingSize(64);
5700     return Name.substr(0, Name.size() - 4);
5701   } else if (Name.endswith("_e32")) {
5702     setForcedEncodingSize(32);
5703     return Name.substr(0, Name.size() - 4);
5704   } else if (Name.endswith("_dpp")) {
5705     setForcedDPP(true);
5706     return Name.substr(0, Name.size() - 4);
5707   } else if (Name.endswith("_sdwa")) {
5708     setForcedSDWA(true);
5709     return Name.substr(0, Name.size() - 5);
5710   }
5711   return Name;
5712 }
5713 
5714 static void applyMnemonicAliases(StringRef &Mnemonic,
5715                                  const FeatureBitset &Features,
5716                                  unsigned VariantID);
5717 
5718 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5719                                        StringRef Name,
5720                                        SMLoc NameLoc, OperandVector &Operands) {
5721   // Add the instruction mnemonic
5722   Name = parseMnemonicSuffix(Name);
5723 
5724   // If the target architecture uses MnemonicAlias, call it here to parse
5725   // operands correctly.
5726   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
5727 
5728   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5729 
5730   bool IsMIMG = Name.startswith("image_");
5731 
5732   while (!trySkipToken(AsmToken::EndOfStatement)) {
5733     OperandMode Mode = OperandMode_Default;
5734     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5735       Mode = OperandMode_NSA;
5736     CPolSeen = 0;
5737     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5738 
5739     if (Res != MatchOperand_Success) {
5740       checkUnsupportedInstruction(Name, NameLoc);
5741       if (!Parser.hasPendingError()) {
5742         // FIXME: use real operand location rather than the current location.
5743         StringRef Msg =
5744           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5745                                             "not a valid operand.";
5746         Error(getLoc(), Msg);
5747       }
5748       while (!trySkipToken(AsmToken::EndOfStatement)) {
5749         lex();
5750       }
5751       return true;
5752     }
5753 
5754     // Eat the comma or space if there is one.
5755     trySkipToken(AsmToken::Comma);
5756   }
5757 
5758   return false;
5759 }
5760 
5761 //===----------------------------------------------------------------------===//
5762 // Utility functions
5763 //===----------------------------------------------------------------------===//
5764 
5765 OperandMatchResultTy
5766 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5767 
5768   if (!trySkipId(Prefix, AsmToken::Colon))
5769     return MatchOperand_NoMatch;
5770 
5771   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5772 }
5773 
5774 OperandMatchResultTy
5775 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5776                                     AMDGPUOperand::ImmTy ImmTy,
5777                                     bool (*ConvertResult)(int64_t&)) {
5778   SMLoc S = getLoc();
5779   int64_t Value = 0;
5780 
5781   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5782   if (Res != MatchOperand_Success)
5783     return Res;
5784 
5785   if (ConvertResult && !ConvertResult(Value)) {
5786     Error(S, "invalid " + StringRef(Prefix) + " value.");
5787   }
5788 
5789   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5790   return MatchOperand_Success;
5791 }
5792 
5793 OperandMatchResultTy
5794 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5795                                              OperandVector &Operands,
5796                                              AMDGPUOperand::ImmTy ImmTy,
5797                                              bool (*ConvertResult)(int64_t&)) {
5798   SMLoc S = getLoc();
5799   if (!trySkipId(Prefix, AsmToken::Colon))
5800     return MatchOperand_NoMatch;
5801 
5802   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5803     return MatchOperand_ParseFail;
5804 
5805   unsigned Val = 0;
5806   const unsigned MaxSize = 4;
5807 
5808   // FIXME: How to verify the number of elements matches the number of src
5809   // operands?
5810   for (int I = 0; ; ++I) {
5811     int64_t Op;
5812     SMLoc Loc = getLoc();
5813     if (!parseExpr(Op))
5814       return MatchOperand_ParseFail;
5815 
5816     if (Op != 0 && Op != 1) {
5817       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5818       return MatchOperand_ParseFail;
5819     }
5820 
5821     Val |= (Op << I);
5822 
5823     if (trySkipToken(AsmToken::RBrac))
5824       break;
5825 
5826     if (I + 1 == MaxSize) {
5827       Error(getLoc(), "expected a closing square bracket");
5828       return MatchOperand_ParseFail;
5829     }
5830 
5831     if (!skipToken(AsmToken::Comma, "expected a comma"))
5832       return MatchOperand_ParseFail;
5833   }
5834 
5835   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5836   return MatchOperand_Success;
5837 }
5838 
5839 OperandMatchResultTy
5840 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5841                                AMDGPUOperand::ImmTy ImmTy) {
5842   int64_t Bit;
5843   SMLoc S = getLoc();
5844 
5845   if (trySkipId(Name)) {
5846     Bit = 1;
5847   } else if (trySkipId("no", Name)) {
5848     Bit = 0;
5849   } else {
5850     return MatchOperand_NoMatch;
5851   }
5852 
5853   if (Name == "r128" && !hasMIMG_R128()) {
5854     Error(S, "r128 modifier is not supported on this GPU");
5855     return MatchOperand_ParseFail;
5856   }
5857   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5858     Error(S, "a16 modifier is not supported on this GPU");
5859     return MatchOperand_ParseFail;
5860   }
5861 
5862   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5863     ImmTy = AMDGPUOperand::ImmTyR128A16;
5864 
5865   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5866   return MatchOperand_Success;
5867 }
5868 
5869 OperandMatchResultTy
5870 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5871   unsigned CPolOn = 0;
5872   unsigned CPolOff = 0;
5873   SMLoc S = getLoc();
5874 
5875   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5876   if (isGFX940() && !Mnemo.startswith("s_")) {
5877     if (trySkipId("sc0"))
5878       CPolOn = AMDGPU::CPol::SC0;
5879     else if (trySkipId("nosc0"))
5880       CPolOff = AMDGPU::CPol::SC0;
5881     else if (trySkipId("nt"))
5882       CPolOn = AMDGPU::CPol::NT;
5883     else if (trySkipId("nont"))
5884       CPolOff = AMDGPU::CPol::NT;
5885     else if (trySkipId("sc1"))
5886       CPolOn = AMDGPU::CPol::SC1;
5887     else if (trySkipId("nosc1"))
5888       CPolOff = AMDGPU::CPol::SC1;
5889     else
5890       return MatchOperand_NoMatch;
5891   }
5892   else if (trySkipId("glc"))
5893     CPolOn = AMDGPU::CPol::GLC;
5894   else if (trySkipId("noglc"))
5895     CPolOff = AMDGPU::CPol::GLC;
5896   else if (trySkipId("slc"))
5897     CPolOn = AMDGPU::CPol::SLC;
5898   else if (trySkipId("noslc"))
5899     CPolOff = AMDGPU::CPol::SLC;
5900   else if (trySkipId("dlc"))
5901     CPolOn = AMDGPU::CPol::DLC;
5902   else if (trySkipId("nodlc"))
5903     CPolOff = AMDGPU::CPol::DLC;
5904   else if (trySkipId("scc"))
5905     CPolOn = AMDGPU::CPol::SCC;
5906   else if (trySkipId("noscc"))
5907     CPolOff = AMDGPU::CPol::SCC;
5908   else
5909     return MatchOperand_NoMatch;
5910 
5911   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5912     Error(S, "dlc modifier is not supported on this GPU");
5913     return MatchOperand_ParseFail;
5914   }
5915 
5916   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5917     Error(S, "scc modifier is not supported on this GPU");
5918     return MatchOperand_ParseFail;
5919   }
5920 
5921   if (CPolSeen & (CPolOn | CPolOff)) {
5922     Error(S, "duplicate cache policy modifier");
5923     return MatchOperand_ParseFail;
5924   }
5925 
5926   CPolSeen |= (CPolOn | CPolOff);
5927 
5928   for (unsigned I = 1; I != Operands.size(); ++I) {
5929     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5930     if (Op.isCPol()) {
5931       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5932       return MatchOperand_Success;
5933     }
5934   }
5935 
5936   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5937                                               AMDGPUOperand::ImmTyCPol));
5938 
5939   return MatchOperand_Success;
5940 }
5941 
5942 static void addOptionalImmOperand(
5943   MCInst& Inst, const OperandVector& Operands,
5944   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5945   AMDGPUOperand::ImmTy ImmT,
5946   int64_t Default = 0) {
5947   auto i = OptionalIdx.find(ImmT);
5948   if (i != OptionalIdx.end()) {
5949     unsigned Idx = i->second;
5950     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5951   } else {
5952     Inst.addOperand(MCOperand::createImm(Default));
5953   }
5954 }
5955 
5956 OperandMatchResultTy
5957 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5958                                        StringRef &Value,
5959                                        SMLoc &StringLoc) {
5960   if (!trySkipId(Prefix, AsmToken::Colon))
5961     return MatchOperand_NoMatch;
5962 
5963   StringLoc = getLoc();
5964   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5965                                                   : MatchOperand_ParseFail;
5966 }
5967 
5968 //===----------------------------------------------------------------------===//
5969 // MTBUF format
5970 //===----------------------------------------------------------------------===//
5971 
5972 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5973                                   int64_t MaxVal,
5974                                   int64_t &Fmt) {
5975   int64_t Val;
5976   SMLoc Loc = getLoc();
5977 
5978   auto Res = parseIntWithPrefix(Pref, Val);
5979   if (Res == MatchOperand_ParseFail)
5980     return false;
5981   if (Res == MatchOperand_NoMatch)
5982     return true;
5983 
5984   if (Val < 0 || Val > MaxVal) {
5985     Error(Loc, Twine("out of range ", StringRef(Pref)));
5986     return false;
5987   }
5988 
5989   Fmt = Val;
5990   return true;
5991 }
5992 
5993 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5994 // values to live in a joint format operand in the MCInst encoding.
5995 OperandMatchResultTy
5996 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5997   using namespace llvm::AMDGPU::MTBUFFormat;
5998 
5999   int64_t Dfmt = DFMT_UNDEF;
6000   int64_t Nfmt = NFMT_UNDEF;
6001 
6002   // dfmt and nfmt can appear in either order, and each is optional.
6003   for (int I = 0; I < 2; ++I) {
6004     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6005       return MatchOperand_ParseFail;
6006 
6007     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
6008       return MatchOperand_ParseFail;
6009     }
6010     // Skip optional comma between dfmt/nfmt
6011     // but guard against 2 commas following each other.
6012     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6013         !peekToken().is(AsmToken::Comma)) {
6014       trySkipToken(AsmToken::Comma);
6015     }
6016   }
6017 
6018   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6019     return MatchOperand_NoMatch;
6020 
6021   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6022   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6023 
6024   Format = encodeDfmtNfmt(Dfmt, Nfmt);
6025   return MatchOperand_Success;
6026 }
6027 
6028 OperandMatchResultTy
6029 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6030   using namespace llvm::AMDGPU::MTBUFFormat;
6031 
6032   int64_t Fmt = UFMT_UNDEF;
6033 
6034   if (!tryParseFmt("format", UFMT_MAX, Fmt))
6035     return MatchOperand_ParseFail;
6036 
6037   if (Fmt == UFMT_UNDEF)
6038     return MatchOperand_NoMatch;
6039 
6040   Format = Fmt;
6041   return MatchOperand_Success;
6042 }
6043 
6044 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6045                                     int64_t &Nfmt,
6046                                     StringRef FormatStr,
6047                                     SMLoc Loc) {
6048   using namespace llvm::AMDGPU::MTBUFFormat;
6049   int64_t Format;
6050 
6051   Format = getDfmt(FormatStr);
6052   if (Format != DFMT_UNDEF) {
6053     Dfmt = Format;
6054     return true;
6055   }
6056 
6057   Format = getNfmt(FormatStr, getSTI());
6058   if (Format != NFMT_UNDEF) {
6059     Nfmt = Format;
6060     return true;
6061   }
6062 
6063   Error(Loc, "unsupported format");
6064   return false;
6065 }
6066 
6067 OperandMatchResultTy
6068 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6069                                           SMLoc FormatLoc,
6070                                           int64_t &Format) {
6071   using namespace llvm::AMDGPU::MTBUFFormat;
6072 
6073   int64_t Dfmt = DFMT_UNDEF;
6074   int64_t Nfmt = NFMT_UNDEF;
6075   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6076     return MatchOperand_ParseFail;
6077 
6078   if (trySkipToken(AsmToken::Comma)) {
6079     StringRef Str;
6080     SMLoc Loc = getLoc();
6081     if (!parseId(Str, "expected a format string") ||
6082         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
6083       return MatchOperand_ParseFail;
6084     }
6085     if (Dfmt == DFMT_UNDEF) {
6086       Error(Loc, "duplicate numeric format");
6087       return MatchOperand_ParseFail;
6088     } else if (Nfmt == NFMT_UNDEF) {
6089       Error(Loc, "duplicate data format");
6090       return MatchOperand_ParseFail;
6091     }
6092   }
6093 
6094   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6095   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6096 
6097   if (isGFX10Plus()) {
6098     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6099     if (Ufmt == UFMT_UNDEF) {
6100       Error(FormatLoc, "unsupported format");
6101       return MatchOperand_ParseFail;
6102     }
6103     Format = Ufmt;
6104   } else {
6105     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6106   }
6107 
6108   return MatchOperand_Success;
6109 }
6110 
6111 OperandMatchResultTy
6112 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6113                                             SMLoc Loc,
6114                                             int64_t &Format) {
6115   using namespace llvm::AMDGPU::MTBUFFormat;
6116 
6117   auto Id = getUnifiedFormat(FormatStr, getSTI());
6118   if (Id == UFMT_UNDEF)
6119     return MatchOperand_NoMatch;
6120 
6121   if (!isGFX10Plus()) {
6122     Error(Loc, "unified format is not supported on this GPU");
6123     return MatchOperand_ParseFail;
6124   }
6125 
6126   Format = Id;
6127   return MatchOperand_Success;
6128 }
6129 
6130 OperandMatchResultTy
6131 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6132   using namespace llvm::AMDGPU::MTBUFFormat;
6133   SMLoc Loc = getLoc();
6134 
6135   if (!parseExpr(Format))
6136     return MatchOperand_ParseFail;
6137   if (!isValidFormatEncoding(Format, getSTI())) {
6138     Error(Loc, "out of range format");
6139     return MatchOperand_ParseFail;
6140   }
6141 
6142   return MatchOperand_Success;
6143 }
6144 
6145 OperandMatchResultTy
6146 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6147   using namespace llvm::AMDGPU::MTBUFFormat;
6148 
6149   if (!trySkipId("format", AsmToken::Colon))
6150     return MatchOperand_NoMatch;
6151 
6152   if (trySkipToken(AsmToken::LBrac)) {
6153     StringRef FormatStr;
6154     SMLoc Loc = getLoc();
6155     if (!parseId(FormatStr, "expected a format string"))
6156       return MatchOperand_ParseFail;
6157 
6158     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6159     if (Res == MatchOperand_NoMatch)
6160       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6161     if (Res != MatchOperand_Success)
6162       return Res;
6163 
6164     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6165       return MatchOperand_ParseFail;
6166 
6167     return MatchOperand_Success;
6168   }
6169 
6170   return parseNumericFormat(Format);
6171 }
6172 
6173 OperandMatchResultTy
6174 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6175   using namespace llvm::AMDGPU::MTBUFFormat;
6176 
6177   int64_t Format = getDefaultFormatEncoding(getSTI());
6178   OperandMatchResultTy Res;
6179   SMLoc Loc = getLoc();
6180 
6181   // Parse legacy format syntax.
6182   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6183   if (Res == MatchOperand_ParseFail)
6184     return Res;
6185 
6186   bool FormatFound = (Res == MatchOperand_Success);
6187 
6188   Operands.push_back(
6189     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6190 
6191   if (FormatFound)
6192     trySkipToken(AsmToken::Comma);
6193 
6194   if (isToken(AsmToken::EndOfStatement)) {
6195     // We are expecting an soffset operand,
6196     // but let matcher handle the error.
6197     return MatchOperand_Success;
6198   }
6199 
6200   // Parse soffset.
6201   Res = parseRegOrImm(Operands);
6202   if (Res != MatchOperand_Success)
6203     return Res;
6204 
6205   trySkipToken(AsmToken::Comma);
6206 
6207   if (!FormatFound) {
6208     Res = parseSymbolicOrNumericFormat(Format);
6209     if (Res == MatchOperand_ParseFail)
6210       return Res;
6211     if (Res == MatchOperand_Success) {
6212       auto Size = Operands.size();
6213       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6214       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6215       Op.setImm(Format);
6216     }
6217     return MatchOperand_Success;
6218   }
6219 
6220   if (isId("format") && peekToken().is(AsmToken::Colon)) {
6221     Error(getLoc(), "duplicate format");
6222     return MatchOperand_ParseFail;
6223   }
6224   return MatchOperand_Success;
6225 }
6226 
6227 //===----------------------------------------------------------------------===//
6228 // ds
6229 //===----------------------------------------------------------------------===//
6230 
6231 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6232                                     const OperandVector &Operands) {
6233   OptionalImmIndexMap OptionalIdx;
6234 
6235   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6236     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6237 
6238     // Add the register arguments
6239     if (Op.isReg()) {
6240       Op.addRegOperands(Inst, 1);
6241       continue;
6242     }
6243 
6244     // Handle optional arguments
6245     OptionalIdx[Op.getImmTy()] = i;
6246   }
6247 
6248   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6249   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6250   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6251 
6252   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6253 }
6254 
6255 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6256                                 bool IsGdsHardcoded) {
6257   OptionalImmIndexMap OptionalIdx;
6258 
6259   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6260     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6261 
6262     // Add the register arguments
6263     if (Op.isReg()) {
6264       Op.addRegOperands(Inst, 1);
6265       continue;
6266     }
6267 
6268     if (Op.isToken() && Op.getToken() == "gds") {
6269       IsGdsHardcoded = true;
6270       continue;
6271     }
6272 
6273     // Handle optional arguments
6274     OptionalIdx[Op.getImmTy()] = i;
6275   }
6276 
6277   AMDGPUOperand::ImmTy OffsetType =
6278     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
6279      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
6280      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
6281                                                       AMDGPUOperand::ImmTyOffset;
6282 
6283   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6284 
6285   if (!IsGdsHardcoded) {
6286     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6287   }
6288   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6289 }
6290 
6291 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6292   OptionalImmIndexMap OptionalIdx;
6293 
6294   unsigned OperandIdx[4];
6295   unsigned EnMask = 0;
6296   int SrcIdx = 0;
6297 
6298   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6299     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6300 
6301     // Add the register arguments
6302     if (Op.isReg()) {
6303       assert(SrcIdx < 4);
6304       OperandIdx[SrcIdx] = Inst.size();
6305       Op.addRegOperands(Inst, 1);
6306       ++SrcIdx;
6307       continue;
6308     }
6309 
6310     if (Op.isOff()) {
6311       assert(SrcIdx < 4);
6312       OperandIdx[SrcIdx] = Inst.size();
6313       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6314       ++SrcIdx;
6315       continue;
6316     }
6317 
6318     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6319       Op.addImmOperands(Inst, 1);
6320       continue;
6321     }
6322 
6323     if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
6324       continue;
6325 
6326     // Handle optional arguments
6327     OptionalIdx[Op.getImmTy()] = i;
6328   }
6329 
6330   assert(SrcIdx == 4);
6331 
6332   bool Compr = false;
6333   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6334     Compr = true;
6335     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6336     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6337     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6338   }
6339 
6340   for (auto i = 0; i < SrcIdx; ++i) {
6341     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6342       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6343     }
6344   }
6345 
6346   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6347   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6348 
6349   Inst.addOperand(MCOperand::createImm(EnMask));
6350 }
6351 
6352 //===----------------------------------------------------------------------===//
6353 // s_waitcnt
6354 //===----------------------------------------------------------------------===//
6355 
6356 static bool
6357 encodeCnt(
6358   const AMDGPU::IsaVersion ISA,
6359   int64_t &IntVal,
6360   int64_t CntVal,
6361   bool Saturate,
6362   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6363   unsigned (*decode)(const IsaVersion &Version, unsigned))
6364 {
6365   bool Failed = false;
6366 
6367   IntVal = encode(ISA, IntVal, CntVal);
6368   if (CntVal != decode(ISA, IntVal)) {
6369     if (Saturate) {
6370       IntVal = encode(ISA, IntVal, -1);
6371     } else {
6372       Failed = true;
6373     }
6374   }
6375   return Failed;
6376 }
6377 
6378 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6379 
6380   SMLoc CntLoc = getLoc();
6381   StringRef CntName = getTokenStr();
6382 
6383   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6384       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6385     return false;
6386 
6387   int64_t CntVal;
6388   SMLoc ValLoc = getLoc();
6389   if (!parseExpr(CntVal))
6390     return false;
6391 
6392   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6393 
6394   bool Failed = true;
6395   bool Sat = CntName.endswith("_sat");
6396 
6397   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6398     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6399   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6400     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6401   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6402     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6403   } else {
6404     Error(CntLoc, "invalid counter name " + CntName);
6405     return false;
6406   }
6407 
6408   if (Failed) {
6409     Error(ValLoc, "too large value for " + CntName);
6410     return false;
6411   }
6412 
6413   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6414     return false;
6415 
6416   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6417     if (isToken(AsmToken::EndOfStatement)) {
6418       Error(getLoc(), "expected a counter name");
6419       return false;
6420     }
6421   }
6422 
6423   return true;
6424 }
6425 
6426 OperandMatchResultTy
6427 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6428   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6429   int64_t Waitcnt = getWaitcntBitMask(ISA);
6430   SMLoc S = getLoc();
6431 
6432   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6433     while (!isToken(AsmToken::EndOfStatement)) {
6434       if (!parseCnt(Waitcnt))
6435         return MatchOperand_ParseFail;
6436     }
6437   } else {
6438     if (!parseExpr(Waitcnt))
6439       return MatchOperand_ParseFail;
6440   }
6441 
6442   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6443   return MatchOperand_Success;
6444 }
6445 
6446 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6447   SMLoc FieldLoc = getLoc();
6448   StringRef FieldName = getTokenStr();
6449   if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6450       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6451     return false;
6452 
6453   SMLoc ValueLoc = getLoc();
6454   StringRef ValueName = getTokenStr();
6455   if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6456       !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6457     return false;
6458 
6459   unsigned Shift;
6460   if (FieldName == "instid0") {
6461     Shift = 0;
6462   } else if (FieldName == "instskip") {
6463     Shift = 4;
6464   } else if (FieldName == "instid1") {
6465     Shift = 7;
6466   } else {
6467     Error(FieldLoc, "invalid field name " + FieldName);
6468     return false;
6469   }
6470 
6471   int Value;
6472   if (Shift == 4) {
6473     // Parse values for instskip.
6474     Value = StringSwitch<int>(ValueName)
6475                 .Case("SAME", 0)
6476                 .Case("NEXT", 1)
6477                 .Case("SKIP_1", 2)
6478                 .Case("SKIP_2", 3)
6479                 .Case("SKIP_3", 4)
6480                 .Case("SKIP_4", 5)
6481                 .Default(-1);
6482   } else {
6483     // Parse values for instid0 and instid1.
6484     Value = StringSwitch<int>(ValueName)
6485                 .Case("NO_DEP", 0)
6486                 .Case("VALU_DEP_1", 1)
6487                 .Case("VALU_DEP_2", 2)
6488                 .Case("VALU_DEP_3", 3)
6489                 .Case("VALU_DEP_4", 4)
6490                 .Case("TRANS32_DEP_1", 5)
6491                 .Case("TRANS32_DEP_2", 6)
6492                 .Case("TRANS32_DEP_3", 7)
6493                 .Case("FMA_ACCUM_CYCLE_1", 8)
6494                 .Case("SALU_CYCLE_1", 9)
6495                 .Case("SALU_CYCLE_2", 10)
6496                 .Case("SALU_CYCLE_3", 11)
6497                 .Default(-1);
6498   }
6499   if (Value < 0) {
6500     Error(ValueLoc, "invalid value name " + ValueName);
6501     return false;
6502   }
6503 
6504   Delay |= Value << Shift;
6505   return true;
6506 }
6507 
6508 OperandMatchResultTy
6509 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) {
6510   int64_t Delay = 0;
6511   SMLoc S = getLoc();
6512 
6513   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6514     do {
6515       if (!parseDelay(Delay))
6516         return MatchOperand_ParseFail;
6517     } while (trySkipToken(AsmToken::Pipe));
6518   } else {
6519     if (!parseExpr(Delay))
6520       return MatchOperand_ParseFail;
6521   }
6522 
6523   Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
6524   return MatchOperand_Success;
6525 }
6526 
6527 bool
6528 AMDGPUOperand::isSWaitCnt() const {
6529   return isImm();
6530 }
6531 
6532 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); }
6533 
6534 //===----------------------------------------------------------------------===//
6535 // DepCtr
6536 //===----------------------------------------------------------------------===//
6537 
6538 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6539                                   StringRef DepCtrName) {
6540   switch (ErrorId) {
6541   case OPR_ID_UNKNOWN:
6542     Error(Loc, Twine("invalid counter name ", DepCtrName));
6543     return;
6544   case OPR_ID_UNSUPPORTED:
6545     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6546     return;
6547   case OPR_ID_DUPLICATE:
6548     Error(Loc, Twine("duplicate counter name ", DepCtrName));
6549     return;
6550   case OPR_VAL_INVALID:
6551     Error(Loc, Twine("invalid value for ", DepCtrName));
6552     return;
6553   default:
6554     assert(false);
6555   }
6556 }
6557 
6558 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6559 
6560   using namespace llvm::AMDGPU::DepCtr;
6561 
6562   SMLoc DepCtrLoc = getLoc();
6563   StringRef DepCtrName = getTokenStr();
6564 
6565   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6566       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6567     return false;
6568 
6569   int64_t ExprVal;
6570   if (!parseExpr(ExprVal))
6571     return false;
6572 
6573   unsigned PrevOprMask = UsedOprMask;
6574   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6575 
6576   if (CntVal < 0) {
6577     depCtrError(DepCtrLoc, CntVal, DepCtrName);
6578     return false;
6579   }
6580 
6581   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6582     return false;
6583 
6584   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6585     if (isToken(AsmToken::EndOfStatement)) {
6586       Error(getLoc(), "expected a counter name");
6587       return false;
6588     }
6589   }
6590 
6591   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
6592   DepCtr = (DepCtr & ~CntValMask) | CntVal;
6593   return true;
6594 }
6595 
6596 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) {
6597   using namespace llvm::AMDGPU::DepCtr;
6598 
6599   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
6600   SMLoc Loc = getLoc();
6601 
6602   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6603     unsigned UsedOprMask = 0;
6604     while (!isToken(AsmToken::EndOfStatement)) {
6605       if (!parseDepCtr(DepCtr, UsedOprMask))
6606         return MatchOperand_ParseFail;
6607     }
6608   } else {
6609     if (!parseExpr(DepCtr))
6610       return MatchOperand_ParseFail;
6611   }
6612 
6613   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
6614   return MatchOperand_Success;
6615 }
6616 
6617 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
6618 
6619 //===----------------------------------------------------------------------===//
6620 // hwreg
6621 //===----------------------------------------------------------------------===//
6622 
6623 bool
6624 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6625                                 OperandInfoTy &Offset,
6626                                 OperandInfoTy &Width) {
6627   using namespace llvm::AMDGPU::Hwreg;
6628 
6629   // The register may be specified by name or using a numeric code
6630   HwReg.Loc = getLoc();
6631   if (isToken(AsmToken::Identifier) &&
6632       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6633     HwReg.IsSymbolic = true;
6634     lex(); // skip register name
6635   } else if (!parseExpr(HwReg.Id, "a register name")) {
6636     return false;
6637   }
6638 
6639   if (trySkipToken(AsmToken::RParen))
6640     return true;
6641 
6642   // parse optional params
6643   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6644     return false;
6645 
6646   Offset.Loc = getLoc();
6647   if (!parseExpr(Offset.Id))
6648     return false;
6649 
6650   if (!skipToken(AsmToken::Comma, "expected a comma"))
6651     return false;
6652 
6653   Width.Loc = getLoc();
6654   return parseExpr(Width.Id) &&
6655          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6656 }
6657 
6658 bool
6659 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6660                                const OperandInfoTy &Offset,
6661                                const OperandInfoTy &Width) {
6662 
6663   using namespace llvm::AMDGPU::Hwreg;
6664 
6665   if (HwReg.IsSymbolic) {
6666     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6667       Error(HwReg.Loc,
6668             "specified hardware register is not supported on this GPU");
6669       return false;
6670     }
6671   } else {
6672     if (!isValidHwreg(HwReg.Id)) {
6673       Error(HwReg.Loc,
6674             "invalid code of hardware register: only 6-bit values are legal");
6675       return false;
6676     }
6677   }
6678   if (!isValidHwregOffset(Offset.Id)) {
6679     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6680     return false;
6681   }
6682   if (!isValidHwregWidth(Width.Id)) {
6683     Error(Width.Loc,
6684           "invalid bitfield width: only values from 1 to 32 are legal");
6685     return false;
6686   }
6687   return true;
6688 }
6689 
6690 OperandMatchResultTy
6691 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6692   using namespace llvm::AMDGPU::Hwreg;
6693 
6694   int64_t ImmVal = 0;
6695   SMLoc Loc = getLoc();
6696 
6697   if (trySkipId("hwreg", AsmToken::LParen)) {
6698     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6699     OperandInfoTy Offset(OFFSET_DEFAULT_);
6700     OperandInfoTy Width(WIDTH_DEFAULT_);
6701     if (parseHwregBody(HwReg, Offset, Width) &&
6702         validateHwreg(HwReg, Offset, Width)) {
6703       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6704     } else {
6705       return MatchOperand_ParseFail;
6706     }
6707   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6708     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6709       Error(Loc, "invalid immediate: only 16-bit values are legal");
6710       return MatchOperand_ParseFail;
6711     }
6712   } else {
6713     return MatchOperand_ParseFail;
6714   }
6715 
6716   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6717   return MatchOperand_Success;
6718 }
6719 
6720 bool AMDGPUOperand::isHwreg() const {
6721   return isImmTy(ImmTyHwreg);
6722 }
6723 
6724 //===----------------------------------------------------------------------===//
6725 // sendmsg
6726 //===----------------------------------------------------------------------===//
6727 
6728 bool
6729 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6730                                   OperandInfoTy &Op,
6731                                   OperandInfoTy &Stream) {
6732   using namespace llvm::AMDGPU::SendMsg;
6733 
6734   Msg.Loc = getLoc();
6735   if (isToken(AsmToken::Identifier) &&
6736       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6737     Msg.IsSymbolic = true;
6738     lex(); // skip message name
6739   } else if (!parseExpr(Msg.Id, "a message name")) {
6740     return false;
6741   }
6742 
6743   if (trySkipToken(AsmToken::Comma)) {
6744     Op.IsDefined = true;
6745     Op.Loc = getLoc();
6746     if (isToken(AsmToken::Identifier) &&
6747         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6748       lex(); // skip operation name
6749     } else if (!parseExpr(Op.Id, "an operation name")) {
6750       return false;
6751     }
6752 
6753     if (trySkipToken(AsmToken::Comma)) {
6754       Stream.IsDefined = true;
6755       Stream.Loc = getLoc();
6756       if (!parseExpr(Stream.Id))
6757         return false;
6758     }
6759   }
6760 
6761   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6762 }
6763 
6764 bool
6765 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6766                                  const OperandInfoTy &Op,
6767                                  const OperandInfoTy &Stream) {
6768   using namespace llvm::AMDGPU::SendMsg;
6769 
6770   // Validation strictness depends on whether message is specified
6771   // in a symbolic or in a numeric form. In the latter case
6772   // only encoding possibility is checked.
6773   bool Strict = Msg.IsSymbolic;
6774 
6775   if (Strict) {
6776     if (Msg.Id == OPR_ID_UNSUPPORTED) {
6777       Error(Msg.Loc, "specified message id is not supported on this GPU");
6778       return false;
6779     }
6780   } else {
6781     if (!isValidMsgId(Msg.Id, getSTI())) {
6782       Error(Msg.Loc, "invalid message id");
6783       return false;
6784     }
6785   }
6786   if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
6787     if (Op.IsDefined) {
6788       Error(Op.Loc, "message does not support operations");
6789     } else {
6790       Error(Msg.Loc, "missing message operation");
6791     }
6792     return false;
6793   }
6794   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6795     Error(Op.Loc, "invalid operation id");
6796     return false;
6797   }
6798   if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
6799       Stream.IsDefined) {
6800     Error(Stream.Loc, "message operation does not support streams");
6801     return false;
6802   }
6803   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6804     Error(Stream.Loc, "invalid message stream id");
6805     return false;
6806   }
6807   return true;
6808 }
6809 
6810 OperandMatchResultTy
6811 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6812   using namespace llvm::AMDGPU::SendMsg;
6813 
6814   int64_t ImmVal = 0;
6815   SMLoc Loc = getLoc();
6816 
6817   if (trySkipId("sendmsg", AsmToken::LParen)) {
6818     OperandInfoTy Msg(OPR_ID_UNKNOWN);
6819     OperandInfoTy Op(OP_NONE_);
6820     OperandInfoTy Stream(STREAM_ID_NONE_);
6821     if (parseSendMsgBody(Msg, Op, Stream) &&
6822         validateSendMsg(Msg, Op, Stream)) {
6823       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6824     } else {
6825       return MatchOperand_ParseFail;
6826     }
6827   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6828     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6829       Error(Loc, "invalid immediate: only 16-bit values are legal");
6830       return MatchOperand_ParseFail;
6831     }
6832   } else {
6833     return MatchOperand_ParseFail;
6834   }
6835 
6836   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6837   return MatchOperand_Success;
6838 }
6839 
6840 bool AMDGPUOperand::isSendMsg() const {
6841   return isImmTy(ImmTySendMsg);
6842 }
6843 
6844 //===----------------------------------------------------------------------===//
6845 // v_interp
6846 //===----------------------------------------------------------------------===//
6847 
6848 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6849   StringRef Str;
6850   SMLoc S = getLoc();
6851 
6852   if (!parseId(Str))
6853     return MatchOperand_NoMatch;
6854 
6855   int Slot = StringSwitch<int>(Str)
6856     .Case("p10", 0)
6857     .Case("p20", 1)
6858     .Case("p0", 2)
6859     .Default(-1);
6860 
6861   if (Slot == -1) {
6862     Error(S, "invalid interpolation slot");
6863     return MatchOperand_ParseFail;
6864   }
6865 
6866   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6867                                               AMDGPUOperand::ImmTyInterpSlot));
6868   return MatchOperand_Success;
6869 }
6870 
6871 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6872   StringRef Str;
6873   SMLoc S = getLoc();
6874 
6875   if (!parseId(Str))
6876     return MatchOperand_NoMatch;
6877 
6878   if (!Str.startswith("attr")) {
6879     Error(S, "invalid interpolation attribute");
6880     return MatchOperand_ParseFail;
6881   }
6882 
6883   StringRef Chan = Str.take_back(2);
6884   int AttrChan = StringSwitch<int>(Chan)
6885     .Case(".x", 0)
6886     .Case(".y", 1)
6887     .Case(".z", 2)
6888     .Case(".w", 3)
6889     .Default(-1);
6890   if (AttrChan == -1) {
6891     Error(S, "invalid or missing interpolation attribute channel");
6892     return MatchOperand_ParseFail;
6893   }
6894 
6895   Str = Str.drop_back(2).drop_front(4);
6896 
6897   uint8_t Attr;
6898   if (Str.getAsInteger(10, Attr)) {
6899     Error(S, "invalid or missing interpolation attribute number");
6900     return MatchOperand_ParseFail;
6901   }
6902 
6903   if (Attr > 63) {
6904     Error(S, "out of bounds interpolation attribute number");
6905     return MatchOperand_ParseFail;
6906   }
6907 
6908   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6909 
6910   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6911                                               AMDGPUOperand::ImmTyInterpAttr));
6912   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6913                                               AMDGPUOperand::ImmTyAttrChan));
6914   return MatchOperand_Success;
6915 }
6916 
6917 //===----------------------------------------------------------------------===//
6918 // exp
6919 //===----------------------------------------------------------------------===//
6920 
6921 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6922   using namespace llvm::AMDGPU::Exp;
6923 
6924   StringRef Str;
6925   SMLoc S = getLoc();
6926 
6927   if (!parseId(Str))
6928     return MatchOperand_NoMatch;
6929 
6930   unsigned Id = getTgtId(Str);
6931   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6932     Error(S, (Id == ET_INVALID) ?
6933                 "invalid exp target" :
6934                 "exp target is not supported on this GPU");
6935     return MatchOperand_ParseFail;
6936   }
6937 
6938   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6939                                               AMDGPUOperand::ImmTyExpTgt));
6940   return MatchOperand_Success;
6941 }
6942 
6943 //===----------------------------------------------------------------------===//
6944 // parser helpers
6945 //===----------------------------------------------------------------------===//
6946 
6947 bool
6948 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6949   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6950 }
6951 
6952 bool
6953 AMDGPUAsmParser::isId(const StringRef Id) const {
6954   return isId(getToken(), Id);
6955 }
6956 
6957 bool
6958 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6959   return getTokenKind() == Kind;
6960 }
6961 
6962 bool
6963 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6964   if (isId(Id)) {
6965     lex();
6966     return true;
6967   }
6968   return false;
6969 }
6970 
6971 bool
6972 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6973   if (isToken(AsmToken::Identifier)) {
6974     StringRef Tok = getTokenStr();
6975     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6976       lex();
6977       return true;
6978     }
6979   }
6980   return false;
6981 }
6982 
6983 bool
6984 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6985   if (isId(Id) && peekToken().is(Kind)) {
6986     lex();
6987     lex();
6988     return true;
6989   }
6990   return false;
6991 }
6992 
6993 bool
6994 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6995   if (isToken(Kind)) {
6996     lex();
6997     return true;
6998   }
6999   return false;
7000 }
7001 
7002 bool
7003 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7004                            const StringRef ErrMsg) {
7005   if (!trySkipToken(Kind)) {
7006     Error(getLoc(), ErrMsg);
7007     return false;
7008   }
7009   return true;
7010 }
7011 
7012 bool
7013 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7014   SMLoc S = getLoc();
7015 
7016   const MCExpr *Expr;
7017   if (Parser.parseExpression(Expr))
7018     return false;
7019 
7020   if (Expr->evaluateAsAbsolute(Imm))
7021     return true;
7022 
7023   if (Expected.empty()) {
7024     Error(S, "expected absolute expression");
7025   } else {
7026     Error(S, Twine("expected ", Expected) +
7027              Twine(" or an absolute expression"));
7028   }
7029   return false;
7030 }
7031 
7032 bool
7033 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7034   SMLoc S = getLoc();
7035 
7036   const MCExpr *Expr;
7037   if (Parser.parseExpression(Expr))
7038     return false;
7039 
7040   int64_t IntVal;
7041   if (Expr->evaluateAsAbsolute(IntVal)) {
7042     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7043   } else {
7044     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7045   }
7046   return true;
7047 }
7048 
7049 bool
7050 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7051   if (isToken(AsmToken::String)) {
7052     Val = getToken().getStringContents();
7053     lex();
7054     return true;
7055   } else {
7056     Error(getLoc(), ErrMsg);
7057     return false;
7058   }
7059 }
7060 
7061 bool
7062 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7063   if (isToken(AsmToken::Identifier)) {
7064     Val = getTokenStr();
7065     lex();
7066     return true;
7067   } else {
7068     if (!ErrMsg.empty())
7069       Error(getLoc(), ErrMsg);
7070     return false;
7071   }
7072 }
7073 
7074 AsmToken
7075 AMDGPUAsmParser::getToken() const {
7076   return Parser.getTok();
7077 }
7078 
7079 AsmToken
7080 AMDGPUAsmParser::peekToken() {
7081   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
7082 }
7083 
7084 void
7085 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7086   auto TokCount = getLexer().peekTokens(Tokens);
7087 
7088   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7089     Tokens[Idx] = AsmToken(AsmToken::Error, "");
7090 }
7091 
7092 AsmToken::TokenKind
7093 AMDGPUAsmParser::getTokenKind() const {
7094   return getLexer().getKind();
7095 }
7096 
7097 SMLoc
7098 AMDGPUAsmParser::getLoc() const {
7099   return getToken().getLoc();
7100 }
7101 
7102 StringRef
7103 AMDGPUAsmParser::getTokenStr() const {
7104   return getToken().getString();
7105 }
7106 
7107 void
7108 AMDGPUAsmParser::lex() {
7109   Parser.Lex();
7110 }
7111 
7112 SMLoc
7113 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7114                                const OperandVector &Operands) const {
7115   for (unsigned i = Operands.size() - 1; i > 0; --i) {
7116     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7117     if (Test(Op))
7118       return Op.getStartLoc();
7119   }
7120   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7121 }
7122 
7123 SMLoc
7124 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7125                            const OperandVector &Operands) const {
7126   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7127   return getOperandLoc(Test, Operands);
7128 }
7129 
7130 SMLoc
7131 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7132                            const OperandVector &Operands) const {
7133   auto Test = [=](const AMDGPUOperand& Op) {
7134     return Op.isRegKind() && Op.getReg() == Reg;
7135   };
7136   return getOperandLoc(Test, Operands);
7137 }
7138 
7139 SMLoc
7140 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
7141   auto Test = [](const AMDGPUOperand& Op) {
7142     return Op.IsImmKindLiteral() || Op.isExpr();
7143   };
7144   return getOperandLoc(Test, Operands);
7145 }
7146 
7147 SMLoc
7148 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7149   auto Test = [](const AMDGPUOperand& Op) {
7150     return Op.isImmKindConst();
7151   };
7152   return getOperandLoc(Test, Operands);
7153 }
7154 
7155 //===----------------------------------------------------------------------===//
7156 // swizzle
7157 //===----------------------------------------------------------------------===//
7158 
7159 LLVM_READNONE
7160 static unsigned
7161 encodeBitmaskPerm(const unsigned AndMask,
7162                   const unsigned OrMask,
7163                   const unsigned XorMask) {
7164   using namespace llvm::AMDGPU::Swizzle;
7165 
7166   return BITMASK_PERM_ENC |
7167          (AndMask << BITMASK_AND_SHIFT) |
7168          (OrMask  << BITMASK_OR_SHIFT)  |
7169          (XorMask << BITMASK_XOR_SHIFT);
7170 }
7171 
7172 bool
7173 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7174                                      const unsigned MinVal,
7175                                      const unsigned MaxVal,
7176                                      const StringRef ErrMsg,
7177                                      SMLoc &Loc) {
7178   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7179     return false;
7180   }
7181   Loc = getLoc();
7182   if (!parseExpr(Op)) {
7183     return false;
7184   }
7185   if (Op < MinVal || Op > MaxVal) {
7186     Error(Loc, ErrMsg);
7187     return false;
7188   }
7189 
7190   return true;
7191 }
7192 
7193 bool
7194 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7195                                       const unsigned MinVal,
7196                                       const unsigned MaxVal,
7197                                       const StringRef ErrMsg) {
7198   SMLoc Loc;
7199   for (unsigned i = 0; i < OpNum; ++i) {
7200     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7201       return false;
7202   }
7203 
7204   return true;
7205 }
7206 
7207 bool
7208 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7209   using namespace llvm::AMDGPU::Swizzle;
7210 
7211   int64_t Lane[LANE_NUM];
7212   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7213                            "expected a 2-bit lane id")) {
7214     Imm = QUAD_PERM_ENC;
7215     for (unsigned I = 0; I < LANE_NUM; ++I) {
7216       Imm |= Lane[I] << (LANE_SHIFT * I);
7217     }
7218     return true;
7219   }
7220   return false;
7221 }
7222 
7223 bool
7224 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7225   using namespace llvm::AMDGPU::Swizzle;
7226 
7227   SMLoc Loc;
7228   int64_t GroupSize;
7229   int64_t LaneIdx;
7230 
7231   if (!parseSwizzleOperand(GroupSize,
7232                            2, 32,
7233                            "group size must be in the interval [2,32]",
7234                            Loc)) {
7235     return false;
7236   }
7237   if (!isPowerOf2_64(GroupSize)) {
7238     Error(Loc, "group size must be a power of two");
7239     return false;
7240   }
7241   if (parseSwizzleOperand(LaneIdx,
7242                           0, GroupSize - 1,
7243                           "lane id must be in the interval [0,group size - 1]",
7244                           Loc)) {
7245     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7246     return true;
7247   }
7248   return false;
7249 }
7250 
7251 bool
7252 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7253   using namespace llvm::AMDGPU::Swizzle;
7254 
7255   SMLoc Loc;
7256   int64_t GroupSize;
7257 
7258   if (!parseSwizzleOperand(GroupSize,
7259                            2, 32,
7260                            "group size must be in the interval [2,32]",
7261                            Loc)) {
7262     return false;
7263   }
7264   if (!isPowerOf2_64(GroupSize)) {
7265     Error(Loc, "group size must be a power of two");
7266     return false;
7267   }
7268 
7269   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7270   return true;
7271 }
7272 
7273 bool
7274 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7275   using namespace llvm::AMDGPU::Swizzle;
7276 
7277   SMLoc Loc;
7278   int64_t GroupSize;
7279 
7280   if (!parseSwizzleOperand(GroupSize,
7281                            1, 16,
7282                            "group size must be in the interval [1,16]",
7283                            Loc)) {
7284     return false;
7285   }
7286   if (!isPowerOf2_64(GroupSize)) {
7287     Error(Loc, "group size must be a power of two");
7288     return false;
7289   }
7290 
7291   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7292   return true;
7293 }
7294 
7295 bool
7296 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7297   using namespace llvm::AMDGPU::Swizzle;
7298 
7299   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7300     return false;
7301   }
7302 
7303   StringRef Ctl;
7304   SMLoc StrLoc = getLoc();
7305   if (!parseString(Ctl)) {
7306     return false;
7307   }
7308   if (Ctl.size() != BITMASK_WIDTH) {
7309     Error(StrLoc, "expected a 5-character mask");
7310     return false;
7311   }
7312 
7313   unsigned AndMask = 0;
7314   unsigned OrMask = 0;
7315   unsigned XorMask = 0;
7316 
7317   for (size_t i = 0; i < Ctl.size(); ++i) {
7318     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7319     switch(Ctl[i]) {
7320     default:
7321       Error(StrLoc, "invalid mask");
7322       return false;
7323     case '0':
7324       break;
7325     case '1':
7326       OrMask |= Mask;
7327       break;
7328     case 'p':
7329       AndMask |= Mask;
7330       break;
7331     case 'i':
7332       AndMask |= Mask;
7333       XorMask |= Mask;
7334       break;
7335     }
7336   }
7337 
7338   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7339   return true;
7340 }
7341 
7342 bool
7343 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7344 
7345   SMLoc OffsetLoc = getLoc();
7346 
7347   if (!parseExpr(Imm, "a swizzle macro")) {
7348     return false;
7349   }
7350   if (!isUInt<16>(Imm)) {
7351     Error(OffsetLoc, "expected a 16-bit offset");
7352     return false;
7353   }
7354   return true;
7355 }
7356 
7357 bool
7358 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7359   using namespace llvm::AMDGPU::Swizzle;
7360 
7361   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7362 
7363     SMLoc ModeLoc = getLoc();
7364     bool Ok = false;
7365 
7366     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7367       Ok = parseSwizzleQuadPerm(Imm);
7368     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7369       Ok = parseSwizzleBitmaskPerm(Imm);
7370     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7371       Ok = parseSwizzleBroadcast(Imm);
7372     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7373       Ok = parseSwizzleSwap(Imm);
7374     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7375       Ok = parseSwizzleReverse(Imm);
7376     } else {
7377       Error(ModeLoc, "expected a swizzle mode");
7378     }
7379 
7380     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7381   }
7382 
7383   return false;
7384 }
7385 
7386 OperandMatchResultTy
7387 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
7388   SMLoc S = getLoc();
7389   int64_t Imm = 0;
7390 
7391   if (trySkipId("offset")) {
7392 
7393     bool Ok = false;
7394     if (skipToken(AsmToken::Colon, "expected a colon")) {
7395       if (trySkipId("swizzle")) {
7396         Ok = parseSwizzleMacro(Imm);
7397       } else {
7398         Ok = parseSwizzleOffset(Imm);
7399       }
7400     }
7401 
7402     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7403 
7404     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
7405   } else {
7406     // Swizzle "offset" operand is optional.
7407     // If it is omitted, try parsing other optional operands.
7408     return parseOptionalOpr(Operands);
7409   }
7410 }
7411 
7412 bool
7413 AMDGPUOperand::isSwizzle() const {
7414   return isImmTy(ImmTySwizzle);
7415 }
7416 
7417 //===----------------------------------------------------------------------===//
7418 // VGPR Index Mode
7419 //===----------------------------------------------------------------------===//
7420 
7421 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7422 
7423   using namespace llvm::AMDGPU::VGPRIndexMode;
7424 
7425   if (trySkipToken(AsmToken::RParen)) {
7426     return OFF;
7427   }
7428 
7429   int64_t Imm = 0;
7430 
7431   while (true) {
7432     unsigned Mode = 0;
7433     SMLoc S = getLoc();
7434 
7435     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7436       if (trySkipId(IdSymbolic[ModeId])) {
7437         Mode = 1 << ModeId;
7438         break;
7439       }
7440     }
7441 
7442     if (Mode == 0) {
7443       Error(S, (Imm == 0)?
7444                "expected a VGPR index mode or a closing parenthesis" :
7445                "expected a VGPR index mode");
7446       return UNDEF;
7447     }
7448 
7449     if (Imm & Mode) {
7450       Error(S, "duplicate VGPR index mode");
7451       return UNDEF;
7452     }
7453     Imm |= Mode;
7454 
7455     if (trySkipToken(AsmToken::RParen))
7456       break;
7457     if (!skipToken(AsmToken::Comma,
7458                    "expected a comma or a closing parenthesis"))
7459       return UNDEF;
7460   }
7461 
7462   return Imm;
7463 }
7464 
7465 OperandMatchResultTy
7466 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7467 
7468   using namespace llvm::AMDGPU::VGPRIndexMode;
7469 
7470   int64_t Imm = 0;
7471   SMLoc S = getLoc();
7472 
7473   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7474     Imm = parseGPRIdxMacro();
7475     if (Imm == UNDEF)
7476       return MatchOperand_ParseFail;
7477   } else {
7478     if (getParser().parseAbsoluteExpression(Imm))
7479       return MatchOperand_ParseFail;
7480     if (Imm < 0 || !isUInt<4>(Imm)) {
7481       Error(S, "invalid immediate: only 4-bit values are legal");
7482       return MatchOperand_ParseFail;
7483     }
7484   }
7485 
7486   Operands.push_back(
7487       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7488   return MatchOperand_Success;
7489 }
7490 
7491 bool AMDGPUOperand::isGPRIdxMode() const {
7492   return isImmTy(ImmTyGprIdxMode);
7493 }
7494 
7495 //===----------------------------------------------------------------------===//
7496 // sopp branch targets
7497 //===----------------------------------------------------------------------===//
7498 
7499 OperandMatchResultTy
7500 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7501 
7502   // Make sure we are not parsing something
7503   // that looks like a label or an expression but is not.
7504   // This will improve error messages.
7505   if (isRegister() || isModifier())
7506     return MatchOperand_NoMatch;
7507 
7508   if (!parseExpr(Operands))
7509     return MatchOperand_ParseFail;
7510 
7511   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7512   assert(Opr.isImm() || Opr.isExpr());
7513   SMLoc Loc = Opr.getStartLoc();
7514 
7515   // Currently we do not support arbitrary expressions as branch targets.
7516   // Only labels and absolute expressions are accepted.
7517   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7518     Error(Loc, "expected an absolute expression or a label");
7519   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7520     Error(Loc, "expected a 16-bit signed jump offset");
7521   }
7522 
7523   return MatchOperand_Success;
7524 }
7525 
7526 //===----------------------------------------------------------------------===//
7527 // Boolean holding registers
7528 //===----------------------------------------------------------------------===//
7529 
7530 OperandMatchResultTy
7531 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7532   return parseReg(Operands);
7533 }
7534 
7535 //===----------------------------------------------------------------------===//
7536 // mubuf
7537 //===----------------------------------------------------------------------===//
7538 
7539 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7540   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7541 }
7542 
7543 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7544                                    const OperandVector &Operands,
7545                                    bool IsAtomic,
7546                                    bool IsLds) {
7547   OptionalImmIndexMap OptionalIdx;
7548   unsigned FirstOperandIdx = 1;
7549   bool IsAtomicReturn = false;
7550 
7551   if (IsAtomic) {
7552     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7553       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7554       if (!Op.isCPol())
7555         continue;
7556       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7557       break;
7558     }
7559 
7560     if (!IsAtomicReturn) {
7561       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7562       if (NewOpc != -1)
7563         Inst.setOpcode(NewOpc);
7564     }
7565 
7566     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7567                       SIInstrFlags::IsAtomicRet;
7568   }
7569 
7570   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7571     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7572 
7573     // Add the register arguments
7574     if (Op.isReg()) {
7575       Op.addRegOperands(Inst, 1);
7576       // Insert a tied src for atomic return dst.
7577       // This cannot be postponed as subsequent calls to
7578       // addImmOperands rely on correct number of MC operands.
7579       if (IsAtomicReturn && i == FirstOperandIdx)
7580         Op.addRegOperands(Inst, 1);
7581       continue;
7582     }
7583 
7584     // Handle the case where soffset is an immediate
7585     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7586       Op.addImmOperands(Inst, 1);
7587       continue;
7588     }
7589 
7590     // Handle tokens like 'offen' which are sometimes hard-coded into the
7591     // asm string.  There are no MCInst operands for these.
7592     if (Op.isToken()) {
7593       continue;
7594     }
7595     assert(Op.isImm());
7596 
7597     // Handle optional arguments
7598     OptionalIdx[Op.getImmTy()] = i;
7599   }
7600 
7601   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7602   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7603 
7604   if (!IsLds) { // tfe is not legal with lds opcodes
7605     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7606   }
7607   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7608 }
7609 
7610 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7611   OptionalImmIndexMap OptionalIdx;
7612 
7613   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7614     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7615 
7616     // Add the register arguments
7617     if (Op.isReg()) {
7618       Op.addRegOperands(Inst, 1);
7619       continue;
7620     }
7621 
7622     // Handle the case where soffset is an immediate
7623     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7624       Op.addImmOperands(Inst, 1);
7625       continue;
7626     }
7627 
7628     // Handle tokens like 'offen' which are sometimes hard-coded into the
7629     // asm string.  There are no MCInst operands for these.
7630     if (Op.isToken()) {
7631       continue;
7632     }
7633     assert(Op.isImm());
7634 
7635     // Handle optional arguments
7636     OptionalIdx[Op.getImmTy()] = i;
7637   }
7638 
7639   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7640                         AMDGPUOperand::ImmTyOffset);
7641   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7642   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7643   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7644   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7645 }
7646 
7647 //===----------------------------------------------------------------------===//
7648 // mimg
7649 //===----------------------------------------------------------------------===//
7650 
7651 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7652                               bool IsAtomic) {
7653   unsigned I = 1;
7654   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7655   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7656     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7657   }
7658 
7659   if (IsAtomic) {
7660     // Add src, same as dst
7661     assert(Desc.getNumDefs() == 1);
7662     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7663   }
7664 
7665   OptionalImmIndexMap OptionalIdx;
7666 
7667   for (unsigned E = Operands.size(); I != E; ++I) {
7668     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7669 
7670     // Add the register arguments
7671     if (Op.isReg()) {
7672       Op.addRegOperands(Inst, 1);
7673     } else if (Op.isImmModifier()) {
7674       OptionalIdx[Op.getImmTy()] = I;
7675     } else if (!Op.isToken()) {
7676       llvm_unreachable("unexpected operand type");
7677     }
7678   }
7679 
7680   bool IsGFX10Plus = isGFX10Plus();
7681 
7682   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7683   if (IsGFX10Plus)
7684     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7685   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7686   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7687   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7688   if (IsGFX10Plus)
7689     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7690   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7691     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7692   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7693   if (!IsGFX10Plus)
7694     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7695   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7696 }
7697 
7698 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7699   cvtMIMG(Inst, Operands, true);
7700 }
7701 
7702 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7703   OptionalImmIndexMap OptionalIdx;
7704   bool IsAtomicReturn = false;
7705 
7706   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7707     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7708     if (!Op.isCPol())
7709       continue;
7710     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7711     break;
7712   }
7713 
7714   if (!IsAtomicReturn) {
7715     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7716     if (NewOpc != -1)
7717       Inst.setOpcode(NewOpc);
7718   }
7719 
7720   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7721                     SIInstrFlags::IsAtomicRet;
7722 
7723   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7724     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7725 
7726     // Add the register arguments
7727     if (Op.isReg()) {
7728       Op.addRegOperands(Inst, 1);
7729       if (IsAtomicReturn && i == 1)
7730         Op.addRegOperands(Inst, 1);
7731       continue;
7732     }
7733 
7734     // Handle the case where soffset is an immediate
7735     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7736       Op.addImmOperands(Inst, 1);
7737       continue;
7738     }
7739 
7740     // Handle tokens like 'offen' which are sometimes hard-coded into the
7741     // asm string.  There are no MCInst operands for these.
7742     if (Op.isToken()) {
7743       continue;
7744     }
7745     assert(Op.isImm());
7746 
7747     // Handle optional arguments
7748     OptionalIdx[Op.getImmTy()] = i;
7749   }
7750 
7751   if ((int)Inst.getNumOperands() <=
7752       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7753     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7754   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7755 }
7756 
7757 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7758                                       const OperandVector &Operands) {
7759   for (unsigned I = 1; I < Operands.size(); ++I) {
7760     auto &Operand = (AMDGPUOperand &)*Operands[I];
7761     if (Operand.isReg())
7762       Operand.addRegOperands(Inst, 1);
7763   }
7764 
7765   Inst.addOperand(MCOperand::createImm(1)); // a16
7766 }
7767 
7768 //===----------------------------------------------------------------------===//
7769 // smrd
7770 //===----------------------------------------------------------------------===//
7771 
7772 bool AMDGPUOperand::isSMRDOffset8() const {
7773   return isImm() && isUInt<8>(getImm());
7774 }
7775 
7776 bool AMDGPUOperand::isSMEMOffset() const {
7777   return isImmTy(ImmTyNone) ||
7778          isImmTy(ImmTyOffset); // Offset range is checked later by validator.
7779 }
7780 
7781 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7782   // 32-bit literals are only supported on CI and we only want to use them
7783   // when the offset is > 8-bits.
7784   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7785 }
7786 
7787 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7788   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7789 }
7790 
7791 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7792   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7793 }
7794 
7795 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7796   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7797 }
7798 
7799 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7800   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7801 }
7802 
7803 //===----------------------------------------------------------------------===//
7804 // vop3
7805 //===----------------------------------------------------------------------===//
7806 
7807 static bool ConvertOmodMul(int64_t &Mul) {
7808   if (Mul != 1 && Mul != 2 && Mul != 4)
7809     return false;
7810 
7811   Mul >>= 1;
7812   return true;
7813 }
7814 
7815 static bool ConvertOmodDiv(int64_t &Div) {
7816   if (Div == 1) {
7817     Div = 0;
7818     return true;
7819   }
7820 
7821   if (Div == 2) {
7822     Div = 3;
7823     return true;
7824   }
7825 
7826   return false;
7827 }
7828 
7829 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7830 // This is intentional and ensures compatibility with sp3.
7831 // See bug 35397 for details.
7832 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7833   if (BoundCtrl == 0 || BoundCtrl == 1) {
7834     BoundCtrl = 1;
7835     return true;
7836   }
7837   return false;
7838 }
7839 
7840 // Note: the order in this table matches the order of operands in AsmString.
7841 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7842   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7843   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7844   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7845   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7846   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7847   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7848   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7849   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7850   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7851   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7852   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7853   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7854   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7855   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7856   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7857   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7858   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7859   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7860   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7861   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7862   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7863   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7864   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7865   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7866   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7867   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7868   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7869   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7870   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7871   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7872   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7873   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7874   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7875   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7876   {"dpp8",     AMDGPUOperand::ImmTyDPP8, false, nullptr},
7877   {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, nullptr},
7878   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7879   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7880   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7881   {"fi",   AMDGPUOperand::ImmTyDppFi, false, nullptr},
7882   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7883   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7884   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr},
7885   {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr},
7886   {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr}
7887 };
7888 
7889 void AMDGPUAsmParser::onBeginOfFile() {
7890   if (!getParser().getStreamer().getTargetStreamer() ||
7891       getSTI().getTargetTriple().getArch() == Triple::r600)
7892     return;
7893 
7894   if (!getTargetStreamer().getTargetID())
7895     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7896 
7897   if (isHsaAbiVersion3AndAbove(&getSTI()))
7898     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7899 }
7900 
7901 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7902 
7903   OperandMatchResultTy res = parseOptionalOpr(Operands);
7904 
7905   // This is a hack to enable hardcoded mandatory operands which follow
7906   // optional operands.
7907   //
7908   // Current design assumes that all operands after the first optional operand
7909   // are also optional. However implementation of some instructions violates
7910   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7911   //
7912   // To alleviate this problem, we have to (implicitly) parse extra operands
7913   // to make sure autogenerated parser of custom operands never hit hardcoded
7914   // mandatory operands.
7915 
7916   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7917     if (res != MatchOperand_Success ||
7918         isToken(AsmToken::EndOfStatement))
7919       break;
7920 
7921     trySkipToken(AsmToken::Comma);
7922     res = parseOptionalOpr(Operands);
7923   }
7924 
7925   return res;
7926 }
7927 
7928 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7929   OperandMatchResultTy res;
7930   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7931     // try to parse any optional operand here
7932     if (Op.IsBit) {
7933       res = parseNamedBit(Op.Name, Operands, Op.Type);
7934     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7935       res = parseOModOperand(Operands);
7936     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7937                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7938                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7939       res = parseSDWASel(Operands, Op.Name, Op.Type);
7940     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7941       res = parseSDWADstUnused(Operands);
7942     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7943                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7944                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7945                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7946       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7947                                         Op.ConvertResult);
7948     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7949       res = parseDim(Operands);
7950     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7951       res = parseCPol(Operands);
7952     } else if (Op.Type == AMDGPUOperand::ImmTyDPP8) {
7953       res = parseDPP8(Operands);
7954     } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) {
7955       res = parseDPPCtrl(Operands);
7956     } else {
7957       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7958       if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) {
7959         res = parseOperandArrayWithPrefix("neg", Operands,
7960                                           AMDGPUOperand::ImmTyBLGP,
7961                                           nullptr);
7962       }
7963     }
7964     if (res != MatchOperand_NoMatch) {
7965       return res;
7966     }
7967   }
7968   return MatchOperand_NoMatch;
7969 }
7970 
7971 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7972   StringRef Name = getTokenStr();
7973   if (Name == "mul") {
7974     return parseIntWithPrefix("mul", Operands,
7975                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7976   }
7977 
7978   if (Name == "div") {
7979     return parseIntWithPrefix("div", Operands,
7980                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7981   }
7982 
7983   return MatchOperand_NoMatch;
7984 }
7985 
7986 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7987   cvtVOP3P(Inst, Operands);
7988 
7989   int Opc = Inst.getOpcode();
7990 
7991   int SrcNum;
7992   const int Ops[] = { AMDGPU::OpName::src0,
7993                       AMDGPU::OpName::src1,
7994                       AMDGPU::OpName::src2 };
7995   for (SrcNum = 0;
7996        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7997        ++SrcNum);
7998   assert(SrcNum > 0);
7999 
8000   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8001   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8002 
8003   if ((OpSel & (1 << SrcNum)) != 0) {
8004     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8005     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8006     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
8007   }
8008 }
8009 
8010 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8011       // 1. This operand is input modifiers
8012   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8013       // 2. This is not last operand
8014       && Desc.NumOperands > (OpNum + 1)
8015       // 3. Next operand is register class
8016       && Desc.OpInfo[OpNum + 1].RegClass != -1
8017       // 4. Next register is not tied to any other operand
8018       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
8019 }
8020 
8021 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8022 {
8023   OptionalImmIndexMap OptionalIdx;
8024   unsigned Opc = Inst.getOpcode();
8025 
8026   unsigned I = 1;
8027   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8028   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8029     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8030   }
8031 
8032   for (unsigned E = Operands.size(); I != E; ++I) {
8033     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8034     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8035       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8036     } else if (Op.isInterpSlot() ||
8037                Op.isInterpAttr() ||
8038                Op.isAttrChan()) {
8039       Inst.addOperand(MCOperand::createImm(Op.getImm()));
8040     } else if (Op.isImmModifier()) {
8041       OptionalIdx[Op.getImmTy()] = I;
8042     } else {
8043       llvm_unreachable("unhandled operand type");
8044     }
8045   }
8046 
8047   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
8048     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
8049   }
8050 
8051   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8052     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8053   }
8054 
8055   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8056     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8057   }
8058 }
8059 
8060 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8061 {
8062   OptionalImmIndexMap OptionalIdx;
8063   unsigned Opc = Inst.getOpcode();
8064 
8065   unsigned I = 1;
8066   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8067   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8068     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8069   }
8070 
8071   for (unsigned E = Operands.size(); I != E; ++I) {
8072     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8073     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8074       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8075     } else if (Op.isImmModifier()) {
8076       OptionalIdx[Op.getImmTy()] = I;
8077     } else {
8078       llvm_unreachable("unhandled operand type");
8079     }
8080   }
8081 
8082   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8083 
8084   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8085   if (OpSelIdx != -1)
8086     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8087 
8088   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8089 
8090   if (OpSelIdx == -1)
8091     return;
8092 
8093   const int Ops[] = { AMDGPU::OpName::src0,
8094                       AMDGPU::OpName::src1,
8095                       AMDGPU::OpName::src2 };
8096   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8097                          AMDGPU::OpName::src1_modifiers,
8098                          AMDGPU::OpName::src2_modifiers };
8099 
8100   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8101 
8102   for (int J = 0; J < 3; ++J) {
8103     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8104     if (OpIdx == -1)
8105       break;
8106 
8107     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8108     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8109 
8110     if ((OpSel & (1 << J)) != 0)
8111       ModVal |= SISrcMods::OP_SEL_0;
8112     if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8113         (OpSel & (1 << 3)) != 0)
8114       ModVal |= SISrcMods::DST_OP_SEL;
8115 
8116     Inst.getOperand(ModIdx).setImm(ModVal);
8117   }
8118 }
8119 
8120 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8121                               OptionalImmIndexMap &OptionalIdx) {
8122   unsigned Opc = Inst.getOpcode();
8123 
8124   unsigned I = 1;
8125   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8126   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8127     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8128   }
8129 
8130   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
8131     // This instruction has src modifiers
8132     for (unsigned E = Operands.size(); I != E; ++I) {
8133       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8134       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8135         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8136       } else if (Op.isImmModifier()) {
8137         OptionalIdx[Op.getImmTy()] = I;
8138       } else if (Op.isRegOrImm()) {
8139         Op.addRegOrImmOperands(Inst, 1);
8140       } else {
8141         llvm_unreachable("unhandled operand type");
8142       }
8143     }
8144   } else {
8145     // No src modifiers
8146     for (unsigned E = Operands.size(); I != E; ++I) {
8147       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8148       if (Op.isMod()) {
8149         OptionalIdx[Op.getImmTy()] = I;
8150       } else {
8151         Op.addRegOrImmOperands(Inst, 1);
8152       }
8153     }
8154   }
8155 
8156   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8157     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8158   }
8159 
8160   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8161     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8162   }
8163 
8164   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8165   // it has src2 register operand that is tied to dst operand
8166   // we don't allow modifiers for this operand in assembler so src2_modifiers
8167   // should be 0.
8168   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
8169       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
8170       Opc == AMDGPU::V_MAC_F32_e64_vi ||
8171       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
8172       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
8173       Opc == AMDGPU::V_MAC_F16_e64_vi ||
8174       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
8175       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
8176       Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
8177       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
8178       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
8179       Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
8180       Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
8181       Opc == AMDGPU::V_FMAC_F16_e64_gfx11) {
8182     auto it = Inst.begin();
8183     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8184     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8185     ++it;
8186     // Copy the operand to ensure it's not invalidated when Inst grows.
8187     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8188   }
8189 }
8190 
8191 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8192   OptionalImmIndexMap OptionalIdx;
8193   cvtVOP3(Inst, Operands, OptionalIdx);
8194 }
8195 
8196 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8197                                OptionalImmIndexMap &OptIdx) {
8198   const int Opc = Inst.getOpcode();
8199   const MCInstrDesc &Desc = MII.get(Opc);
8200 
8201   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8202 
8203   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
8204     assert(!IsPacked);
8205     Inst.addOperand(Inst.getOperand(0));
8206   }
8207 
8208   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8209   // instruction, and then figure out where to actually put the modifiers
8210 
8211   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8212   if (OpSelIdx != -1) {
8213     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8214   }
8215 
8216   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8217   if (OpSelHiIdx != -1) {
8218     int DefaultVal = IsPacked ? -1 : 0;
8219     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8220                           DefaultVal);
8221   }
8222 
8223   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8224   if (NegLoIdx != -1) {
8225     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8226     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8227   }
8228 
8229   const int Ops[] = { AMDGPU::OpName::src0,
8230                       AMDGPU::OpName::src1,
8231                       AMDGPU::OpName::src2 };
8232   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8233                          AMDGPU::OpName::src1_modifiers,
8234                          AMDGPU::OpName::src2_modifiers };
8235 
8236   unsigned OpSel = 0;
8237   unsigned OpSelHi = 0;
8238   unsigned NegLo = 0;
8239   unsigned NegHi = 0;
8240 
8241   if (OpSelIdx != -1)
8242     OpSel = Inst.getOperand(OpSelIdx).getImm();
8243 
8244   if (OpSelHiIdx != -1)
8245     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8246 
8247   if (NegLoIdx != -1) {
8248     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8249     NegLo = Inst.getOperand(NegLoIdx).getImm();
8250     NegHi = Inst.getOperand(NegHiIdx).getImm();
8251   }
8252 
8253   for (int J = 0; J < 3; ++J) {
8254     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8255     if (OpIdx == -1)
8256       break;
8257 
8258     uint32_t ModVal = 0;
8259 
8260     if ((OpSel & (1 << J)) != 0)
8261       ModVal |= SISrcMods::OP_SEL_0;
8262 
8263     if ((OpSelHi & (1 << J)) != 0)
8264       ModVal |= SISrcMods::OP_SEL_1;
8265 
8266     if ((NegLo & (1 << J)) != 0)
8267       ModVal |= SISrcMods::NEG;
8268 
8269     if ((NegHi & (1 << J)) != 0)
8270       ModVal |= SISrcMods::NEG_HI;
8271 
8272     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8273 
8274     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8275   }
8276 }
8277 
8278 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8279   OptionalImmIndexMap OptIdx;
8280   cvtVOP3(Inst, Operands, OptIdx);
8281   cvtVOP3P(Inst, Operands, OptIdx);
8282 }
8283 
8284 //===----------------------------------------------------------------------===//
8285 // dpp
8286 //===----------------------------------------------------------------------===//
8287 
8288 bool AMDGPUOperand::isDPP8() const {
8289   return isImmTy(ImmTyDPP8);
8290 }
8291 
8292 bool AMDGPUOperand::isDPPCtrl() const {
8293   using namespace AMDGPU::DPP;
8294 
8295   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8296   if (result) {
8297     int64_t Imm = getImm();
8298     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8299            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8300            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8301            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8302            (Imm == DppCtrl::WAVE_SHL1) ||
8303            (Imm == DppCtrl::WAVE_ROL1) ||
8304            (Imm == DppCtrl::WAVE_SHR1) ||
8305            (Imm == DppCtrl::WAVE_ROR1) ||
8306            (Imm == DppCtrl::ROW_MIRROR) ||
8307            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8308            (Imm == DppCtrl::BCAST15) ||
8309            (Imm == DppCtrl::BCAST31) ||
8310            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8311            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8312   }
8313   return false;
8314 }
8315 
8316 //===----------------------------------------------------------------------===//
8317 // mAI
8318 //===----------------------------------------------------------------------===//
8319 
8320 bool AMDGPUOperand::isBLGP() const {
8321   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8322 }
8323 
8324 bool AMDGPUOperand::isCBSZ() const {
8325   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8326 }
8327 
8328 bool AMDGPUOperand::isABID() const {
8329   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8330 }
8331 
8332 bool AMDGPUOperand::isS16Imm() const {
8333   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8334 }
8335 
8336 bool AMDGPUOperand::isU16Imm() const {
8337   return isImm() && isUInt<16>(getImm());
8338 }
8339 
8340 //===----------------------------------------------------------------------===//
8341 // dim
8342 //===----------------------------------------------------------------------===//
8343 
8344 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8345   // We want to allow "dim:1D" etc.,
8346   // but the initial 1 is tokenized as an integer.
8347   std::string Token;
8348   if (isToken(AsmToken::Integer)) {
8349     SMLoc Loc = getToken().getEndLoc();
8350     Token = std::string(getTokenStr());
8351     lex();
8352     if (getLoc() != Loc)
8353       return false;
8354   }
8355 
8356   StringRef Suffix;
8357   if (!parseId(Suffix))
8358     return false;
8359   Token += Suffix;
8360 
8361   StringRef DimId = Token;
8362   if (DimId.startswith("SQ_RSRC_IMG_"))
8363     DimId = DimId.drop_front(12);
8364 
8365   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8366   if (!DimInfo)
8367     return false;
8368 
8369   Encoding = DimInfo->Encoding;
8370   return true;
8371 }
8372 
8373 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8374   if (!isGFX10Plus())
8375     return MatchOperand_NoMatch;
8376 
8377   SMLoc S = getLoc();
8378 
8379   if (!trySkipId("dim", AsmToken::Colon))
8380     return MatchOperand_NoMatch;
8381 
8382   unsigned Encoding;
8383   SMLoc Loc = getLoc();
8384   if (!parseDimId(Encoding)) {
8385     Error(Loc, "invalid dim value");
8386     return MatchOperand_ParseFail;
8387   }
8388 
8389   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8390                                               AMDGPUOperand::ImmTyDim));
8391   return MatchOperand_Success;
8392 }
8393 
8394 //===----------------------------------------------------------------------===//
8395 // dpp
8396 //===----------------------------------------------------------------------===//
8397 
8398 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8399   SMLoc S = getLoc();
8400 
8401   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8402     return MatchOperand_NoMatch;
8403 
8404   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8405 
8406   int64_t Sels[8];
8407 
8408   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8409     return MatchOperand_ParseFail;
8410 
8411   for (size_t i = 0; i < 8; ++i) {
8412     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8413       return MatchOperand_ParseFail;
8414 
8415     SMLoc Loc = getLoc();
8416     if (getParser().parseAbsoluteExpression(Sels[i]))
8417       return MatchOperand_ParseFail;
8418     if (0 > Sels[i] || 7 < Sels[i]) {
8419       Error(Loc, "expected a 3-bit value");
8420       return MatchOperand_ParseFail;
8421     }
8422   }
8423 
8424   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8425     return MatchOperand_ParseFail;
8426 
8427   unsigned DPP8 = 0;
8428   for (size_t i = 0; i < 8; ++i)
8429     DPP8 |= (Sels[i] << (i * 3));
8430 
8431   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8432   return MatchOperand_Success;
8433 }
8434 
8435 bool
8436 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8437                                     const OperandVector &Operands) {
8438   if (Ctrl == "row_newbcast")
8439     return isGFX90A();
8440 
8441   if (Ctrl == "row_share" ||
8442       Ctrl == "row_xmask")
8443     return isGFX10Plus();
8444 
8445   if (Ctrl == "wave_shl" ||
8446       Ctrl == "wave_shr" ||
8447       Ctrl == "wave_rol" ||
8448       Ctrl == "wave_ror" ||
8449       Ctrl == "row_bcast")
8450     return isVI() || isGFX9();
8451 
8452   return Ctrl == "row_mirror" ||
8453          Ctrl == "row_half_mirror" ||
8454          Ctrl == "quad_perm" ||
8455          Ctrl == "row_shl" ||
8456          Ctrl == "row_shr" ||
8457          Ctrl == "row_ror";
8458 }
8459 
8460 int64_t
8461 AMDGPUAsmParser::parseDPPCtrlPerm() {
8462   // quad_perm:[%d,%d,%d,%d]
8463 
8464   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8465     return -1;
8466 
8467   int64_t Val = 0;
8468   for (int i = 0; i < 4; ++i) {
8469     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8470       return -1;
8471 
8472     int64_t Temp;
8473     SMLoc Loc = getLoc();
8474     if (getParser().parseAbsoluteExpression(Temp))
8475       return -1;
8476     if (Temp < 0 || Temp > 3) {
8477       Error(Loc, "expected a 2-bit value");
8478       return -1;
8479     }
8480 
8481     Val += (Temp << i * 2);
8482   }
8483 
8484   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8485     return -1;
8486 
8487   return Val;
8488 }
8489 
8490 int64_t
8491 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8492   using namespace AMDGPU::DPP;
8493 
8494   // sel:%d
8495 
8496   int64_t Val;
8497   SMLoc Loc = getLoc();
8498 
8499   if (getParser().parseAbsoluteExpression(Val))
8500     return -1;
8501 
8502   struct DppCtrlCheck {
8503     int64_t Ctrl;
8504     int Lo;
8505     int Hi;
8506   };
8507 
8508   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8509     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8510     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8511     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8512     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8513     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8514     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8515     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8516     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8517     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8518     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8519     .Default({-1, 0, 0});
8520 
8521   bool Valid;
8522   if (Check.Ctrl == -1) {
8523     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8524     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8525   } else {
8526     Valid = Check.Lo <= Val && Val <= Check.Hi;
8527     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8528   }
8529 
8530   if (!Valid) {
8531     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8532     return -1;
8533   }
8534 
8535   return Val;
8536 }
8537 
8538 OperandMatchResultTy
8539 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8540   using namespace AMDGPU::DPP;
8541 
8542   if (!isToken(AsmToken::Identifier) ||
8543       !isSupportedDPPCtrl(getTokenStr(), Operands))
8544     return MatchOperand_NoMatch;
8545 
8546   SMLoc S = getLoc();
8547   int64_t Val = -1;
8548   StringRef Ctrl;
8549 
8550   parseId(Ctrl);
8551 
8552   if (Ctrl == "row_mirror") {
8553     Val = DppCtrl::ROW_MIRROR;
8554   } else if (Ctrl == "row_half_mirror") {
8555     Val = DppCtrl::ROW_HALF_MIRROR;
8556   } else {
8557     if (skipToken(AsmToken::Colon, "expected a colon")) {
8558       if (Ctrl == "quad_perm") {
8559         Val = parseDPPCtrlPerm();
8560       } else {
8561         Val = parseDPPCtrlSel(Ctrl);
8562       }
8563     }
8564   }
8565 
8566   if (Val == -1)
8567     return MatchOperand_ParseFail;
8568 
8569   Operands.push_back(
8570     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8571   return MatchOperand_Success;
8572 }
8573 
8574 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8575   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8576 }
8577 
8578 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8579   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8580 }
8581 
8582 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8583   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8584 }
8585 
8586 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8587   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8588 }
8589 
8590 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8591   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8592 }
8593 
8594 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8595   OptionalImmIndexMap OptionalIdx;
8596   unsigned Opc = Inst.getOpcode();
8597   bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8598   unsigned I = 1;
8599   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8600   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8601     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8602   }
8603 
8604   int Fi = 0;
8605   for (unsigned E = Operands.size(); I != E; ++I) {
8606     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8607                                             MCOI::TIED_TO);
8608     if (TiedTo != -1) {
8609       assert((unsigned)TiedTo < Inst.getNumOperands());
8610       // handle tied old or src2 for MAC instructions
8611       Inst.addOperand(Inst.getOperand(TiedTo));
8612     }
8613     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8614     // Add the register arguments
8615     if (IsDPP8 && Op.isFI()) {
8616       Fi = Op.getImm();
8617     } else if (HasModifiers &&
8618                isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8619       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8620     } else if (Op.isReg()) {
8621       Op.addRegOperands(Inst, 1);
8622     } else if (Op.isImm() &&
8623                Desc.OpInfo[Inst.getNumOperands()].RegClass != -1) {
8624       assert(!HasModifiers && "Case should be unreachable with modifiers");
8625       assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
8626       Op.addImmOperands(Inst, 1);
8627     } else if (Op.isImm()) {
8628       OptionalIdx[Op.getImmTy()] = I;
8629     } else {
8630       llvm_unreachable("unhandled operand type");
8631     }
8632   }
8633   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8634     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8635   }
8636   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8637     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8638   }
8639   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) {
8640     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8641   }
8642 
8643   if (IsDPP8) {
8644     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
8645     using namespace llvm::AMDGPU::DPP;
8646     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8647   } else {
8648     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
8649     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8650     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8651     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8652     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8653       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8654     }
8655   }
8656 }
8657 
8658 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8659   OptionalImmIndexMap OptionalIdx;
8660 
8661   unsigned Opc = Inst.getOpcode();
8662   bool HasModifiers =
8663       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8664   unsigned I = 1;
8665   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8666   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8667     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8668   }
8669 
8670   int Fi = 0;
8671   for (unsigned E = Operands.size(); I != E; ++I) {
8672     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8673                                             MCOI::TIED_TO);
8674     if (TiedTo != -1) {
8675       assert((unsigned)TiedTo < Inst.getNumOperands());
8676       // handle tied old or src2 for MAC instructions
8677       Inst.addOperand(Inst.getOperand(TiedTo));
8678     }
8679     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8680     // Add the register arguments
8681     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8682       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8683       // Skip it.
8684       continue;
8685     }
8686 
8687     if (IsDPP8) {
8688       if (Op.isDPP8()) {
8689         Op.addImmOperands(Inst, 1);
8690       } else if (HasModifiers &&
8691                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8692         Op.addRegWithFPInputModsOperands(Inst, 2);
8693       } else if (Op.isFI()) {
8694         Fi = Op.getImm();
8695       } else if (Op.isReg()) {
8696         Op.addRegOperands(Inst, 1);
8697       } else {
8698         llvm_unreachable("Invalid operand type");
8699       }
8700     } else {
8701       if (HasModifiers &&
8702           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8703         Op.addRegWithFPInputModsOperands(Inst, 2);
8704       } else if (Op.isReg()) {
8705         Op.addRegOperands(Inst, 1);
8706       } else if (Op.isDPPCtrl()) {
8707         Op.addImmOperands(Inst, 1);
8708       } else if (Op.isImm()) {
8709         // Handle optional arguments
8710         OptionalIdx[Op.getImmTy()] = I;
8711       } else {
8712         llvm_unreachable("Invalid operand type");
8713       }
8714     }
8715   }
8716 
8717   if (IsDPP8) {
8718     using namespace llvm::AMDGPU::DPP;
8719     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8720   } else {
8721     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8722     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8723     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8724     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8725       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8726     }
8727   }
8728 }
8729 
8730 //===----------------------------------------------------------------------===//
8731 // sdwa
8732 //===----------------------------------------------------------------------===//
8733 
8734 OperandMatchResultTy
8735 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8736                               AMDGPUOperand::ImmTy Type) {
8737   using namespace llvm::AMDGPU::SDWA;
8738 
8739   SMLoc S = getLoc();
8740   StringRef Value;
8741   OperandMatchResultTy res;
8742 
8743   SMLoc StringLoc;
8744   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8745   if (res != MatchOperand_Success) {
8746     return res;
8747   }
8748 
8749   int64_t Int;
8750   Int = StringSwitch<int64_t>(Value)
8751         .Case("BYTE_0", SdwaSel::BYTE_0)
8752         .Case("BYTE_1", SdwaSel::BYTE_1)
8753         .Case("BYTE_2", SdwaSel::BYTE_2)
8754         .Case("BYTE_3", SdwaSel::BYTE_3)
8755         .Case("WORD_0", SdwaSel::WORD_0)
8756         .Case("WORD_1", SdwaSel::WORD_1)
8757         .Case("DWORD", SdwaSel::DWORD)
8758         .Default(0xffffffff);
8759 
8760   if (Int == 0xffffffff) {
8761     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8762     return MatchOperand_ParseFail;
8763   }
8764 
8765   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8766   return MatchOperand_Success;
8767 }
8768 
8769 OperandMatchResultTy
8770 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8771   using namespace llvm::AMDGPU::SDWA;
8772 
8773   SMLoc S = getLoc();
8774   StringRef Value;
8775   OperandMatchResultTy res;
8776 
8777   SMLoc StringLoc;
8778   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8779   if (res != MatchOperand_Success) {
8780     return res;
8781   }
8782 
8783   int64_t Int;
8784   Int = StringSwitch<int64_t>(Value)
8785         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8786         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8787         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8788         .Default(0xffffffff);
8789 
8790   if (Int == 0xffffffff) {
8791     Error(StringLoc, "invalid dst_unused value");
8792     return MatchOperand_ParseFail;
8793   }
8794 
8795   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8796   return MatchOperand_Success;
8797 }
8798 
8799 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8800   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8801 }
8802 
8803 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8804   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8805 }
8806 
8807 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8808   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8809 }
8810 
8811 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8812   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8813 }
8814 
8815 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8816   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8817 }
8818 
8819 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8820                               uint64_t BasicInstType,
8821                               bool SkipDstVcc,
8822                               bool SkipSrcVcc) {
8823   using namespace llvm::AMDGPU::SDWA;
8824 
8825   OptionalImmIndexMap OptionalIdx;
8826   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8827   bool SkippedVcc = false;
8828 
8829   unsigned I = 1;
8830   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8831   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8832     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8833   }
8834 
8835   for (unsigned E = Operands.size(); I != E; ++I) {
8836     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8837     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8838         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8839       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8840       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8841       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8842       // Skip VCC only if we didn't skip it on previous iteration.
8843       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8844       if (BasicInstType == SIInstrFlags::VOP2 &&
8845           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8846            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8847         SkippedVcc = true;
8848         continue;
8849       } else if (BasicInstType == SIInstrFlags::VOPC &&
8850                  Inst.getNumOperands() == 0) {
8851         SkippedVcc = true;
8852         continue;
8853       }
8854     }
8855     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8856       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8857     } else if (Op.isImm()) {
8858       // Handle optional arguments
8859       OptionalIdx[Op.getImmTy()] = I;
8860     } else {
8861       llvm_unreachable("Invalid operand type");
8862     }
8863     SkippedVcc = false;
8864   }
8865 
8866   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8867       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8868       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8869     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8870     switch (BasicInstType) {
8871     case SIInstrFlags::VOP1:
8872       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8873       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8874         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8875       }
8876       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8877       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8878       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8879       break;
8880 
8881     case SIInstrFlags::VOP2:
8882       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8883       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8884         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8885       }
8886       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8887       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8888       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8889       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8890       break;
8891 
8892     case SIInstrFlags::VOPC:
8893       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8894         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8895       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8896       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8897       break;
8898 
8899     default:
8900       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8901     }
8902   }
8903 
8904   // special case v_mac_{f16, f32}:
8905   // it has src2 register operand that is tied to dst operand
8906   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8907       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8908     auto it = Inst.begin();
8909     std::advance(
8910       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8911     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8912   }
8913 }
8914 
8915 //===----------------------------------------------------------------------===//
8916 // mAI
8917 //===----------------------------------------------------------------------===//
8918 
8919 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8920   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8921 }
8922 
8923 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8924   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8925 }
8926 
8927 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8928   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8929 }
8930 
8931 /// Force static initialization.
8932 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8933   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8934   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8935 }
8936 
8937 #define GET_REGISTER_MATCHER
8938 #define GET_MATCHER_IMPLEMENTATION
8939 #define GET_MNEMONIC_SPELL_CHECKER
8940 #define GET_MNEMONIC_CHECKER
8941 #include "AMDGPUGenAsmMatcher.inc"
8942 
8943 // This function should be defined after auto-generated include so that we have
8944 // MatchClassKind enum defined
8945 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8946                                                      unsigned Kind) {
8947   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8948   // But MatchInstructionImpl() expects to meet token and fails to validate
8949   // operand. This method checks if we are given immediate operand but expect to
8950   // get corresponding token.
8951   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8952   switch (Kind) {
8953   case MCK_addr64:
8954     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8955   case MCK_gds:
8956     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8957   case MCK_lds:
8958     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8959   case MCK_idxen:
8960     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8961   case MCK_offen:
8962     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8963   case MCK_SSrcB32:
8964     // When operands have expression values, they will return true for isToken,
8965     // because it is not possible to distinguish between a token and an
8966     // expression at parse time. MatchInstructionImpl() will always try to
8967     // match an operand as a token, when isToken returns true, and when the
8968     // name of the expression is not a valid token, the match will fail,
8969     // so we need to handle it here.
8970     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8971   case MCK_SSrcF32:
8972     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8973   case MCK_SoppBrTarget:
8974     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8975   case MCK_VReg32OrOff:
8976     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8977   case MCK_InterpSlot:
8978     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8979   case MCK_Attr:
8980     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8981   case MCK_AttrChan:
8982     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8983   case MCK_ImmSMEMOffset:
8984     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8985   case MCK_SReg_64:
8986   case MCK_SReg_64_XEXEC:
8987     // Null is defined as a 32-bit register but
8988     // it should also be enabled with 64-bit operands.
8989     // The following code enables it for SReg_64 operands
8990     // used as source and destination. Remaining source
8991     // operands are handled in isInlinableImm.
8992     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8993   default:
8994     return Match_InvalidOperand;
8995   }
8996 }
8997 
8998 //===----------------------------------------------------------------------===//
8999 // endpgm
9000 //===----------------------------------------------------------------------===//
9001 
9002 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
9003   SMLoc S = getLoc();
9004   int64_t Imm = 0;
9005 
9006   if (!parseExpr(Imm)) {
9007     // The operand is optional, if not present default to 0
9008     Imm = 0;
9009   }
9010 
9011   if (!isUInt<16>(Imm)) {
9012     Error(S, "expected a 16-bit value");
9013     return MatchOperand_ParseFail;
9014   }
9015 
9016   Operands.push_back(
9017       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9018   return MatchOperand_Success;
9019 }
9020 
9021 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9022 
9023 //===----------------------------------------------------------------------===//
9024 // LDSDIR
9025 //===----------------------------------------------------------------------===//
9026 
9027 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const {
9028   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST);
9029 }
9030 
9031 bool AMDGPUOperand::isWaitVDST() const {
9032   return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
9033 }
9034 
9035 //===----------------------------------------------------------------------===//
9036 // VINTERP
9037 //===----------------------------------------------------------------------===//
9038 
9039 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const {
9040   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP);
9041 }
9042 
9043 bool AMDGPUOperand::isWaitEXP() const {
9044   return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
9045 }
9046