1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCInstrDesc.h"
29 #include "llvm/MC/MCParser/MCAsmLexer.h"
30 #include "llvm/MC/MCParser/MCAsmParser.h"
31 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
32 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/MC/TargetRegistry.h"
35 #include "llvm/Support/AMDGPUMetadata.h"
36 #include "llvm/Support/AMDHSAKernelDescriptor.h"
37 #include "llvm/Support/Casting.h"
38 #include "llvm/Support/MachineValueType.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/Support/TargetParser.h"
41 
42 using namespace llvm;
43 using namespace llvm::AMDGPU;
44 using namespace llvm::amdhsa;
45 
46 namespace {
47 
48 class AMDGPUAsmParser;
49 
50 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
51 
52 //===----------------------------------------------------------------------===//
53 // Operand
54 //===----------------------------------------------------------------------===//
55 
56 class AMDGPUOperand : public MCParsedAsmOperand {
57   enum KindTy {
58     Token,
59     Immediate,
60     Register,
61     Expression
62   } Kind;
63 
64   SMLoc StartLoc, EndLoc;
65   const AMDGPUAsmParser *AsmParser;
66 
67 public:
68   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
69       : Kind(Kind_), AsmParser(AsmParser_) {}
70 
71   using Ptr = std::unique_ptr<AMDGPUOperand>;
72 
73   struct Modifiers {
74     bool Abs = false;
75     bool Neg = false;
76     bool Sext = false;
77 
78     bool hasFPModifiers() const { return Abs || Neg; }
79     bool hasIntModifiers() const { return Sext; }
80     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
81 
82     int64_t getFPModifiersOperand() const {
83       int64_t Operand = 0;
84       Operand |= Abs ? SISrcMods::ABS : 0u;
85       Operand |= Neg ? SISrcMods::NEG : 0u;
86       return Operand;
87     }
88 
89     int64_t getIntModifiersOperand() const {
90       int64_t Operand = 0;
91       Operand |= Sext ? SISrcMods::SEXT : 0u;
92       return Operand;
93     }
94 
95     int64_t getModifiersOperand() const {
96       assert(!(hasFPModifiers() && hasIntModifiers())
97            && "fp and int modifiers should not be used simultaneously");
98       if (hasFPModifiers()) {
99         return getFPModifiersOperand();
100       } else if (hasIntModifiers()) {
101         return getIntModifiersOperand();
102       } else {
103         return 0;
104       }
105     }
106 
107     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
108   };
109 
110   enum ImmTy {
111     ImmTyNone,
112     ImmTyGDS,
113     ImmTyLDS,
114     ImmTyOffen,
115     ImmTyIdxen,
116     ImmTyAddr64,
117     ImmTyOffset,
118     ImmTyInstOffset,
119     ImmTyOffset0,
120     ImmTyOffset1,
121     ImmTyCPol,
122     ImmTySWZ,
123     ImmTyTFE,
124     ImmTyD16,
125     ImmTyClampSI,
126     ImmTyOModSI,
127     ImmTySdwaDstSel,
128     ImmTySdwaSrc0Sel,
129     ImmTySdwaSrc1Sel,
130     ImmTySdwaDstUnused,
131     ImmTyDMask,
132     ImmTyDim,
133     ImmTyUNorm,
134     ImmTyDA,
135     ImmTyR128A16,
136     ImmTyA16,
137     ImmTyLWE,
138     ImmTyExpTgt,
139     ImmTyExpCompr,
140     ImmTyExpVM,
141     ImmTyFORMAT,
142     ImmTyHwreg,
143     ImmTyOff,
144     ImmTySendMsg,
145     ImmTyInterpSlot,
146     ImmTyInterpAttr,
147     ImmTyAttrChan,
148     ImmTyOpSel,
149     ImmTyOpSelHi,
150     ImmTyNegLo,
151     ImmTyNegHi,
152     ImmTyDPP8,
153     ImmTyDppCtrl,
154     ImmTyDppRowMask,
155     ImmTyDppBankMask,
156     ImmTyDppBoundCtrl,
157     ImmTyDppFi,
158     ImmTySwizzle,
159     ImmTyGprIdxMode,
160     ImmTyHigh,
161     ImmTyBLGP,
162     ImmTyCBSZ,
163     ImmTyABID,
164     ImmTyEndpgm,
165     ImmTyWaitVDST,
166     ImmTyWaitEXP,
167   };
168 
169   enum ImmKindTy {
170     ImmKindTyNone,
171     ImmKindTyLiteral,
172     ImmKindTyConst,
173   };
174 
175 private:
176   struct TokOp {
177     const char *Data;
178     unsigned Length;
179   };
180 
181   struct ImmOp {
182     int64_t Val;
183     ImmTy Type;
184     bool IsFPImm;
185     mutable ImmKindTy Kind;
186     Modifiers Mods;
187   };
188 
189   struct RegOp {
190     unsigned RegNo;
191     Modifiers Mods;
192   };
193 
194   union {
195     TokOp Tok;
196     ImmOp Imm;
197     RegOp Reg;
198     const MCExpr *Expr;
199   };
200 
201 public:
202   bool isToken() const override {
203     if (Kind == Token)
204       return true;
205 
206     // When parsing operands, we can't always tell if something was meant to be
207     // a token, like 'gds', or an expression that references a global variable.
208     // In this case, we assume the string is an expression, and if we need to
209     // interpret is a token, then we treat the symbol name as the token.
210     return isSymbolRefExpr();
211   }
212 
213   bool isSymbolRefExpr() const {
214     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
215   }
216 
217   bool isImm() const override {
218     return Kind == Immediate;
219   }
220 
221   void setImmKindNone() const {
222     assert(isImm());
223     Imm.Kind = ImmKindTyNone;
224   }
225 
226   void setImmKindLiteral() const {
227     assert(isImm());
228     Imm.Kind = ImmKindTyLiteral;
229   }
230 
231   void setImmKindConst() const {
232     assert(isImm());
233     Imm.Kind = ImmKindTyConst;
234   }
235 
236   bool IsImmKindLiteral() const {
237     return isImm() && Imm.Kind == ImmKindTyLiteral;
238   }
239 
240   bool isImmKindConst() const {
241     return isImm() && Imm.Kind == ImmKindTyConst;
242   }
243 
244   bool isInlinableImm(MVT type) const;
245   bool isLiteralImm(MVT type) const;
246 
247   bool isRegKind() const {
248     return Kind == Register;
249   }
250 
251   bool isReg() const override {
252     return isRegKind() && !hasModifiers();
253   }
254 
255   bool isRegOrInline(unsigned RCID, MVT type) const {
256     return isRegClass(RCID) || isInlinableImm(type);
257   }
258 
259   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
260     return isRegOrInline(RCID, type) || isLiteralImm(type);
261   }
262 
263   bool isRegOrImmWithInt16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
265   }
266 
267   bool isRegOrImmWithInt32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
269   }
270 
271   bool isRegOrInlineImmWithInt16InputMods() const {
272     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
273   }
274 
275   bool isRegOrInlineImmWithInt32InputMods() const {
276     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
277   }
278 
279   bool isRegOrImmWithInt64InputMods() const {
280     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
281   }
282 
283   bool isRegOrImmWithFP16InputMods() const {
284     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
285   }
286 
287   bool isRegOrImmWithFP32InputMods() const {
288     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
289   }
290 
291   bool isRegOrImmWithFP64InputMods() const {
292     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
293   }
294 
295   bool isRegOrInlineImmWithFP16InputMods() const {
296     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
297   }
298 
299   bool isRegOrInlineImmWithFP32InputMods() const {
300     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
301   }
302 
303 
304   bool isVReg() const {
305     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
306            isRegClass(AMDGPU::VReg_64RegClassID) ||
307            isRegClass(AMDGPU::VReg_96RegClassID) ||
308            isRegClass(AMDGPU::VReg_128RegClassID) ||
309            isRegClass(AMDGPU::VReg_160RegClassID) ||
310            isRegClass(AMDGPU::VReg_192RegClassID) ||
311            isRegClass(AMDGPU::VReg_256RegClassID) ||
312            isRegClass(AMDGPU::VReg_512RegClassID) ||
313            isRegClass(AMDGPU::VReg_1024RegClassID);
314   }
315 
316   bool isVReg32() const {
317     return isRegClass(AMDGPU::VGPR_32RegClassID);
318   }
319 
320   bool isVReg32OrOff() const {
321     return isOff() || isVReg32();
322   }
323 
324   bool isNull() const {
325     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
326   }
327 
328   bool isVRegWithInputMods() const;
329 
330   bool isSDWAOperand(MVT type) const;
331   bool isSDWAFP16Operand() const;
332   bool isSDWAFP32Operand() const;
333   bool isSDWAInt16Operand() const;
334   bool isSDWAInt32Operand() const;
335 
336   bool isImmTy(ImmTy ImmT) const {
337     return isImm() && Imm.Type == ImmT;
338   }
339 
340   bool isImmModifier() const {
341     return isImm() && Imm.Type != ImmTyNone;
342   }
343 
344   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
345   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
346   bool isDMask() const { return isImmTy(ImmTyDMask); }
347   bool isDim() const { return isImmTy(ImmTyDim); }
348   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
349   bool isDA() const { return isImmTy(ImmTyDA); }
350   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
351   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
352   bool isLWE() const { return isImmTy(ImmTyLWE); }
353   bool isOff() const { return isImmTy(ImmTyOff); }
354   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
355   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
356   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
357   bool isOffen() const { return isImmTy(ImmTyOffen); }
358   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
359   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
360   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
361   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
362   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
363 
364   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
365   bool isGDS() const { return isImmTy(ImmTyGDS); }
366   bool isLDS() const { return isImmTy(ImmTyLDS); }
367   bool isCPol() const { return isImmTy(ImmTyCPol); }
368   bool isSWZ() const { return isImmTy(ImmTySWZ); }
369   bool isTFE() const { return isImmTy(ImmTyTFE); }
370   bool isD16() const { return isImmTy(ImmTyD16); }
371   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
372   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
373   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
374   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
375   bool isFI() const { return isImmTy(ImmTyDppFi); }
376   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
377   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
378   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
379   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
380   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
381   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
382   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
383   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
384   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
385   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
386   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
387   bool isHigh() const { return isImmTy(ImmTyHigh); }
388 
389   bool isMod() const {
390     return isClampSI() || isOModSI();
391   }
392 
393   bool isRegOrImm() const {
394     return isReg() || isImm();
395   }
396 
397   bool isRegClass(unsigned RCID) const;
398 
399   bool isInlineValue() const;
400 
401   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
402     return isRegOrInline(RCID, type) && !hasModifiers();
403   }
404 
405   bool isSCSrcB16() const {
406     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
407   }
408 
409   bool isSCSrcV2B16() const {
410     return isSCSrcB16();
411   }
412 
413   bool isSCSrcB32() const {
414     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
415   }
416 
417   bool isSCSrcB64() const {
418     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
419   }
420 
421   bool isBoolReg() const;
422 
423   bool isSCSrcF16() const {
424     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
425   }
426 
427   bool isSCSrcV2F16() const {
428     return isSCSrcF16();
429   }
430 
431   bool isSCSrcF32() const {
432     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
433   }
434 
435   bool isSCSrcF64() const {
436     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
437   }
438 
439   bool isSSrcB32() const {
440     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
441   }
442 
443   bool isSSrcB16() const {
444     return isSCSrcB16() || isLiteralImm(MVT::i16);
445   }
446 
447   bool isSSrcV2B16() const {
448     llvm_unreachable("cannot happen");
449     return isSSrcB16();
450   }
451 
452   bool isSSrcB64() const {
453     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
454     // See isVSrc64().
455     return isSCSrcB64() || isLiteralImm(MVT::i64);
456   }
457 
458   bool isSSrcF32() const {
459     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
460   }
461 
462   bool isSSrcF64() const {
463     return isSCSrcB64() || isLiteralImm(MVT::f64);
464   }
465 
466   bool isSSrcF16() const {
467     return isSCSrcB16() || isLiteralImm(MVT::f16);
468   }
469 
470   bool isSSrcV2F16() const {
471     llvm_unreachable("cannot happen");
472     return isSSrcF16();
473   }
474 
475   bool isSSrcV2FP32() const {
476     llvm_unreachable("cannot happen");
477     return isSSrcF32();
478   }
479 
480   bool isSCSrcV2FP32() const {
481     llvm_unreachable("cannot happen");
482     return isSCSrcF32();
483   }
484 
485   bool isSSrcV2INT32() const {
486     llvm_unreachable("cannot happen");
487     return isSSrcB32();
488   }
489 
490   bool isSCSrcV2INT32() const {
491     llvm_unreachable("cannot happen");
492     return isSCSrcB32();
493   }
494 
495   bool isSSrcOrLdsB32() const {
496     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
497            isLiteralImm(MVT::i32) || isExpr();
498   }
499 
500   bool isVCSrcB32() const {
501     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
502   }
503 
504   bool isVCSrcB64() const {
505     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
506   }
507 
508   bool isVCSrcB16() const {
509     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
510   }
511 
512   bool isVCSrcV2B16() const {
513     return isVCSrcB16();
514   }
515 
516   bool isVCSrcF32() const {
517     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
518   }
519 
520   bool isVCSrcF64() const {
521     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
522   }
523 
524   bool isVCSrcF16() const {
525     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
526   }
527 
528   bool isVCSrcV2F16() const {
529     return isVCSrcF16();
530   }
531 
532   bool isVSrcB32() const {
533     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
534   }
535 
536   bool isVSrcB64() const {
537     return isVCSrcF64() || isLiteralImm(MVT::i64);
538   }
539 
540   bool isVSrcB16() const {
541     return isVCSrcB16() || isLiteralImm(MVT::i16);
542   }
543 
544   bool isVSrcV2B16() const {
545     return isVSrcB16() || isLiteralImm(MVT::v2i16);
546   }
547 
548   bool isVCSrcV2FP32() const {
549     return isVCSrcF64();
550   }
551 
552   bool isVSrcV2FP32() const {
553     return isVSrcF64() || isLiteralImm(MVT::v2f32);
554   }
555 
556   bool isVCSrcV2INT32() const {
557     return isVCSrcB64();
558   }
559 
560   bool isVSrcV2INT32() const {
561     return isVSrcB64() || isLiteralImm(MVT::v2i32);
562   }
563 
564   bool isVSrcF32() const {
565     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
566   }
567 
568   bool isVSrcF64() const {
569     return isVCSrcF64() || isLiteralImm(MVT::f64);
570   }
571 
572   bool isVSrcF16() const {
573     return isVCSrcF16() || isLiteralImm(MVT::f16);
574   }
575 
576   bool isVSrcV2F16() const {
577     return isVSrcF16() || isLiteralImm(MVT::v2f16);
578   }
579 
580   bool isVISrcB32() const {
581     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
582   }
583 
584   bool isVISrcB16() const {
585     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
586   }
587 
588   bool isVISrcV2B16() const {
589     return isVISrcB16();
590   }
591 
592   bool isVISrcF32() const {
593     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
594   }
595 
596   bool isVISrcF16() const {
597     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
598   }
599 
600   bool isVISrcV2F16() const {
601     return isVISrcF16() || isVISrcB32();
602   }
603 
604   bool isVISrc_64B64() const {
605     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
606   }
607 
608   bool isVISrc_64F64() const {
609     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
610   }
611 
612   bool isVISrc_64V2FP32() const {
613     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
614   }
615 
616   bool isVISrc_64V2INT32() const {
617     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
618   }
619 
620   bool isVISrc_256B64() const {
621     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
622   }
623 
624   bool isVISrc_256F64() const {
625     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
626   }
627 
628   bool isVISrc_128B16() const {
629     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
630   }
631 
632   bool isVISrc_128V2B16() const {
633     return isVISrc_128B16();
634   }
635 
636   bool isVISrc_128B32() const {
637     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
638   }
639 
640   bool isVISrc_128F32() const {
641     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
642   }
643 
644   bool isVISrc_256V2FP32() const {
645     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
646   }
647 
648   bool isVISrc_256V2INT32() const {
649     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
650   }
651 
652   bool isVISrc_512B32() const {
653     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
654   }
655 
656   bool isVISrc_512B16() const {
657     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
658   }
659 
660   bool isVISrc_512V2B16() const {
661     return isVISrc_512B16();
662   }
663 
664   bool isVISrc_512F32() const {
665     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
666   }
667 
668   bool isVISrc_512F16() const {
669     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
670   }
671 
672   bool isVISrc_512V2F16() const {
673     return isVISrc_512F16() || isVISrc_512B32();
674   }
675 
676   bool isVISrc_1024B32() const {
677     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
678   }
679 
680   bool isVISrc_1024B16() const {
681     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
682   }
683 
684   bool isVISrc_1024V2B16() const {
685     return isVISrc_1024B16();
686   }
687 
688   bool isVISrc_1024F32() const {
689     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
690   }
691 
692   bool isVISrc_1024F16() const {
693     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
694   }
695 
696   bool isVISrc_1024V2F16() const {
697     return isVISrc_1024F16() || isVISrc_1024B32();
698   }
699 
700   bool isAISrcB32() const {
701     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
702   }
703 
704   bool isAISrcB16() const {
705     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
706   }
707 
708   bool isAISrcV2B16() const {
709     return isAISrcB16();
710   }
711 
712   bool isAISrcF32() const {
713     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
714   }
715 
716   bool isAISrcF16() const {
717     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
718   }
719 
720   bool isAISrcV2F16() const {
721     return isAISrcF16() || isAISrcB32();
722   }
723 
724   bool isAISrc_64B64() const {
725     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
726   }
727 
728   bool isAISrc_64F64() const {
729     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
730   }
731 
732   bool isAISrc_128B32() const {
733     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
734   }
735 
736   bool isAISrc_128B16() const {
737     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
738   }
739 
740   bool isAISrc_128V2B16() const {
741     return isAISrc_128B16();
742   }
743 
744   bool isAISrc_128F32() const {
745     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
746   }
747 
748   bool isAISrc_128F16() const {
749     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
750   }
751 
752   bool isAISrc_128V2F16() const {
753     return isAISrc_128F16() || isAISrc_128B32();
754   }
755 
756   bool isVISrc_128F16() const {
757     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
758   }
759 
760   bool isVISrc_128V2F16() const {
761     return isVISrc_128F16() || isVISrc_128B32();
762   }
763 
764   bool isAISrc_256B64() const {
765     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
766   }
767 
768   bool isAISrc_256F64() const {
769     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
770   }
771 
772   bool isAISrc_512B32() const {
773     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
774   }
775 
776   bool isAISrc_512B16() const {
777     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
778   }
779 
780   bool isAISrc_512V2B16() const {
781     return isAISrc_512B16();
782   }
783 
784   bool isAISrc_512F32() const {
785     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
786   }
787 
788   bool isAISrc_512F16() const {
789     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
790   }
791 
792   bool isAISrc_512V2F16() const {
793     return isAISrc_512F16() || isAISrc_512B32();
794   }
795 
796   bool isAISrc_1024B32() const {
797     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
798   }
799 
800   bool isAISrc_1024B16() const {
801     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
802   }
803 
804   bool isAISrc_1024V2B16() const {
805     return isAISrc_1024B16();
806   }
807 
808   bool isAISrc_1024F32() const {
809     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
810   }
811 
812   bool isAISrc_1024F16() const {
813     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
814   }
815 
816   bool isAISrc_1024V2F16() const {
817     return isAISrc_1024F16() || isAISrc_1024B32();
818   }
819 
820   bool isKImmFP32() const {
821     return isLiteralImm(MVT::f32);
822   }
823 
824   bool isKImmFP16() const {
825     return isLiteralImm(MVT::f16);
826   }
827 
828   bool isMem() const override {
829     return false;
830   }
831 
832   bool isExpr() const {
833     return Kind == Expression;
834   }
835 
836   bool isSoppBrTarget() const {
837     return isExpr() || isImm();
838   }
839 
840   bool isSWaitCnt() const;
841   bool isDepCtr() const;
842   bool isSDelayAlu() const;
843   bool isHwreg() const;
844   bool isSendMsg() const;
845   bool isSwizzle() const;
846   bool isSMRDOffset8() const;
847   bool isSMEMOffset() const;
848   bool isSMRDLiteralOffset() const;
849   bool isDPP8() const;
850   bool isDPPCtrl() const;
851   bool isBLGP() const;
852   bool isCBSZ() const;
853   bool isABID() const;
854   bool isGPRIdxMode() const;
855   bool isS16Imm() const;
856   bool isU16Imm() const;
857   bool isEndpgm() const;
858   bool isWaitVDST() const;
859   bool isWaitEXP() const;
860 
861   StringRef getExpressionAsToken() const {
862     assert(isExpr());
863     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
864     return S->getSymbol().getName();
865   }
866 
867   StringRef getToken() const {
868     assert(isToken());
869 
870     if (Kind == Expression)
871       return getExpressionAsToken();
872 
873     return StringRef(Tok.Data, Tok.Length);
874   }
875 
876   int64_t getImm() const {
877     assert(isImm());
878     return Imm.Val;
879   }
880 
881   void setImm(int64_t Val) {
882     assert(isImm());
883     Imm.Val = Val;
884   }
885 
886   ImmTy getImmTy() const {
887     assert(isImm());
888     return Imm.Type;
889   }
890 
891   unsigned getReg() const override {
892     assert(isRegKind());
893     return Reg.RegNo;
894   }
895 
896   SMLoc getStartLoc() const override {
897     return StartLoc;
898   }
899 
900   SMLoc getEndLoc() const override {
901     return EndLoc;
902   }
903 
904   SMRange getLocRange() const {
905     return SMRange(StartLoc, EndLoc);
906   }
907 
908   Modifiers getModifiers() const {
909     assert(isRegKind() || isImmTy(ImmTyNone));
910     return isRegKind() ? Reg.Mods : Imm.Mods;
911   }
912 
913   void setModifiers(Modifiers Mods) {
914     assert(isRegKind() || isImmTy(ImmTyNone));
915     if (isRegKind())
916       Reg.Mods = Mods;
917     else
918       Imm.Mods = Mods;
919   }
920 
921   bool hasModifiers() const {
922     return getModifiers().hasModifiers();
923   }
924 
925   bool hasFPModifiers() const {
926     return getModifiers().hasFPModifiers();
927   }
928 
929   bool hasIntModifiers() const {
930     return getModifiers().hasIntModifiers();
931   }
932 
933   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
934 
935   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
936 
937   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
938 
939   template <unsigned Bitwidth>
940   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
941 
942   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
943     addKImmFPOperands<16>(Inst, N);
944   }
945 
946   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
947     addKImmFPOperands<32>(Inst, N);
948   }
949 
950   void addRegOperands(MCInst &Inst, unsigned N) const;
951 
952   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
953     addRegOperands(Inst, N);
954   }
955 
956   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
957     if (isRegKind())
958       addRegOperands(Inst, N);
959     else if (isExpr())
960       Inst.addOperand(MCOperand::createExpr(Expr));
961     else
962       addImmOperands(Inst, N);
963   }
964 
965   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
966     Modifiers Mods = getModifiers();
967     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
968     if (isRegKind()) {
969       addRegOperands(Inst, N);
970     } else {
971       addImmOperands(Inst, N, false);
972     }
973   }
974 
975   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
976     assert(!hasIntModifiers());
977     addRegOrImmWithInputModsOperands(Inst, N);
978   }
979 
980   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
981     assert(!hasFPModifiers());
982     addRegOrImmWithInputModsOperands(Inst, N);
983   }
984 
985   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
986     Modifiers Mods = getModifiers();
987     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
988     assert(isRegKind());
989     addRegOperands(Inst, N);
990   }
991 
992   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
993     assert(!hasIntModifiers());
994     addRegWithInputModsOperands(Inst, N);
995   }
996 
997   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
998     assert(!hasFPModifiers());
999     addRegWithInputModsOperands(Inst, N);
1000   }
1001 
1002   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
1003     if (isImm())
1004       addImmOperands(Inst, N);
1005     else {
1006       assert(isExpr());
1007       Inst.addOperand(MCOperand::createExpr(Expr));
1008     }
1009   }
1010 
1011   static void printImmTy(raw_ostream& OS, ImmTy Type) {
1012     switch (Type) {
1013     case ImmTyNone: OS << "None"; break;
1014     case ImmTyGDS: OS << "GDS"; break;
1015     case ImmTyLDS: OS << "LDS"; break;
1016     case ImmTyOffen: OS << "Offen"; break;
1017     case ImmTyIdxen: OS << "Idxen"; break;
1018     case ImmTyAddr64: OS << "Addr64"; break;
1019     case ImmTyOffset: OS << "Offset"; break;
1020     case ImmTyInstOffset: OS << "InstOffset"; break;
1021     case ImmTyOffset0: OS << "Offset0"; break;
1022     case ImmTyOffset1: OS << "Offset1"; break;
1023     case ImmTyCPol: OS << "CPol"; break;
1024     case ImmTySWZ: OS << "SWZ"; break;
1025     case ImmTyTFE: OS << "TFE"; break;
1026     case ImmTyD16: OS << "D16"; break;
1027     case ImmTyFORMAT: OS << "FORMAT"; break;
1028     case ImmTyClampSI: OS << "ClampSI"; break;
1029     case ImmTyOModSI: OS << "OModSI"; break;
1030     case ImmTyDPP8: OS << "DPP8"; break;
1031     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1032     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1033     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1034     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1035     case ImmTyDppFi: OS << "FI"; break;
1036     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1037     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1038     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1039     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1040     case ImmTyDMask: OS << "DMask"; break;
1041     case ImmTyDim: OS << "Dim"; break;
1042     case ImmTyUNorm: OS << "UNorm"; break;
1043     case ImmTyDA: OS << "DA"; break;
1044     case ImmTyR128A16: OS << "R128A16"; break;
1045     case ImmTyA16: OS << "A16"; break;
1046     case ImmTyLWE: OS << "LWE"; break;
1047     case ImmTyOff: OS << "Off"; break;
1048     case ImmTyExpTgt: OS << "ExpTgt"; break;
1049     case ImmTyExpCompr: OS << "ExpCompr"; break;
1050     case ImmTyExpVM: OS << "ExpVM"; break;
1051     case ImmTyHwreg: OS << "Hwreg"; break;
1052     case ImmTySendMsg: OS << "SendMsg"; break;
1053     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1054     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1055     case ImmTyAttrChan: OS << "AttrChan"; break;
1056     case ImmTyOpSel: OS << "OpSel"; break;
1057     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1058     case ImmTyNegLo: OS << "NegLo"; break;
1059     case ImmTyNegHi: OS << "NegHi"; break;
1060     case ImmTySwizzle: OS << "Swizzle"; break;
1061     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1062     case ImmTyHigh: OS << "High"; break;
1063     case ImmTyBLGP: OS << "BLGP"; break;
1064     case ImmTyCBSZ: OS << "CBSZ"; break;
1065     case ImmTyABID: OS << "ABID"; break;
1066     case ImmTyEndpgm: OS << "Endpgm"; break;
1067     case ImmTyWaitVDST: OS << "WaitVDST"; break;
1068     case ImmTyWaitEXP: OS << "WaitEXP"; break;
1069     }
1070   }
1071 
1072   void print(raw_ostream &OS) const override {
1073     switch (Kind) {
1074     case Register:
1075       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1076       break;
1077     case Immediate:
1078       OS << '<' << getImm();
1079       if (getImmTy() != ImmTyNone) {
1080         OS << " type: "; printImmTy(OS, getImmTy());
1081       }
1082       OS << " mods: " << Imm.Mods << '>';
1083       break;
1084     case Token:
1085       OS << '\'' << getToken() << '\'';
1086       break;
1087     case Expression:
1088       OS << "<expr " << *Expr << '>';
1089       break;
1090     }
1091   }
1092 
1093   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1094                                       int64_t Val, SMLoc Loc,
1095                                       ImmTy Type = ImmTyNone,
1096                                       bool IsFPImm = false) {
1097     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1098     Op->Imm.Val = Val;
1099     Op->Imm.IsFPImm = IsFPImm;
1100     Op->Imm.Kind = ImmKindTyNone;
1101     Op->Imm.Type = Type;
1102     Op->Imm.Mods = Modifiers();
1103     Op->StartLoc = Loc;
1104     Op->EndLoc = Loc;
1105     return Op;
1106   }
1107 
1108   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1109                                         StringRef Str, SMLoc Loc,
1110                                         bool HasExplicitEncodingSize = true) {
1111     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1112     Res->Tok.Data = Str.data();
1113     Res->Tok.Length = Str.size();
1114     Res->StartLoc = Loc;
1115     Res->EndLoc = Loc;
1116     return Res;
1117   }
1118 
1119   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1120                                       unsigned RegNo, SMLoc S,
1121                                       SMLoc E) {
1122     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1123     Op->Reg.RegNo = RegNo;
1124     Op->Reg.Mods = Modifiers();
1125     Op->StartLoc = S;
1126     Op->EndLoc = E;
1127     return Op;
1128   }
1129 
1130   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1131                                        const class MCExpr *Expr, SMLoc S) {
1132     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1133     Op->Expr = Expr;
1134     Op->StartLoc = S;
1135     Op->EndLoc = S;
1136     return Op;
1137   }
1138 };
1139 
1140 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1141   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1142   return OS;
1143 }
1144 
1145 //===----------------------------------------------------------------------===//
1146 // AsmParser
1147 //===----------------------------------------------------------------------===//
1148 
1149 // Holds info related to the current kernel, e.g. count of SGPRs used.
1150 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1151 // .amdgpu_hsa_kernel or at EOF.
1152 class KernelScopeInfo {
1153   int SgprIndexUnusedMin = -1;
1154   int VgprIndexUnusedMin = -1;
1155   int AgprIndexUnusedMin = -1;
1156   MCContext *Ctx = nullptr;
1157   MCSubtargetInfo const *MSTI = nullptr;
1158 
1159   void usesSgprAt(int i) {
1160     if (i >= SgprIndexUnusedMin) {
1161       SgprIndexUnusedMin = ++i;
1162       if (Ctx) {
1163         MCSymbol* const Sym =
1164           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1165         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1166       }
1167     }
1168   }
1169 
1170   void usesVgprAt(int i) {
1171     if (i >= VgprIndexUnusedMin) {
1172       VgprIndexUnusedMin = ++i;
1173       if (Ctx) {
1174         MCSymbol* const Sym =
1175           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1176         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1177                                          VgprIndexUnusedMin);
1178         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1179       }
1180     }
1181   }
1182 
1183   void usesAgprAt(int i) {
1184     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1185     if (!hasMAIInsts(*MSTI))
1186       return;
1187 
1188     if (i >= AgprIndexUnusedMin) {
1189       AgprIndexUnusedMin = ++i;
1190       if (Ctx) {
1191         MCSymbol* const Sym =
1192           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1193         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1194 
1195         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1196         MCSymbol* const vSym =
1197           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1198         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1199                                          VgprIndexUnusedMin);
1200         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1201       }
1202     }
1203   }
1204 
1205 public:
1206   KernelScopeInfo() = default;
1207 
1208   void initialize(MCContext &Context) {
1209     Ctx = &Context;
1210     MSTI = Ctx->getSubtargetInfo();
1211 
1212     usesSgprAt(SgprIndexUnusedMin = -1);
1213     usesVgprAt(VgprIndexUnusedMin = -1);
1214     if (hasMAIInsts(*MSTI)) {
1215       usesAgprAt(AgprIndexUnusedMin = -1);
1216     }
1217   }
1218 
1219   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1220                     unsigned RegWidth) {
1221     switch (RegKind) {
1222     case IS_SGPR:
1223       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1224       break;
1225     case IS_AGPR:
1226       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1227       break;
1228     case IS_VGPR:
1229       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1230       break;
1231     default:
1232       break;
1233     }
1234   }
1235 };
1236 
1237 class AMDGPUAsmParser : public MCTargetAsmParser {
1238   MCAsmParser &Parser;
1239 
1240   // Number of extra operands parsed after the first optional operand.
1241   // This may be necessary to skip hardcoded mandatory operands.
1242   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1243 
1244   unsigned ForcedEncodingSize = 0;
1245   bool ForcedDPP = false;
1246   bool ForcedSDWA = false;
1247   KernelScopeInfo KernelScope;
1248   unsigned CPolSeen;
1249 
1250   /// @name Auto-generated Match Functions
1251   /// {
1252 
1253 #define GET_ASSEMBLER_HEADER
1254 #include "AMDGPUGenAsmMatcher.inc"
1255 
1256   /// }
1257 
1258 private:
1259   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1260   bool OutOfRangeError(SMRange Range);
1261   /// Calculate VGPR/SGPR blocks required for given target, reserved
1262   /// registers, and user-specified NextFreeXGPR values.
1263   ///
1264   /// \param Features [in] Target features, used for bug corrections.
1265   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1266   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1267   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1268   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1269   /// descriptor field, if valid.
1270   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1271   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1272   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1273   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1274   /// \param VGPRBlocks [out] Result VGPR block count.
1275   /// \param SGPRBlocks [out] Result SGPR block count.
1276   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1277                           bool FlatScrUsed, bool XNACKUsed,
1278                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1279                           SMRange VGPRRange, unsigned NextFreeSGPR,
1280                           SMRange SGPRRange, unsigned &VGPRBlocks,
1281                           unsigned &SGPRBlocks);
1282   bool ParseDirectiveAMDGCNTarget();
1283   bool ParseDirectiveAMDHSAKernel();
1284   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1285   bool ParseDirectiveHSACodeObjectVersion();
1286   bool ParseDirectiveHSACodeObjectISA();
1287   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1288   bool ParseDirectiveAMDKernelCodeT();
1289   // TODO: Possibly make subtargetHasRegister const.
1290   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1291   bool ParseDirectiveAMDGPUHsaKernel();
1292 
1293   bool ParseDirectiveISAVersion();
1294   bool ParseDirectiveHSAMetadata();
1295   bool ParseDirectivePALMetadataBegin();
1296   bool ParseDirectivePALMetadata();
1297   bool ParseDirectiveAMDGPULDS();
1298 
1299   /// Common code to parse out a block of text (typically YAML) between start and
1300   /// end directives.
1301   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1302                            const char *AssemblerDirectiveEnd,
1303                            std::string &CollectString);
1304 
1305   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1306                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1307   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1308                            unsigned &RegNum, unsigned &RegWidth,
1309                            bool RestoreOnFailure = false);
1310   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1311                            unsigned &RegNum, unsigned &RegWidth,
1312                            SmallVectorImpl<AsmToken> &Tokens);
1313   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1314                            unsigned &RegWidth,
1315                            SmallVectorImpl<AsmToken> &Tokens);
1316   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1317                            unsigned &RegWidth,
1318                            SmallVectorImpl<AsmToken> &Tokens);
1319   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1320                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1321   bool ParseRegRange(unsigned& Num, unsigned& Width);
1322   unsigned getRegularReg(RegisterKind RegKind,
1323                          unsigned RegNum,
1324                          unsigned RegWidth,
1325                          SMLoc Loc);
1326 
1327   bool isRegister();
1328   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1329   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1330   void initializeGprCountSymbol(RegisterKind RegKind);
1331   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1332                              unsigned RegWidth);
1333   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1334                     bool IsAtomic, bool IsLds = false);
1335   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1336                  bool IsGdsHardcoded);
1337 
1338 public:
1339   enum AMDGPUMatchResultTy {
1340     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1341   };
1342   enum OperandMode {
1343     OperandMode_Default,
1344     OperandMode_NSA,
1345   };
1346 
1347   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1348 
1349   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1350                const MCInstrInfo &MII,
1351                const MCTargetOptions &Options)
1352       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1353     MCAsmParserExtension::Initialize(Parser);
1354 
1355     if (getFeatureBits().none()) {
1356       // Set default features.
1357       copySTI().ToggleFeature("southern-islands");
1358     }
1359 
1360     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1361 
1362     {
1363       // TODO: make those pre-defined variables read-only.
1364       // Currently there is none suitable machinery in the core llvm-mc for this.
1365       // MCSymbol::isRedefinable is intended for another purpose, and
1366       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1367       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1368       MCContext &Ctx = getContext();
1369       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1370         MCSymbol *Sym =
1371             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1372         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1373         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1374         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1375         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1376         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1377       } else {
1378         MCSymbol *Sym =
1379             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1380         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1381         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1382         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1383         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1384         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1385       }
1386       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1387         initializeGprCountSymbol(IS_VGPR);
1388         initializeGprCountSymbol(IS_SGPR);
1389       } else
1390         KernelScope.initialize(getContext());
1391     }
1392   }
1393 
1394   bool hasMIMG_R128() const {
1395     return AMDGPU::hasMIMG_R128(getSTI());
1396   }
1397 
1398   bool hasPackedD16() const {
1399     return AMDGPU::hasPackedD16(getSTI());
1400   }
1401 
1402   bool hasGFX10A16() const {
1403     return AMDGPU::hasGFX10A16(getSTI());
1404   }
1405 
1406   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1407 
1408   bool isSI() const {
1409     return AMDGPU::isSI(getSTI());
1410   }
1411 
1412   bool isCI() const {
1413     return AMDGPU::isCI(getSTI());
1414   }
1415 
1416   bool isVI() const {
1417     return AMDGPU::isVI(getSTI());
1418   }
1419 
1420   bool isGFX9() const {
1421     return AMDGPU::isGFX9(getSTI());
1422   }
1423 
1424   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1425   bool isGFX90A() const {
1426     return AMDGPU::isGFX90A(getSTI());
1427   }
1428 
1429   bool isGFX940() const {
1430     return AMDGPU::isGFX940(getSTI());
1431   }
1432 
1433   bool isGFX9Plus() const {
1434     return AMDGPU::isGFX9Plus(getSTI());
1435   }
1436 
1437   bool isGFX10() const {
1438     return AMDGPU::isGFX10(getSTI());
1439   }
1440 
1441   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1442 
1443   bool isGFX11() const {
1444     return AMDGPU::isGFX11(getSTI());
1445   }
1446 
1447   bool isGFX11Plus() const {
1448     return AMDGPU::isGFX11Plus(getSTI());
1449   }
1450 
1451   bool isGFX10_BEncoding() const {
1452     return AMDGPU::isGFX10_BEncoding(getSTI());
1453   }
1454 
1455   bool hasInv2PiInlineImm() const {
1456     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1457   }
1458 
1459   bool hasFlatOffsets() const {
1460     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1461   }
1462 
1463   bool hasArchitectedFlatScratch() const {
1464     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1465   }
1466 
1467   bool hasSGPR102_SGPR103() const {
1468     return !isVI() && !isGFX9();
1469   }
1470 
1471   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1472 
1473   bool hasIntClamp() const {
1474     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1475   }
1476 
1477   AMDGPUTargetStreamer &getTargetStreamer() {
1478     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1479     return static_cast<AMDGPUTargetStreamer &>(TS);
1480   }
1481 
1482   const MCRegisterInfo *getMRI() const {
1483     // We need this const_cast because for some reason getContext() is not const
1484     // in MCAsmParser.
1485     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1486   }
1487 
1488   const MCInstrInfo *getMII() const {
1489     return &MII;
1490   }
1491 
1492   const FeatureBitset &getFeatureBits() const {
1493     return getSTI().getFeatureBits();
1494   }
1495 
1496   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1497   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1498   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1499 
1500   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1501   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1502   bool isForcedDPP() const { return ForcedDPP; }
1503   bool isForcedSDWA() const { return ForcedSDWA; }
1504   ArrayRef<unsigned> getMatchedVariants() const;
1505   StringRef getMatchedVariantName() const;
1506 
1507   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1508   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1509                      bool RestoreOnFailure);
1510   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1511   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1512                                         SMLoc &EndLoc) override;
1513   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1514   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1515                                       unsigned Kind) override;
1516   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1517                                OperandVector &Operands, MCStreamer &Out,
1518                                uint64_t &ErrorInfo,
1519                                bool MatchingInlineAsm) override;
1520   bool ParseDirective(AsmToken DirectiveID) override;
1521   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1522                                     OperandMode Mode = OperandMode_Default);
1523   StringRef parseMnemonicSuffix(StringRef Name);
1524   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1525                         SMLoc NameLoc, OperandVector &Operands) override;
1526   //bool ProcessInstruction(MCInst &Inst);
1527 
1528   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1529 
1530   OperandMatchResultTy
1531   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1532                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1533                      bool (*ConvertResult)(int64_t &) = nullptr);
1534 
1535   OperandMatchResultTy
1536   parseOperandArrayWithPrefix(const char *Prefix,
1537                               OperandVector &Operands,
1538                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1539                               bool (*ConvertResult)(int64_t&) = nullptr);
1540 
1541   OperandMatchResultTy
1542   parseNamedBit(StringRef Name, OperandVector &Operands,
1543                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1544   OperandMatchResultTy parseCPol(OperandVector &Operands);
1545   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1546                                              StringRef &Value,
1547                                              SMLoc &StringLoc);
1548 
1549   bool isModifier();
1550   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1551   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1552   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1553   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1554   bool parseSP3NegModifier();
1555   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1556   OperandMatchResultTy parseReg(OperandVector &Operands);
1557   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1558   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1559   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1560   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1561   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1562   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1563   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1564   OperandMatchResultTy parseUfmt(int64_t &Format);
1565   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1566   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1567   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1568   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1569   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1570   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1571   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1572 
1573   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1574   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1575   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1576   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1577 
1578   bool parseCnt(int64_t &IntVal);
1579   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1580 
1581   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1582   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1583   OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
1584 
1585   bool parseDelay(int64_t &Delay);
1586   OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
1587 
1588   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1589 
1590 private:
1591   struct OperandInfoTy {
1592     SMLoc Loc;
1593     int64_t Id;
1594     bool IsSymbolic = false;
1595     bool IsDefined = false;
1596 
1597     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1598   };
1599 
1600   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1601   bool validateSendMsg(const OperandInfoTy &Msg,
1602                        const OperandInfoTy &Op,
1603                        const OperandInfoTy &Stream);
1604 
1605   bool parseHwregBody(OperandInfoTy &HwReg,
1606                       OperandInfoTy &Offset,
1607                       OperandInfoTy &Width);
1608   bool validateHwreg(const OperandInfoTy &HwReg,
1609                      const OperandInfoTy &Offset,
1610                      const OperandInfoTy &Width);
1611 
1612   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1613   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1614   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1615 
1616   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1617                       const OperandVector &Operands) const;
1618   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1619   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1620   SMLoc getLitLoc(const OperandVector &Operands) const;
1621   SMLoc getConstLoc(const OperandVector &Operands) const;
1622 
1623   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1624   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1625   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1626   bool validateSOPLiteral(const MCInst &Inst) const;
1627   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1628   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1629   bool validateIntClampSupported(const MCInst &Inst);
1630   bool validateMIMGAtomicDMask(const MCInst &Inst);
1631   bool validateMIMGGatherDMask(const MCInst &Inst);
1632   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1633   Optional<StringRef> validateMIMGDataSize(const MCInst &Inst);
1634   bool validateMIMGAddrSize(const MCInst &Inst);
1635   bool validateMIMGD16(const MCInst &Inst);
1636   bool validateMIMGDim(const MCInst &Inst);
1637   bool validateMIMGMSAA(const MCInst &Inst);
1638   bool validateOpSel(const MCInst &Inst);
1639   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1640   bool validateVccOperand(unsigned Reg) const;
1641   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1642   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1643   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1644   bool validateAGPRLdSt(const MCInst &Inst) const;
1645   bool validateVGPRAlign(const MCInst &Inst) const;
1646   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1647   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1648   bool validateDivScale(const MCInst &Inst);
1649   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1650                              const SMLoc &IDLoc);
1651   bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands,
1652                           const SMLoc &IDLoc);
1653   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1654   unsigned getConstantBusLimit(unsigned Opcode) const;
1655   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1656   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1657   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1658 
1659   bool isSupportedMnemo(StringRef Mnemo,
1660                         const FeatureBitset &FBS);
1661   bool isSupportedMnemo(StringRef Mnemo,
1662                         const FeatureBitset &FBS,
1663                         ArrayRef<unsigned> Variants);
1664   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1665 
1666   bool isId(const StringRef Id) const;
1667   bool isId(const AsmToken &Token, const StringRef Id) const;
1668   bool isToken(const AsmToken::TokenKind Kind) const;
1669   bool trySkipId(const StringRef Id);
1670   bool trySkipId(const StringRef Pref, const StringRef Id);
1671   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1672   bool trySkipToken(const AsmToken::TokenKind Kind);
1673   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1674   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1675   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1676 
1677   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1678   AsmToken::TokenKind getTokenKind() const;
1679   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1680   bool parseExpr(OperandVector &Operands);
1681   StringRef getTokenStr() const;
1682   AsmToken peekToken();
1683   AsmToken getToken() const;
1684   SMLoc getLoc() const;
1685   void lex();
1686 
1687 public:
1688   void onBeginOfFile() override;
1689 
1690   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1691   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1692 
1693   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1694   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1695   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1696   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1697   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1698   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1699 
1700   bool parseSwizzleOperand(int64_t &Op,
1701                            const unsigned MinVal,
1702                            const unsigned MaxVal,
1703                            const StringRef ErrMsg,
1704                            SMLoc &Loc);
1705   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1706                             const unsigned MinVal,
1707                             const unsigned MaxVal,
1708                             const StringRef ErrMsg);
1709   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1710   bool parseSwizzleOffset(int64_t &Imm);
1711   bool parseSwizzleMacro(int64_t &Imm);
1712   bool parseSwizzleQuadPerm(int64_t &Imm);
1713   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1714   bool parseSwizzleBroadcast(int64_t &Imm);
1715   bool parseSwizzleSwap(int64_t &Imm);
1716   bool parseSwizzleReverse(int64_t &Imm);
1717 
1718   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1719   int64_t parseGPRIdxMacro();
1720 
1721   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1722   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1723   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1724   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1725 
1726   AMDGPUOperand::Ptr defaultCPol() const;
1727 
1728   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1729   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1730   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1731   AMDGPUOperand::Ptr defaultFlatOffset() const;
1732 
1733   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1734 
1735   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1736                OptionalImmIndexMap &OptionalIdx);
1737   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1738   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1739   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1740   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1741                 OptionalImmIndexMap &OptionalIdx);
1742 
1743   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1744   void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1745 
1746   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1747                bool IsAtomic = false);
1748   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1749   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1750 
1751   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1752 
1753   bool parseDimId(unsigned &Encoding);
1754   OperandMatchResultTy parseDim(OperandVector &Operands);
1755   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1756   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1757   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1758   int64_t parseDPPCtrlSel(StringRef Ctrl);
1759   int64_t parseDPPCtrlPerm();
1760   AMDGPUOperand::Ptr defaultRowMask() const;
1761   AMDGPUOperand::Ptr defaultBankMask() const;
1762   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1763   AMDGPUOperand::Ptr defaultFI() const;
1764   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1765   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1766     cvtDPP(Inst, Operands, true);
1767   }
1768   void cvtVOPCNoDstDPP(MCInst &Inst, const OperandVector &Operands,
1769                        bool IsDPP8 = false);
1770   void cvtVOPCNoDstDPP8(MCInst &Inst, const OperandVector &Operands) {
1771     cvtVOPCNoDstDPP(Inst, Operands, true);
1772   }
1773   void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1774                   bool IsDPP8 = false);
1775   void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1776     cvtVOP3DPP(Inst, Operands, true);
1777   }
1778   void cvtVOPC64NoDstDPP(MCInst &Inst, const OperandVector &Operands,
1779                          bool IsDPP8 = false);
1780   void cvtVOPC64NoDstDPP8(MCInst &Inst, const OperandVector &Operands) {
1781     cvtVOPC64NoDstDPP(Inst, Operands, true);
1782   }
1783 
1784   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1785                                     AMDGPUOperand::ImmTy Type);
1786   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1787   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1788   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1789   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1790   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1791   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1792   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1793                uint64_t BasicInstType,
1794                bool SkipDstVcc = false,
1795                bool SkipSrcVcc = false);
1796 
1797   AMDGPUOperand::Ptr defaultBLGP() const;
1798   AMDGPUOperand::Ptr defaultCBSZ() const;
1799   AMDGPUOperand::Ptr defaultABID() const;
1800 
1801   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1802   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1803 
1804   AMDGPUOperand::Ptr defaultWaitVDST() const;
1805   AMDGPUOperand::Ptr defaultWaitEXP() const;
1806 };
1807 
1808 struct OptionalOperand {
1809   const char *Name;
1810   AMDGPUOperand::ImmTy Type;
1811   bool IsBit;
1812   bool (*ConvertResult)(int64_t&);
1813 };
1814 
1815 } // end anonymous namespace
1816 
1817 // May be called with integer type with equivalent bitwidth.
1818 static const fltSemantics *getFltSemantics(unsigned Size) {
1819   switch (Size) {
1820   case 4:
1821     return &APFloat::IEEEsingle();
1822   case 8:
1823     return &APFloat::IEEEdouble();
1824   case 2:
1825     return &APFloat::IEEEhalf();
1826   default:
1827     llvm_unreachable("unsupported fp type");
1828   }
1829 }
1830 
1831 static const fltSemantics *getFltSemantics(MVT VT) {
1832   return getFltSemantics(VT.getSizeInBits() / 8);
1833 }
1834 
1835 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1836   switch (OperandType) {
1837   case AMDGPU::OPERAND_REG_IMM_INT32:
1838   case AMDGPU::OPERAND_REG_IMM_FP32:
1839   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1840   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1841   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1842   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1843   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1844   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1845   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1846   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1847   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1848   case AMDGPU::OPERAND_KIMM32:
1849     return &APFloat::IEEEsingle();
1850   case AMDGPU::OPERAND_REG_IMM_INT64:
1851   case AMDGPU::OPERAND_REG_IMM_FP64:
1852   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1853   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1854   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1855     return &APFloat::IEEEdouble();
1856   case AMDGPU::OPERAND_REG_IMM_INT16:
1857   case AMDGPU::OPERAND_REG_IMM_FP16:
1858   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1859   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1860   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1861   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1862   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1863   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1864   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1865   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1866   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1867   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1868   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1869   case AMDGPU::OPERAND_KIMM16:
1870     return &APFloat::IEEEhalf();
1871   default:
1872     llvm_unreachable("unsupported fp type");
1873   }
1874 }
1875 
1876 //===----------------------------------------------------------------------===//
1877 // Operand
1878 //===----------------------------------------------------------------------===//
1879 
1880 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1881   bool Lost;
1882 
1883   // Convert literal to single precision
1884   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1885                                                APFloat::rmNearestTiesToEven,
1886                                                &Lost);
1887   // We allow precision lost but not overflow or underflow
1888   if (Status != APFloat::opOK &&
1889       Lost &&
1890       ((Status & APFloat::opOverflow)  != 0 ||
1891        (Status & APFloat::opUnderflow) != 0)) {
1892     return false;
1893   }
1894 
1895   return true;
1896 }
1897 
1898 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1899   return isUIntN(Size, Val) || isIntN(Size, Val);
1900 }
1901 
1902 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1903   if (VT.getScalarType() == MVT::i16) {
1904     // FP immediate values are broken.
1905     return isInlinableIntLiteral(Val);
1906   }
1907 
1908   // f16/v2f16 operands work correctly for all values.
1909   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1910 }
1911 
1912 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1913 
1914   // This is a hack to enable named inline values like
1915   // shared_base with both 32-bit and 64-bit operands.
1916   // Note that these values are defined as
1917   // 32-bit operands only.
1918   if (isInlineValue()) {
1919     return true;
1920   }
1921 
1922   if (!isImmTy(ImmTyNone)) {
1923     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1924     return false;
1925   }
1926   // TODO: We should avoid using host float here. It would be better to
1927   // check the float bit values which is what a few other places do.
1928   // We've had bot failures before due to weird NaN support on mips hosts.
1929 
1930   APInt Literal(64, Imm.Val);
1931 
1932   if (Imm.IsFPImm) { // We got fp literal token
1933     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1934       return AMDGPU::isInlinableLiteral64(Imm.Val,
1935                                           AsmParser->hasInv2PiInlineImm());
1936     }
1937 
1938     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1939     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1940       return false;
1941 
1942     if (type.getScalarSizeInBits() == 16) {
1943       return isInlineableLiteralOp16(
1944         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1945         type, AsmParser->hasInv2PiInlineImm());
1946     }
1947 
1948     // Check if single precision literal is inlinable
1949     return AMDGPU::isInlinableLiteral32(
1950       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1951       AsmParser->hasInv2PiInlineImm());
1952   }
1953 
1954   // We got int literal token.
1955   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1956     return AMDGPU::isInlinableLiteral64(Imm.Val,
1957                                         AsmParser->hasInv2PiInlineImm());
1958   }
1959 
1960   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1961     return false;
1962   }
1963 
1964   if (type.getScalarSizeInBits() == 16) {
1965     return isInlineableLiteralOp16(
1966       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1967       type, AsmParser->hasInv2PiInlineImm());
1968   }
1969 
1970   return AMDGPU::isInlinableLiteral32(
1971     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1972     AsmParser->hasInv2PiInlineImm());
1973 }
1974 
1975 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1976   // Check that this immediate can be added as literal
1977   if (!isImmTy(ImmTyNone)) {
1978     return false;
1979   }
1980 
1981   if (!Imm.IsFPImm) {
1982     // We got int literal token.
1983 
1984     if (type == MVT::f64 && hasFPModifiers()) {
1985       // Cannot apply fp modifiers to int literals preserving the same semantics
1986       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1987       // disable these cases.
1988       return false;
1989     }
1990 
1991     unsigned Size = type.getSizeInBits();
1992     if (Size == 64)
1993       Size = 32;
1994 
1995     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1996     // types.
1997     return isSafeTruncation(Imm.Val, Size);
1998   }
1999 
2000   // We got fp literal token
2001   if (type == MVT::f64) { // Expected 64-bit fp operand
2002     // We would set low 64-bits of literal to zeroes but we accept this literals
2003     return true;
2004   }
2005 
2006   if (type == MVT::i64) { // Expected 64-bit int operand
2007     // We don't allow fp literals in 64-bit integer instructions. It is
2008     // unclear how we should encode them.
2009     return false;
2010   }
2011 
2012   // We allow fp literals with f16x2 operands assuming that the specified
2013   // literal goes into the lower half and the upper half is zero. We also
2014   // require that the literal may be losslessly converted to f16.
2015   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
2016                      (type == MVT::v2i16)? MVT::i16 :
2017                      (type == MVT::v2f32)? MVT::f32 : type;
2018 
2019   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2020   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2021 }
2022 
2023 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2024   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2025 }
2026 
2027 bool AMDGPUOperand::isVRegWithInputMods() const {
2028   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2029          // GFX90A allows DPP on 64-bit operands.
2030          (isRegClass(AMDGPU::VReg_64RegClassID) &&
2031           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
2032 }
2033 
2034 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2035   if (AsmParser->isVI())
2036     return isVReg32();
2037   else if (AsmParser->isGFX9Plus())
2038     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2039   else
2040     return false;
2041 }
2042 
2043 bool AMDGPUOperand::isSDWAFP16Operand() const {
2044   return isSDWAOperand(MVT::f16);
2045 }
2046 
2047 bool AMDGPUOperand::isSDWAFP32Operand() const {
2048   return isSDWAOperand(MVT::f32);
2049 }
2050 
2051 bool AMDGPUOperand::isSDWAInt16Operand() const {
2052   return isSDWAOperand(MVT::i16);
2053 }
2054 
2055 bool AMDGPUOperand::isSDWAInt32Operand() const {
2056   return isSDWAOperand(MVT::i32);
2057 }
2058 
2059 bool AMDGPUOperand::isBoolReg() const {
2060   auto FB = AsmParser->getFeatureBits();
2061   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2062                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2063 }
2064 
2065 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2066 {
2067   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2068   assert(Size == 2 || Size == 4 || Size == 8);
2069 
2070   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2071 
2072   if (Imm.Mods.Abs) {
2073     Val &= ~FpSignMask;
2074   }
2075   if (Imm.Mods.Neg) {
2076     Val ^= FpSignMask;
2077   }
2078 
2079   return Val;
2080 }
2081 
2082 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2083   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2084                              Inst.getNumOperands())) {
2085     addLiteralImmOperand(Inst, Imm.Val,
2086                          ApplyModifiers &
2087                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2088   } else {
2089     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2090     Inst.addOperand(MCOperand::createImm(Imm.Val));
2091     setImmKindNone();
2092   }
2093 }
2094 
2095 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2096   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2097   auto OpNum = Inst.getNumOperands();
2098   // Check that this operand accepts literals
2099   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2100 
2101   if (ApplyModifiers) {
2102     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2103     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2104     Val = applyInputFPModifiers(Val, Size);
2105   }
2106 
2107   APInt Literal(64, Val);
2108   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2109 
2110   if (Imm.IsFPImm) { // We got fp literal token
2111     switch (OpTy) {
2112     case AMDGPU::OPERAND_REG_IMM_INT64:
2113     case AMDGPU::OPERAND_REG_IMM_FP64:
2114     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2115     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2116     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2117       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2118                                        AsmParser->hasInv2PiInlineImm())) {
2119         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2120         setImmKindConst();
2121         return;
2122       }
2123 
2124       // Non-inlineable
2125       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2126         // For fp operands we check if low 32 bits are zeros
2127         if (Literal.getLoBits(32) != 0) {
2128           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2129           "Can't encode literal as exact 64-bit floating-point operand. "
2130           "Low 32-bits will be set to zero");
2131         }
2132 
2133         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2134         setImmKindLiteral();
2135         return;
2136       }
2137 
2138       // We don't allow fp literals in 64-bit integer instructions. It is
2139       // unclear how we should encode them. This case should be checked earlier
2140       // in predicate methods (isLiteralImm())
2141       llvm_unreachable("fp literal in 64-bit integer instruction.");
2142 
2143     case AMDGPU::OPERAND_REG_IMM_INT32:
2144     case AMDGPU::OPERAND_REG_IMM_FP32:
2145     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2146     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2147     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2148     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2149     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2150     case AMDGPU::OPERAND_REG_IMM_INT16:
2151     case AMDGPU::OPERAND_REG_IMM_FP16:
2152     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2153     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2154     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2155     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2156     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2157     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2158     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2159     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2160     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2161     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2162     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2163     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2164     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2165     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2166     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2167     case AMDGPU::OPERAND_KIMM32:
2168     case AMDGPU::OPERAND_KIMM16: {
2169       bool lost;
2170       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2171       // Convert literal to single precision
2172       FPLiteral.convert(*getOpFltSemantics(OpTy),
2173                         APFloat::rmNearestTiesToEven, &lost);
2174       // We allow precision lost but not overflow or underflow. This should be
2175       // checked earlier in isLiteralImm()
2176 
2177       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2178       Inst.addOperand(MCOperand::createImm(ImmVal));
2179       setImmKindLiteral();
2180       return;
2181     }
2182     default:
2183       llvm_unreachable("invalid operand size");
2184     }
2185 
2186     return;
2187   }
2188 
2189   // We got int literal token.
2190   // Only sign extend inline immediates.
2191   switch (OpTy) {
2192   case AMDGPU::OPERAND_REG_IMM_INT32:
2193   case AMDGPU::OPERAND_REG_IMM_FP32:
2194   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2195   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2196   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2197   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2198   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2199   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2200   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2201   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2202   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2203   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2204   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2205     if (isSafeTruncation(Val, 32) &&
2206         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2207                                      AsmParser->hasInv2PiInlineImm())) {
2208       Inst.addOperand(MCOperand::createImm(Val));
2209       setImmKindConst();
2210       return;
2211     }
2212 
2213     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2214     setImmKindLiteral();
2215     return;
2216 
2217   case AMDGPU::OPERAND_REG_IMM_INT64:
2218   case AMDGPU::OPERAND_REG_IMM_FP64:
2219   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2220   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2221   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2222     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2223       Inst.addOperand(MCOperand::createImm(Val));
2224       setImmKindConst();
2225       return;
2226     }
2227 
2228     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2229     setImmKindLiteral();
2230     return;
2231 
2232   case AMDGPU::OPERAND_REG_IMM_INT16:
2233   case AMDGPU::OPERAND_REG_IMM_FP16:
2234   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2235   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2236   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2237   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2238   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2239     if (isSafeTruncation(Val, 16) &&
2240         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2241                                      AsmParser->hasInv2PiInlineImm())) {
2242       Inst.addOperand(MCOperand::createImm(Val));
2243       setImmKindConst();
2244       return;
2245     }
2246 
2247     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2248     setImmKindLiteral();
2249     return;
2250 
2251   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2252   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2253   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2254   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2255     assert(isSafeTruncation(Val, 16));
2256     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2257                                         AsmParser->hasInv2PiInlineImm()));
2258 
2259     Inst.addOperand(MCOperand::createImm(Val));
2260     return;
2261   }
2262   case AMDGPU::OPERAND_KIMM32:
2263     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2264     setImmKindNone();
2265     return;
2266   case AMDGPU::OPERAND_KIMM16:
2267     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2268     setImmKindNone();
2269     return;
2270   default:
2271     llvm_unreachable("invalid operand size");
2272   }
2273 }
2274 
2275 template <unsigned Bitwidth>
2276 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2277   APInt Literal(64, Imm.Val);
2278   setImmKindNone();
2279 
2280   if (!Imm.IsFPImm) {
2281     // We got int literal token.
2282     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2283     return;
2284   }
2285 
2286   bool Lost;
2287   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2288   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2289                     APFloat::rmNearestTiesToEven, &Lost);
2290   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2291 }
2292 
2293 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2294   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2295 }
2296 
2297 static bool isInlineValue(unsigned Reg) {
2298   switch (Reg) {
2299   case AMDGPU::SRC_SHARED_BASE:
2300   case AMDGPU::SRC_SHARED_LIMIT:
2301   case AMDGPU::SRC_PRIVATE_BASE:
2302   case AMDGPU::SRC_PRIVATE_LIMIT:
2303   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2304     return true;
2305   case AMDGPU::SRC_VCCZ:
2306   case AMDGPU::SRC_EXECZ:
2307   case AMDGPU::SRC_SCC:
2308     return true;
2309   case AMDGPU::SGPR_NULL:
2310     return true;
2311   default:
2312     return false;
2313   }
2314 }
2315 
2316 bool AMDGPUOperand::isInlineValue() const {
2317   return isRegKind() && ::isInlineValue(getReg());
2318 }
2319 
2320 //===----------------------------------------------------------------------===//
2321 // AsmParser
2322 //===----------------------------------------------------------------------===//
2323 
2324 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2325   if (Is == IS_VGPR) {
2326     switch (RegWidth) {
2327       default: return -1;
2328       case 32:
2329         return AMDGPU::VGPR_32RegClassID;
2330       case 64:
2331         return AMDGPU::VReg_64RegClassID;
2332       case 96:
2333         return AMDGPU::VReg_96RegClassID;
2334       case 128:
2335         return AMDGPU::VReg_128RegClassID;
2336       case 160:
2337         return AMDGPU::VReg_160RegClassID;
2338       case 192:
2339         return AMDGPU::VReg_192RegClassID;
2340       case 224:
2341         return AMDGPU::VReg_224RegClassID;
2342       case 256:
2343         return AMDGPU::VReg_256RegClassID;
2344       case 512:
2345         return AMDGPU::VReg_512RegClassID;
2346       case 1024:
2347         return AMDGPU::VReg_1024RegClassID;
2348     }
2349   } else if (Is == IS_TTMP) {
2350     switch (RegWidth) {
2351       default: return -1;
2352       case 32:
2353         return AMDGPU::TTMP_32RegClassID;
2354       case 64:
2355         return AMDGPU::TTMP_64RegClassID;
2356       case 128:
2357         return AMDGPU::TTMP_128RegClassID;
2358       case 256:
2359         return AMDGPU::TTMP_256RegClassID;
2360       case 512:
2361         return AMDGPU::TTMP_512RegClassID;
2362     }
2363   } else if (Is == IS_SGPR) {
2364     switch (RegWidth) {
2365       default: return -1;
2366       case 32:
2367         return AMDGPU::SGPR_32RegClassID;
2368       case 64:
2369         return AMDGPU::SGPR_64RegClassID;
2370       case 96:
2371         return AMDGPU::SGPR_96RegClassID;
2372       case 128:
2373         return AMDGPU::SGPR_128RegClassID;
2374       case 160:
2375         return AMDGPU::SGPR_160RegClassID;
2376       case 192:
2377         return AMDGPU::SGPR_192RegClassID;
2378       case 224:
2379         return AMDGPU::SGPR_224RegClassID;
2380       case 256:
2381         return AMDGPU::SGPR_256RegClassID;
2382       case 512:
2383         return AMDGPU::SGPR_512RegClassID;
2384     }
2385   } else if (Is == IS_AGPR) {
2386     switch (RegWidth) {
2387       default: return -1;
2388       case 32:
2389         return AMDGPU::AGPR_32RegClassID;
2390       case 64:
2391         return AMDGPU::AReg_64RegClassID;
2392       case 96:
2393         return AMDGPU::AReg_96RegClassID;
2394       case 128:
2395         return AMDGPU::AReg_128RegClassID;
2396       case 160:
2397         return AMDGPU::AReg_160RegClassID;
2398       case 192:
2399         return AMDGPU::AReg_192RegClassID;
2400       case 224:
2401         return AMDGPU::AReg_224RegClassID;
2402       case 256:
2403         return AMDGPU::AReg_256RegClassID;
2404       case 512:
2405         return AMDGPU::AReg_512RegClassID;
2406       case 1024:
2407         return AMDGPU::AReg_1024RegClassID;
2408     }
2409   }
2410   return -1;
2411 }
2412 
2413 static unsigned getSpecialRegForName(StringRef RegName) {
2414   return StringSwitch<unsigned>(RegName)
2415     .Case("exec", AMDGPU::EXEC)
2416     .Case("vcc", AMDGPU::VCC)
2417     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2418     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2419     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2420     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2421     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2422     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2423     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2424     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2425     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2426     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2427     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2428     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2429     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2430     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2431     .Case("m0", AMDGPU::M0)
2432     .Case("vccz", AMDGPU::SRC_VCCZ)
2433     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2434     .Case("execz", AMDGPU::SRC_EXECZ)
2435     .Case("src_execz", AMDGPU::SRC_EXECZ)
2436     .Case("scc", AMDGPU::SRC_SCC)
2437     .Case("src_scc", AMDGPU::SRC_SCC)
2438     .Case("tba", AMDGPU::TBA)
2439     .Case("tma", AMDGPU::TMA)
2440     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2441     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2442     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2443     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2444     .Case("vcc_lo", AMDGPU::VCC_LO)
2445     .Case("vcc_hi", AMDGPU::VCC_HI)
2446     .Case("exec_lo", AMDGPU::EXEC_LO)
2447     .Case("exec_hi", AMDGPU::EXEC_HI)
2448     .Case("tma_lo", AMDGPU::TMA_LO)
2449     .Case("tma_hi", AMDGPU::TMA_HI)
2450     .Case("tba_lo", AMDGPU::TBA_LO)
2451     .Case("tba_hi", AMDGPU::TBA_HI)
2452     .Case("pc", AMDGPU::PC_REG)
2453     .Case("null", AMDGPU::SGPR_NULL)
2454     .Default(AMDGPU::NoRegister);
2455 }
2456 
2457 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2458                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2459   auto R = parseRegister();
2460   if (!R) return true;
2461   assert(R->isReg());
2462   RegNo = R->getReg();
2463   StartLoc = R->getStartLoc();
2464   EndLoc = R->getEndLoc();
2465   return false;
2466 }
2467 
2468 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2469                                     SMLoc &EndLoc) {
2470   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2471 }
2472 
2473 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2474                                                        SMLoc &StartLoc,
2475                                                        SMLoc &EndLoc) {
2476   bool Result =
2477       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2478   bool PendingErrors = getParser().hasPendingError();
2479   getParser().clearPendingErrors();
2480   if (PendingErrors)
2481     return MatchOperand_ParseFail;
2482   if (Result)
2483     return MatchOperand_NoMatch;
2484   return MatchOperand_Success;
2485 }
2486 
2487 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2488                                             RegisterKind RegKind, unsigned Reg1,
2489                                             SMLoc Loc) {
2490   switch (RegKind) {
2491   case IS_SPECIAL:
2492     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2493       Reg = AMDGPU::EXEC;
2494       RegWidth = 64;
2495       return true;
2496     }
2497     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2498       Reg = AMDGPU::FLAT_SCR;
2499       RegWidth = 64;
2500       return true;
2501     }
2502     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2503       Reg = AMDGPU::XNACK_MASK;
2504       RegWidth = 64;
2505       return true;
2506     }
2507     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2508       Reg = AMDGPU::VCC;
2509       RegWidth = 64;
2510       return true;
2511     }
2512     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2513       Reg = AMDGPU::TBA;
2514       RegWidth = 64;
2515       return true;
2516     }
2517     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2518       Reg = AMDGPU::TMA;
2519       RegWidth = 64;
2520       return true;
2521     }
2522     Error(Loc, "register does not fit in the list");
2523     return false;
2524   case IS_VGPR:
2525   case IS_SGPR:
2526   case IS_AGPR:
2527   case IS_TTMP:
2528     if (Reg1 != Reg + RegWidth / 32) {
2529       Error(Loc, "registers in a list must have consecutive indices");
2530       return false;
2531     }
2532     RegWidth += 32;
2533     return true;
2534   default:
2535     llvm_unreachable("unexpected register kind");
2536   }
2537 }
2538 
2539 struct RegInfo {
2540   StringLiteral Name;
2541   RegisterKind Kind;
2542 };
2543 
2544 static constexpr RegInfo RegularRegisters[] = {
2545   {{"v"},    IS_VGPR},
2546   {{"s"},    IS_SGPR},
2547   {{"ttmp"}, IS_TTMP},
2548   {{"acc"},  IS_AGPR},
2549   {{"a"},    IS_AGPR},
2550 };
2551 
2552 static bool isRegularReg(RegisterKind Kind) {
2553   return Kind == IS_VGPR ||
2554          Kind == IS_SGPR ||
2555          Kind == IS_TTMP ||
2556          Kind == IS_AGPR;
2557 }
2558 
2559 static const RegInfo* getRegularRegInfo(StringRef Str) {
2560   for (const RegInfo &Reg : RegularRegisters)
2561     if (Str.startswith(Reg.Name))
2562       return &Reg;
2563   return nullptr;
2564 }
2565 
2566 static bool getRegNum(StringRef Str, unsigned& Num) {
2567   return !Str.getAsInteger(10, Num);
2568 }
2569 
2570 bool
2571 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2572                             const AsmToken &NextToken) const {
2573 
2574   // A list of consecutive registers: [s0,s1,s2,s3]
2575   if (Token.is(AsmToken::LBrac))
2576     return true;
2577 
2578   if (!Token.is(AsmToken::Identifier))
2579     return false;
2580 
2581   // A single register like s0 or a range of registers like s[0:1]
2582 
2583   StringRef Str = Token.getString();
2584   const RegInfo *Reg = getRegularRegInfo(Str);
2585   if (Reg) {
2586     StringRef RegName = Reg->Name;
2587     StringRef RegSuffix = Str.substr(RegName.size());
2588     if (!RegSuffix.empty()) {
2589       unsigned Num;
2590       // A single register with an index: rXX
2591       if (getRegNum(RegSuffix, Num))
2592         return true;
2593     } else {
2594       // A range of registers: r[XX:YY].
2595       if (NextToken.is(AsmToken::LBrac))
2596         return true;
2597     }
2598   }
2599 
2600   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2601 }
2602 
2603 bool
2604 AMDGPUAsmParser::isRegister()
2605 {
2606   return isRegister(getToken(), peekToken());
2607 }
2608 
2609 unsigned
2610 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2611                                unsigned RegNum,
2612                                unsigned RegWidth,
2613                                SMLoc Loc) {
2614 
2615   assert(isRegularReg(RegKind));
2616 
2617   unsigned AlignSize = 1;
2618   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2619     // SGPR and TTMP registers must be aligned.
2620     // Max required alignment is 4 dwords.
2621     AlignSize = std::min(RegWidth / 32, 4u);
2622   }
2623 
2624   if (RegNum % AlignSize != 0) {
2625     Error(Loc, "invalid register alignment");
2626     return AMDGPU::NoRegister;
2627   }
2628 
2629   unsigned RegIdx = RegNum / AlignSize;
2630   int RCID = getRegClass(RegKind, RegWidth);
2631   if (RCID == -1) {
2632     Error(Loc, "invalid or unsupported register size");
2633     return AMDGPU::NoRegister;
2634   }
2635 
2636   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2637   const MCRegisterClass RC = TRI->getRegClass(RCID);
2638   if (RegIdx >= RC.getNumRegs()) {
2639     Error(Loc, "register index is out of range");
2640     return AMDGPU::NoRegister;
2641   }
2642 
2643   return RC.getRegister(RegIdx);
2644 }
2645 
2646 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2647   int64_t RegLo, RegHi;
2648   if (!skipToken(AsmToken::LBrac, "missing register index"))
2649     return false;
2650 
2651   SMLoc FirstIdxLoc = getLoc();
2652   SMLoc SecondIdxLoc;
2653 
2654   if (!parseExpr(RegLo))
2655     return false;
2656 
2657   if (trySkipToken(AsmToken::Colon)) {
2658     SecondIdxLoc = getLoc();
2659     if (!parseExpr(RegHi))
2660       return false;
2661   } else {
2662     RegHi = RegLo;
2663   }
2664 
2665   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2666     return false;
2667 
2668   if (!isUInt<32>(RegLo)) {
2669     Error(FirstIdxLoc, "invalid register index");
2670     return false;
2671   }
2672 
2673   if (!isUInt<32>(RegHi)) {
2674     Error(SecondIdxLoc, "invalid register index");
2675     return false;
2676   }
2677 
2678   if (RegLo > RegHi) {
2679     Error(FirstIdxLoc, "first register index should not exceed second index");
2680     return false;
2681   }
2682 
2683   Num = static_cast<unsigned>(RegLo);
2684   RegWidth = 32 * ((RegHi - RegLo) + 1);
2685   return true;
2686 }
2687 
2688 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2689                                           unsigned &RegNum, unsigned &RegWidth,
2690                                           SmallVectorImpl<AsmToken> &Tokens) {
2691   assert(isToken(AsmToken::Identifier));
2692   unsigned Reg = getSpecialRegForName(getTokenStr());
2693   if (Reg) {
2694     RegNum = 0;
2695     RegWidth = 32;
2696     RegKind = IS_SPECIAL;
2697     Tokens.push_back(getToken());
2698     lex(); // skip register name
2699   }
2700   return Reg;
2701 }
2702 
2703 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2704                                           unsigned &RegNum, unsigned &RegWidth,
2705                                           SmallVectorImpl<AsmToken> &Tokens) {
2706   assert(isToken(AsmToken::Identifier));
2707   StringRef RegName = getTokenStr();
2708   auto Loc = getLoc();
2709 
2710   const RegInfo *RI = getRegularRegInfo(RegName);
2711   if (!RI) {
2712     Error(Loc, "invalid register name");
2713     return AMDGPU::NoRegister;
2714   }
2715 
2716   Tokens.push_back(getToken());
2717   lex(); // skip register name
2718 
2719   RegKind = RI->Kind;
2720   StringRef RegSuffix = RegName.substr(RI->Name.size());
2721   if (!RegSuffix.empty()) {
2722     // Single 32-bit register: vXX.
2723     if (!getRegNum(RegSuffix, RegNum)) {
2724       Error(Loc, "invalid register index");
2725       return AMDGPU::NoRegister;
2726     }
2727     RegWidth = 32;
2728   } else {
2729     // Range of registers: v[XX:YY]. ":YY" is optional.
2730     if (!ParseRegRange(RegNum, RegWidth))
2731       return AMDGPU::NoRegister;
2732   }
2733 
2734   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2735 }
2736 
2737 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2738                                        unsigned &RegWidth,
2739                                        SmallVectorImpl<AsmToken> &Tokens) {
2740   unsigned Reg = AMDGPU::NoRegister;
2741   auto ListLoc = getLoc();
2742 
2743   if (!skipToken(AsmToken::LBrac,
2744                  "expected a register or a list of registers")) {
2745     return AMDGPU::NoRegister;
2746   }
2747 
2748   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2749 
2750   auto Loc = getLoc();
2751   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2752     return AMDGPU::NoRegister;
2753   if (RegWidth != 32) {
2754     Error(Loc, "expected a single 32-bit register");
2755     return AMDGPU::NoRegister;
2756   }
2757 
2758   for (; trySkipToken(AsmToken::Comma); ) {
2759     RegisterKind NextRegKind;
2760     unsigned NextReg, NextRegNum, NextRegWidth;
2761     Loc = getLoc();
2762 
2763     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2764                              NextRegNum, NextRegWidth,
2765                              Tokens)) {
2766       return AMDGPU::NoRegister;
2767     }
2768     if (NextRegWidth != 32) {
2769       Error(Loc, "expected a single 32-bit register");
2770       return AMDGPU::NoRegister;
2771     }
2772     if (NextRegKind != RegKind) {
2773       Error(Loc, "registers in a list must be of the same kind");
2774       return AMDGPU::NoRegister;
2775     }
2776     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2777       return AMDGPU::NoRegister;
2778   }
2779 
2780   if (!skipToken(AsmToken::RBrac,
2781                  "expected a comma or a closing square bracket")) {
2782     return AMDGPU::NoRegister;
2783   }
2784 
2785   if (isRegularReg(RegKind))
2786     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2787 
2788   return Reg;
2789 }
2790 
2791 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2792                                           unsigned &RegNum, unsigned &RegWidth,
2793                                           SmallVectorImpl<AsmToken> &Tokens) {
2794   auto Loc = getLoc();
2795   Reg = AMDGPU::NoRegister;
2796 
2797   if (isToken(AsmToken::Identifier)) {
2798     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2799     if (Reg == AMDGPU::NoRegister)
2800       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2801   } else {
2802     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2803   }
2804 
2805   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2806   if (Reg == AMDGPU::NoRegister) {
2807     assert(Parser.hasPendingError());
2808     return false;
2809   }
2810 
2811   if (!subtargetHasRegister(*TRI, Reg)) {
2812     if (Reg == AMDGPU::SGPR_NULL) {
2813       Error(Loc, "'null' operand is not supported on this GPU");
2814     } else {
2815       Error(Loc, "register not available on this GPU");
2816     }
2817     return false;
2818   }
2819 
2820   return true;
2821 }
2822 
2823 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2824                                           unsigned &RegNum, unsigned &RegWidth,
2825                                           bool RestoreOnFailure /*=false*/) {
2826   Reg = AMDGPU::NoRegister;
2827 
2828   SmallVector<AsmToken, 1> Tokens;
2829   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2830     if (RestoreOnFailure) {
2831       while (!Tokens.empty()) {
2832         getLexer().UnLex(Tokens.pop_back_val());
2833       }
2834     }
2835     return true;
2836   }
2837   return false;
2838 }
2839 
2840 Optional<StringRef>
2841 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2842   switch (RegKind) {
2843   case IS_VGPR:
2844     return StringRef(".amdgcn.next_free_vgpr");
2845   case IS_SGPR:
2846     return StringRef(".amdgcn.next_free_sgpr");
2847   default:
2848     return None;
2849   }
2850 }
2851 
2852 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2853   auto SymbolName = getGprCountSymbolName(RegKind);
2854   assert(SymbolName && "initializing invalid register kind");
2855   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2856   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2857 }
2858 
2859 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2860                                             unsigned DwordRegIndex,
2861                                             unsigned RegWidth) {
2862   // Symbols are only defined for GCN targets
2863   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2864     return true;
2865 
2866   auto SymbolName = getGprCountSymbolName(RegKind);
2867   if (!SymbolName)
2868     return true;
2869   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2870 
2871   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2872   int64_t OldCount;
2873 
2874   if (!Sym->isVariable())
2875     return !Error(getLoc(),
2876                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2877   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2878     return !Error(
2879         getLoc(),
2880         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2881 
2882   if (OldCount <= NewMax)
2883     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2884 
2885   return true;
2886 }
2887 
2888 std::unique_ptr<AMDGPUOperand>
2889 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2890   const auto &Tok = getToken();
2891   SMLoc StartLoc = Tok.getLoc();
2892   SMLoc EndLoc = Tok.getEndLoc();
2893   RegisterKind RegKind;
2894   unsigned Reg, RegNum, RegWidth;
2895 
2896   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2897     return nullptr;
2898   }
2899   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2900     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2901       return nullptr;
2902   } else
2903     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2904   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2905 }
2906 
2907 OperandMatchResultTy
2908 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2909   // TODO: add syntactic sugar for 1/(2*PI)
2910 
2911   assert(!isRegister());
2912   assert(!isModifier());
2913 
2914   const auto& Tok = getToken();
2915   const auto& NextTok = peekToken();
2916   bool IsReal = Tok.is(AsmToken::Real);
2917   SMLoc S = getLoc();
2918   bool Negate = false;
2919 
2920   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2921     lex();
2922     IsReal = true;
2923     Negate = true;
2924   }
2925 
2926   if (IsReal) {
2927     // Floating-point expressions are not supported.
2928     // Can only allow floating-point literals with an
2929     // optional sign.
2930 
2931     StringRef Num = getTokenStr();
2932     lex();
2933 
2934     APFloat RealVal(APFloat::IEEEdouble());
2935     auto roundMode = APFloat::rmNearestTiesToEven;
2936     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2937       return MatchOperand_ParseFail;
2938     }
2939     if (Negate)
2940       RealVal.changeSign();
2941 
2942     Operands.push_back(
2943       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2944                                AMDGPUOperand::ImmTyNone, true));
2945 
2946     return MatchOperand_Success;
2947 
2948   } else {
2949     int64_t IntVal;
2950     const MCExpr *Expr;
2951     SMLoc S = getLoc();
2952 
2953     if (HasSP3AbsModifier) {
2954       // This is a workaround for handling expressions
2955       // as arguments of SP3 'abs' modifier, for example:
2956       //     |1.0|
2957       //     |-1|
2958       //     |1+x|
2959       // This syntax is not compatible with syntax of standard
2960       // MC expressions (due to the trailing '|').
2961       SMLoc EndLoc;
2962       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2963         return MatchOperand_ParseFail;
2964     } else {
2965       if (Parser.parseExpression(Expr))
2966         return MatchOperand_ParseFail;
2967     }
2968 
2969     if (Expr->evaluateAsAbsolute(IntVal)) {
2970       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2971     } else {
2972       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2973     }
2974 
2975     return MatchOperand_Success;
2976   }
2977 
2978   return MatchOperand_NoMatch;
2979 }
2980 
2981 OperandMatchResultTy
2982 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2983   if (!isRegister())
2984     return MatchOperand_NoMatch;
2985 
2986   if (auto R = parseRegister()) {
2987     assert(R->isReg());
2988     Operands.push_back(std::move(R));
2989     return MatchOperand_Success;
2990   }
2991   return MatchOperand_ParseFail;
2992 }
2993 
2994 OperandMatchResultTy
2995 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2996   auto res = parseReg(Operands);
2997   if (res != MatchOperand_NoMatch) {
2998     return res;
2999   } else if (isModifier()) {
3000     return MatchOperand_NoMatch;
3001   } else {
3002     return parseImm(Operands, HasSP3AbsMod);
3003   }
3004 }
3005 
3006 bool
3007 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3008   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3009     const auto &str = Token.getString();
3010     return str == "abs" || str == "neg" || str == "sext";
3011   }
3012   return false;
3013 }
3014 
3015 bool
3016 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3017   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3018 }
3019 
3020 bool
3021 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3022   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3023 }
3024 
3025 bool
3026 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3027   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3028 }
3029 
3030 // Check if this is an operand modifier or an opcode modifier
3031 // which may look like an expression but it is not. We should
3032 // avoid parsing these modifiers as expressions. Currently
3033 // recognized sequences are:
3034 //   |...|
3035 //   abs(...)
3036 //   neg(...)
3037 //   sext(...)
3038 //   -reg
3039 //   -|...|
3040 //   -abs(...)
3041 //   name:...
3042 // Note that simple opcode modifiers like 'gds' may be parsed as
3043 // expressions; this is a special case. See getExpressionAsToken.
3044 //
3045 bool
3046 AMDGPUAsmParser::isModifier() {
3047 
3048   AsmToken Tok = getToken();
3049   AsmToken NextToken[2];
3050   peekTokens(NextToken);
3051 
3052   return isOperandModifier(Tok, NextToken[0]) ||
3053          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3054          isOpcodeModifierWithVal(Tok, NextToken[0]);
3055 }
3056 
3057 // Check if the current token is an SP3 'neg' modifier.
3058 // Currently this modifier is allowed in the following context:
3059 //
3060 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3061 // 2. Before an 'abs' modifier: -abs(...)
3062 // 3. Before an SP3 'abs' modifier: -|...|
3063 //
3064 // In all other cases "-" is handled as a part
3065 // of an expression that follows the sign.
3066 //
3067 // Note: When "-" is followed by an integer literal,
3068 // this is interpreted as integer negation rather
3069 // than a floating-point NEG modifier applied to N.
3070 // Beside being contr-intuitive, such use of floating-point
3071 // NEG modifier would have resulted in different meaning
3072 // of integer literals used with VOP1/2/C and VOP3,
3073 // for example:
3074 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3075 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3076 // Negative fp literals with preceding "-" are
3077 // handled likewise for uniformity
3078 //
3079 bool
3080 AMDGPUAsmParser::parseSP3NegModifier() {
3081 
3082   AsmToken NextToken[2];
3083   peekTokens(NextToken);
3084 
3085   if (isToken(AsmToken::Minus) &&
3086       (isRegister(NextToken[0], NextToken[1]) ||
3087        NextToken[0].is(AsmToken::Pipe) ||
3088        isId(NextToken[0], "abs"))) {
3089     lex();
3090     return true;
3091   }
3092 
3093   return false;
3094 }
3095 
3096 OperandMatchResultTy
3097 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3098                                               bool AllowImm) {
3099   bool Neg, SP3Neg;
3100   bool Abs, SP3Abs;
3101   SMLoc Loc;
3102 
3103   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3104   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3105     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3106     return MatchOperand_ParseFail;
3107   }
3108 
3109   SP3Neg = parseSP3NegModifier();
3110 
3111   Loc = getLoc();
3112   Neg = trySkipId("neg");
3113   if (Neg && SP3Neg) {
3114     Error(Loc, "expected register or immediate");
3115     return MatchOperand_ParseFail;
3116   }
3117   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3118     return MatchOperand_ParseFail;
3119 
3120   Abs = trySkipId("abs");
3121   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3122     return MatchOperand_ParseFail;
3123 
3124   Loc = getLoc();
3125   SP3Abs = trySkipToken(AsmToken::Pipe);
3126   if (Abs && SP3Abs) {
3127     Error(Loc, "expected register or immediate");
3128     return MatchOperand_ParseFail;
3129   }
3130 
3131   OperandMatchResultTy Res;
3132   if (AllowImm) {
3133     Res = parseRegOrImm(Operands, SP3Abs);
3134   } else {
3135     Res = parseReg(Operands);
3136   }
3137   if (Res != MatchOperand_Success) {
3138     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3139   }
3140 
3141   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3142     return MatchOperand_ParseFail;
3143   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3144     return MatchOperand_ParseFail;
3145   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3146     return MatchOperand_ParseFail;
3147 
3148   AMDGPUOperand::Modifiers Mods;
3149   Mods.Abs = Abs || SP3Abs;
3150   Mods.Neg = Neg || SP3Neg;
3151 
3152   if (Mods.hasFPModifiers()) {
3153     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3154     if (Op.isExpr()) {
3155       Error(Op.getStartLoc(), "expected an absolute expression");
3156       return MatchOperand_ParseFail;
3157     }
3158     Op.setModifiers(Mods);
3159   }
3160   return MatchOperand_Success;
3161 }
3162 
3163 OperandMatchResultTy
3164 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3165                                                bool AllowImm) {
3166   bool Sext = trySkipId("sext");
3167   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3168     return MatchOperand_ParseFail;
3169 
3170   OperandMatchResultTy Res;
3171   if (AllowImm) {
3172     Res = parseRegOrImm(Operands);
3173   } else {
3174     Res = parseReg(Operands);
3175   }
3176   if (Res != MatchOperand_Success) {
3177     return Sext? MatchOperand_ParseFail : Res;
3178   }
3179 
3180   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3181     return MatchOperand_ParseFail;
3182 
3183   AMDGPUOperand::Modifiers Mods;
3184   Mods.Sext = Sext;
3185 
3186   if (Mods.hasIntModifiers()) {
3187     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3188     if (Op.isExpr()) {
3189       Error(Op.getStartLoc(), "expected an absolute expression");
3190       return MatchOperand_ParseFail;
3191     }
3192     Op.setModifiers(Mods);
3193   }
3194 
3195   return MatchOperand_Success;
3196 }
3197 
3198 OperandMatchResultTy
3199 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3200   return parseRegOrImmWithFPInputMods(Operands, false);
3201 }
3202 
3203 OperandMatchResultTy
3204 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3205   return parseRegOrImmWithIntInputMods(Operands, false);
3206 }
3207 
3208 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3209   auto Loc = getLoc();
3210   if (trySkipId("off")) {
3211     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3212                                                 AMDGPUOperand::ImmTyOff, false));
3213     return MatchOperand_Success;
3214   }
3215 
3216   if (!isRegister())
3217     return MatchOperand_NoMatch;
3218 
3219   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3220   if (Reg) {
3221     Operands.push_back(std::move(Reg));
3222     return MatchOperand_Success;
3223   }
3224 
3225   return MatchOperand_ParseFail;
3226 
3227 }
3228 
3229 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3230   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3231 
3232   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3233       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3234       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3235       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3236     return Match_InvalidOperand;
3237 
3238   if ((TSFlags & SIInstrFlags::VOP3) &&
3239       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3240       getForcedEncodingSize() != 64)
3241     return Match_PreferE32;
3242 
3243   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3244       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3245     // v_mac_f32/16 allow only dst_sel == DWORD;
3246     auto OpNum =
3247         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3248     const auto &Op = Inst.getOperand(OpNum);
3249     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3250       return Match_InvalidOperand;
3251     }
3252   }
3253 
3254   return Match_Success;
3255 }
3256 
3257 static ArrayRef<unsigned> getAllVariants() {
3258   static const unsigned Variants[] = {
3259     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3260     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3261     AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3262   };
3263 
3264   return makeArrayRef(Variants);
3265 }
3266 
3267 // What asm variants we should check
3268 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3269   if (isForcedDPP() && isForcedVOP3()) {
3270     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3271     return makeArrayRef(Variants);
3272   }
3273   if (getForcedEncodingSize() == 32) {
3274     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3275     return makeArrayRef(Variants);
3276   }
3277 
3278   if (isForcedVOP3()) {
3279     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3280     return makeArrayRef(Variants);
3281   }
3282 
3283   if (isForcedSDWA()) {
3284     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3285                                         AMDGPUAsmVariants::SDWA9};
3286     return makeArrayRef(Variants);
3287   }
3288 
3289   if (isForcedDPP()) {
3290     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3291     return makeArrayRef(Variants);
3292   }
3293 
3294   return getAllVariants();
3295 }
3296 
3297 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3298   if (isForcedDPP() && isForcedVOP3())
3299     return "e64_dpp";
3300 
3301   if (getForcedEncodingSize() == 32)
3302     return "e32";
3303 
3304   if (isForcedVOP3())
3305     return "e64";
3306 
3307   if (isForcedSDWA())
3308     return "sdwa";
3309 
3310   if (isForcedDPP())
3311     return "dpp";
3312 
3313   return "";
3314 }
3315 
3316 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3317   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3318   const unsigned Num = Desc.getNumImplicitUses();
3319   for (unsigned i = 0; i < Num; ++i) {
3320     unsigned Reg = Desc.ImplicitUses[i];
3321     switch (Reg) {
3322     case AMDGPU::FLAT_SCR:
3323     case AMDGPU::VCC:
3324     case AMDGPU::VCC_LO:
3325     case AMDGPU::VCC_HI:
3326     case AMDGPU::M0:
3327       return Reg;
3328     default:
3329       break;
3330     }
3331   }
3332   return AMDGPU::NoRegister;
3333 }
3334 
3335 // NB: This code is correct only when used to check constant
3336 // bus limitations because GFX7 support no f16 inline constants.
3337 // Note that there are no cases when a GFX7 opcode violates
3338 // constant bus limitations due to the use of an f16 constant.
3339 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3340                                        unsigned OpIdx) const {
3341   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3342 
3343   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3344     return false;
3345   }
3346 
3347   const MCOperand &MO = Inst.getOperand(OpIdx);
3348 
3349   int64_t Val = MO.getImm();
3350   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3351 
3352   switch (OpSize) { // expected operand size
3353   case 8:
3354     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3355   case 4:
3356     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3357   case 2: {
3358     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3359     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3360         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3361         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3362       return AMDGPU::isInlinableIntLiteral(Val);
3363 
3364     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3365         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3366         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3367       return AMDGPU::isInlinableIntLiteralV216(Val);
3368 
3369     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3370         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3371         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3372       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3373 
3374     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3375   }
3376   default:
3377     llvm_unreachable("invalid operand size");
3378   }
3379 }
3380 
3381 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3382   if (!isGFX10Plus())
3383     return 1;
3384 
3385   switch (Opcode) {
3386   // 64-bit shift instructions can use only one scalar value input
3387   case AMDGPU::V_LSHLREV_B64_e64:
3388   case AMDGPU::V_LSHLREV_B64_gfx10:
3389   case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3390   case AMDGPU::V_LSHRREV_B64_e64:
3391   case AMDGPU::V_LSHRREV_B64_gfx10:
3392   case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3393   case AMDGPU::V_ASHRREV_I64_e64:
3394   case AMDGPU::V_ASHRREV_I64_gfx10:
3395   case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3396   case AMDGPU::V_LSHL_B64_e64:
3397   case AMDGPU::V_LSHR_B64_e64:
3398   case AMDGPU::V_ASHR_I64_e64:
3399     return 1;
3400   default:
3401     return 2;
3402   }
3403 }
3404 
3405 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3406   const MCOperand &MO = Inst.getOperand(OpIdx);
3407   if (MO.isImm()) {
3408     return !isInlineConstant(Inst, OpIdx);
3409   } else if (MO.isReg()) {
3410     auto Reg = MO.getReg();
3411     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3412     auto PReg = mc2PseudoReg(Reg);
3413     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3414   } else {
3415     return true;
3416   }
3417 }
3418 
3419 bool
3420 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3421                                                 const OperandVector &Operands) {
3422   const unsigned Opcode = Inst.getOpcode();
3423   const MCInstrDesc &Desc = MII.get(Opcode);
3424   unsigned LastSGPR = AMDGPU::NoRegister;
3425   unsigned ConstantBusUseCount = 0;
3426   unsigned NumLiterals = 0;
3427   unsigned LiteralSize;
3428 
3429   if (Desc.TSFlags &
3430       (SIInstrFlags::VOPC |
3431        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3432        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3433        SIInstrFlags::SDWA)) {
3434     // Check special imm operands (used by madmk, etc)
3435     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3436       ++NumLiterals;
3437       LiteralSize = 4;
3438     }
3439 
3440     SmallDenseSet<unsigned> SGPRsUsed;
3441     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3442     if (SGPRUsed != AMDGPU::NoRegister) {
3443       SGPRsUsed.insert(SGPRUsed);
3444       ++ConstantBusUseCount;
3445     }
3446 
3447     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3448     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3449     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3450 
3451     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3452 
3453     for (int OpIdx : OpIndices) {
3454       if (OpIdx == -1) break;
3455 
3456       const MCOperand &MO = Inst.getOperand(OpIdx);
3457       if (usesConstantBus(Inst, OpIdx)) {
3458         if (MO.isReg()) {
3459           LastSGPR = mc2PseudoReg(MO.getReg());
3460           // Pairs of registers with a partial intersections like these
3461           //   s0, s[0:1]
3462           //   flat_scratch_lo, flat_scratch
3463           //   flat_scratch_lo, flat_scratch_hi
3464           // are theoretically valid but they are disabled anyway.
3465           // Note that this code mimics SIInstrInfo::verifyInstruction
3466           if (!SGPRsUsed.count(LastSGPR)) {
3467             SGPRsUsed.insert(LastSGPR);
3468             ++ConstantBusUseCount;
3469           }
3470         } else { // Expression or a literal
3471 
3472           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3473             continue; // special operand like VINTERP attr_chan
3474 
3475           // An instruction may use only one literal.
3476           // This has been validated on the previous step.
3477           // See validateVOPLiteral.
3478           // This literal may be used as more than one operand.
3479           // If all these operands are of the same size,
3480           // this literal counts as one scalar value.
3481           // Otherwise it counts as 2 scalar values.
3482           // See "GFX10 Shader Programming", section 3.6.2.3.
3483 
3484           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3485           if (Size < 4) Size = 4;
3486 
3487           if (NumLiterals == 0) {
3488             NumLiterals = 1;
3489             LiteralSize = Size;
3490           } else if (LiteralSize != Size) {
3491             NumLiterals = 2;
3492           }
3493         }
3494       }
3495     }
3496   }
3497   ConstantBusUseCount += NumLiterals;
3498 
3499   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3500     return true;
3501 
3502   SMLoc LitLoc = getLitLoc(Operands);
3503   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3504   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3505   Error(Loc, "invalid operand (violates constant bus restrictions)");
3506   return false;
3507 }
3508 
3509 bool
3510 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3511                                                  const OperandVector &Operands) {
3512   const unsigned Opcode = Inst.getOpcode();
3513   const MCInstrDesc &Desc = MII.get(Opcode);
3514 
3515   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3516   if (DstIdx == -1 ||
3517       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3518     return true;
3519   }
3520 
3521   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3522 
3523   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3524   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3525   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3526 
3527   assert(DstIdx != -1);
3528   const MCOperand &Dst = Inst.getOperand(DstIdx);
3529   assert(Dst.isReg());
3530 
3531   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3532 
3533   for (int SrcIdx : SrcIndices) {
3534     if (SrcIdx == -1) break;
3535     const MCOperand &Src = Inst.getOperand(SrcIdx);
3536     if (Src.isReg()) {
3537       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3538         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3539         Error(getRegLoc(SrcReg, Operands),
3540           "destination must be different than all sources");
3541         return false;
3542       }
3543     }
3544   }
3545 
3546   return true;
3547 }
3548 
3549 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3550 
3551   const unsigned Opc = Inst.getOpcode();
3552   const MCInstrDesc &Desc = MII.get(Opc);
3553 
3554   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3555     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3556     assert(ClampIdx != -1);
3557     return Inst.getOperand(ClampIdx).getImm() == 0;
3558   }
3559 
3560   return true;
3561 }
3562 
3563 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3564 
3565   const unsigned Opc = Inst.getOpcode();
3566   const MCInstrDesc &Desc = MII.get(Opc);
3567 
3568   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3569     return None;
3570 
3571   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3572   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3573   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3574 
3575   assert(VDataIdx != -1);
3576 
3577   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3578     return None;
3579 
3580   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3581   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3582   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3583   if (DMask == 0)
3584     DMask = 1;
3585 
3586   bool isPackedD16 = false;
3587   unsigned DataSize =
3588     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3589   if (hasPackedD16()) {
3590     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3591     isPackedD16 = D16Idx >= 0;
3592     if (isPackedD16 && Inst.getOperand(D16Idx).getImm())
3593       DataSize = (DataSize + 1) / 2;
3594   }
3595 
3596   if ((VDataSize / 4) == DataSize + TFESize)
3597     return None;
3598 
3599   return StringRef(isPackedD16
3600                        ? "image data size does not match dmask, d16 and tfe"
3601                        : "image data size does not match dmask and tfe");
3602 }
3603 
3604 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3605   const unsigned Opc = Inst.getOpcode();
3606   const MCInstrDesc &Desc = MII.get(Opc);
3607 
3608   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3609     return true;
3610 
3611   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3612 
3613   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3614       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3615   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3616   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3617   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3618   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3619 
3620   assert(VAddr0Idx != -1);
3621   assert(SrsrcIdx != -1);
3622   assert(SrsrcIdx > VAddr0Idx);
3623 
3624   if (DimIdx == -1)
3625     return true; // intersect_ray
3626 
3627   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3628   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3629   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3630   unsigned ActualAddrSize =
3631       IsNSA ? SrsrcIdx - VAddr0Idx
3632             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3633   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3634 
3635   unsigned ExpectedAddrSize =
3636       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3637 
3638   if (!IsNSA) {
3639     if (ExpectedAddrSize > 8)
3640       ExpectedAddrSize = 16;
3641 
3642     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3643     // This provides backward compatibility for assembly created
3644     // before 160b/192b/224b types were directly supported.
3645     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3646       return true;
3647   }
3648 
3649   return ActualAddrSize == ExpectedAddrSize;
3650 }
3651 
3652 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3653 
3654   const unsigned Opc = Inst.getOpcode();
3655   const MCInstrDesc &Desc = MII.get(Opc);
3656 
3657   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3658     return true;
3659   if (!Desc.mayLoad() || !Desc.mayStore())
3660     return true; // Not atomic
3661 
3662   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3663   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3664 
3665   // This is an incomplete check because image_atomic_cmpswap
3666   // may only use 0x3 and 0xf while other atomic operations
3667   // may use 0x1 and 0x3. However these limitations are
3668   // verified when we check that dmask matches dst size.
3669   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3670 }
3671 
3672 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3673 
3674   const unsigned Opc = Inst.getOpcode();
3675   const MCInstrDesc &Desc = MII.get(Opc);
3676 
3677   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3678     return true;
3679 
3680   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3681   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3682 
3683   // GATHER4 instructions use dmask in a different fashion compared to
3684   // other MIMG instructions. The only useful DMASK values are
3685   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3686   // (red,red,red,red) etc.) The ISA document doesn't mention
3687   // this.
3688   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3689 }
3690 
3691 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3692   const unsigned Opc = Inst.getOpcode();
3693   const MCInstrDesc &Desc = MII.get(Opc);
3694 
3695   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3696     return true;
3697 
3698   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3699   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3700       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3701 
3702   if (!BaseOpcode->MSAA)
3703     return true;
3704 
3705   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3706   assert(DimIdx != -1);
3707 
3708   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3709   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3710 
3711   return DimInfo->MSAA;
3712 }
3713 
3714 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3715 {
3716   switch (Opcode) {
3717   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3718   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3719   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3720     return true;
3721   default:
3722     return false;
3723   }
3724 }
3725 
3726 // movrels* opcodes should only allow VGPRS as src0.
3727 // This is specified in .td description for vop1/vop3,
3728 // but sdwa is handled differently. See isSDWAOperand.
3729 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3730                                       const OperandVector &Operands) {
3731 
3732   const unsigned Opc = Inst.getOpcode();
3733   const MCInstrDesc &Desc = MII.get(Opc);
3734 
3735   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3736     return true;
3737 
3738   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3739   assert(Src0Idx != -1);
3740 
3741   SMLoc ErrLoc;
3742   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3743   if (Src0.isReg()) {
3744     auto Reg = mc2PseudoReg(Src0.getReg());
3745     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3746     if (!isSGPR(Reg, TRI))
3747       return true;
3748     ErrLoc = getRegLoc(Reg, Operands);
3749   } else {
3750     ErrLoc = getConstLoc(Operands);
3751   }
3752 
3753   Error(ErrLoc, "source operand must be a VGPR");
3754   return false;
3755 }
3756 
3757 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3758                                           const OperandVector &Operands) {
3759 
3760   const unsigned Opc = Inst.getOpcode();
3761 
3762   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3763     return true;
3764 
3765   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3766   assert(Src0Idx != -1);
3767 
3768   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3769   if (!Src0.isReg())
3770     return true;
3771 
3772   auto Reg = mc2PseudoReg(Src0.getReg());
3773   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3774   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3775     Error(getRegLoc(Reg, Operands),
3776           "source operand must be either a VGPR or an inline constant");
3777     return false;
3778   }
3779 
3780   return true;
3781 }
3782 
3783 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3784                                    const OperandVector &Operands) {
3785   const unsigned Opc = Inst.getOpcode();
3786   const MCInstrDesc &Desc = MII.get(Opc);
3787 
3788   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3789     return true;
3790 
3791   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3792   if (Src2Idx == -1)
3793     return true;
3794 
3795   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3796   if (!Src2.isReg())
3797     return true;
3798 
3799   MCRegister Src2Reg = Src2.getReg();
3800   MCRegister DstReg = Inst.getOperand(0).getReg();
3801   if (Src2Reg == DstReg)
3802     return true;
3803 
3804   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3805   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3806     return true;
3807 
3808   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3809     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3810           "source 2 operand must not partially overlap with dst");
3811     return false;
3812   }
3813 
3814   return true;
3815 }
3816 
3817 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3818   switch (Inst.getOpcode()) {
3819   default:
3820     return true;
3821   case V_DIV_SCALE_F32_gfx6_gfx7:
3822   case V_DIV_SCALE_F32_vi:
3823   case V_DIV_SCALE_F32_gfx10:
3824   case V_DIV_SCALE_F64_gfx6_gfx7:
3825   case V_DIV_SCALE_F64_vi:
3826   case V_DIV_SCALE_F64_gfx10:
3827     break;
3828   }
3829 
3830   // TODO: Check that src0 = src1 or src2.
3831 
3832   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3833                     AMDGPU::OpName::src2_modifiers,
3834                     AMDGPU::OpName::src2_modifiers}) {
3835     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3836             .getImm() &
3837         SISrcMods::ABS) {
3838       return false;
3839     }
3840   }
3841 
3842   return true;
3843 }
3844 
3845 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3846 
3847   const unsigned Opc = Inst.getOpcode();
3848   const MCInstrDesc &Desc = MII.get(Opc);
3849 
3850   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3851     return true;
3852 
3853   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3854   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3855     if (isCI() || isSI())
3856       return false;
3857   }
3858 
3859   return true;
3860 }
3861 
3862 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3863   const unsigned Opc = Inst.getOpcode();
3864   const MCInstrDesc &Desc = MII.get(Opc);
3865 
3866   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3867     return true;
3868 
3869   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3870   if (DimIdx < 0)
3871     return true;
3872 
3873   long Imm = Inst.getOperand(DimIdx).getImm();
3874   if (Imm < 0 || Imm >= 8)
3875     return false;
3876 
3877   return true;
3878 }
3879 
3880 static bool IsRevOpcode(const unsigned Opcode)
3881 {
3882   switch (Opcode) {
3883   case AMDGPU::V_SUBREV_F32_e32:
3884   case AMDGPU::V_SUBREV_F32_e64:
3885   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3886   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3887   case AMDGPU::V_SUBREV_F32_e32_vi:
3888   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3889   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3890   case AMDGPU::V_SUBREV_F32_e64_vi:
3891 
3892   case AMDGPU::V_SUBREV_CO_U32_e32:
3893   case AMDGPU::V_SUBREV_CO_U32_e64:
3894   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3895   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3896 
3897   case AMDGPU::V_SUBBREV_U32_e32:
3898   case AMDGPU::V_SUBBREV_U32_e64:
3899   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3900   case AMDGPU::V_SUBBREV_U32_e32_vi:
3901   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3902   case AMDGPU::V_SUBBREV_U32_e64_vi:
3903 
3904   case AMDGPU::V_SUBREV_U32_e32:
3905   case AMDGPU::V_SUBREV_U32_e64:
3906   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3907   case AMDGPU::V_SUBREV_U32_e32_vi:
3908   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3909   case AMDGPU::V_SUBREV_U32_e64_vi:
3910 
3911   case AMDGPU::V_SUBREV_F16_e32:
3912   case AMDGPU::V_SUBREV_F16_e64:
3913   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3914   case AMDGPU::V_SUBREV_F16_e32_vi:
3915   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3916   case AMDGPU::V_SUBREV_F16_e64_vi:
3917 
3918   case AMDGPU::V_SUBREV_U16_e32:
3919   case AMDGPU::V_SUBREV_U16_e64:
3920   case AMDGPU::V_SUBREV_U16_e32_vi:
3921   case AMDGPU::V_SUBREV_U16_e64_vi:
3922 
3923   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3924   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3925   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3926 
3927   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3928   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3929 
3930   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3931   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3932 
3933   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3934   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3935 
3936   case AMDGPU::V_LSHRREV_B32_e32:
3937   case AMDGPU::V_LSHRREV_B32_e64:
3938   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3939   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3940   case AMDGPU::V_LSHRREV_B32_e32_vi:
3941   case AMDGPU::V_LSHRREV_B32_e64_vi:
3942   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3943   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3944 
3945   case AMDGPU::V_ASHRREV_I32_e32:
3946   case AMDGPU::V_ASHRREV_I32_e64:
3947   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3948   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3949   case AMDGPU::V_ASHRREV_I32_e32_vi:
3950   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3951   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3952   case AMDGPU::V_ASHRREV_I32_e64_vi:
3953 
3954   case AMDGPU::V_LSHLREV_B32_e32:
3955   case AMDGPU::V_LSHLREV_B32_e64:
3956   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3957   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3958   case AMDGPU::V_LSHLREV_B32_e32_vi:
3959   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3960   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3961   case AMDGPU::V_LSHLREV_B32_e64_vi:
3962 
3963   case AMDGPU::V_LSHLREV_B16_e32:
3964   case AMDGPU::V_LSHLREV_B16_e64:
3965   case AMDGPU::V_LSHLREV_B16_e32_vi:
3966   case AMDGPU::V_LSHLREV_B16_e64_vi:
3967   case AMDGPU::V_LSHLREV_B16_gfx10:
3968 
3969   case AMDGPU::V_LSHRREV_B16_e32:
3970   case AMDGPU::V_LSHRREV_B16_e64:
3971   case AMDGPU::V_LSHRREV_B16_e32_vi:
3972   case AMDGPU::V_LSHRREV_B16_e64_vi:
3973   case AMDGPU::V_LSHRREV_B16_gfx10:
3974 
3975   case AMDGPU::V_ASHRREV_I16_e32:
3976   case AMDGPU::V_ASHRREV_I16_e64:
3977   case AMDGPU::V_ASHRREV_I16_e32_vi:
3978   case AMDGPU::V_ASHRREV_I16_e64_vi:
3979   case AMDGPU::V_ASHRREV_I16_gfx10:
3980 
3981   case AMDGPU::V_LSHLREV_B64_e64:
3982   case AMDGPU::V_LSHLREV_B64_gfx10:
3983   case AMDGPU::V_LSHLREV_B64_vi:
3984 
3985   case AMDGPU::V_LSHRREV_B64_e64:
3986   case AMDGPU::V_LSHRREV_B64_gfx10:
3987   case AMDGPU::V_LSHRREV_B64_vi:
3988 
3989   case AMDGPU::V_ASHRREV_I64_e64:
3990   case AMDGPU::V_ASHRREV_I64_gfx10:
3991   case AMDGPU::V_ASHRREV_I64_vi:
3992 
3993   case AMDGPU::V_PK_LSHLREV_B16:
3994   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3995   case AMDGPU::V_PK_LSHLREV_B16_vi:
3996 
3997   case AMDGPU::V_PK_LSHRREV_B16:
3998   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3999   case AMDGPU::V_PK_LSHRREV_B16_vi:
4000   case AMDGPU::V_PK_ASHRREV_I16:
4001   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4002   case AMDGPU::V_PK_ASHRREV_I16_vi:
4003     return true;
4004   default:
4005     return false;
4006   }
4007 }
4008 
4009 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4010 
4011   using namespace SIInstrFlags;
4012   const unsigned Opcode = Inst.getOpcode();
4013   const MCInstrDesc &Desc = MII.get(Opcode);
4014 
4015   // lds_direct register is defined so that it can be used
4016   // with 9-bit operands only. Ignore encodings which do not accept these.
4017   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4018   if ((Desc.TSFlags & Enc) == 0)
4019     return None;
4020 
4021   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4022     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4023     if (SrcIdx == -1)
4024       break;
4025     const auto &Src = Inst.getOperand(SrcIdx);
4026     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4027 
4028       if (isGFX90A() || isGFX11Plus())
4029         return StringRef("lds_direct is not supported on this GPU");
4030 
4031       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4032         return StringRef("lds_direct cannot be used with this instruction");
4033 
4034       if (SrcName != OpName::src0)
4035         return StringRef("lds_direct may be used as src0 only");
4036     }
4037   }
4038 
4039   return None;
4040 }
4041 
4042 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4043   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4044     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4045     if (Op.isFlatOffset())
4046       return Op.getStartLoc();
4047   }
4048   return getLoc();
4049 }
4050 
4051 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4052                                          const OperandVector &Operands) {
4053   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4054   if ((TSFlags & SIInstrFlags::FLAT) == 0)
4055     return true;
4056 
4057   auto Opcode = Inst.getOpcode();
4058   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4059   assert(OpNum != -1);
4060 
4061   const auto &Op = Inst.getOperand(OpNum);
4062   if (!hasFlatOffsets() && Op.getImm() != 0) {
4063     Error(getFlatOffsetLoc(Operands),
4064           "flat offset modifier is not supported on this GPU");
4065     return false;
4066   }
4067 
4068   // For FLAT segment the offset must be positive;
4069   // MSB is ignored and forced to zero.
4070   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
4071     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
4072     if (!isIntN(OffsetSize, Op.getImm())) {
4073       Error(getFlatOffsetLoc(Operands),
4074             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4075       return false;
4076     }
4077   } else {
4078     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
4079     if (!isUIntN(OffsetSize, Op.getImm())) {
4080       Error(getFlatOffsetLoc(Operands),
4081             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4082       return false;
4083     }
4084   }
4085 
4086   return true;
4087 }
4088 
4089 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4090   // Start with second operand because SMEM Offset cannot be dst or src0.
4091   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4092     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4093     if (Op.isSMEMOffset())
4094       return Op.getStartLoc();
4095   }
4096   return getLoc();
4097 }
4098 
4099 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4100                                          const OperandVector &Operands) {
4101   if (isCI() || isSI())
4102     return true;
4103 
4104   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4105   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4106     return true;
4107 
4108   auto Opcode = Inst.getOpcode();
4109   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4110   if (OpNum == -1)
4111     return true;
4112 
4113   const auto &Op = Inst.getOperand(OpNum);
4114   if (!Op.isImm())
4115     return true;
4116 
4117   uint64_t Offset = Op.getImm();
4118   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4119   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4120       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4121     return true;
4122 
4123   Error(getSMEMOffsetLoc(Operands),
4124         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4125                                "expected a 21-bit signed offset");
4126 
4127   return false;
4128 }
4129 
4130 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4131   unsigned Opcode = Inst.getOpcode();
4132   const MCInstrDesc &Desc = MII.get(Opcode);
4133   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4134     return true;
4135 
4136   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4137   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4138 
4139   const int OpIndices[] = { Src0Idx, Src1Idx };
4140 
4141   unsigned NumExprs = 0;
4142   unsigned NumLiterals = 0;
4143   uint32_t LiteralValue;
4144 
4145   for (int OpIdx : OpIndices) {
4146     if (OpIdx == -1) break;
4147 
4148     const MCOperand &MO = Inst.getOperand(OpIdx);
4149     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4150     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4151       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4152         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4153         if (NumLiterals == 0 || LiteralValue != Value) {
4154           LiteralValue = Value;
4155           ++NumLiterals;
4156         }
4157       } else if (MO.isExpr()) {
4158         ++NumExprs;
4159       }
4160     }
4161   }
4162 
4163   return NumLiterals + NumExprs <= 1;
4164 }
4165 
4166 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4167   const unsigned Opc = Inst.getOpcode();
4168   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4169       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4170     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4171     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4172 
4173     if (OpSel & ~3)
4174       return false;
4175   }
4176 
4177   if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
4178     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4179     if (OpSelIdx != -1) {
4180       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4181         return false;
4182     }
4183     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4184     if (OpSelHiIdx != -1) {
4185       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4186         return false;
4187     }
4188   }
4189 
4190   return true;
4191 }
4192 
4193 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4194                                   const OperandVector &Operands) {
4195   const unsigned Opc = Inst.getOpcode();
4196   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4197   if (DppCtrlIdx < 0)
4198     return true;
4199   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4200 
4201   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4202     // DPP64 is supported for row_newbcast only.
4203     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4204     if (Src0Idx >= 0 &&
4205         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4206       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4207       Error(S, "64 bit dpp only supports row_newbcast");
4208       return false;
4209     }
4210   }
4211 
4212   return true;
4213 }
4214 
4215 // Check if VCC register matches wavefront size
4216 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4217   auto FB = getFeatureBits();
4218   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4219     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4220 }
4221 
4222 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4223 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4224                                          const OperandVector &Operands) {
4225   unsigned Opcode = Inst.getOpcode();
4226   const MCInstrDesc &Desc = MII.get(Opcode);
4227   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4228   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4229       ImmIdx == -1)
4230     return true;
4231 
4232   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4233   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4234   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4235 
4236   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4237 
4238   unsigned NumExprs = 0;
4239   unsigned NumLiterals = 0;
4240   uint32_t LiteralValue;
4241 
4242   for (int OpIdx : OpIndices) {
4243     if (OpIdx == -1)
4244       continue;
4245 
4246     const MCOperand &MO = Inst.getOperand(OpIdx);
4247     if (!MO.isImm() && !MO.isExpr())
4248       continue;
4249     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4250       continue;
4251 
4252     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4253         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4254       Error(getConstLoc(Operands),
4255             "inline constants are not allowed for this operand");
4256       return false;
4257     }
4258 
4259     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4260       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4261       if (NumLiterals == 0 || LiteralValue != Value) {
4262         LiteralValue = Value;
4263         ++NumLiterals;
4264       }
4265     } else if (MO.isExpr()) {
4266       ++NumExprs;
4267     }
4268   }
4269   NumLiterals += NumExprs;
4270 
4271   if (!NumLiterals)
4272     return true;
4273 
4274   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4275     Error(getLitLoc(Operands), "literal operands are not supported");
4276     return false;
4277   }
4278 
4279   if (NumLiterals > 1) {
4280     Error(getLitLoc(Operands), "only one literal operand is allowed");
4281     return false;
4282   }
4283 
4284   return true;
4285 }
4286 
4287 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4288 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4289                          const MCRegisterInfo *MRI) {
4290   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4291   if (OpIdx < 0)
4292     return -1;
4293 
4294   const MCOperand &Op = Inst.getOperand(OpIdx);
4295   if (!Op.isReg())
4296     return -1;
4297 
4298   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4299   auto Reg = Sub ? Sub : Op.getReg();
4300   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4301   return AGPR32.contains(Reg) ? 1 : 0;
4302 }
4303 
4304 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4305   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4306   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4307                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4308                   SIInstrFlags::DS)) == 0)
4309     return true;
4310 
4311   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4312                                                       : AMDGPU::OpName::vdata;
4313 
4314   const MCRegisterInfo *MRI = getMRI();
4315   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4316   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4317 
4318   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4319     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4320     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4321       return false;
4322   }
4323 
4324   auto FB = getFeatureBits();
4325   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4326     if (DataAreg < 0 || DstAreg < 0)
4327       return true;
4328     return DstAreg == DataAreg;
4329   }
4330 
4331   return DstAreg < 1 && DataAreg < 1;
4332 }
4333 
4334 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4335   auto FB = getFeatureBits();
4336   if (!FB[AMDGPU::FeatureGFX90AInsts])
4337     return true;
4338 
4339   const MCRegisterInfo *MRI = getMRI();
4340   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4341   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4342   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4343     const MCOperand &Op = Inst.getOperand(I);
4344     if (!Op.isReg())
4345       continue;
4346 
4347     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4348     if (!Sub)
4349       continue;
4350 
4351     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4352       return false;
4353     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4354       return false;
4355   }
4356 
4357   return true;
4358 }
4359 
4360 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4361   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4362     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4363     if (Op.isBLGP())
4364       return Op.getStartLoc();
4365   }
4366   return SMLoc();
4367 }
4368 
4369 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4370                                    const OperandVector &Operands) {
4371   unsigned Opc = Inst.getOpcode();
4372   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4373   if (BlgpIdx == -1)
4374     return true;
4375   SMLoc BLGPLoc = getBLGPLoc(Operands);
4376   if (!BLGPLoc.isValid())
4377     return true;
4378   bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4379   auto FB = getFeatureBits();
4380   bool UsesNeg = false;
4381   if (FB[AMDGPU::FeatureGFX940Insts]) {
4382     switch (Opc) {
4383     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4384     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4385     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4386     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4387       UsesNeg = true;
4388     }
4389   }
4390 
4391   if (IsNeg == UsesNeg)
4392     return true;
4393 
4394   Error(BLGPLoc,
4395         UsesNeg ? "invalid modifier: blgp is not supported"
4396                 : "invalid modifier: neg is not supported");
4397 
4398   return false;
4399 }
4400 
4401 // gfx90a has an undocumented limitation:
4402 // DS_GWS opcodes must use even aligned registers.
4403 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4404                                   const OperandVector &Operands) {
4405   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4406     return true;
4407 
4408   int Opc = Inst.getOpcode();
4409   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4410       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4411     return true;
4412 
4413   const MCRegisterInfo *MRI = getMRI();
4414   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4415   int Data0Pos =
4416       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4417   assert(Data0Pos != -1);
4418   auto Reg = Inst.getOperand(Data0Pos).getReg();
4419   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4420   if (RegIdx & 1) {
4421     SMLoc RegLoc = getRegLoc(Reg, Operands);
4422     Error(RegLoc, "vgpr must be even aligned");
4423     return false;
4424   }
4425 
4426   return true;
4427 }
4428 
4429 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4430                                             const OperandVector &Operands,
4431                                             const SMLoc &IDLoc) {
4432   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4433                                            AMDGPU::OpName::cpol);
4434   if (CPolPos == -1)
4435     return true;
4436 
4437   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4438 
4439   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4440   if (TSFlags & SIInstrFlags::SMRD) {
4441     if (CPol && (isSI() || isCI())) {
4442       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4443       Error(S, "cache policy is not supported for SMRD instructions");
4444       return false;
4445     }
4446     if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4447       Error(IDLoc, "invalid cache policy for SMEM instruction");
4448       return false;
4449     }
4450   }
4451 
4452   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4453     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4454     StringRef CStr(S.getPointer());
4455     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4456     Error(S, "scc is not supported on this GPU");
4457     return false;
4458   }
4459 
4460   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4461     return true;
4462 
4463   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4464     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4465       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4466                               : "instruction must use glc");
4467       return false;
4468     }
4469   } else {
4470     if (CPol & CPol::GLC) {
4471       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4472       StringRef CStr(S.getPointer());
4473       S = SMLoc::getFromPointer(
4474           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4475       Error(S, isGFX940() ? "instruction must not use sc0"
4476                           : "instruction must not use glc");
4477       return false;
4478     }
4479   }
4480 
4481   return true;
4482 }
4483 
4484 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst,
4485                                          const OperandVector &Operands,
4486                                          const SMLoc &IDLoc) {
4487   if (isGFX940())
4488     return true;
4489 
4490   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4491   if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) !=
4492       (SIInstrFlags::VALU | SIInstrFlags::FLAT))
4493     return true;
4494   // This is FLAT LDS DMA.
4495 
4496   SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands);
4497   StringRef CStr(S.getPointer());
4498   if (!CStr.startswith("lds")) {
4499     // This is incorrectly selected LDS DMA version of a FLAT load opcode.
4500     // And LDS version should have 'lds' modifier, but it follows optional
4501     // operands so its absense is ignored by the matcher.
4502     Error(IDLoc, "invalid operands for instruction");
4503     return false;
4504   }
4505 
4506   return true;
4507 }
4508 
4509 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4510                                           const SMLoc &IDLoc,
4511                                           const OperandVector &Operands) {
4512   if (auto ErrMsg = validateLdsDirect(Inst)) {
4513     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4514     return false;
4515   }
4516   if (!validateSOPLiteral(Inst)) {
4517     Error(getLitLoc(Operands),
4518       "only one literal operand is allowed");
4519     return false;
4520   }
4521   if (!validateVOPLiteral(Inst, Operands)) {
4522     return false;
4523   }
4524   if (!validateConstantBusLimitations(Inst, Operands)) {
4525     return false;
4526   }
4527   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4528     return false;
4529   }
4530   if (!validateIntClampSupported(Inst)) {
4531     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4532       "integer clamping is not supported on this GPU");
4533     return false;
4534   }
4535   if (!validateOpSel(Inst)) {
4536     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4537       "invalid op_sel operand");
4538     return false;
4539   }
4540   if (!validateDPP(Inst, Operands)) {
4541     return false;
4542   }
4543   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4544   if (!validateMIMGD16(Inst)) {
4545     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4546       "d16 modifier is not supported on this GPU");
4547     return false;
4548   }
4549   if (!validateMIMGDim(Inst)) {
4550     Error(IDLoc, "dim modifier is required on this GPU");
4551     return false;
4552   }
4553   if (!validateMIMGMSAA(Inst)) {
4554     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4555           "invalid dim; must be MSAA type");
4556     return false;
4557   }
4558   if (auto ErrMsg = validateMIMGDataSize(Inst)) {
4559     Error(IDLoc, *ErrMsg);
4560     return false;
4561   }
4562   if (!validateMIMGAddrSize(Inst)) {
4563     Error(IDLoc,
4564       "image address size does not match dim and a16");
4565     return false;
4566   }
4567   if (!validateMIMGAtomicDMask(Inst)) {
4568     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4569       "invalid atomic image dmask");
4570     return false;
4571   }
4572   if (!validateMIMGGatherDMask(Inst)) {
4573     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4574       "invalid image_gather dmask: only one bit must be set");
4575     return false;
4576   }
4577   if (!validateMovrels(Inst, Operands)) {
4578     return false;
4579   }
4580   if (!validateFlatOffset(Inst, Operands)) {
4581     return false;
4582   }
4583   if (!validateSMEMOffset(Inst, Operands)) {
4584     return false;
4585   }
4586   if (!validateMAIAccWrite(Inst, Operands)) {
4587     return false;
4588   }
4589   if (!validateMFMA(Inst, Operands)) {
4590     return false;
4591   }
4592   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4593     return false;
4594   }
4595 
4596   if (!validateAGPRLdSt(Inst)) {
4597     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4598     ? "invalid register class: data and dst should be all VGPR or AGPR"
4599     : "invalid register class: agpr loads and stores not supported on this GPU"
4600     );
4601     return false;
4602   }
4603   if (!validateVGPRAlign(Inst)) {
4604     Error(IDLoc,
4605       "invalid register class: vgpr tuples must be 64 bit aligned");
4606     return false;
4607   }
4608   if (!validateGWS(Inst, Operands)) {
4609     return false;
4610   }
4611 
4612   if (!validateBLGP(Inst, Operands)) {
4613     return false;
4614   }
4615 
4616   if (!validateDivScale(Inst)) {
4617     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4618     return false;
4619   }
4620   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4621     return false;
4622   }
4623 
4624   if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) {
4625     return false;
4626   }
4627 
4628   return true;
4629 }
4630 
4631 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4632                                             const FeatureBitset &FBS,
4633                                             unsigned VariantID = 0);
4634 
4635 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4636                                 const FeatureBitset &AvailableFeatures,
4637                                 unsigned VariantID);
4638 
4639 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4640                                        const FeatureBitset &FBS) {
4641   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4642 }
4643 
4644 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4645                                        const FeatureBitset &FBS,
4646                                        ArrayRef<unsigned> Variants) {
4647   for (auto Variant : Variants) {
4648     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4649       return true;
4650   }
4651 
4652   return false;
4653 }
4654 
4655 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4656                                                   const SMLoc &IDLoc) {
4657   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4658 
4659   // Check if requested instruction variant is supported.
4660   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4661     return false;
4662 
4663   // This instruction is not supported.
4664   // Clear any other pending errors because they are no longer relevant.
4665   getParser().clearPendingErrors();
4666 
4667   // Requested instruction variant is not supported.
4668   // Check if any other variants are supported.
4669   StringRef VariantName = getMatchedVariantName();
4670   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4671     return Error(IDLoc,
4672                  Twine(VariantName,
4673                        " variant of this instruction is not supported"));
4674   }
4675 
4676   // Finally check if this instruction is supported on any other GPU.
4677   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4678     return Error(IDLoc, "instruction not supported on this GPU");
4679   }
4680 
4681   // Instruction not supported on any GPU. Probably a typo.
4682   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4683   return Error(IDLoc, "invalid instruction" + Suggestion);
4684 }
4685 
4686 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4687                                               OperandVector &Operands,
4688                                               MCStreamer &Out,
4689                                               uint64_t &ErrorInfo,
4690                                               bool MatchingInlineAsm) {
4691   MCInst Inst;
4692   unsigned Result = Match_Success;
4693   for (auto Variant : getMatchedVariants()) {
4694     uint64_t EI;
4695     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4696                                   Variant);
4697     // We order match statuses from least to most specific. We use most specific
4698     // status as resulting
4699     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4700     if ((R == Match_Success) ||
4701         (R == Match_PreferE32) ||
4702         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4703         (R == Match_InvalidOperand && Result != Match_MissingFeature
4704                                    && Result != Match_PreferE32) ||
4705         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4706                                    && Result != Match_MissingFeature
4707                                    && Result != Match_PreferE32)) {
4708       Result = R;
4709       ErrorInfo = EI;
4710     }
4711     if (R == Match_Success)
4712       break;
4713   }
4714 
4715   if (Result == Match_Success) {
4716     if (!validateInstruction(Inst, IDLoc, Operands)) {
4717       return true;
4718     }
4719     Inst.setLoc(IDLoc);
4720     Out.emitInstruction(Inst, getSTI());
4721     return false;
4722   }
4723 
4724   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4725   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4726     return true;
4727   }
4728 
4729   switch (Result) {
4730   default: break;
4731   case Match_MissingFeature:
4732     // It has been verified that the specified instruction
4733     // mnemonic is valid. A match was found but it requires
4734     // features which are not supported on this GPU.
4735     return Error(IDLoc, "operands are not valid for this GPU or mode");
4736 
4737   case Match_InvalidOperand: {
4738     SMLoc ErrorLoc = IDLoc;
4739     if (ErrorInfo != ~0ULL) {
4740       if (ErrorInfo >= Operands.size()) {
4741         return Error(IDLoc, "too few operands for instruction");
4742       }
4743       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4744       if (ErrorLoc == SMLoc())
4745         ErrorLoc = IDLoc;
4746     }
4747     return Error(ErrorLoc, "invalid operand for instruction");
4748   }
4749 
4750   case Match_PreferE32:
4751     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4752                         "should be encoded as e32");
4753   case Match_MnemonicFail:
4754     llvm_unreachable("Invalid instructions should have been handled already");
4755   }
4756   llvm_unreachable("Implement any new match types added!");
4757 }
4758 
4759 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4760   int64_t Tmp = -1;
4761   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4762     return true;
4763   }
4764   if (getParser().parseAbsoluteExpression(Tmp)) {
4765     return true;
4766   }
4767   Ret = static_cast<uint32_t>(Tmp);
4768   return false;
4769 }
4770 
4771 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4772                                                uint32_t &Minor) {
4773   if (ParseAsAbsoluteExpression(Major))
4774     return TokError("invalid major version");
4775 
4776   if (!trySkipToken(AsmToken::Comma))
4777     return TokError("minor version number required, comma expected");
4778 
4779   if (ParseAsAbsoluteExpression(Minor))
4780     return TokError("invalid minor version");
4781 
4782   return false;
4783 }
4784 
4785 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4786   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4787     return TokError("directive only supported for amdgcn architecture");
4788 
4789   std::string TargetIDDirective;
4790   SMLoc TargetStart = getTok().getLoc();
4791   if (getParser().parseEscapedString(TargetIDDirective))
4792     return true;
4793 
4794   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4795   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4796     return getParser().Error(TargetRange.Start,
4797         (Twine(".amdgcn_target directive's target id ") +
4798          Twine(TargetIDDirective) +
4799          Twine(" does not match the specified target id ") +
4800          Twine(getTargetStreamer().getTargetID()->toString())).str());
4801 
4802   return false;
4803 }
4804 
4805 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4806   return Error(Range.Start, "value out of range", Range);
4807 }
4808 
4809 bool AMDGPUAsmParser::calculateGPRBlocks(
4810     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4811     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4812     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4813     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4814   // TODO(scott.linder): These calculations are duplicated from
4815   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4816   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4817 
4818   unsigned NumVGPRs = NextFreeVGPR;
4819   unsigned NumSGPRs = NextFreeSGPR;
4820 
4821   if (Version.Major >= 10)
4822     NumSGPRs = 0;
4823   else {
4824     unsigned MaxAddressableNumSGPRs =
4825         IsaInfo::getAddressableNumSGPRs(&getSTI());
4826 
4827     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4828         NumSGPRs > MaxAddressableNumSGPRs)
4829       return OutOfRangeError(SGPRRange);
4830 
4831     NumSGPRs +=
4832         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4833 
4834     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4835         NumSGPRs > MaxAddressableNumSGPRs)
4836       return OutOfRangeError(SGPRRange);
4837 
4838     if (Features.test(FeatureSGPRInitBug))
4839       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4840   }
4841 
4842   VGPRBlocks =
4843       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4844   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4845 
4846   return false;
4847 }
4848 
4849 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4850   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4851     return TokError("directive only supported for amdgcn architecture");
4852 
4853   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4854     return TokError("directive only supported for amdhsa OS");
4855 
4856   StringRef KernelName;
4857   if (getParser().parseIdentifier(KernelName))
4858     return true;
4859 
4860   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4861 
4862   StringSet<> Seen;
4863 
4864   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4865 
4866   SMRange VGPRRange;
4867   uint64_t NextFreeVGPR = 0;
4868   uint64_t AccumOffset = 0;
4869   uint64_t SharedVGPRCount = 0;
4870   SMRange SGPRRange;
4871   uint64_t NextFreeSGPR = 0;
4872 
4873   // Count the number of user SGPRs implied from the enabled feature bits.
4874   unsigned ImpliedUserSGPRCount = 0;
4875 
4876   // Track if the asm explicitly contains the directive for the user SGPR
4877   // count.
4878   Optional<unsigned> ExplicitUserSGPRCount;
4879   bool ReserveVCC = true;
4880   bool ReserveFlatScr = true;
4881   Optional<bool> EnableWavefrontSize32;
4882 
4883   while (true) {
4884     while (trySkipToken(AsmToken::EndOfStatement));
4885 
4886     StringRef ID;
4887     SMRange IDRange = getTok().getLocRange();
4888     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4889       return true;
4890 
4891     if (ID == ".end_amdhsa_kernel")
4892       break;
4893 
4894     if (Seen.find(ID) != Seen.end())
4895       return TokError(".amdhsa_ directives cannot be repeated");
4896     Seen.insert(ID);
4897 
4898     SMLoc ValStart = getLoc();
4899     int64_t IVal;
4900     if (getParser().parseAbsoluteExpression(IVal))
4901       return true;
4902     SMLoc ValEnd = getLoc();
4903     SMRange ValRange = SMRange(ValStart, ValEnd);
4904 
4905     if (IVal < 0)
4906       return OutOfRangeError(ValRange);
4907 
4908     uint64_t Val = IVal;
4909 
4910 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4911   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4912     return OutOfRangeError(RANGE);                                             \
4913   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4914 
4915     if (ID == ".amdhsa_group_segment_fixed_size") {
4916       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4917         return OutOfRangeError(ValRange);
4918       KD.group_segment_fixed_size = Val;
4919     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4920       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4921         return OutOfRangeError(ValRange);
4922       KD.private_segment_fixed_size = Val;
4923     } else if (ID == ".amdhsa_kernarg_size") {
4924       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4925         return OutOfRangeError(ValRange);
4926       KD.kernarg_size = Val;
4927     } else if (ID == ".amdhsa_user_sgpr_count") {
4928       ExplicitUserSGPRCount = Val;
4929     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4930       if (hasArchitectedFlatScratch())
4931         return Error(IDRange.Start,
4932                      "directive is not supported with architected flat scratch",
4933                      IDRange);
4934       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4935                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4936                        Val, ValRange);
4937       if (Val)
4938         ImpliedUserSGPRCount += 4;
4939     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4940       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4941                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4942                        ValRange);
4943       if (Val)
4944         ImpliedUserSGPRCount += 2;
4945     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4946       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4947                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4948                        ValRange);
4949       if (Val)
4950         ImpliedUserSGPRCount += 2;
4951     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4952       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4953                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4954                        Val, ValRange);
4955       if (Val)
4956         ImpliedUserSGPRCount += 2;
4957     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4958       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4959                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4960                        ValRange);
4961       if (Val)
4962         ImpliedUserSGPRCount += 2;
4963     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4964       if (hasArchitectedFlatScratch())
4965         return Error(IDRange.Start,
4966                      "directive is not supported with architected flat scratch",
4967                      IDRange);
4968       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4969                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4970                        ValRange);
4971       if (Val)
4972         ImpliedUserSGPRCount += 2;
4973     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4974       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4975                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4976                        Val, ValRange);
4977       if (Val)
4978         ImpliedUserSGPRCount += 1;
4979     } else if (ID == ".amdhsa_wavefront_size32") {
4980       if (IVersion.Major < 10)
4981         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4982       EnableWavefrontSize32 = Val;
4983       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4984                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4985                        Val, ValRange);
4986     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4987       if (hasArchitectedFlatScratch())
4988         return Error(IDRange.Start,
4989                      "directive is not supported with architected flat scratch",
4990                      IDRange);
4991       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4992                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4993     } else if (ID == ".amdhsa_enable_private_segment") {
4994       if (!hasArchitectedFlatScratch())
4995         return Error(
4996             IDRange.Start,
4997             "directive is not supported without architected flat scratch",
4998             IDRange);
4999       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5000                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5001     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5002       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5003                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5004                        ValRange);
5005     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5006       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5007                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5008                        ValRange);
5009     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5010       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5011                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5012                        ValRange);
5013     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5014       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5015                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5016                        ValRange);
5017     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5018       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5019                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5020                        ValRange);
5021     } else if (ID == ".amdhsa_next_free_vgpr") {
5022       VGPRRange = ValRange;
5023       NextFreeVGPR = Val;
5024     } else if (ID == ".amdhsa_next_free_sgpr") {
5025       SGPRRange = ValRange;
5026       NextFreeSGPR = Val;
5027     } else if (ID == ".amdhsa_accum_offset") {
5028       if (!isGFX90A())
5029         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5030       AccumOffset = Val;
5031     } else if (ID == ".amdhsa_reserve_vcc") {
5032       if (!isUInt<1>(Val))
5033         return OutOfRangeError(ValRange);
5034       ReserveVCC = Val;
5035     } else if (ID == ".amdhsa_reserve_flat_scratch") {
5036       if (IVersion.Major < 7)
5037         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5038       if (hasArchitectedFlatScratch())
5039         return Error(IDRange.Start,
5040                      "directive is not supported with architected flat scratch",
5041                      IDRange);
5042       if (!isUInt<1>(Val))
5043         return OutOfRangeError(ValRange);
5044       ReserveFlatScr = Val;
5045     } else if (ID == ".amdhsa_reserve_xnack_mask") {
5046       if (IVersion.Major < 8)
5047         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5048       if (!isUInt<1>(Val))
5049         return OutOfRangeError(ValRange);
5050       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5051         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5052                                  IDRange);
5053     } else if (ID == ".amdhsa_float_round_mode_32") {
5054       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5055                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5056     } else if (ID == ".amdhsa_float_round_mode_16_64") {
5057       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5058                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5059     } else if (ID == ".amdhsa_float_denorm_mode_32") {
5060       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5061                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5062     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5063       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5064                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5065                        ValRange);
5066     } else if (ID == ".amdhsa_dx10_clamp") {
5067       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5068                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5069     } else if (ID == ".amdhsa_ieee_mode") {
5070       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5071                        Val, ValRange);
5072     } else if (ID == ".amdhsa_fp16_overflow") {
5073       if (IVersion.Major < 9)
5074         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5075       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5076                        ValRange);
5077     } else if (ID == ".amdhsa_tg_split") {
5078       if (!isGFX90A())
5079         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5080       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5081                        ValRange);
5082     } else if (ID == ".amdhsa_workgroup_processor_mode") {
5083       if (IVersion.Major < 10)
5084         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5085       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5086                        ValRange);
5087     } else if (ID == ".amdhsa_memory_ordered") {
5088       if (IVersion.Major < 10)
5089         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5090       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5091                        ValRange);
5092     } else if (ID == ".amdhsa_forward_progress") {
5093       if (IVersion.Major < 10)
5094         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5095       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5096                        ValRange);
5097     } else if (ID == ".amdhsa_shared_vgpr_count") {
5098       if (IVersion.Major < 10)
5099         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5100       SharedVGPRCount = Val;
5101       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5102                        COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val,
5103                        ValRange);
5104     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5105       PARSE_BITS_ENTRY(
5106           KD.compute_pgm_rsrc2,
5107           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5108           ValRange);
5109     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5110       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5111                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5112                        Val, ValRange);
5113     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5114       PARSE_BITS_ENTRY(
5115           KD.compute_pgm_rsrc2,
5116           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5117           ValRange);
5118     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5119       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5120                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5121                        Val, ValRange);
5122     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5123       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5124                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5125                        Val, ValRange);
5126     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5127       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5128                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5129                        Val, ValRange);
5130     } else if (ID == ".amdhsa_exception_int_div_zero") {
5131       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5132                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5133                        Val, ValRange);
5134     } else {
5135       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5136     }
5137 
5138 #undef PARSE_BITS_ENTRY
5139   }
5140 
5141   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5142     return TokError(".amdhsa_next_free_vgpr directive is required");
5143 
5144   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5145     return TokError(".amdhsa_next_free_sgpr directive is required");
5146 
5147   unsigned VGPRBlocks;
5148   unsigned SGPRBlocks;
5149   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5150                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5151                          EnableWavefrontSize32, NextFreeVGPR,
5152                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5153                          SGPRBlocks))
5154     return true;
5155 
5156   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5157           VGPRBlocks))
5158     return OutOfRangeError(VGPRRange);
5159   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5160                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5161 
5162   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5163           SGPRBlocks))
5164     return OutOfRangeError(SGPRRange);
5165   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5166                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5167                   SGPRBlocks);
5168 
5169   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5170     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5171                     "enabled user SGPRs");
5172 
5173   unsigned UserSGPRCount =
5174       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5175 
5176   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5177     return TokError("too many user SGPRs enabled");
5178   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5179                   UserSGPRCount);
5180 
5181   if (isGFX90A()) {
5182     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5183       return TokError(".amdhsa_accum_offset directive is required");
5184     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5185       return TokError("accum_offset should be in range [4..256] in "
5186                       "increments of 4");
5187     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5188       return TokError("accum_offset exceeds total VGPR allocation");
5189     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5190                     (AccumOffset / 4 - 1));
5191   }
5192 
5193   if (IVersion.Major == 10) {
5194     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5195     if (SharedVGPRCount && EnableWavefrontSize32) {
5196       return TokError("shared_vgpr_count directive not valid on "
5197                       "wavefront size 32");
5198     }
5199     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5200       return TokError("shared_vgpr_count*2 + "
5201                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5202                       "exceed 63\n");
5203     }
5204   }
5205 
5206   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5207       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5208       ReserveFlatScr);
5209   return false;
5210 }
5211 
5212 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5213   uint32_t Major;
5214   uint32_t Minor;
5215 
5216   if (ParseDirectiveMajorMinor(Major, Minor))
5217     return true;
5218 
5219   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5220   return false;
5221 }
5222 
5223 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5224   uint32_t Major;
5225   uint32_t Minor;
5226   uint32_t Stepping;
5227   StringRef VendorName;
5228   StringRef ArchName;
5229 
5230   // If this directive has no arguments, then use the ISA version for the
5231   // targeted GPU.
5232   if (isToken(AsmToken::EndOfStatement)) {
5233     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5234     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5235                                                         ISA.Stepping,
5236                                                         "AMD", "AMDGPU");
5237     return false;
5238   }
5239 
5240   if (ParseDirectiveMajorMinor(Major, Minor))
5241     return true;
5242 
5243   if (!trySkipToken(AsmToken::Comma))
5244     return TokError("stepping version number required, comma expected");
5245 
5246   if (ParseAsAbsoluteExpression(Stepping))
5247     return TokError("invalid stepping version");
5248 
5249   if (!trySkipToken(AsmToken::Comma))
5250     return TokError("vendor name required, comma expected");
5251 
5252   if (!parseString(VendorName, "invalid vendor name"))
5253     return true;
5254 
5255   if (!trySkipToken(AsmToken::Comma))
5256     return TokError("arch name required, comma expected");
5257 
5258   if (!parseString(ArchName, "invalid arch name"))
5259     return true;
5260 
5261   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5262                                                       VendorName, ArchName);
5263   return false;
5264 }
5265 
5266 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5267                                                amd_kernel_code_t &Header) {
5268   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5269   // assembly for backwards compatibility.
5270   if (ID == "max_scratch_backing_memory_byte_size") {
5271     Parser.eatToEndOfStatement();
5272     return false;
5273   }
5274 
5275   SmallString<40> ErrStr;
5276   raw_svector_ostream Err(ErrStr);
5277   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5278     return TokError(Err.str());
5279   }
5280   Lex();
5281 
5282   if (ID == "enable_wavefront_size32") {
5283     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5284       if (!isGFX10Plus())
5285         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5286       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5287         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5288     } else {
5289       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5290         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5291     }
5292   }
5293 
5294   if (ID == "wavefront_size") {
5295     if (Header.wavefront_size == 5) {
5296       if (!isGFX10Plus())
5297         return TokError("wavefront_size=5 is only allowed on GFX10+");
5298       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5299         return TokError("wavefront_size=5 requires +WavefrontSize32");
5300     } else if (Header.wavefront_size == 6) {
5301       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5302         return TokError("wavefront_size=6 requires +WavefrontSize64");
5303     }
5304   }
5305 
5306   if (ID == "enable_wgp_mode") {
5307     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5308         !isGFX10Plus())
5309       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5310   }
5311 
5312   if (ID == "enable_mem_ordered") {
5313     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5314         !isGFX10Plus())
5315       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5316   }
5317 
5318   if (ID == "enable_fwd_progress") {
5319     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5320         !isGFX10Plus())
5321       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5322   }
5323 
5324   return false;
5325 }
5326 
5327 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5328   amd_kernel_code_t Header;
5329   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5330 
5331   while (true) {
5332     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5333     // will set the current token to EndOfStatement.
5334     while(trySkipToken(AsmToken::EndOfStatement));
5335 
5336     StringRef ID;
5337     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5338       return true;
5339 
5340     if (ID == ".end_amd_kernel_code_t")
5341       break;
5342 
5343     if (ParseAMDKernelCodeTValue(ID, Header))
5344       return true;
5345   }
5346 
5347   getTargetStreamer().EmitAMDKernelCodeT(Header);
5348 
5349   return false;
5350 }
5351 
5352 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5353   StringRef KernelName;
5354   if (!parseId(KernelName, "expected symbol name"))
5355     return true;
5356 
5357   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5358                                            ELF::STT_AMDGPU_HSA_KERNEL);
5359 
5360   KernelScope.initialize(getContext());
5361   return false;
5362 }
5363 
5364 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5365   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5366     return Error(getLoc(),
5367                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5368                  "architectures");
5369   }
5370 
5371   auto TargetIDDirective = getLexer().getTok().getStringContents();
5372   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5373     return Error(getParser().getTok().getLoc(), "target id must match options");
5374 
5375   getTargetStreamer().EmitISAVersion();
5376   Lex();
5377 
5378   return false;
5379 }
5380 
5381 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5382   const char *AssemblerDirectiveBegin;
5383   const char *AssemblerDirectiveEnd;
5384   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5385       isHsaAbiVersion3AndAbove(&getSTI())
5386           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5387                             HSAMD::V3::AssemblerDirectiveEnd)
5388           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5389                             HSAMD::AssemblerDirectiveEnd);
5390 
5391   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5392     return Error(getLoc(),
5393                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5394                  "not available on non-amdhsa OSes")).str());
5395   }
5396 
5397   std::string HSAMetadataString;
5398   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5399                           HSAMetadataString))
5400     return true;
5401 
5402   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5403     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5404       return Error(getLoc(), "invalid HSA metadata");
5405   } else {
5406     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5407       return Error(getLoc(), "invalid HSA metadata");
5408   }
5409 
5410   return false;
5411 }
5412 
5413 /// Common code to parse out a block of text (typically YAML) between start and
5414 /// end directives.
5415 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5416                                           const char *AssemblerDirectiveEnd,
5417                                           std::string &CollectString) {
5418 
5419   raw_string_ostream CollectStream(CollectString);
5420 
5421   getLexer().setSkipSpace(false);
5422 
5423   bool FoundEnd = false;
5424   while (!isToken(AsmToken::Eof)) {
5425     while (isToken(AsmToken::Space)) {
5426       CollectStream << getTokenStr();
5427       Lex();
5428     }
5429 
5430     if (trySkipId(AssemblerDirectiveEnd)) {
5431       FoundEnd = true;
5432       break;
5433     }
5434 
5435     CollectStream << Parser.parseStringToEndOfStatement()
5436                   << getContext().getAsmInfo()->getSeparatorString();
5437 
5438     Parser.eatToEndOfStatement();
5439   }
5440 
5441   getLexer().setSkipSpace(true);
5442 
5443   if (isToken(AsmToken::Eof) && !FoundEnd) {
5444     return TokError(Twine("expected directive ") +
5445                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5446   }
5447 
5448   CollectStream.flush();
5449   return false;
5450 }
5451 
5452 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5453 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5454   std::string String;
5455   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5456                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5457     return true;
5458 
5459   auto PALMetadata = getTargetStreamer().getPALMetadata();
5460   if (!PALMetadata->setFromString(String))
5461     return Error(getLoc(), "invalid PAL metadata");
5462   return false;
5463 }
5464 
5465 /// Parse the assembler directive for old linear-format PAL metadata.
5466 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5467   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5468     return Error(getLoc(),
5469                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5470                  "not available on non-amdpal OSes")).str());
5471   }
5472 
5473   auto PALMetadata = getTargetStreamer().getPALMetadata();
5474   PALMetadata->setLegacy();
5475   for (;;) {
5476     uint32_t Key, Value;
5477     if (ParseAsAbsoluteExpression(Key)) {
5478       return TokError(Twine("invalid value in ") +
5479                       Twine(PALMD::AssemblerDirective));
5480     }
5481     if (!trySkipToken(AsmToken::Comma)) {
5482       return TokError(Twine("expected an even number of values in ") +
5483                       Twine(PALMD::AssemblerDirective));
5484     }
5485     if (ParseAsAbsoluteExpression(Value)) {
5486       return TokError(Twine("invalid value in ") +
5487                       Twine(PALMD::AssemblerDirective));
5488     }
5489     PALMetadata->setRegister(Key, Value);
5490     if (!trySkipToken(AsmToken::Comma))
5491       break;
5492   }
5493   return false;
5494 }
5495 
5496 /// ParseDirectiveAMDGPULDS
5497 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5498 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5499   if (getParser().checkForValidSection())
5500     return true;
5501 
5502   StringRef Name;
5503   SMLoc NameLoc = getLoc();
5504   if (getParser().parseIdentifier(Name))
5505     return TokError("expected identifier in directive");
5506 
5507   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5508   if (parseToken(AsmToken::Comma, "expected ','"))
5509     return true;
5510 
5511   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5512 
5513   int64_t Size;
5514   SMLoc SizeLoc = getLoc();
5515   if (getParser().parseAbsoluteExpression(Size))
5516     return true;
5517   if (Size < 0)
5518     return Error(SizeLoc, "size must be non-negative");
5519   if (Size > LocalMemorySize)
5520     return Error(SizeLoc, "size is too large");
5521 
5522   int64_t Alignment = 4;
5523   if (trySkipToken(AsmToken::Comma)) {
5524     SMLoc AlignLoc = getLoc();
5525     if (getParser().parseAbsoluteExpression(Alignment))
5526       return true;
5527     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5528       return Error(AlignLoc, "alignment must be a power of two");
5529 
5530     // Alignment larger than the size of LDS is possible in theory, as long
5531     // as the linker manages to place to symbol at address 0, but we do want
5532     // to make sure the alignment fits nicely into a 32-bit integer.
5533     if (Alignment >= 1u << 31)
5534       return Error(AlignLoc, "alignment is too large");
5535   }
5536 
5537   if (parseEOL())
5538     return true;
5539 
5540   Symbol->redefineIfPossible();
5541   if (!Symbol->isUndefined())
5542     return Error(NameLoc, "invalid symbol redefinition");
5543 
5544   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5545   return false;
5546 }
5547 
5548 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5549   StringRef IDVal = DirectiveID.getString();
5550 
5551   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5552     if (IDVal == ".amdhsa_kernel")
5553      return ParseDirectiveAMDHSAKernel();
5554 
5555     // TODO: Restructure/combine with PAL metadata directive.
5556     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5557       return ParseDirectiveHSAMetadata();
5558   } else {
5559     if (IDVal == ".hsa_code_object_version")
5560       return ParseDirectiveHSACodeObjectVersion();
5561 
5562     if (IDVal == ".hsa_code_object_isa")
5563       return ParseDirectiveHSACodeObjectISA();
5564 
5565     if (IDVal == ".amd_kernel_code_t")
5566       return ParseDirectiveAMDKernelCodeT();
5567 
5568     if (IDVal == ".amdgpu_hsa_kernel")
5569       return ParseDirectiveAMDGPUHsaKernel();
5570 
5571     if (IDVal == ".amd_amdgpu_isa")
5572       return ParseDirectiveISAVersion();
5573 
5574     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5575       return ParseDirectiveHSAMetadata();
5576   }
5577 
5578   if (IDVal == ".amdgcn_target")
5579     return ParseDirectiveAMDGCNTarget();
5580 
5581   if (IDVal == ".amdgpu_lds")
5582     return ParseDirectiveAMDGPULDS();
5583 
5584   if (IDVal == PALMD::AssemblerDirectiveBegin)
5585     return ParseDirectivePALMetadataBegin();
5586 
5587   if (IDVal == PALMD::AssemblerDirective)
5588     return ParseDirectivePALMetadata();
5589 
5590   return true;
5591 }
5592 
5593 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5594                                            unsigned RegNo) {
5595 
5596   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5597     return isGFX9Plus();
5598 
5599   // GFX10 has 2 more SGPRs 104 and 105.
5600   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5601     return hasSGPR104_SGPR105();
5602 
5603   switch (RegNo) {
5604   case AMDGPU::SRC_SHARED_BASE:
5605   case AMDGPU::SRC_SHARED_LIMIT:
5606   case AMDGPU::SRC_PRIVATE_BASE:
5607   case AMDGPU::SRC_PRIVATE_LIMIT:
5608   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5609     return isGFX9Plus();
5610   case AMDGPU::TBA:
5611   case AMDGPU::TBA_LO:
5612   case AMDGPU::TBA_HI:
5613   case AMDGPU::TMA:
5614   case AMDGPU::TMA_LO:
5615   case AMDGPU::TMA_HI:
5616     return !isGFX9Plus();
5617   case AMDGPU::XNACK_MASK:
5618   case AMDGPU::XNACK_MASK_LO:
5619   case AMDGPU::XNACK_MASK_HI:
5620     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5621   case AMDGPU::SGPR_NULL:
5622     return isGFX10Plus();
5623   default:
5624     break;
5625   }
5626 
5627   if (isCI())
5628     return true;
5629 
5630   if (isSI() || isGFX10Plus()) {
5631     // No flat_scr on SI.
5632     // On GFX10 flat scratch is not a valid register operand and can only be
5633     // accessed with s_setreg/s_getreg.
5634     switch (RegNo) {
5635     case AMDGPU::FLAT_SCR:
5636     case AMDGPU::FLAT_SCR_LO:
5637     case AMDGPU::FLAT_SCR_HI:
5638       return false;
5639     default:
5640       return true;
5641     }
5642   }
5643 
5644   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5645   // SI/CI have.
5646   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5647     return hasSGPR102_SGPR103();
5648 
5649   return true;
5650 }
5651 
5652 OperandMatchResultTy
5653 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5654                               OperandMode Mode) {
5655   // Try to parse with a custom parser
5656   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5657 
5658   // If we successfully parsed the operand or if there as an error parsing,
5659   // we are done.
5660   //
5661   // If we are parsing after we reach EndOfStatement then this means we
5662   // are appending default values to the Operands list.  This is only done
5663   // by custom parser, so we shouldn't continue on to the generic parsing.
5664   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5665       isToken(AsmToken::EndOfStatement))
5666     return ResTy;
5667 
5668   SMLoc RBraceLoc;
5669   SMLoc LBraceLoc = getLoc();
5670   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5671     unsigned Prefix = Operands.size();
5672 
5673     for (;;) {
5674       auto Loc = getLoc();
5675       ResTy = parseReg(Operands);
5676       if (ResTy == MatchOperand_NoMatch)
5677         Error(Loc, "expected a register");
5678       if (ResTy != MatchOperand_Success)
5679         return MatchOperand_ParseFail;
5680 
5681       RBraceLoc = getLoc();
5682       if (trySkipToken(AsmToken::RBrac))
5683         break;
5684 
5685       if (!skipToken(AsmToken::Comma,
5686                      "expected a comma or a closing square bracket")) {
5687         return MatchOperand_ParseFail;
5688       }
5689     }
5690 
5691     if (Operands.size() - Prefix > 1) {
5692       Operands.insert(Operands.begin() + Prefix,
5693                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5694       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5695     }
5696 
5697     return MatchOperand_Success;
5698   }
5699 
5700   return parseRegOrImm(Operands);
5701 }
5702 
5703 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5704   // Clear any forced encodings from the previous instruction.
5705   setForcedEncodingSize(0);
5706   setForcedDPP(false);
5707   setForcedSDWA(false);
5708 
5709   if (Name.endswith("_e64_dpp")) {
5710     setForcedDPP(true);
5711     setForcedEncodingSize(64);
5712     return Name.substr(0, Name.size() - 8);
5713   } else if (Name.endswith("_e64")) {
5714     setForcedEncodingSize(64);
5715     return Name.substr(0, Name.size() - 4);
5716   } else if (Name.endswith("_e32")) {
5717     setForcedEncodingSize(32);
5718     return Name.substr(0, Name.size() - 4);
5719   } else if (Name.endswith("_dpp")) {
5720     setForcedDPP(true);
5721     return Name.substr(0, Name.size() - 4);
5722   } else if (Name.endswith("_sdwa")) {
5723     setForcedSDWA(true);
5724     return Name.substr(0, Name.size() - 5);
5725   }
5726   return Name;
5727 }
5728 
5729 static void applyMnemonicAliases(StringRef &Mnemonic,
5730                                  const FeatureBitset &Features,
5731                                  unsigned VariantID);
5732 
5733 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5734                                        StringRef Name,
5735                                        SMLoc NameLoc, OperandVector &Operands) {
5736   // Add the instruction mnemonic
5737   Name = parseMnemonicSuffix(Name);
5738 
5739   // If the target architecture uses MnemonicAlias, call it here to parse
5740   // operands correctly.
5741   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
5742 
5743   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5744 
5745   bool IsMIMG = Name.startswith("image_");
5746 
5747   while (!trySkipToken(AsmToken::EndOfStatement)) {
5748     OperandMode Mode = OperandMode_Default;
5749     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5750       Mode = OperandMode_NSA;
5751     CPolSeen = 0;
5752     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5753 
5754     if (Res != MatchOperand_Success) {
5755       checkUnsupportedInstruction(Name, NameLoc);
5756       if (!Parser.hasPendingError()) {
5757         // FIXME: use real operand location rather than the current location.
5758         StringRef Msg =
5759           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5760                                             "not a valid operand.";
5761         Error(getLoc(), Msg);
5762       }
5763       while (!trySkipToken(AsmToken::EndOfStatement)) {
5764         lex();
5765       }
5766       return true;
5767     }
5768 
5769     // Eat the comma or space if there is one.
5770     trySkipToken(AsmToken::Comma);
5771   }
5772 
5773   return false;
5774 }
5775 
5776 //===----------------------------------------------------------------------===//
5777 // Utility functions
5778 //===----------------------------------------------------------------------===//
5779 
5780 OperandMatchResultTy
5781 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5782 
5783   if (!trySkipId(Prefix, AsmToken::Colon))
5784     return MatchOperand_NoMatch;
5785 
5786   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5787 }
5788 
5789 OperandMatchResultTy
5790 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5791                                     AMDGPUOperand::ImmTy ImmTy,
5792                                     bool (*ConvertResult)(int64_t&)) {
5793   SMLoc S = getLoc();
5794   int64_t Value = 0;
5795 
5796   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5797   if (Res != MatchOperand_Success)
5798     return Res;
5799 
5800   if (ConvertResult && !ConvertResult(Value)) {
5801     Error(S, "invalid " + StringRef(Prefix) + " value.");
5802   }
5803 
5804   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5805   return MatchOperand_Success;
5806 }
5807 
5808 OperandMatchResultTy
5809 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5810                                              OperandVector &Operands,
5811                                              AMDGPUOperand::ImmTy ImmTy,
5812                                              bool (*ConvertResult)(int64_t&)) {
5813   SMLoc S = getLoc();
5814   if (!trySkipId(Prefix, AsmToken::Colon))
5815     return MatchOperand_NoMatch;
5816 
5817   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5818     return MatchOperand_ParseFail;
5819 
5820   unsigned Val = 0;
5821   const unsigned MaxSize = 4;
5822 
5823   // FIXME: How to verify the number of elements matches the number of src
5824   // operands?
5825   for (int I = 0; ; ++I) {
5826     int64_t Op;
5827     SMLoc Loc = getLoc();
5828     if (!parseExpr(Op))
5829       return MatchOperand_ParseFail;
5830 
5831     if (Op != 0 && Op != 1) {
5832       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5833       return MatchOperand_ParseFail;
5834     }
5835 
5836     Val |= (Op << I);
5837 
5838     if (trySkipToken(AsmToken::RBrac))
5839       break;
5840 
5841     if (I + 1 == MaxSize) {
5842       Error(getLoc(), "expected a closing square bracket");
5843       return MatchOperand_ParseFail;
5844     }
5845 
5846     if (!skipToken(AsmToken::Comma, "expected a comma"))
5847       return MatchOperand_ParseFail;
5848   }
5849 
5850   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5851   return MatchOperand_Success;
5852 }
5853 
5854 OperandMatchResultTy
5855 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5856                                AMDGPUOperand::ImmTy ImmTy) {
5857   int64_t Bit;
5858   SMLoc S = getLoc();
5859 
5860   if (trySkipId(Name)) {
5861     Bit = 1;
5862   } else if (trySkipId("no", Name)) {
5863     Bit = 0;
5864   } else {
5865     return MatchOperand_NoMatch;
5866   }
5867 
5868   if (Name == "r128" && !hasMIMG_R128()) {
5869     Error(S, "r128 modifier is not supported on this GPU");
5870     return MatchOperand_ParseFail;
5871   }
5872   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5873     Error(S, "a16 modifier is not supported on this GPU");
5874     return MatchOperand_ParseFail;
5875   }
5876 
5877   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5878     ImmTy = AMDGPUOperand::ImmTyR128A16;
5879 
5880   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5881   return MatchOperand_Success;
5882 }
5883 
5884 OperandMatchResultTy
5885 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5886   unsigned CPolOn = 0;
5887   unsigned CPolOff = 0;
5888   SMLoc S = getLoc();
5889 
5890   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5891   if (isGFX940() && !Mnemo.startswith("s_")) {
5892     if (trySkipId("sc0"))
5893       CPolOn = AMDGPU::CPol::SC0;
5894     else if (trySkipId("nosc0"))
5895       CPolOff = AMDGPU::CPol::SC0;
5896     else if (trySkipId("nt"))
5897       CPolOn = AMDGPU::CPol::NT;
5898     else if (trySkipId("nont"))
5899       CPolOff = AMDGPU::CPol::NT;
5900     else if (trySkipId("sc1"))
5901       CPolOn = AMDGPU::CPol::SC1;
5902     else if (trySkipId("nosc1"))
5903       CPolOff = AMDGPU::CPol::SC1;
5904     else
5905       return MatchOperand_NoMatch;
5906   }
5907   else if (trySkipId("glc"))
5908     CPolOn = AMDGPU::CPol::GLC;
5909   else if (trySkipId("noglc"))
5910     CPolOff = AMDGPU::CPol::GLC;
5911   else if (trySkipId("slc"))
5912     CPolOn = AMDGPU::CPol::SLC;
5913   else if (trySkipId("noslc"))
5914     CPolOff = AMDGPU::CPol::SLC;
5915   else if (trySkipId("dlc"))
5916     CPolOn = AMDGPU::CPol::DLC;
5917   else if (trySkipId("nodlc"))
5918     CPolOff = AMDGPU::CPol::DLC;
5919   else if (trySkipId("scc"))
5920     CPolOn = AMDGPU::CPol::SCC;
5921   else if (trySkipId("noscc"))
5922     CPolOff = AMDGPU::CPol::SCC;
5923   else
5924     return MatchOperand_NoMatch;
5925 
5926   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5927     Error(S, "dlc modifier is not supported on this GPU");
5928     return MatchOperand_ParseFail;
5929   }
5930 
5931   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5932     Error(S, "scc modifier is not supported on this GPU");
5933     return MatchOperand_ParseFail;
5934   }
5935 
5936   if (CPolSeen & (CPolOn | CPolOff)) {
5937     Error(S, "duplicate cache policy modifier");
5938     return MatchOperand_ParseFail;
5939   }
5940 
5941   CPolSeen |= (CPolOn | CPolOff);
5942 
5943   for (unsigned I = 1; I != Operands.size(); ++I) {
5944     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5945     if (Op.isCPol()) {
5946       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5947       return MatchOperand_Success;
5948     }
5949   }
5950 
5951   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5952                                               AMDGPUOperand::ImmTyCPol));
5953 
5954   return MatchOperand_Success;
5955 }
5956 
5957 static void addOptionalImmOperand(
5958   MCInst& Inst, const OperandVector& Operands,
5959   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5960   AMDGPUOperand::ImmTy ImmT,
5961   int64_t Default = 0) {
5962   auto i = OptionalIdx.find(ImmT);
5963   if (i != OptionalIdx.end()) {
5964     unsigned Idx = i->second;
5965     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5966   } else {
5967     Inst.addOperand(MCOperand::createImm(Default));
5968   }
5969 }
5970 
5971 OperandMatchResultTy
5972 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5973                                        StringRef &Value,
5974                                        SMLoc &StringLoc) {
5975   if (!trySkipId(Prefix, AsmToken::Colon))
5976     return MatchOperand_NoMatch;
5977 
5978   StringLoc = getLoc();
5979   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5980                                                   : MatchOperand_ParseFail;
5981 }
5982 
5983 //===----------------------------------------------------------------------===//
5984 // MTBUF format
5985 //===----------------------------------------------------------------------===//
5986 
5987 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5988                                   int64_t MaxVal,
5989                                   int64_t &Fmt) {
5990   int64_t Val;
5991   SMLoc Loc = getLoc();
5992 
5993   auto Res = parseIntWithPrefix(Pref, Val);
5994   if (Res == MatchOperand_ParseFail)
5995     return false;
5996   if (Res == MatchOperand_NoMatch)
5997     return true;
5998 
5999   if (Val < 0 || Val > MaxVal) {
6000     Error(Loc, Twine("out of range ", StringRef(Pref)));
6001     return false;
6002   }
6003 
6004   Fmt = Val;
6005   return true;
6006 }
6007 
6008 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6009 // values to live in a joint format operand in the MCInst encoding.
6010 OperandMatchResultTy
6011 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6012   using namespace llvm::AMDGPU::MTBUFFormat;
6013 
6014   int64_t Dfmt = DFMT_UNDEF;
6015   int64_t Nfmt = NFMT_UNDEF;
6016 
6017   // dfmt and nfmt can appear in either order, and each is optional.
6018   for (int I = 0; I < 2; ++I) {
6019     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6020       return MatchOperand_ParseFail;
6021 
6022     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
6023       return MatchOperand_ParseFail;
6024     }
6025     // Skip optional comma between dfmt/nfmt
6026     // but guard against 2 commas following each other.
6027     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6028         !peekToken().is(AsmToken::Comma)) {
6029       trySkipToken(AsmToken::Comma);
6030     }
6031   }
6032 
6033   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6034     return MatchOperand_NoMatch;
6035 
6036   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6037   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6038 
6039   Format = encodeDfmtNfmt(Dfmt, Nfmt);
6040   return MatchOperand_Success;
6041 }
6042 
6043 OperandMatchResultTy
6044 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6045   using namespace llvm::AMDGPU::MTBUFFormat;
6046 
6047   int64_t Fmt = UFMT_UNDEF;
6048 
6049   if (!tryParseFmt("format", UFMT_MAX, Fmt))
6050     return MatchOperand_ParseFail;
6051 
6052   if (Fmt == UFMT_UNDEF)
6053     return MatchOperand_NoMatch;
6054 
6055   Format = Fmt;
6056   return MatchOperand_Success;
6057 }
6058 
6059 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6060                                     int64_t &Nfmt,
6061                                     StringRef FormatStr,
6062                                     SMLoc Loc) {
6063   using namespace llvm::AMDGPU::MTBUFFormat;
6064   int64_t Format;
6065 
6066   Format = getDfmt(FormatStr);
6067   if (Format != DFMT_UNDEF) {
6068     Dfmt = Format;
6069     return true;
6070   }
6071 
6072   Format = getNfmt(FormatStr, getSTI());
6073   if (Format != NFMT_UNDEF) {
6074     Nfmt = Format;
6075     return true;
6076   }
6077 
6078   Error(Loc, "unsupported format");
6079   return false;
6080 }
6081 
6082 OperandMatchResultTy
6083 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6084                                           SMLoc FormatLoc,
6085                                           int64_t &Format) {
6086   using namespace llvm::AMDGPU::MTBUFFormat;
6087 
6088   int64_t Dfmt = DFMT_UNDEF;
6089   int64_t Nfmt = NFMT_UNDEF;
6090   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6091     return MatchOperand_ParseFail;
6092 
6093   if (trySkipToken(AsmToken::Comma)) {
6094     StringRef Str;
6095     SMLoc Loc = getLoc();
6096     if (!parseId(Str, "expected a format string") ||
6097         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
6098       return MatchOperand_ParseFail;
6099     }
6100     if (Dfmt == DFMT_UNDEF) {
6101       Error(Loc, "duplicate numeric format");
6102       return MatchOperand_ParseFail;
6103     } else if (Nfmt == NFMT_UNDEF) {
6104       Error(Loc, "duplicate data format");
6105       return MatchOperand_ParseFail;
6106     }
6107   }
6108 
6109   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6110   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6111 
6112   if (isGFX10Plus()) {
6113     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6114     if (Ufmt == UFMT_UNDEF) {
6115       Error(FormatLoc, "unsupported format");
6116       return MatchOperand_ParseFail;
6117     }
6118     Format = Ufmt;
6119   } else {
6120     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6121   }
6122 
6123   return MatchOperand_Success;
6124 }
6125 
6126 OperandMatchResultTy
6127 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6128                                             SMLoc Loc,
6129                                             int64_t &Format) {
6130   using namespace llvm::AMDGPU::MTBUFFormat;
6131 
6132   auto Id = getUnifiedFormat(FormatStr, getSTI());
6133   if (Id == UFMT_UNDEF)
6134     return MatchOperand_NoMatch;
6135 
6136   if (!isGFX10Plus()) {
6137     Error(Loc, "unified format is not supported on this GPU");
6138     return MatchOperand_ParseFail;
6139   }
6140 
6141   Format = Id;
6142   return MatchOperand_Success;
6143 }
6144 
6145 OperandMatchResultTy
6146 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6147   using namespace llvm::AMDGPU::MTBUFFormat;
6148   SMLoc Loc = getLoc();
6149 
6150   if (!parseExpr(Format))
6151     return MatchOperand_ParseFail;
6152   if (!isValidFormatEncoding(Format, getSTI())) {
6153     Error(Loc, "out of range format");
6154     return MatchOperand_ParseFail;
6155   }
6156 
6157   return MatchOperand_Success;
6158 }
6159 
6160 OperandMatchResultTy
6161 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6162   using namespace llvm::AMDGPU::MTBUFFormat;
6163 
6164   if (!trySkipId("format", AsmToken::Colon))
6165     return MatchOperand_NoMatch;
6166 
6167   if (trySkipToken(AsmToken::LBrac)) {
6168     StringRef FormatStr;
6169     SMLoc Loc = getLoc();
6170     if (!parseId(FormatStr, "expected a format string"))
6171       return MatchOperand_ParseFail;
6172 
6173     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6174     if (Res == MatchOperand_NoMatch)
6175       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6176     if (Res != MatchOperand_Success)
6177       return Res;
6178 
6179     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6180       return MatchOperand_ParseFail;
6181 
6182     return MatchOperand_Success;
6183   }
6184 
6185   return parseNumericFormat(Format);
6186 }
6187 
6188 OperandMatchResultTy
6189 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6190   using namespace llvm::AMDGPU::MTBUFFormat;
6191 
6192   int64_t Format = getDefaultFormatEncoding(getSTI());
6193   OperandMatchResultTy Res;
6194   SMLoc Loc = getLoc();
6195 
6196   // Parse legacy format syntax.
6197   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6198   if (Res == MatchOperand_ParseFail)
6199     return Res;
6200 
6201   bool FormatFound = (Res == MatchOperand_Success);
6202 
6203   Operands.push_back(
6204     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6205 
6206   if (FormatFound)
6207     trySkipToken(AsmToken::Comma);
6208 
6209   if (isToken(AsmToken::EndOfStatement)) {
6210     // We are expecting an soffset operand,
6211     // but let matcher handle the error.
6212     return MatchOperand_Success;
6213   }
6214 
6215   // Parse soffset.
6216   Res = parseRegOrImm(Operands);
6217   if (Res != MatchOperand_Success)
6218     return Res;
6219 
6220   trySkipToken(AsmToken::Comma);
6221 
6222   if (!FormatFound) {
6223     Res = parseSymbolicOrNumericFormat(Format);
6224     if (Res == MatchOperand_ParseFail)
6225       return Res;
6226     if (Res == MatchOperand_Success) {
6227       auto Size = Operands.size();
6228       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6229       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6230       Op.setImm(Format);
6231     }
6232     return MatchOperand_Success;
6233   }
6234 
6235   if (isId("format") && peekToken().is(AsmToken::Colon)) {
6236     Error(getLoc(), "duplicate format");
6237     return MatchOperand_ParseFail;
6238   }
6239   return MatchOperand_Success;
6240 }
6241 
6242 //===----------------------------------------------------------------------===//
6243 // ds
6244 //===----------------------------------------------------------------------===//
6245 
6246 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6247                                     const OperandVector &Operands) {
6248   OptionalImmIndexMap OptionalIdx;
6249 
6250   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6251     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6252 
6253     // Add the register arguments
6254     if (Op.isReg()) {
6255       Op.addRegOperands(Inst, 1);
6256       continue;
6257     }
6258 
6259     // Handle optional arguments
6260     OptionalIdx[Op.getImmTy()] = i;
6261   }
6262 
6263   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6264   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6265   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6266 
6267   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6268 }
6269 
6270 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6271                                 bool IsGdsHardcoded) {
6272   OptionalImmIndexMap OptionalIdx;
6273 
6274   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6275     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6276 
6277     // Add the register arguments
6278     if (Op.isReg()) {
6279       Op.addRegOperands(Inst, 1);
6280       continue;
6281     }
6282 
6283     if (Op.isToken() && Op.getToken() == "gds") {
6284       IsGdsHardcoded = true;
6285       continue;
6286     }
6287 
6288     // Handle optional arguments
6289     OptionalIdx[Op.getImmTy()] = i;
6290   }
6291 
6292   AMDGPUOperand::ImmTy OffsetType =
6293     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
6294      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
6295      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
6296                                                       AMDGPUOperand::ImmTyOffset;
6297 
6298   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6299 
6300   if (!IsGdsHardcoded) {
6301     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6302   }
6303   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6304 }
6305 
6306 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6307   OptionalImmIndexMap OptionalIdx;
6308 
6309   unsigned OperandIdx[4];
6310   unsigned EnMask = 0;
6311   int SrcIdx = 0;
6312 
6313   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6314     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6315 
6316     // Add the register arguments
6317     if (Op.isReg()) {
6318       assert(SrcIdx < 4);
6319       OperandIdx[SrcIdx] = Inst.size();
6320       Op.addRegOperands(Inst, 1);
6321       ++SrcIdx;
6322       continue;
6323     }
6324 
6325     if (Op.isOff()) {
6326       assert(SrcIdx < 4);
6327       OperandIdx[SrcIdx] = Inst.size();
6328       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6329       ++SrcIdx;
6330       continue;
6331     }
6332 
6333     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6334       Op.addImmOperands(Inst, 1);
6335       continue;
6336     }
6337 
6338     if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
6339       continue;
6340 
6341     // Handle optional arguments
6342     OptionalIdx[Op.getImmTy()] = i;
6343   }
6344 
6345   assert(SrcIdx == 4);
6346 
6347   bool Compr = false;
6348   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6349     Compr = true;
6350     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6351     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6352     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6353   }
6354 
6355   for (auto i = 0; i < SrcIdx; ++i) {
6356     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6357       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6358     }
6359   }
6360 
6361   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6362   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6363 
6364   Inst.addOperand(MCOperand::createImm(EnMask));
6365 }
6366 
6367 //===----------------------------------------------------------------------===//
6368 // s_waitcnt
6369 //===----------------------------------------------------------------------===//
6370 
6371 static bool
6372 encodeCnt(
6373   const AMDGPU::IsaVersion ISA,
6374   int64_t &IntVal,
6375   int64_t CntVal,
6376   bool Saturate,
6377   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6378   unsigned (*decode)(const IsaVersion &Version, unsigned))
6379 {
6380   bool Failed = false;
6381 
6382   IntVal = encode(ISA, IntVal, CntVal);
6383   if (CntVal != decode(ISA, IntVal)) {
6384     if (Saturate) {
6385       IntVal = encode(ISA, IntVal, -1);
6386     } else {
6387       Failed = true;
6388     }
6389   }
6390   return Failed;
6391 }
6392 
6393 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6394 
6395   SMLoc CntLoc = getLoc();
6396   StringRef CntName = getTokenStr();
6397 
6398   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6399       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6400     return false;
6401 
6402   int64_t CntVal;
6403   SMLoc ValLoc = getLoc();
6404   if (!parseExpr(CntVal))
6405     return false;
6406 
6407   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6408 
6409   bool Failed = true;
6410   bool Sat = CntName.endswith("_sat");
6411 
6412   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6413     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6414   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6415     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6416   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6417     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6418   } else {
6419     Error(CntLoc, "invalid counter name " + CntName);
6420     return false;
6421   }
6422 
6423   if (Failed) {
6424     Error(ValLoc, "too large value for " + CntName);
6425     return false;
6426   }
6427 
6428   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6429     return false;
6430 
6431   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6432     if (isToken(AsmToken::EndOfStatement)) {
6433       Error(getLoc(), "expected a counter name");
6434       return false;
6435     }
6436   }
6437 
6438   return true;
6439 }
6440 
6441 OperandMatchResultTy
6442 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6443   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6444   int64_t Waitcnt = getWaitcntBitMask(ISA);
6445   SMLoc S = getLoc();
6446 
6447   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6448     while (!isToken(AsmToken::EndOfStatement)) {
6449       if (!parseCnt(Waitcnt))
6450         return MatchOperand_ParseFail;
6451     }
6452   } else {
6453     if (!parseExpr(Waitcnt))
6454       return MatchOperand_ParseFail;
6455   }
6456 
6457   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6458   return MatchOperand_Success;
6459 }
6460 
6461 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6462   SMLoc FieldLoc = getLoc();
6463   StringRef FieldName = getTokenStr();
6464   if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6465       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6466     return false;
6467 
6468   SMLoc ValueLoc = getLoc();
6469   StringRef ValueName = getTokenStr();
6470   if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6471       !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6472     return false;
6473 
6474   unsigned Shift;
6475   if (FieldName == "instid0") {
6476     Shift = 0;
6477   } else if (FieldName == "instskip") {
6478     Shift = 4;
6479   } else if (FieldName == "instid1") {
6480     Shift = 7;
6481   } else {
6482     Error(FieldLoc, "invalid field name " + FieldName);
6483     return false;
6484   }
6485 
6486   int Value;
6487   if (Shift == 4) {
6488     // Parse values for instskip.
6489     Value = StringSwitch<int>(ValueName)
6490                 .Case("SAME", 0)
6491                 .Case("NEXT", 1)
6492                 .Case("SKIP_1", 2)
6493                 .Case("SKIP_2", 3)
6494                 .Case("SKIP_3", 4)
6495                 .Case("SKIP_4", 5)
6496                 .Default(-1);
6497   } else {
6498     // Parse values for instid0 and instid1.
6499     Value = StringSwitch<int>(ValueName)
6500                 .Case("NO_DEP", 0)
6501                 .Case("VALU_DEP_1", 1)
6502                 .Case("VALU_DEP_2", 2)
6503                 .Case("VALU_DEP_3", 3)
6504                 .Case("VALU_DEP_4", 4)
6505                 .Case("TRANS32_DEP_1", 5)
6506                 .Case("TRANS32_DEP_2", 6)
6507                 .Case("TRANS32_DEP_3", 7)
6508                 .Case("FMA_ACCUM_CYCLE_1", 8)
6509                 .Case("SALU_CYCLE_1", 9)
6510                 .Case("SALU_CYCLE_2", 10)
6511                 .Case("SALU_CYCLE_3", 11)
6512                 .Default(-1);
6513   }
6514   if (Value < 0) {
6515     Error(ValueLoc, "invalid value name " + ValueName);
6516     return false;
6517   }
6518 
6519   Delay |= Value << Shift;
6520   return true;
6521 }
6522 
6523 OperandMatchResultTy
6524 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) {
6525   int64_t Delay = 0;
6526   SMLoc S = getLoc();
6527 
6528   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6529     do {
6530       if (!parseDelay(Delay))
6531         return MatchOperand_ParseFail;
6532     } while (trySkipToken(AsmToken::Pipe));
6533   } else {
6534     if (!parseExpr(Delay))
6535       return MatchOperand_ParseFail;
6536   }
6537 
6538   Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
6539   return MatchOperand_Success;
6540 }
6541 
6542 bool
6543 AMDGPUOperand::isSWaitCnt() const {
6544   return isImm();
6545 }
6546 
6547 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); }
6548 
6549 //===----------------------------------------------------------------------===//
6550 // DepCtr
6551 //===----------------------------------------------------------------------===//
6552 
6553 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6554                                   StringRef DepCtrName) {
6555   switch (ErrorId) {
6556   case OPR_ID_UNKNOWN:
6557     Error(Loc, Twine("invalid counter name ", DepCtrName));
6558     return;
6559   case OPR_ID_UNSUPPORTED:
6560     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6561     return;
6562   case OPR_ID_DUPLICATE:
6563     Error(Loc, Twine("duplicate counter name ", DepCtrName));
6564     return;
6565   case OPR_VAL_INVALID:
6566     Error(Loc, Twine("invalid value for ", DepCtrName));
6567     return;
6568   default:
6569     assert(false);
6570   }
6571 }
6572 
6573 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6574 
6575   using namespace llvm::AMDGPU::DepCtr;
6576 
6577   SMLoc DepCtrLoc = getLoc();
6578   StringRef DepCtrName = getTokenStr();
6579 
6580   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6581       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6582     return false;
6583 
6584   int64_t ExprVal;
6585   if (!parseExpr(ExprVal))
6586     return false;
6587 
6588   unsigned PrevOprMask = UsedOprMask;
6589   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6590 
6591   if (CntVal < 0) {
6592     depCtrError(DepCtrLoc, CntVal, DepCtrName);
6593     return false;
6594   }
6595 
6596   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6597     return false;
6598 
6599   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6600     if (isToken(AsmToken::EndOfStatement)) {
6601       Error(getLoc(), "expected a counter name");
6602       return false;
6603     }
6604   }
6605 
6606   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
6607   DepCtr = (DepCtr & ~CntValMask) | CntVal;
6608   return true;
6609 }
6610 
6611 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) {
6612   using namespace llvm::AMDGPU::DepCtr;
6613 
6614   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
6615   SMLoc Loc = getLoc();
6616 
6617   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6618     unsigned UsedOprMask = 0;
6619     while (!isToken(AsmToken::EndOfStatement)) {
6620       if (!parseDepCtr(DepCtr, UsedOprMask))
6621         return MatchOperand_ParseFail;
6622     }
6623   } else {
6624     if (!parseExpr(DepCtr))
6625       return MatchOperand_ParseFail;
6626   }
6627 
6628   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
6629   return MatchOperand_Success;
6630 }
6631 
6632 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
6633 
6634 //===----------------------------------------------------------------------===//
6635 // hwreg
6636 //===----------------------------------------------------------------------===//
6637 
6638 bool
6639 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6640                                 OperandInfoTy &Offset,
6641                                 OperandInfoTy &Width) {
6642   using namespace llvm::AMDGPU::Hwreg;
6643 
6644   // The register may be specified by name or using a numeric code
6645   HwReg.Loc = getLoc();
6646   if (isToken(AsmToken::Identifier) &&
6647       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6648     HwReg.IsSymbolic = true;
6649     lex(); // skip register name
6650   } else if (!parseExpr(HwReg.Id, "a register name")) {
6651     return false;
6652   }
6653 
6654   if (trySkipToken(AsmToken::RParen))
6655     return true;
6656 
6657   // parse optional params
6658   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6659     return false;
6660 
6661   Offset.Loc = getLoc();
6662   if (!parseExpr(Offset.Id))
6663     return false;
6664 
6665   if (!skipToken(AsmToken::Comma, "expected a comma"))
6666     return false;
6667 
6668   Width.Loc = getLoc();
6669   return parseExpr(Width.Id) &&
6670          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6671 }
6672 
6673 bool
6674 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6675                                const OperandInfoTy &Offset,
6676                                const OperandInfoTy &Width) {
6677 
6678   using namespace llvm::AMDGPU::Hwreg;
6679 
6680   if (HwReg.IsSymbolic) {
6681     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6682       Error(HwReg.Loc,
6683             "specified hardware register is not supported on this GPU");
6684       return false;
6685     }
6686   } else {
6687     if (!isValidHwreg(HwReg.Id)) {
6688       Error(HwReg.Loc,
6689             "invalid code of hardware register: only 6-bit values are legal");
6690       return false;
6691     }
6692   }
6693   if (!isValidHwregOffset(Offset.Id)) {
6694     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6695     return false;
6696   }
6697   if (!isValidHwregWidth(Width.Id)) {
6698     Error(Width.Loc,
6699           "invalid bitfield width: only values from 1 to 32 are legal");
6700     return false;
6701   }
6702   return true;
6703 }
6704 
6705 OperandMatchResultTy
6706 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6707   using namespace llvm::AMDGPU::Hwreg;
6708 
6709   int64_t ImmVal = 0;
6710   SMLoc Loc = getLoc();
6711 
6712   if (trySkipId("hwreg", AsmToken::LParen)) {
6713     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6714     OperandInfoTy Offset(OFFSET_DEFAULT_);
6715     OperandInfoTy Width(WIDTH_DEFAULT_);
6716     if (parseHwregBody(HwReg, Offset, Width) &&
6717         validateHwreg(HwReg, Offset, Width)) {
6718       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6719     } else {
6720       return MatchOperand_ParseFail;
6721     }
6722   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6723     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6724       Error(Loc, "invalid immediate: only 16-bit values are legal");
6725       return MatchOperand_ParseFail;
6726     }
6727   } else {
6728     return MatchOperand_ParseFail;
6729   }
6730 
6731   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6732   return MatchOperand_Success;
6733 }
6734 
6735 bool AMDGPUOperand::isHwreg() const {
6736   return isImmTy(ImmTyHwreg);
6737 }
6738 
6739 //===----------------------------------------------------------------------===//
6740 // sendmsg
6741 //===----------------------------------------------------------------------===//
6742 
6743 bool
6744 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6745                                   OperandInfoTy &Op,
6746                                   OperandInfoTy &Stream) {
6747   using namespace llvm::AMDGPU::SendMsg;
6748 
6749   Msg.Loc = getLoc();
6750   if (isToken(AsmToken::Identifier) &&
6751       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6752     Msg.IsSymbolic = true;
6753     lex(); // skip message name
6754   } else if (!parseExpr(Msg.Id, "a message name")) {
6755     return false;
6756   }
6757 
6758   if (trySkipToken(AsmToken::Comma)) {
6759     Op.IsDefined = true;
6760     Op.Loc = getLoc();
6761     if (isToken(AsmToken::Identifier) &&
6762         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6763       lex(); // skip operation name
6764     } else if (!parseExpr(Op.Id, "an operation name")) {
6765       return false;
6766     }
6767 
6768     if (trySkipToken(AsmToken::Comma)) {
6769       Stream.IsDefined = true;
6770       Stream.Loc = getLoc();
6771       if (!parseExpr(Stream.Id))
6772         return false;
6773     }
6774   }
6775 
6776   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6777 }
6778 
6779 bool
6780 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6781                                  const OperandInfoTy &Op,
6782                                  const OperandInfoTy &Stream) {
6783   using namespace llvm::AMDGPU::SendMsg;
6784 
6785   // Validation strictness depends on whether message is specified
6786   // in a symbolic or in a numeric form. In the latter case
6787   // only encoding possibility is checked.
6788   bool Strict = Msg.IsSymbolic;
6789 
6790   if (Strict) {
6791     if (Msg.Id == OPR_ID_UNSUPPORTED) {
6792       Error(Msg.Loc, "specified message id is not supported on this GPU");
6793       return false;
6794     }
6795   } else {
6796     if (!isValidMsgId(Msg.Id, getSTI())) {
6797       Error(Msg.Loc, "invalid message id");
6798       return false;
6799     }
6800   }
6801   if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
6802     if (Op.IsDefined) {
6803       Error(Op.Loc, "message does not support operations");
6804     } else {
6805       Error(Msg.Loc, "missing message operation");
6806     }
6807     return false;
6808   }
6809   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6810     Error(Op.Loc, "invalid operation id");
6811     return false;
6812   }
6813   if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
6814       Stream.IsDefined) {
6815     Error(Stream.Loc, "message operation does not support streams");
6816     return false;
6817   }
6818   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6819     Error(Stream.Loc, "invalid message stream id");
6820     return false;
6821   }
6822   return true;
6823 }
6824 
6825 OperandMatchResultTy
6826 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6827   using namespace llvm::AMDGPU::SendMsg;
6828 
6829   int64_t ImmVal = 0;
6830   SMLoc Loc = getLoc();
6831 
6832   if (trySkipId("sendmsg", AsmToken::LParen)) {
6833     OperandInfoTy Msg(OPR_ID_UNKNOWN);
6834     OperandInfoTy Op(OP_NONE_);
6835     OperandInfoTy Stream(STREAM_ID_NONE_);
6836     if (parseSendMsgBody(Msg, Op, Stream) &&
6837         validateSendMsg(Msg, Op, Stream)) {
6838       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6839     } else {
6840       return MatchOperand_ParseFail;
6841     }
6842   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6843     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6844       Error(Loc, "invalid immediate: only 16-bit values are legal");
6845       return MatchOperand_ParseFail;
6846     }
6847   } else {
6848     return MatchOperand_ParseFail;
6849   }
6850 
6851   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6852   return MatchOperand_Success;
6853 }
6854 
6855 bool AMDGPUOperand::isSendMsg() const {
6856   return isImmTy(ImmTySendMsg);
6857 }
6858 
6859 //===----------------------------------------------------------------------===//
6860 // v_interp
6861 //===----------------------------------------------------------------------===//
6862 
6863 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6864   StringRef Str;
6865   SMLoc S = getLoc();
6866 
6867   if (!parseId(Str))
6868     return MatchOperand_NoMatch;
6869 
6870   int Slot = StringSwitch<int>(Str)
6871     .Case("p10", 0)
6872     .Case("p20", 1)
6873     .Case("p0", 2)
6874     .Default(-1);
6875 
6876   if (Slot == -1) {
6877     Error(S, "invalid interpolation slot");
6878     return MatchOperand_ParseFail;
6879   }
6880 
6881   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6882                                               AMDGPUOperand::ImmTyInterpSlot));
6883   return MatchOperand_Success;
6884 }
6885 
6886 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6887   StringRef Str;
6888   SMLoc S = getLoc();
6889 
6890   if (!parseId(Str))
6891     return MatchOperand_NoMatch;
6892 
6893   if (!Str.startswith("attr")) {
6894     Error(S, "invalid interpolation attribute");
6895     return MatchOperand_ParseFail;
6896   }
6897 
6898   StringRef Chan = Str.take_back(2);
6899   int AttrChan = StringSwitch<int>(Chan)
6900     .Case(".x", 0)
6901     .Case(".y", 1)
6902     .Case(".z", 2)
6903     .Case(".w", 3)
6904     .Default(-1);
6905   if (AttrChan == -1) {
6906     Error(S, "invalid or missing interpolation attribute channel");
6907     return MatchOperand_ParseFail;
6908   }
6909 
6910   Str = Str.drop_back(2).drop_front(4);
6911 
6912   uint8_t Attr;
6913   if (Str.getAsInteger(10, Attr)) {
6914     Error(S, "invalid or missing interpolation attribute number");
6915     return MatchOperand_ParseFail;
6916   }
6917 
6918   if (Attr > 63) {
6919     Error(S, "out of bounds interpolation attribute number");
6920     return MatchOperand_ParseFail;
6921   }
6922 
6923   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6924 
6925   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6926                                               AMDGPUOperand::ImmTyInterpAttr));
6927   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6928                                               AMDGPUOperand::ImmTyAttrChan));
6929   return MatchOperand_Success;
6930 }
6931 
6932 //===----------------------------------------------------------------------===//
6933 // exp
6934 //===----------------------------------------------------------------------===//
6935 
6936 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6937   using namespace llvm::AMDGPU::Exp;
6938 
6939   StringRef Str;
6940   SMLoc S = getLoc();
6941 
6942   if (!parseId(Str))
6943     return MatchOperand_NoMatch;
6944 
6945   unsigned Id = getTgtId(Str);
6946   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6947     Error(S, (Id == ET_INVALID) ?
6948                 "invalid exp target" :
6949                 "exp target is not supported on this GPU");
6950     return MatchOperand_ParseFail;
6951   }
6952 
6953   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6954                                               AMDGPUOperand::ImmTyExpTgt));
6955   return MatchOperand_Success;
6956 }
6957 
6958 //===----------------------------------------------------------------------===//
6959 // parser helpers
6960 //===----------------------------------------------------------------------===//
6961 
6962 bool
6963 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6964   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6965 }
6966 
6967 bool
6968 AMDGPUAsmParser::isId(const StringRef Id) const {
6969   return isId(getToken(), Id);
6970 }
6971 
6972 bool
6973 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6974   return getTokenKind() == Kind;
6975 }
6976 
6977 bool
6978 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6979   if (isId(Id)) {
6980     lex();
6981     return true;
6982   }
6983   return false;
6984 }
6985 
6986 bool
6987 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6988   if (isToken(AsmToken::Identifier)) {
6989     StringRef Tok = getTokenStr();
6990     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6991       lex();
6992       return true;
6993     }
6994   }
6995   return false;
6996 }
6997 
6998 bool
6999 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7000   if (isId(Id) && peekToken().is(Kind)) {
7001     lex();
7002     lex();
7003     return true;
7004   }
7005   return false;
7006 }
7007 
7008 bool
7009 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7010   if (isToken(Kind)) {
7011     lex();
7012     return true;
7013   }
7014   return false;
7015 }
7016 
7017 bool
7018 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7019                            const StringRef ErrMsg) {
7020   if (!trySkipToken(Kind)) {
7021     Error(getLoc(), ErrMsg);
7022     return false;
7023   }
7024   return true;
7025 }
7026 
7027 bool
7028 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7029   SMLoc S = getLoc();
7030 
7031   const MCExpr *Expr;
7032   if (Parser.parseExpression(Expr))
7033     return false;
7034 
7035   if (Expr->evaluateAsAbsolute(Imm))
7036     return true;
7037 
7038   if (Expected.empty()) {
7039     Error(S, "expected absolute expression");
7040   } else {
7041     Error(S, Twine("expected ", Expected) +
7042              Twine(" or an absolute expression"));
7043   }
7044   return false;
7045 }
7046 
7047 bool
7048 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7049   SMLoc S = getLoc();
7050 
7051   const MCExpr *Expr;
7052   if (Parser.parseExpression(Expr))
7053     return false;
7054 
7055   int64_t IntVal;
7056   if (Expr->evaluateAsAbsolute(IntVal)) {
7057     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7058   } else {
7059     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7060   }
7061   return true;
7062 }
7063 
7064 bool
7065 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7066   if (isToken(AsmToken::String)) {
7067     Val = getToken().getStringContents();
7068     lex();
7069     return true;
7070   } else {
7071     Error(getLoc(), ErrMsg);
7072     return false;
7073   }
7074 }
7075 
7076 bool
7077 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7078   if (isToken(AsmToken::Identifier)) {
7079     Val = getTokenStr();
7080     lex();
7081     return true;
7082   } else {
7083     if (!ErrMsg.empty())
7084       Error(getLoc(), ErrMsg);
7085     return false;
7086   }
7087 }
7088 
7089 AsmToken
7090 AMDGPUAsmParser::getToken() const {
7091   return Parser.getTok();
7092 }
7093 
7094 AsmToken
7095 AMDGPUAsmParser::peekToken() {
7096   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
7097 }
7098 
7099 void
7100 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7101   auto TokCount = getLexer().peekTokens(Tokens);
7102 
7103   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7104     Tokens[Idx] = AsmToken(AsmToken::Error, "");
7105 }
7106 
7107 AsmToken::TokenKind
7108 AMDGPUAsmParser::getTokenKind() const {
7109   return getLexer().getKind();
7110 }
7111 
7112 SMLoc
7113 AMDGPUAsmParser::getLoc() const {
7114   return getToken().getLoc();
7115 }
7116 
7117 StringRef
7118 AMDGPUAsmParser::getTokenStr() const {
7119   return getToken().getString();
7120 }
7121 
7122 void
7123 AMDGPUAsmParser::lex() {
7124   Parser.Lex();
7125 }
7126 
7127 SMLoc
7128 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7129                                const OperandVector &Operands) const {
7130   for (unsigned i = Operands.size() - 1; i > 0; --i) {
7131     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7132     if (Test(Op))
7133       return Op.getStartLoc();
7134   }
7135   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7136 }
7137 
7138 SMLoc
7139 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7140                            const OperandVector &Operands) const {
7141   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7142   return getOperandLoc(Test, Operands);
7143 }
7144 
7145 SMLoc
7146 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7147                            const OperandVector &Operands) const {
7148   auto Test = [=](const AMDGPUOperand& Op) {
7149     return Op.isRegKind() && Op.getReg() == Reg;
7150   };
7151   return getOperandLoc(Test, Operands);
7152 }
7153 
7154 SMLoc
7155 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
7156   auto Test = [](const AMDGPUOperand& Op) {
7157     return Op.IsImmKindLiteral() || Op.isExpr();
7158   };
7159   return getOperandLoc(Test, Operands);
7160 }
7161 
7162 SMLoc
7163 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7164   auto Test = [](const AMDGPUOperand& Op) {
7165     return Op.isImmKindConst();
7166   };
7167   return getOperandLoc(Test, Operands);
7168 }
7169 
7170 //===----------------------------------------------------------------------===//
7171 // swizzle
7172 //===----------------------------------------------------------------------===//
7173 
7174 LLVM_READNONE
7175 static unsigned
7176 encodeBitmaskPerm(const unsigned AndMask,
7177                   const unsigned OrMask,
7178                   const unsigned XorMask) {
7179   using namespace llvm::AMDGPU::Swizzle;
7180 
7181   return BITMASK_PERM_ENC |
7182          (AndMask << BITMASK_AND_SHIFT) |
7183          (OrMask  << BITMASK_OR_SHIFT)  |
7184          (XorMask << BITMASK_XOR_SHIFT);
7185 }
7186 
7187 bool
7188 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7189                                      const unsigned MinVal,
7190                                      const unsigned MaxVal,
7191                                      const StringRef ErrMsg,
7192                                      SMLoc &Loc) {
7193   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7194     return false;
7195   }
7196   Loc = getLoc();
7197   if (!parseExpr(Op)) {
7198     return false;
7199   }
7200   if (Op < MinVal || Op > MaxVal) {
7201     Error(Loc, ErrMsg);
7202     return false;
7203   }
7204 
7205   return true;
7206 }
7207 
7208 bool
7209 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7210                                       const unsigned MinVal,
7211                                       const unsigned MaxVal,
7212                                       const StringRef ErrMsg) {
7213   SMLoc Loc;
7214   for (unsigned i = 0; i < OpNum; ++i) {
7215     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7216       return false;
7217   }
7218 
7219   return true;
7220 }
7221 
7222 bool
7223 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7224   using namespace llvm::AMDGPU::Swizzle;
7225 
7226   int64_t Lane[LANE_NUM];
7227   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7228                            "expected a 2-bit lane id")) {
7229     Imm = QUAD_PERM_ENC;
7230     for (unsigned I = 0; I < LANE_NUM; ++I) {
7231       Imm |= Lane[I] << (LANE_SHIFT * I);
7232     }
7233     return true;
7234   }
7235   return false;
7236 }
7237 
7238 bool
7239 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7240   using namespace llvm::AMDGPU::Swizzle;
7241 
7242   SMLoc Loc;
7243   int64_t GroupSize;
7244   int64_t LaneIdx;
7245 
7246   if (!parseSwizzleOperand(GroupSize,
7247                            2, 32,
7248                            "group size must be in the interval [2,32]",
7249                            Loc)) {
7250     return false;
7251   }
7252   if (!isPowerOf2_64(GroupSize)) {
7253     Error(Loc, "group size must be a power of two");
7254     return false;
7255   }
7256   if (parseSwizzleOperand(LaneIdx,
7257                           0, GroupSize - 1,
7258                           "lane id must be in the interval [0,group size - 1]",
7259                           Loc)) {
7260     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7261     return true;
7262   }
7263   return false;
7264 }
7265 
7266 bool
7267 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7268   using namespace llvm::AMDGPU::Swizzle;
7269 
7270   SMLoc Loc;
7271   int64_t GroupSize;
7272 
7273   if (!parseSwizzleOperand(GroupSize,
7274                            2, 32,
7275                            "group size must be in the interval [2,32]",
7276                            Loc)) {
7277     return false;
7278   }
7279   if (!isPowerOf2_64(GroupSize)) {
7280     Error(Loc, "group size must be a power of two");
7281     return false;
7282   }
7283 
7284   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7285   return true;
7286 }
7287 
7288 bool
7289 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7290   using namespace llvm::AMDGPU::Swizzle;
7291 
7292   SMLoc Loc;
7293   int64_t GroupSize;
7294 
7295   if (!parseSwizzleOperand(GroupSize,
7296                            1, 16,
7297                            "group size must be in the interval [1,16]",
7298                            Loc)) {
7299     return false;
7300   }
7301   if (!isPowerOf2_64(GroupSize)) {
7302     Error(Loc, "group size must be a power of two");
7303     return false;
7304   }
7305 
7306   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7307   return true;
7308 }
7309 
7310 bool
7311 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7312   using namespace llvm::AMDGPU::Swizzle;
7313 
7314   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7315     return false;
7316   }
7317 
7318   StringRef Ctl;
7319   SMLoc StrLoc = getLoc();
7320   if (!parseString(Ctl)) {
7321     return false;
7322   }
7323   if (Ctl.size() != BITMASK_WIDTH) {
7324     Error(StrLoc, "expected a 5-character mask");
7325     return false;
7326   }
7327 
7328   unsigned AndMask = 0;
7329   unsigned OrMask = 0;
7330   unsigned XorMask = 0;
7331 
7332   for (size_t i = 0; i < Ctl.size(); ++i) {
7333     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7334     switch(Ctl[i]) {
7335     default:
7336       Error(StrLoc, "invalid mask");
7337       return false;
7338     case '0':
7339       break;
7340     case '1':
7341       OrMask |= Mask;
7342       break;
7343     case 'p':
7344       AndMask |= Mask;
7345       break;
7346     case 'i':
7347       AndMask |= Mask;
7348       XorMask |= Mask;
7349       break;
7350     }
7351   }
7352 
7353   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7354   return true;
7355 }
7356 
7357 bool
7358 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7359 
7360   SMLoc OffsetLoc = getLoc();
7361 
7362   if (!parseExpr(Imm, "a swizzle macro")) {
7363     return false;
7364   }
7365   if (!isUInt<16>(Imm)) {
7366     Error(OffsetLoc, "expected a 16-bit offset");
7367     return false;
7368   }
7369   return true;
7370 }
7371 
7372 bool
7373 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7374   using namespace llvm::AMDGPU::Swizzle;
7375 
7376   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7377 
7378     SMLoc ModeLoc = getLoc();
7379     bool Ok = false;
7380 
7381     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7382       Ok = parseSwizzleQuadPerm(Imm);
7383     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7384       Ok = parseSwizzleBitmaskPerm(Imm);
7385     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7386       Ok = parseSwizzleBroadcast(Imm);
7387     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7388       Ok = parseSwizzleSwap(Imm);
7389     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7390       Ok = parseSwizzleReverse(Imm);
7391     } else {
7392       Error(ModeLoc, "expected a swizzle mode");
7393     }
7394 
7395     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7396   }
7397 
7398   return false;
7399 }
7400 
7401 OperandMatchResultTy
7402 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
7403   SMLoc S = getLoc();
7404   int64_t Imm = 0;
7405 
7406   if (trySkipId("offset")) {
7407 
7408     bool Ok = false;
7409     if (skipToken(AsmToken::Colon, "expected a colon")) {
7410       if (trySkipId("swizzle")) {
7411         Ok = parseSwizzleMacro(Imm);
7412       } else {
7413         Ok = parseSwizzleOffset(Imm);
7414       }
7415     }
7416 
7417     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7418 
7419     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
7420   } else {
7421     // Swizzle "offset" operand is optional.
7422     // If it is omitted, try parsing other optional operands.
7423     return parseOptionalOpr(Operands);
7424   }
7425 }
7426 
7427 bool
7428 AMDGPUOperand::isSwizzle() const {
7429   return isImmTy(ImmTySwizzle);
7430 }
7431 
7432 //===----------------------------------------------------------------------===//
7433 // VGPR Index Mode
7434 //===----------------------------------------------------------------------===//
7435 
7436 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7437 
7438   using namespace llvm::AMDGPU::VGPRIndexMode;
7439 
7440   if (trySkipToken(AsmToken::RParen)) {
7441     return OFF;
7442   }
7443 
7444   int64_t Imm = 0;
7445 
7446   while (true) {
7447     unsigned Mode = 0;
7448     SMLoc S = getLoc();
7449 
7450     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7451       if (trySkipId(IdSymbolic[ModeId])) {
7452         Mode = 1 << ModeId;
7453         break;
7454       }
7455     }
7456 
7457     if (Mode == 0) {
7458       Error(S, (Imm == 0)?
7459                "expected a VGPR index mode or a closing parenthesis" :
7460                "expected a VGPR index mode");
7461       return UNDEF;
7462     }
7463 
7464     if (Imm & Mode) {
7465       Error(S, "duplicate VGPR index mode");
7466       return UNDEF;
7467     }
7468     Imm |= Mode;
7469 
7470     if (trySkipToken(AsmToken::RParen))
7471       break;
7472     if (!skipToken(AsmToken::Comma,
7473                    "expected a comma or a closing parenthesis"))
7474       return UNDEF;
7475   }
7476 
7477   return Imm;
7478 }
7479 
7480 OperandMatchResultTy
7481 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7482 
7483   using namespace llvm::AMDGPU::VGPRIndexMode;
7484 
7485   int64_t Imm = 0;
7486   SMLoc S = getLoc();
7487 
7488   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7489     Imm = parseGPRIdxMacro();
7490     if (Imm == UNDEF)
7491       return MatchOperand_ParseFail;
7492   } else {
7493     if (getParser().parseAbsoluteExpression(Imm))
7494       return MatchOperand_ParseFail;
7495     if (Imm < 0 || !isUInt<4>(Imm)) {
7496       Error(S, "invalid immediate: only 4-bit values are legal");
7497       return MatchOperand_ParseFail;
7498     }
7499   }
7500 
7501   Operands.push_back(
7502       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7503   return MatchOperand_Success;
7504 }
7505 
7506 bool AMDGPUOperand::isGPRIdxMode() const {
7507   return isImmTy(ImmTyGprIdxMode);
7508 }
7509 
7510 //===----------------------------------------------------------------------===//
7511 // sopp branch targets
7512 //===----------------------------------------------------------------------===//
7513 
7514 OperandMatchResultTy
7515 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7516 
7517   // Make sure we are not parsing something
7518   // that looks like a label or an expression but is not.
7519   // This will improve error messages.
7520   if (isRegister() || isModifier())
7521     return MatchOperand_NoMatch;
7522 
7523   if (!parseExpr(Operands))
7524     return MatchOperand_ParseFail;
7525 
7526   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7527   assert(Opr.isImm() || Opr.isExpr());
7528   SMLoc Loc = Opr.getStartLoc();
7529 
7530   // Currently we do not support arbitrary expressions as branch targets.
7531   // Only labels and absolute expressions are accepted.
7532   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7533     Error(Loc, "expected an absolute expression or a label");
7534   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7535     Error(Loc, "expected a 16-bit signed jump offset");
7536   }
7537 
7538   return MatchOperand_Success;
7539 }
7540 
7541 //===----------------------------------------------------------------------===//
7542 // Boolean holding registers
7543 //===----------------------------------------------------------------------===//
7544 
7545 OperandMatchResultTy
7546 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7547   return parseReg(Operands);
7548 }
7549 
7550 //===----------------------------------------------------------------------===//
7551 // mubuf
7552 //===----------------------------------------------------------------------===//
7553 
7554 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7555   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7556 }
7557 
7558 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7559                                    const OperandVector &Operands,
7560                                    bool IsAtomic,
7561                                    bool IsLds) {
7562   OptionalImmIndexMap OptionalIdx;
7563   unsigned FirstOperandIdx = 1;
7564   bool IsAtomicReturn = false;
7565 
7566   if (IsAtomic) {
7567     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7568       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7569       if (!Op.isCPol())
7570         continue;
7571       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7572       break;
7573     }
7574 
7575     if (!IsAtomicReturn) {
7576       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7577       if (NewOpc != -1)
7578         Inst.setOpcode(NewOpc);
7579     }
7580 
7581     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7582                       SIInstrFlags::IsAtomicRet;
7583   }
7584 
7585   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7586     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7587 
7588     // Add the register arguments
7589     if (Op.isReg()) {
7590       Op.addRegOperands(Inst, 1);
7591       // Insert a tied src for atomic return dst.
7592       // This cannot be postponed as subsequent calls to
7593       // addImmOperands rely on correct number of MC operands.
7594       if (IsAtomicReturn && i == FirstOperandIdx)
7595         Op.addRegOperands(Inst, 1);
7596       continue;
7597     }
7598 
7599     // Handle the case where soffset is an immediate
7600     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7601       Op.addImmOperands(Inst, 1);
7602       continue;
7603     }
7604 
7605     // Handle tokens like 'offen' which are sometimes hard-coded into the
7606     // asm string.  There are no MCInst operands for these.
7607     if (Op.isToken()) {
7608       continue;
7609     }
7610     assert(Op.isImm());
7611 
7612     // Handle optional arguments
7613     OptionalIdx[Op.getImmTy()] = i;
7614   }
7615 
7616   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7617   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7618 
7619   if (!IsLds) { // tfe is not legal with lds opcodes
7620     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7621   }
7622   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7623 }
7624 
7625 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7626   OptionalImmIndexMap OptionalIdx;
7627 
7628   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7629     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7630 
7631     // Add the register arguments
7632     if (Op.isReg()) {
7633       Op.addRegOperands(Inst, 1);
7634       continue;
7635     }
7636 
7637     // Handle the case where soffset is an immediate
7638     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7639       Op.addImmOperands(Inst, 1);
7640       continue;
7641     }
7642 
7643     // Handle tokens like 'offen' which are sometimes hard-coded into the
7644     // asm string.  There are no MCInst operands for these.
7645     if (Op.isToken()) {
7646       continue;
7647     }
7648     assert(Op.isImm());
7649 
7650     // Handle optional arguments
7651     OptionalIdx[Op.getImmTy()] = i;
7652   }
7653 
7654   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7655                         AMDGPUOperand::ImmTyOffset);
7656   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7657   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7658   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7659   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7660 }
7661 
7662 //===----------------------------------------------------------------------===//
7663 // mimg
7664 //===----------------------------------------------------------------------===//
7665 
7666 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7667                               bool IsAtomic) {
7668   unsigned I = 1;
7669   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7670   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7671     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7672   }
7673 
7674   if (IsAtomic) {
7675     // Add src, same as dst
7676     assert(Desc.getNumDefs() == 1);
7677     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7678   }
7679 
7680   OptionalImmIndexMap OptionalIdx;
7681 
7682   for (unsigned E = Operands.size(); I != E; ++I) {
7683     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7684 
7685     // Add the register arguments
7686     if (Op.isReg()) {
7687       Op.addRegOperands(Inst, 1);
7688     } else if (Op.isImmModifier()) {
7689       OptionalIdx[Op.getImmTy()] = I;
7690     } else if (!Op.isToken()) {
7691       llvm_unreachable("unexpected operand type");
7692     }
7693   }
7694 
7695   bool IsGFX10Plus = isGFX10Plus();
7696 
7697   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7698   if (IsGFX10Plus)
7699     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7700   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7701   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7702   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7703   if (IsGFX10Plus)
7704     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7705   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7706     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7707   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7708   if (!IsGFX10Plus)
7709     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7710   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7711 }
7712 
7713 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7714   cvtMIMG(Inst, Operands, true);
7715 }
7716 
7717 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7718   OptionalImmIndexMap OptionalIdx;
7719   bool IsAtomicReturn = false;
7720 
7721   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7722     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7723     if (!Op.isCPol())
7724       continue;
7725     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7726     break;
7727   }
7728 
7729   if (!IsAtomicReturn) {
7730     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7731     if (NewOpc != -1)
7732       Inst.setOpcode(NewOpc);
7733   }
7734 
7735   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7736                     SIInstrFlags::IsAtomicRet;
7737 
7738   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7739     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7740 
7741     // Add the register arguments
7742     if (Op.isReg()) {
7743       Op.addRegOperands(Inst, 1);
7744       if (IsAtomicReturn && i == 1)
7745         Op.addRegOperands(Inst, 1);
7746       continue;
7747     }
7748 
7749     // Handle the case where soffset is an immediate
7750     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7751       Op.addImmOperands(Inst, 1);
7752       continue;
7753     }
7754 
7755     // Handle tokens like 'offen' which are sometimes hard-coded into the
7756     // asm string.  There are no MCInst operands for these.
7757     if (Op.isToken()) {
7758       continue;
7759     }
7760     assert(Op.isImm());
7761 
7762     // Handle optional arguments
7763     OptionalIdx[Op.getImmTy()] = i;
7764   }
7765 
7766   if ((int)Inst.getNumOperands() <=
7767       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7768     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7769   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7770 }
7771 
7772 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7773                                       const OperandVector &Operands) {
7774   for (unsigned I = 1; I < Operands.size(); ++I) {
7775     auto &Operand = (AMDGPUOperand &)*Operands[I];
7776     if (Operand.isReg())
7777       Operand.addRegOperands(Inst, 1);
7778   }
7779 
7780   Inst.addOperand(MCOperand::createImm(1)); // a16
7781 }
7782 
7783 //===----------------------------------------------------------------------===//
7784 // smrd
7785 //===----------------------------------------------------------------------===//
7786 
7787 bool AMDGPUOperand::isSMRDOffset8() const {
7788   return isImm() && isUInt<8>(getImm());
7789 }
7790 
7791 bool AMDGPUOperand::isSMEMOffset() const {
7792   return isImmTy(ImmTyNone) ||
7793          isImmTy(ImmTyOffset); // Offset range is checked later by validator.
7794 }
7795 
7796 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7797   // 32-bit literals are only supported on CI and we only want to use them
7798   // when the offset is > 8-bits.
7799   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7800 }
7801 
7802 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7803   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7804 }
7805 
7806 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7807   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7808 }
7809 
7810 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7811   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7812 }
7813 
7814 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7815   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7816 }
7817 
7818 //===----------------------------------------------------------------------===//
7819 // vop3
7820 //===----------------------------------------------------------------------===//
7821 
7822 static bool ConvertOmodMul(int64_t &Mul) {
7823   if (Mul != 1 && Mul != 2 && Mul != 4)
7824     return false;
7825 
7826   Mul >>= 1;
7827   return true;
7828 }
7829 
7830 static bool ConvertOmodDiv(int64_t &Div) {
7831   if (Div == 1) {
7832     Div = 0;
7833     return true;
7834   }
7835 
7836   if (Div == 2) {
7837     Div = 3;
7838     return true;
7839   }
7840 
7841   return false;
7842 }
7843 
7844 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7845 // This is intentional and ensures compatibility with sp3.
7846 // See bug 35397 for details.
7847 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7848   if (BoundCtrl == 0 || BoundCtrl == 1) {
7849     BoundCtrl = 1;
7850     return true;
7851   }
7852   return false;
7853 }
7854 
7855 // Note: the order in this table matches the order of operands in AsmString.
7856 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7857   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7858   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7859   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7860   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7861   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7862   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7863   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7864   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7865   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7866   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7867   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7868   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7869   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7870   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7871   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7872   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7873   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7874   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7875   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7876   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7877   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7878   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7879   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7880   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7881   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7882   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7883   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7884   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7885   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7886   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7887   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7888   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7889   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7890   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7891   {"dpp8",     AMDGPUOperand::ImmTyDPP8, false, nullptr},
7892   {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, nullptr},
7893   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7894   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7895   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7896   {"fi",   AMDGPUOperand::ImmTyDppFi, false, nullptr},
7897   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7898   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7899   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr},
7900   {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr},
7901   {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr}
7902 };
7903 
7904 void AMDGPUAsmParser::onBeginOfFile() {
7905   if (!getParser().getStreamer().getTargetStreamer() ||
7906       getSTI().getTargetTriple().getArch() == Triple::r600)
7907     return;
7908 
7909   if (!getTargetStreamer().getTargetID())
7910     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7911 
7912   if (isHsaAbiVersion3AndAbove(&getSTI()))
7913     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7914 }
7915 
7916 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7917 
7918   OperandMatchResultTy res = parseOptionalOpr(Operands);
7919 
7920   // This is a hack to enable hardcoded mandatory operands which follow
7921   // optional operands.
7922   //
7923   // Current design assumes that all operands after the first optional operand
7924   // are also optional. However implementation of some instructions violates
7925   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7926   //
7927   // To alleviate this problem, we have to (implicitly) parse extra operands
7928   // to make sure autogenerated parser of custom operands never hit hardcoded
7929   // mandatory operands.
7930 
7931   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7932     if (res != MatchOperand_Success ||
7933         isToken(AsmToken::EndOfStatement))
7934       break;
7935 
7936     trySkipToken(AsmToken::Comma);
7937     res = parseOptionalOpr(Operands);
7938   }
7939 
7940   return res;
7941 }
7942 
7943 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7944   OperandMatchResultTy res;
7945   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7946     // try to parse any optional operand here
7947     if (Op.IsBit) {
7948       res = parseNamedBit(Op.Name, Operands, Op.Type);
7949     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7950       res = parseOModOperand(Operands);
7951     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7952                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7953                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7954       res = parseSDWASel(Operands, Op.Name, Op.Type);
7955     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7956       res = parseSDWADstUnused(Operands);
7957     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7958                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7959                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7960                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7961       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7962                                         Op.ConvertResult);
7963     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7964       res = parseDim(Operands);
7965     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7966       res = parseCPol(Operands);
7967     } else if (Op.Type == AMDGPUOperand::ImmTyDPP8) {
7968       res = parseDPP8(Operands);
7969     } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) {
7970       res = parseDPPCtrl(Operands);
7971     } else {
7972       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7973       if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) {
7974         res = parseOperandArrayWithPrefix("neg", Operands,
7975                                           AMDGPUOperand::ImmTyBLGP,
7976                                           nullptr);
7977       }
7978     }
7979     if (res != MatchOperand_NoMatch) {
7980       return res;
7981     }
7982   }
7983   return MatchOperand_NoMatch;
7984 }
7985 
7986 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7987   StringRef Name = getTokenStr();
7988   if (Name == "mul") {
7989     return parseIntWithPrefix("mul", Operands,
7990                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7991   }
7992 
7993   if (Name == "div") {
7994     return parseIntWithPrefix("div", Operands,
7995                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7996   }
7997 
7998   return MatchOperand_NoMatch;
7999 }
8000 
8001 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
8002   cvtVOP3P(Inst, Operands);
8003 
8004   int Opc = Inst.getOpcode();
8005 
8006   int SrcNum;
8007   const int Ops[] = { AMDGPU::OpName::src0,
8008                       AMDGPU::OpName::src1,
8009                       AMDGPU::OpName::src2 };
8010   for (SrcNum = 0;
8011        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
8012        ++SrcNum);
8013   assert(SrcNum > 0);
8014 
8015   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8016   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8017 
8018   if ((OpSel & (1 << SrcNum)) != 0) {
8019     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8020     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8021     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
8022   }
8023 }
8024 
8025 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8026       // 1. This operand is input modifiers
8027   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8028       // 2. This is not last operand
8029       && Desc.NumOperands > (OpNum + 1)
8030       // 3. Next operand is register class
8031       && Desc.OpInfo[OpNum + 1].RegClass != -1
8032       // 4. Next register is not tied to any other operand
8033       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
8034 }
8035 
8036 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8037 {
8038   OptionalImmIndexMap OptionalIdx;
8039   unsigned Opc = Inst.getOpcode();
8040 
8041   unsigned I = 1;
8042   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8043   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8044     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8045   }
8046 
8047   for (unsigned E = Operands.size(); I != E; ++I) {
8048     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8049     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8050       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8051     } else if (Op.isInterpSlot() ||
8052                Op.isInterpAttr() ||
8053                Op.isAttrChan()) {
8054       Inst.addOperand(MCOperand::createImm(Op.getImm()));
8055     } else if (Op.isImmModifier()) {
8056       OptionalIdx[Op.getImmTy()] = I;
8057     } else {
8058       llvm_unreachable("unhandled operand type");
8059     }
8060   }
8061 
8062   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
8063     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
8064   }
8065 
8066   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8067     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8068   }
8069 
8070   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8071     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8072   }
8073 }
8074 
8075 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8076 {
8077   OptionalImmIndexMap OptionalIdx;
8078   unsigned Opc = Inst.getOpcode();
8079 
8080   unsigned I = 1;
8081   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8082   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8083     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8084   }
8085 
8086   for (unsigned E = Operands.size(); I != E; ++I) {
8087     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8088     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8089       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8090     } else if (Op.isImmModifier()) {
8091       OptionalIdx[Op.getImmTy()] = I;
8092     } else {
8093       llvm_unreachable("unhandled operand type");
8094     }
8095   }
8096 
8097   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8098 
8099   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8100   if (OpSelIdx != -1)
8101     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8102 
8103   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8104 
8105   if (OpSelIdx == -1)
8106     return;
8107 
8108   const int Ops[] = { AMDGPU::OpName::src0,
8109                       AMDGPU::OpName::src1,
8110                       AMDGPU::OpName::src2 };
8111   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8112                          AMDGPU::OpName::src1_modifiers,
8113                          AMDGPU::OpName::src2_modifiers };
8114 
8115   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8116 
8117   for (int J = 0; J < 3; ++J) {
8118     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8119     if (OpIdx == -1)
8120       break;
8121 
8122     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8123     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8124 
8125     if ((OpSel & (1 << J)) != 0)
8126       ModVal |= SISrcMods::OP_SEL_0;
8127     if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8128         (OpSel & (1 << 3)) != 0)
8129       ModVal |= SISrcMods::DST_OP_SEL;
8130 
8131     Inst.getOperand(ModIdx).setImm(ModVal);
8132   }
8133 }
8134 
8135 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8136                               OptionalImmIndexMap &OptionalIdx) {
8137   unsigned Opc = Inst.getOpcode();
8138 
8139   unsigned I = 1;
8140   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8141   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8142     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8143   }
8144 
8145   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
8146     // This instruction has src modifiers
8147     for (unsigned E = Operands.size(); I != E; ++I) {
8148       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8149       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8150         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8151       } else if (Op.isImmModifier()) {
8152         OptionalIdx[Op.getImmTy()] = I;
8153       } else if (Op.isRegOrImm()) {
8154         Op.addRegOrImmOperands(Inst, 1);
8155       } else {
8156         llvm_unreachable("unhandled operand type");
8157       }
8158     }
8159   } else {
8160     // No src modifiers
8161     for (unsigned E = Operands.size(); I != E; ++I) {
8162       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8163       if (Op.isMod()) {
8164         OptionalIdx[Op.getImmTy()] = I;
8165       } else {
8166         Op.addRegOrImmOperands(Inst, 1);
8167       }
8168     }
8169   }
8170 
8171   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8172     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8173   }
8174 
8175   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8176     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8177   }
8178 
8179   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8180   // it has src2 register operand that is tied to dst operand
8181   // we don't allow modifiers for this operand in assembler so src2_modifiers
8182   // should be 0.
8183   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
8184       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
8185       Opc == AMDGPU::V_MAC_F32_e64_vi ||
8186       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
8187       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
8188       Opc == AMDGPU::V_MAC_F16_e64_vi ||
8189       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
8190       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
8191       Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
8192       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
8193       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
8194       Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
8195       Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
8196       Opc == AMDGPU::V_FMAC_F16_e64_gfx11) {
8197     auto it = Inst.begin();
8198     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8199     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8200     ++it;
8201     // Copy the operand to ensure it's not invalidated when Inst grows.
8202     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8203   }
8204 }
8205 
8206 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8207   OptionalImmIndexMap OptionalIdx;
8208   cvtVOP3(Inst, Operands, OptionalIdx);
8209 }
8210 
8211 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8212                                OptionalImmIndexMap &OptIdx) {
8213   const int Opc = Inst.getOpcode();
8214   const MCInstrDesc &Desc = MII.get(Opc);
8215 
8216   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8217 
8218   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
8219     assert(!IsPacked);
8220     Inst.addOperand(Inst.getOperand(0));
8221   }
8222 
8223   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8224   // instruction, and then figure out where to actually put the modifiers
8225 
8226   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8227   if (OpSelIdx != -1) {
8228     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8229   }
8230 
8231   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8232   if (OpSelHiIdx != -1) {
8233     int DefaultVal = IsPacked ? -1 : 0;
8234     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8235                           DefaultVal);
8236   }
8237 
8238   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8239   if (NegLoIdx != -1) {
8240     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8241     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8242   }
8243 
8244   const int Ops[] = { AMDGPU::OpName::src0,
8245                       AMDGPU::OpName::src1,
8246                       AMDGPU::OpName::src2 };
8247   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8248                          AMDGPU::OpName::src1_modifiers,
8249                          AMDGPU::OpName::src2_modifiers };
8250 
8251   unsigned OpSel = 0;
8252   unsigned OpSelHi = 0;
8253   unsigned NegLo = 0;
8254   unsigned NegHi = 0;
8255 
8256   if (OpSelIdx != -1)
8257     OpSel = Inst.getOperand(OpSelIdx).getImm();
8258 
8259   if (OpSelHiIdx != -1)
8260     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8261 
8262   if (NegLoIdx != -1) {
8263     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8264     NegLo = Inst.getOperand(NegLoIdx).getImm();
8265     NegHi = Inst.getOperand(NegHiIdx).getImm();
8266   }
8267 
8268   for (int J = 0; J < 3; ++J) {
8269     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8270     if (OpIdx == -1)
8271       break;
8272 
8273     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8274 
8275     if (ModIdx == -1)
8276       continue;
8277 
8278     uint32_t ModVal = 0;
8279 
8280     if ((OpSel & (1 << J)) != 0)
8281       ModVal |= SISrcMods::OP_SEL_0;
8282 
8283     if ((OpSelHi & (1 << J)) != 0)
8284       ModVal |= SISrcMods::OP_SEL_1;
8285 
8286     if ((NegLo & (1 << J)) != 0)
8287       ModVal |= SISrcMods::NEG;
8288 
8289     if ((NegHi & (1 << J)) != 0)
8290       ModVal |= SISrcMods::NEG_HI;
8291 
8292     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8293   }
8294 }
8295 
8296 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8297   OptionalImmIndexMap OptIdx;
8298   cvtVOP3(Inst, Operands, OptIdx);
8299   cvtVOP3P(Inst, Operands, OptIdx);
8300 }
8301 
8302 //===----------------------------------------------------------------------===//
8303 // dpp
8304 //===----------------------------------------------------------------------===//
8305 
8306 bool AMDGPUOperand::isDPP8() const {
8307   return isImmTy(ImmTyDPP8);
8308 }
8309 
8310 bool AMDGPUOperand::isDPPCtrl() const {
8311   using namespace AMDGPU::DPP;
8312 
8313   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8314   if (result) {
8315     int64_t Imm = getImm();
8316     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8317            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8318            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8319            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8320            (Imm == DppCtrl::WAVE_SHL1) ||
8321            (Imm == DppCtrl::WAVE_ROL1) ||
8322            (Imm == DppCtrl::WAVE_SHR1) ||
8323            (Imm == DppCtrl::WAVE_ROR1) ||
8324            (Imm == DppCtrl::ROW_MIRROR) ||
8325            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8326            (Imm == DppCtrl::BCAST15) ||
8327            (Imm == DppCtrl::BCAST31) ||
8328            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8329            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8330   }
8331   return false;
8332 }
8333 
8334 //===----------------------------------------------------------------------===//
8335 // mAI
8336 //===----------------------------------------------------------------------===//
8337 
8338 bool AMDGPUOperand::isBLGP() const {
8339   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8340 }
8341 
8342 bool AMDGPUOperand::isCBSZ() const {
8343   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8344 }
8345 
8346 bool AMDGPUOperand::isABID() const {
8347   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8348 }
8349 
8350 bool AMDGPUOperand::isS16Imm() const {
8351   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8352 }
8353 
8354 bool AMDGPUOperand::isU16Imm() const {
8355   return isImm() && isUInt<16>(getImm());
8356 }
8357 
8358 //===----------------------------------------------------------------------===//
8359 // dim
8360 //===----------------------------------------------------------------------===//
8361 
8362 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8363   // We want to allow "dim:1D" etc.,
8364   // but the initial 1 is tokenized as an integer.
8365   std::string Token;
8366   if (isToken(AsmToken::Integer)) {
8367     SMLoc Loc = getToken().getEndLoc();
8368     Token = std::string(getTokenStr());
8369     lex();
8370     if (getLoc() != Loc)
8371       return false;
8372   }
8373 
8374   StringRef Suffix;
8375   if (!parseId(Suffix))
8376     return false;
8377   Token += Suffix;
8378 
8379   StringRef DimId = Token;
8380   if (DimId.startswith("SQ_RSRC_IMG_"))
8381     DimId = DimId.drop_front(12);
8382 
8383   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8384   if (!DimInfo)
8385     return false;
8386 
8387   Encoding = DimInfo->Encoding;
8388   return true;
8389 }
8390 
8391 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8392   if (!isGFX10Plus())
8393     return MatchOperand_NoMatch;
8394 
8395   SMLoc S = getLoc();
8396 
8397   if (!trySkipId("dim", AsmToken::Colon))
8398     return MatchOperand_NoMatch;
8399 
8400   unsigned Encoding;
8401   SMLoc Loc = getLoc();
8402   if (!parseDimId(Encoding)) {
8403     Error(Loc, "invalid dim value");
8404     return MatchOperand_ParseFail;
8405   }
8406 
8407   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8408                                               AMDGPUOperand::ImmTyDim));
8409   return MatchOperand_Success;
8410 }
8411 
8412 //===----------------------------------------------------------------------===//
8413 // dpp
8414 //===----------------------------------------------------------------------===//
8415 
8416 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8417   SMLoc S = getLoc();
8418 
8419   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8420     return MatchOperand_NoMatch;
8421 
8422   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8423 
8424   int64_t Sels[8];
8425 
8426   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8427     return MatchOperand_ParseFail;
8428 
8429   for (size_t i = 0; i < 8; ++i) {
8430     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8431       return MatchOperand_ParseFail;
8432 
8433     SMLoc Loc = getLoc();
8434     if (getParser().parseAbsoluteExpression(Sels[i]))
8435       return MatchOperand_ParseFail;
8436     if (0 > Sels[i] || 7 < Sels[i]) {
8437       Error(Loc, "expected a 3-bit value");
8438       return MatchOperand_ParseFail;
8439     }
8440   }
8441 
8442   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8443     return MatchOperand_ParseFail;
8444 
8445   unsigned DPP8 = 0;
8446   for (size_t i = 0; i < 8; ++i)
8447     DPP8 |= (Sels[i] << (i * 3));
8448 
8449   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8450   return MatchOperand_Success;
8451 }
8452 
8453 bool
8454 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8455                                     const OperandVector &Operands) {
8456   if (Ctrl == "row_newbcast")
8457     return isGFX90A();
8458 
8459   if (Ctrl == "row_share" ||
8460       Ctrl == "row_xmask")
8461     return isGFX10Plus();
8462 
8463   if (Ctrl == "wave_shl" ||
8464       Ctrl == "wave_shr" ||
8465       Ctrl == "wave_rol" ||
8466       Ctrl == "wave_ror" ||
8467       Ctrl == "row_bcast")
8468     return isVI() || isGFX9();
8469 
8470   return Ctrl == "row_mirror" ||
8471          Ctrl == "row_half_mirror" ||
8472          Ctrl == "quad_perm" ||
8473          Ctrl == "row_shl" ||
8474          Ctrl == "row_shr" ||
8475          Ctrl == "row_ror";
8476 }
8477 
8478 int64_t
8479 AMDGPUAsmParser::parseDPPCtrlPerm() {
8480   // quad_perm:[%d,%d,%d,%d]
8481 
8482   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8483     return -1;
8484 
8485   int64_t Val = 0;
8486   for (int i = 0; i < 4; ++i) {
8487     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8488       return -1;
8489 
8490     int64_t Temp;
8491     SMLoc Loc = getLoc();
8492     if (getParser().parseAbsoluteExpression(Temp))
8493       return -1;
8494     if (Temp < 0 || Temp > 3) {
8495       Error(Loc, "expected a 2-bit value");
8496       return -1;
8497     }
8498 
8499     Val += (Temp << i * 2);
8500   }
8501 
8502   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8503     return -1;
8504 
8505   return Val;
8506 }
8507 
8508 int64_t
8509 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8510   using namespace AMDGPU::DPP;
8511 
8512   // sel:%d
8513 
8514   int64_t Val;
8515   SMLoc Loc = getLoc();
8516 
8517   if (getParser().parseAbsoluteExpression(Val))
8518     return -1;
8519 
8520   struct DppCtrlCheck {
8521     int64_t Ctrl;
8522     int Lo;
8523     int Hi;
8524   };
8525 
8526   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8527     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8528     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8529     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8530     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8531     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8532     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8533     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8534     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8535     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8536     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8537     .Default({-1, 0, 0});
8538 
8539   bool Valid;
8540   if (Check.Ctrl == -1) {
8541     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8542     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8543   } else {
8544     Valid = Check.Lo <= Val && Val <= Check.Hi;
8545     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8546   }
8547 
8548   if (!Valid) {
8549     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8550     return -1;
8551   }
8552 
8553   return Val;
8554 }
8555 
8556 OperandMatchResultTy
8557 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8558   using namespace AMDGPU::DPP;
8559 
8560   if (!isToken(AsmToken::Identifier) ||
8561       !isSupportedDPPCtrl(getTokenStr(), Operands))
8562     return MatchOperand_NoMatch;
8563 
8564   SMLoc S = getLoc();
8565   int64_t Val = -1;
8566   StringRef Ctrl;
8567 
8568   parseId(Ctrl);
8569 
8570   if (Ctrl == "row_mirror") {
8571     Val = DppCtrl::ROW_MIRROR;
8572   } else if (Ctrl == "row_half_mirror") {
8573     Val = DppCtrl::ROW_HALF_MIRROR;
8574   } else {
8575     if (skipToken(AsmToken::Colon, "expected a colon")) {
8576       if (Ctrl == "quad_perm") {
8577         Val = parseDPPCtrlPerm();
8578       } else {
8579         Val = parseDPPCtrlSel(Ctrl);
8580       }
8581     }
8582   }
8583 
8584   if (Val == -1)
8585     return MatchOperand_ParseFail;
8586 
8587   Operands.push_back(
8588     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8589   return MatchOperand_Success;
8590 }
8591 
8592 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8593   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8594 }
8595 
8596 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8597   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8598 }
8599 
8600 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8601   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8602 }
8603 
8604 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8605   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8606 }
8607 
8608 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8609   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8610 }
8611 
8612 // Add dummy $old operand
8613 void AMDGPUAsmParser::cvtVOPC64NoDstDPP(MCInst &Inst,
8614                                         const OperandVector &Operands,
8615                                         bool IsDPP8) {
8616   Inst.addOperand(MCOperand::createReg(0));
8617   cvtVOP3DPP(Inst, Operands, IsDPP8);
8618 }
8619 
8620 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8621   OptionalImmIndexMap OptionalIdx;
8622   unsigned Opc = Inst.getOpcode();
8623   bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8624   unsigned I = 1;
8625   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8626   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8627     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8628   }
8629 
8630   int Fi = 0;
8631   for (unsigned E = Operands.size(); I != E; ++I) {
8632     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8633                                             MCOI::TIED_TO);
8634     if (TiedTo != -1) {
8635       assert((unsigned)TiedTo < Inst.getNumOperands());
8636       // handle tied old or src2 for MAC instructions
8637       Inst.addOperand(Inst.getOperand(TiedTo));
8638     }
8639     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8640     // Add the register arguments
8641     if (IsDPP8 && Op.isFI()) {
8642       Fi = Op.getImm();
8643     } else if (HasModifiers &&
8644                isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8645       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8646     } else if (Op.isReg()) {
8647       Op.addRegOperands(Inst, 1);
8648     } else if (Op.isImm() &&
8649                Desc.OpInfo[Inst.getNumOperands()].RegClass != -1) {
8650       assert(!HasModifiers && "Case should be unreachable with modifiers");
8651       assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
8652       Op.addImmOperands(Inst, 1);
8653     } else if (Op.isImm()) {
8654       OptionalIdx[Op.getImmTy()] = I;
8655     } else {
8656       llvm_unreachable("unhandled operand type");
8657     }
8658   }
8659   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8660     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8661   }
8662   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8663     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8664   }
8665   if (Desc.TSFlags & SIInstrFlags::VOP3P)
8666     cvtVOP3P(Inst, Operands, OptionalIdx);
8667   else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) {
8668     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8669   }
8670 
8671   if (IsDPP8) {
8672     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
8673     using namespace llvm::AMDGPU::DPP;
8674     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8675   } else {
8676     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
8677     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8678     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8679     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8680     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8681       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8682     }
8683   }
8684 }
8685 
8686 // Add dummy $old operand
8687 void AMDGPUAsmParser::cvtVOPCNoDstDPP(MCInst &Inst,
8688                                       const OperandVector &Operands,
8689                                       bool IsDPP8) {
8690   Inst.addOperand(MCOperand::createReg(0));
8691   cvtDPP(Inst, Operands, IsDPP8);
8692 }
8693 
8694 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8695   OptionalImmIndexMap OptionalIdx;
8696 
8697   unsigned Opc = Inst.getOpcode();
8698   bool HasModifiers =
8699       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8700   unsigned I = 1;
8701   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8702   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8703     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8704   }
8705 
8706   int Fi = 0;
8707   for (unsigned E = Operands.size(); I != E; ++I) {
8708     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8709                                             MCOI::TIED_TO);
8710     if (TiedTo != -1) {
8711       assert((unsigned)TiedTo < Inst.getNumOperands());
8712       // handle tied old or src2 for MAC instructions
8713       Inst.addOperand(Inst.getOperand(TiedTo));
8714     }
8715     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8716     // Add the register arguments
8717     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8718       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8719       // Skip it.
8720       continue;
8721     }
8722 
8723     if (IsDPP8) {
8724       if (Op.isDPP8()) {
8725         Op.addImmOperands(Inst, 1);
8726       } else if (HasModifiers &&
8727                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8728         Op.addRegWithFPInputModsOperands(Inst, 2);
8729       } else if (Op.isFI()) {
8730         Fi = Op.getImm();
8731       } else if (Op.isReg()) {
8732         Op.addRegOperands(Inst, 1);
8733       } else {
8734         llvm_unreachable("Invalid operand type");
8735       }
8736     } else {
8737       if (HasModifiers &&
8738           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8739         Op.addRegWithFPInputModsOperands(Inst, 2);
8740       } else if (Op.isReg()) {
8741         Op.addRegOperands(Inst, 1);
8742       } else if (Op.isDPPCtrl()) {
8743         Op.addImmOperands(Inst, 1);
8744       } else if (Op.isImm()) {
8745         // Handle optional arguments
8746         OptionalIdx[Op.getImmTy()] = I;
8747       } else {
8748         llvm_unreachable("Invalid operand type");
8749       }
8750     }
8751   }
8752 
8753   if (IsDPP8) {
8754     using namespace llvm::AMDGPU::DPP;
8755     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8756   } else {
8757     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8758     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8759     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8760     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8761       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8762     }
8763   }
8764 }
8765 
8766 //===----------------------------------------------------------------------===//
8767 // sdwa
8768 //===----------------------------------------------------------------------===//
8769 
8770 OperandMatchResultTy
8771 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8772                               AMDGPUOperand::ImmTy Type) {
8773   using namespace llvm::AMDGPU::SDWA;
8774 
8775   SMLoc S = getLoc();
8776   StringRef Value;
8777   OperandMatchResultTy res;
8778 
8779   SMLoc StringLoc;
8780   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8781   if (res != MatchOperand_Success) {
8782     return res;
8783   }
8784 
8785   int64_t Int;
8786   Int = StringSwitch<int64_t>(Value)
8787         .Case("BYTE_0", SdwaSel::BYTE_0)
8788         .Case("BYTE_1", SdwaSel::BYTE_1)
8789         .Case("BYTE_2", SdwaSel::BYTE_2)
8790         .Case("BYTE_3", SdwaSel::BYTE_3)
8791         .Case("WORD_0", SdwaSel::WORD_0)
8792         .Case("WORD_1", SdwaSel::WORD_1)
8793         .Case("DWORD", SdwaSel::DWORD)
8794         .Default(0xffffffff);
8795 
8796   if (Int == 0xffffffff) {
8797     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8798     return MatchOperand_ParseFail;
8799   }
8800 
8801   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8802   return MatchOperand_Success;
8803 }
8804 
8805 OperandMatchResultTy
8806 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8807   using namespace llvm::AMDGPU::SDWA;
8808 
8809   SMLoc S = getLoc();
8810   StringRef Value;
8811   OperandMatchResultTy res;
8812 
8813   SMLoc StringLoc;
8814   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8815   if (res != MatchOperand_Success) {
8816     return res;
8817   }
8818 
8819   int64_t Int;
8820   Int = StringSwitch<int64_t>(Value)
8821         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8822         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8823         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8824         .Default(0xffffffff);
8825 
8826   if (Int == 0xffffffff) {
8827     Error(StringLoc, "invalid dst_unused value");
8828     return MatchOperand_ParseFail;
8829   }
8830 
8831   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8832   return MatchOperand_Success;
8833 }
8834 
8835 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8836   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8837 }
8838 
8839 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8840   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8841 }
8842 
8843 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8844   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8845 }
8846 
8847 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8848   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8849 }
8850 
8851 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8852   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8853 }
8854 
8855 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8856                               uint64_t BasicInstType,
8857                               bool SkipDstVcc,
8858                               bool SkipSrcVcc) {
8859   using namespace llvm::AMDGPU::SDWA;
8860 
8861   OptionalImmIndexMap OptionalIdx;
8862   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8863   bool SkippedVcc = false;
8864 
8865   unsigned I = 1;
8866   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8867   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8868     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8869   }
8870 
8871   for (unsigned E = Operands.size(); I != E; ++I) {
8872     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8873     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8874         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8875       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8876       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8877       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8878       // Skip VCC only if we didn't skip it on previous iteration.
8879       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8880       if (BasicInstType == SIInstrFlags::VOP2 &&
8881           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8882            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8883         SkippedVcc = true;
8884         continue;
8885       } else if (BasicInstType == SIInstrFlags::VOPC &&
8886                  Inst.getNumOperands() == 0) {
8887         SkippedVcc = true;
8888         continue;
8889       }
8890     }
8891     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8892       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8893     } else if (Op.isImm()) {
8894       // Handle optional arguments
8895       OptionalIdx[Op.getImmTy()] = I;
8896     } else {
8897       llvm_unreachable("Invalid operand type");
8898     }
8899     SkippedVcc = false;
8900   }
8901 
8902   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8903       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8904       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8905     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8906     switch (BasicInstType) {
8907     case SIInstrFlags::VOP1:
8908       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8909       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8910         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8911       }
8912       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8913       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8914       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8915       break;
8916 
8917     case SIInstrFlags::VOP2:
8918       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8919       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8920         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8921       }
8922       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8923       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8924       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8925       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8926       break;
8927 
8928     case SIInstrFlags::VOPC:
8929       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8930         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8931       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8932       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8933       break;
8934 
8935     default:
8936       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8937     }
8938   }
8939 
8940   // special case v_mac_{f16, f32}:
8941   // it has src2 register operand that is tied to dst operand
8942   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8943       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8944     auto it = Inst.begin();
8945     std::advance(
8946       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8947     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8948   }
8949 }
8950 
8951 //===----------------------------------------------------------------------===//
8952 // mAI
8953 //===----------------------------------------------------------------------===//
8954 
8955 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8956   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8957 }
8958 
8959 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8960   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8961 }
8962 
8963 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8964   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8965 }
8966 
8967 /// Force static initialization.
8968 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8969   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8970   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8971 }
8972 
8973 #define GET_REGISTER_MATCHER
8974 #define GET_MATCHER_IMPLEMENTATION
8975 #define GET_MNEMONIC_SPELL_CHECKER
8976 #define GET_MNEMONIC_CHECKER
8977 #include "AMDGPUGenAsmMatcher.inc"
8978 
8979 // This function should be defined after auto-generated include so that we have
8980 // MatchClassKind enum defined
8981 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8982                                                      unsigned Kind) {
8983   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8984   // But MatchInstructionImpl() expects to meet token and fails to validate
8985   // operand. This method checks if we are given immediate operand but expect to
8986   // get corresponding token.
8987   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8988   switch (Kind) {
8989   case MCK_addr64:
8990     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8991   case MCK_gds:
8992     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8993   case MCK_lds:
8994     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8995   case MCK_idxen:
8996     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8997   case MCK_offen:
8998     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8999   case MCK_SSrcB32:
9000     // When operands have expression values, they will return true for isToken,
9001     // because it is not possible to distinguish between a token and an
9002     // expression at parse time. MatchInstructionImpl() will always try to
9003     // match an operand as a token, when isToken returns true, and when the
9004     // name of the expression is not a valid token, the match will fail,
9005     // so we need to handle it here.
9006     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
9007   case MCK_SSrcF32:
9008     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
9009   case MCK_SoppBrTarget:
9010     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
9011   case MCK_VReg32OrOff:
9012     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9013   case MCK_InterpSlot:
9014     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9015   case MCK_Attr:
9016     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9017   case MCK_AttrChan:
9018     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
9019   case MCK_ImmSMEMOffset:
9020     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
9021   case MCK_SReg_64:
9022   case MCK_SReg_64_XEXEC:
9023     // Null is defined as a 32-bit register but
9024     // it should also be enabled with 64-bit operands.
9025     // The following code enables it for SReg_64 operands
9026     // used as source and destination. Remaining source
9027     // operands are handled in isInlinableImm.
9028     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9029   default:
9030     return Match_InvalidOperand;
9031   }
9032 }
9033 
9034 //===----------------------------------------------------------------------===//
9035 // endpgm
9036 //===----------------------------------------------------------------------===//
9037 
9038 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
9039   SMLoc S = getLoc();
9040   int64_t Imm = 0;
9041 
9042   if (!parseExpr(Imm)) {
9043     // The operand is optional, if not present default to 0
9044     Imm = 0;
9045   }
9046 
9047   if (!isUInt<16>(Imm)) {
9048     Error(S, "expected a 16-bit value");
9049     return MatchOperand_ParseFail;
9050   }
9051 
9052   Operands.push_back(
9053       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9054   return MatchOperand_Success;
9055 }
9056 
9057 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9058 
9059 //===----------------------------------------------------------------------===//
9060 // LDSDIR
9061 //===----------------------------------------------------------------------===//
9062 
9063 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const {
9064   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST);
9065 }
9066 
9067 bool AMDGPUOperand::isWaitVDST() const {
9068   return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
9069 }
9070 
9071 //===----------------------------------------------------------------------===//
9072 // VINTERP
9073 //===----------------------------------------------------------------------===//
9074 
9075 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const {
9076   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP);
9077 }
9078 
9079 bool AMDGPUOperand::isWaitEXP() const {
9080   return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
9081 }
9082