1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/Support/AMDGPUMetadata.h"
32 #include "llvm/Support/AMDHSAKernelDescriptor.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/MachineValueType.h"
35 #include "llvm/Support/TargetParser.h"
36 #include "llvm/Support/TargetRegistry.h"
37 
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::amdhsa;
41 
42 namespace {
43 
44 class AMDGPUAsmParser;
45 
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47 
48 //===----------------------------------------------------------------------===//
49 // Operand
50 //===----------------------------------------------------------------------===//
51 
52 class AMDGPUOperand : public MCParsedAsmOperand {
53   enum KindTy {
54     Token,
55     Immediate,
56     Register,
57     Expression
58   } Kind;
59 
60   SMLoc StartLoc, EndLoc;
61   const AMDGPUAsmParser *AsmParser;
62 
63 public:
64   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
66 
67   using Ptr = std::unique_ptr<AMDGPUOperand>;
68 
69   struct Modifiers {
70     bool Abs = false;
71     bool Neg = false;
72     bool Sext = false;
73 
74     bool hasFPModifiers() const { return Abs || Neg; }
75     bool hasIntModifiers() const { return Sext; }
76     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77 
78     int64_t getFPModifiersOperand() const {
79       int64_t Operand = 0;
80       Operand |= Abs ? SISrcMods::ABS : 0u;
81       Operand |= Neg ? SISrcMods::NEG : 0u;
82       return Operand;
83     }
84 
85     int64_t getIntModifiersOperand() const {
86       int64_t Operand = 0;
87       Operand |= Sext ? SISrcMods::SEXT : 0u;
88       return Operand;
89     }
90 
91     int64_t getModifiersOperand() const {
92       assert(!(hasFPModifiers() && hasIntModifiers())
93            && "fp and int modifiers should not be used simultaneously");
94       if (hasFPModifiers()) {
95         return getFPModifiersOperand();
96       } else if (hasIntModifiers()) {
97         return getIntModifiersOperand();
98       } else {
99         return 0;
100       }
101     }
102 
103     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104   };
105 
106   enum ImmTy {
107     ImmTyNone,
108     ImmTyGDS,
109     ImmTyLDS,
110     ImmTyOffen,
111     ImmTyIdxen,
112     ImmTyAddr64,
113     ImmTyOffset,
114     ImmTyInstOffset,
115     ImmTyOffset0,
116     ImmTyOffset1,
117     ImmTyCPol,
118     ImmTySWZ,
119     ImmTyTFE,
120     ImmTyD16,
121     ImmTyClampSI,
122     ImmTyOModSI,
123     ImmTyDPP8,
124     ImmTyDppCtrl,
125     ImmTyDppRowMask,
126     ImmTyDppBankMask,
127     ImmTyDppBoundCtrl,
128     ImmTyDppFi,
129     ImmTySdwaDstSel,
130     ImmTySdwaSrc0Sel,
131     ImmTySdwaSrc1Sel,
132     ImmTySdwaDstUnused,
133     ImmTyDMask,
134     ImmTyDim,
135     ImmTyUNorm,
136     ImmTyDA,
137     ImmTyR128A16,
138     ImmTyA16,
139     ImmTyLWE,
140     ImmTyExpTgt,
141     ImmTyExpCompr,
142     ImmTyExpVM,
143     ImmTyFORMAT,
144     ImmTyHwreg,
145     ImmTyOff,
146     ImmTySendMsg,
147     ImmTyInterpSlot,
148     ImmTyInterpAttr,
149     ImmTyAttrChan,
150     ImmTyOpSel,
151     ImmTyOpSelHi,
152     ImmTyNegLo,
153     ImmTyNegHi,
154     ImmTySwizzle,
155     ImmTyGprIdxMode,
156     ImmTyHigh,
157     ImmTyBLGP,
158     ImmTyCBSZ,
159     ImmTyABID,
160     ImmTyEndpgm,
161   };
162 
163   enum ImmKindTy {
164     ImmKindTyNone,
165     ImmKindTyLiteral,
166     ImmKindTyConst,
167   };
168 
169 private:
170   struct TokOp {
171     const char *Data;
172     unsigned Length;
173   };
174 
175   struct ImmOp {
176     int64_t Val;
177     ImmTy Type;
178     bool IsFPImm;
179     mutable ImmKindTy Kind;
180     Modifiers Mods;
181   };
182 
183   struct RegOp {
184     unsigned RegNo;
185     Modifiers Mods;
186   };
187 
188   union {
189     TokOp Tok;
190     ImmOp Imm;
191     RegOp Reg;
192     const MCExpr *Expr;
193   };
194 
195 public:
196   bool isToken() const override {
197     if (Kind == Token)
198       return true;
199 
200     // When parsing operands, we can't always tell if something was meant to be
201     // a token, like 'gds', or an expression that references a global variable.
202     // In this case, we assume the string is an expression, and if we need to
203     // interpret is a token, then we treat the symbol name as the token.
204     return isSymbolRefExpr();
205   }
206 
207   bool isSymbolRefExpr() const {
208     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
209   }
210 
211   bool isImm() const override {
212     return Kind == Immediate;
213   }
214 
215   void setImmKindNone() const {
216     assert(isImm());
217     Imm.Kind = ImmKindTyNone;
218   }
219 
220   void setImmKindLiteral() const {
221     assert(isImm());
222     Imm.Kind = ImmKindTyLiteral;
223   }
224 
225   void setImmKindConst() const {
226     assert(isImm());
227     Imm.Kind = ImmKindTyConst;
228   }
229 
230   bool IsImmKindLiteral() const {
231     return isImm() && Imm.Kind == ImmKindTyLiteral;
232   }
233 
234   bool isImmKindConst() const {
235     return isImm() && Imm.Kind == ImmKindTyConst;
236   }
237 
238   bool isInlinableImm(MVT type) const;
239   bool isLiteralImm(MVT type) const;
240 
241   bool isRegKind() const {
242     return Kind == Register;
243   }
244 
245   bool isReg() const override {
246     return isRegKind() && !hasModifiers();
247   }
248 
249   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
250     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
251   }
252 
253   bool isRegOrImmWithInt16InputMods() const {
254     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
255   }
256 
257   bool isRegOrImmWithInt32InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259   }
260 
261   bool isRegOrImmWithInt64InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
263   }
264 
265   bool isRegOrImmWithFP16InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
267   }
268 
269   bool isRegOrImmWithFP32InputMods() const {
270     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
271   }
272 
273   bool isRegOrImmWithFP64InputMods() const {
274     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
275   }
276 
277   bool isVReg() const {
278     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
279            isRegClass(AMDGPU::VReg_64RegClassID) ||
280            isRegClass(AMDGPU::VReg_96RegClassID) ||
281            isRegClass(AMDGPU::VReg_128RegClassID) ||
282            isRegClass(AMDGPU::VReg_160RegClassID) ||
283            isRegClass(AMDGPU::VReg_192RegClassID) ||
284            isRegClass(AMDGPU::VReg_256RegClassID) ||
285            isRegClass(AMDGPU::VReg_512RegClassID) ||
286            isRegClass(AMDGPU::VReg_1024RegClassID);
287   }
288 
289   bool isVReg32() const {
290     return isRegClass(AMDGPU::VGPR_32RegClassID);
291   }
292 
293   bool isVReg32OrOff() const {
294     return isOff() || isVReg32();
295   }
296 
297   bool isNull() const {
298     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
299   }
300 
301   bool isVRegWithInputMods() const;
302 
303   bool isSDWAOperand(MVT type) const;
304   bool isSDWAFP16Operand() const;
305   bool isSDWAFP32Operand() const;
306   bool isSDWAInt16Operand() const;
307   bool isSDWAInt32Operand() const;
308 
309   bool isImmTy(ImmTy ImmT) const {
310     return isImm() && Imm.Type == ImmT;
311   }
312 
313   bool isImmModifier() const {
314     return isImm() && Imm.Type != ImmTyNone;
315   }
316 
317   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
318   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
319   bool isDMask() const { return isImmTy(ImmTyDMask); }
320   bool isDim() const { return isImmTy(ImmTyDim); }
321   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
322   bool isDA() const { return isImmTy(ImmTyDA); }
323   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
324   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
325   bool isLWE() const { return isImmTy(ImmTyLWE); }
326   bool isOff() const { return isImmTy(ImmTyOff); }
327   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
328   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
329   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
330   bool isOffen() const { return isImmTy(ImmTyOffen); }
331   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
332   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
333   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
334   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
335   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
336 
337   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
338   bool isGDS() const { return isImmTy(ImmTyGDS); }
339   bool isLDS() const { return isImmTy(ImmTyLDS); }
340   bool isCPol() const { return isImmTy(ImmTyCPol); }
341   // "CPol_GLC1" is a MatchClass of the CPOL_GLC1 operand with the default and
342   // forced value of the GLC operand.
343   bool isCPol_GLC1() const { return isImmTy(ImmTyCPol); }
344   bool isSWZ() const { return isImmTy(ImmTySWZ); }
345   bool isTFE() const { return isImmTy(ImmTyTFE); }
346   bool isD16() const { return isImmTy(ImmTyD16); }
347   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
348   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
349   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
350   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
351   bool isFI() const { return isImmTy(ImmTyDppFi); }
352   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
353   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
354   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
355   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
356   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
357   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
358   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
359   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
360   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
361   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
362   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
363   bool isHigh() const { return isImmTy(ImmTyHigh); }
364 
365   bool isMod() const {
366     return isClampSI() || isOModSI();
367   }
368 
369   bool isRegOrImm() const {
370     return isReg() || isImm();
371   }
372 
373   bool isRegClass(unsigned RCID) const;
374 
375   bool isInlineValue() const;
376 
377   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
378     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
379   }
380 
381   bool isSCSrcB16() const {
382     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
383   }
384 
385   bool isSCSrcV2B16() const {
386     return isSCSrcB16();
387   }
388 
389   bool isSCSrcB32() const {
390     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
391   }
392 
393   bool isSCSrcB64() const {
394     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
395   }
396 
397   bool isBoolReg() const;
398 
399   bool isSCSrcF16() const {
400     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
401   }
402 
403   bool isSCSrcV2F16() const {
404     return isSCSrcF16();
405   }
406 
407   bool isSCSrcF32() const {
408     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
409   }
410 
411   bool isSCSrcF64() const {
412     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
413   }
414 
415   bool isSSrcB32() const {
416     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
417   }
418 
419   bool isSSrcB16() const {
420     return isSCSrcB16() || isLiteralImm(MVT::i16);
421   }
422 
423   bool isSSrcV2B16() const {
424     llvm_unreachable("cannot happen");
425     return isSSrcB16();
426   }
427 
428   bool isSSrcB64() const {
429     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
430     // See isVSrc64().
431     return isSCSrcB64() || isLiteralImm(MVT::i64);
432   }
433 
434   bool isSSrcF32() const {
435     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
436   }
437 
438   bool isSSrcF64() const {
439     return isSCSrcB64() || isLiteralImm(MVT::f64);
440   }
441 
442   bool isSSrcF16() const {
443     return isSCSrcB16() || isLiteralImm(MVT::f16);
444   }
445 
446   bool isSSrcV2F16() const {
447     llvm_unreachable("cannot happen");
448     return isSSrcF16();
449   }
450 
451   bool isSSrcV2FP32() const {
452     llvm_unreachable("cannot happen");
453     return isSSrcF32();
454   }
455 
456   bool isSCSrcV2FP32() const {
457     llvm_unreachable("cannot happen");
458     return isSCSrcF32();
459   }
460 
461   bool isSSrcV2INT32() const {
462     llvm_unreachable("cannot happen");
463     return isSSrcB32();
464   }
465 
466   bool isSCSrcV2INT32() const {
467     llvm_unreachable("cannot happen");
468     return isSCSrcB32();
469   }
470 
471   bool isSSrcOrLdsB32() const {
472     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
473            isLiteralImm(MVT::i32) || isExpr();
474   }
475 
476   bool isVCSrcB32() const {
477     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
478   }
479 
480   bool isVCSrcB64() const {
481     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
482   }
483 
484   bool isVCSrcB16() const {
485     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
486   }
487 
488   bool isVCSrcV2B16() const {
489     return isVCSrcB16();
490   }
491 
492   bool isVCSrcF32() const {
493     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
494   }
495 
496   bool isVCSrcF64() const {
497     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
498   }
499 
500   bool isVCSrcF16() const {
501     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
502   }
503 
504   bool isVCSrcV2F16() const {
505     return isVCSrcF16();
506   }
507 
508   bool isVSrcB32() const {
509     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
510   }
511 
512   bool isVSrcB64() const {
513     return isVCSrcF64() || isLiteralImm(MVT::i64);
514   }
515 
516   bool isVSrcB16() const {
517     return isVCSrcB16() || isLiteralImm(MVT::i16);
518   }
519 
520   bool isVSrcV2B16() const {
521     return isVSrcB16() || isLiteralImm(MVT::v2i16);
522   }
523 
524   bool isVCSrcV2FP32() const {
525     return isVCSrcF64();
526   }
527 
528   bool isVSrcV2FP32() const {
529     return isVSrcF64() || isLiteralImm(MVT::v2f32);
530   }
531 
532   bool isVCSrcV2INT32() const {
533     return isVCSrcB64();
534   }
535 
536   bool isVSrcV2INT32() const {
537     return isVSrcB64() || isLiteralImm(MVT::v2i32);
538   }
539 
540   bool isVSrcF32() const {
541     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
542   }
543 
544   bool isVSrcF64() const {
545     return isVCSrcF64() || isLiteralImm(MVT::f64);
546   }
547 
548   bool isVSrcF16() const {
549     return isVCSrcF16() || isLiteralImm(MVT::f16);
550   }
551 
552   bool isVSrcV2F16() const {
553     return isVSrcF16() || isLiteralImm(MVT::v2f16);
554   }
555 
556   bool isVISrcB32() const {
557     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
558   }
559 
560   bool isVISrcB16() const {
561     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
562   }
563 
564   bool isVISrcV2B16() const {
565     return isVISrcB16();
566   }
567 
568   bool isVISrcF32() const {
569     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
570   }
571 
572   bool isVISrcF16() const {
573     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
574   }
575 
576   bool isVISrcV2F16() const {
577     return isVISrcF16() || isVISrcB32();
578   }
579 
580   bool isVISrc_64B64() const {
581     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
582   }
583 
584   bool isVISrc_64F64() const {
585     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
586   }
587 
588   bool isVISrc_64V2FP32() const {
589     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
590   }
591 
592   bool isVISrc_64V2INT32() const {
593     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
594   }
595 
596   bool isVISrc_256B64() const {
597     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
598   }
599 
600   bool isVISrc_256F64() const {
601     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
602   }
603 
604   bool isVISrc_128B16() const {
605     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
606   }
607 
608   bool isVISrc_128V2B16() const {
609     return isVISrc_128B16();
610   }
611 
612   bool isVISrc_128B32() const {
613     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
614   }
615 
616   bool isVISrc_128F32() const {
617     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
618   }
619 
620   bool isVISrc_256V2FP32() const {
621     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
622   }
623 
624   bool isVISrc_256V2INT32() const {
625     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
626   }
627 
628   bool isVISrc_512B32() const {
629     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
630   }
631 
632   bool isVISrc_512B16() const {
633     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
634   }
635 
636   bool isVISrc_512V2B16() const {
637     return isVISrc_512B16();
638   }
639 
640   bool isVISrc_512F32() const {
641     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
642   }
643 
644   bool isVISrc_512F16() const {
645     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
646   }
647 
648   bool isVISrc_512V2F16() const {
649     return isVISrc_512F16() || isVISrc_512B32();
650   }
651 
652   bool isVISrc_1024B32() const {
653     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
654   }
655 
656   bool isVISrc_1024B16() const {
657     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
658   }
659 
660   bool isVISrc_1024V2B16() const {
661     return isVISrc_1024B16();
662   }
663 
664   bool isVISrc_1024F32() const {
665     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
666   }
667 
668   bool isVISrc_1024F16() const {
669     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
670   }
671 
672   bool isVISrc_1024V2F16() const {
673     return isVISrc_1024F16() || isVISrc_1024B32();
674   }
675 
676   bool isAISrcB32() const {
677     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
678   }
679 
680   bool isAISrcB16() const {
681     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
682   }
683 
684   bool isAISrcV2B16() const {
685     return isAISrcB16();
686   }
687 
688   bool isAISrcF32() const {
689     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
690   }
691 
692   bool isAISrcF16() const {
693     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
694   }
695 
696   bool isAISrcV2F16() const {
697     return isAISrcF16() || isAISrcB32();
698   }
699 
700   bool isAISrc_64B64() const {
701     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
702   }
703 
704   bool isAISrc_64F64() const {
705     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
706   }
707 
708   bool isAISrc_128B32() const {
709     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
710   }
711 
712   bool isAISrc_128B16() const {
713     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
714   }
715 
716   bool isAISrc_128V2B16() const {
717     return isAISrc_128B16();
718   }
719 
720   bool isAISrc_128F32() const {
721     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
722   }
723 
724   bool isAISrc_128F16() const {
725     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
726   }
727 
728   bool isAISrc_128V2F16() const {
729     return isAISrc_128F16() || isAISrc_128B32();
730   }
731 
732   bool isVISrc_128F16() const {
733     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
734   }
735 
736   bool isVISrc_128V2F16() const {
737     return isVISrc_128F16() || isVISrc_128B32();
738   }
739 
740   bool isAISrc_256B64() const {
741     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
742   }
743 
744   bool isAISrc_256F64() const {
745     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
746   }
747 
748   bool isAISrc_512B32() const {
749     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
750   }
751 
752   bool isAISrc_512B16() const {
753     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
754   }
755 
756   bool isAISrc_512V2B16() const {
757     return isAISrc_512B16();
758   }
759 
760   bool isAISrc_512F32() const {
761     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
762   }
763 
764   bool isAISrc_512F16() const {
765     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
766   }
767 
768   bool isAISrc_512V2F16() const {
769     return isAISrc_512F16() || isAISrc_512B32();
770   }
771 
772   bool isAISrc_1024B32() const {
773     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
774   }
775 
776   bool isAISrc_1024B16() const {
777     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
778   }
779 
780   bool isAISrc_1024V2B16() const {
781     return isAISrc_1024B16();
782   }
783 
784   bool isAISrc_1024F32() const {
785     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
786   }
787 
788   bool isAISrc_1024F16() const {
789     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
790   }
791 
792   bool isAISrc_1024V2F16() const {
793     return isAISrc_1024F16() || isAISrc_1024B32();
794   }
795 
796   bool isKImmFP32() const {
797     return isLiteralImm(MVT::f32);
798   }
799 
800   bool isKImmFP16() const {
801     return isLiteralImm(MVT::f16);
802   }
803 
804   bool isMem() const override {
805     return false;
806   }
807 
808   bool isExpr() const {
809     return Kind == Expression;
810   }
811 
812   bool isSoppBrTarget() const {
813     return isExpr() || isImm();
814   }
815 
816   bool isSWaitCnt() const;
817   bool isHwreg() const;
818   bool isSendMsg() const;
819   bool isSwizzle() const;
820   bool isSMRDOffset8() const;
821   bool isSMEMOffset() const;
822   bool isSMRDLiteralOffset() const;
823   bool isDPP8() const;
824   bool isDPPCtrl() const;
825   bool isBLGP() const;
826   bool isCBSZ() const;
827   bool isABID() const;
828   bool isGPRIdxMode() const;
829   bool isS16Imm() const;
830   bool isU16Imm() const;
831   bool isEndpgm() const;
832 
833   StringRef getExpressionAsToken() const {
834     assert(isExpr());
835     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
836     return S->getSymbol().getName();
837   }
838 
839   StringRef getToken() const {
840     assert(isToken());
841 
842     if (Kind == Expression)
843       return getExpressionAsToken();
844 
845     return StringRef(Tok.Data, Tok.Length);
846   }
847 
848   int64_t getImm() const {
849     assert(isImm());
850     return Imm.Val;
851   }
852 
853   void setImm(int64_t Val) {
854     assert(isImm());
855     Imm.Val = Val;
856   }
857 
858   ImmTy getImmTy() const {
859     assert(isImm());
860     return Imm.Type;
861   }
862 
863   unsigned getReg() const override {
864     assert(isRegKind());
865     return Reg.RegNo;
866   }
867 
868   SMLoc getStartLoc() const override {
869     return StartLoc;
870   }
871 
872   SMLoc getEndLoc() const override {
873     return EndLoc;
874   }
875 
876   SMRange getLocRange() const {
877     return SMRange(StartLoc, EndLoc);
878   }
879 
880   Modifiers getModifiers() const {
881     assert(isRegKind() || isImmTy(ImmTyNone));
882     return isRegKind() ? Reg.Mods : Imm.Mods;
883   }
884 
885   void setModifiers(Modifiers Mods) {
886     assert(isRegKind() || isImmTy(ImmTyNone));
887     if (isRegKind())
888       Reg.Mods = Mods;
889     else
890       Imm.Mods = Mods;
891   }
892 
893   bool hasModifiers() const {
894     return getModifiers().hasModifiers();
895   }
896 
897   bool hasFPModifiers() const {
898     return getModifiers().hasFPModifiers();
899   }
900 
901   bool hasIntModifiers() const {
902     return getModifiers().hasIntModifiers();
903   }
904 
905   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
906 
907   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
908 
909   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
910 
911   template <unsigned Bitwidth>
912   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
913 
914   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
915     addKImmFPOperands<16>(Inst, N);
916   }
917 
918   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
919     addKImmFPOperands<32>(Inst, N);
920   }
921 
922   void addRegOperands(MCInst &Inst, unsigned N) const;
923 
924   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
925     addRegOperands(Inst, N);
926   }
927 
928   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
929     if (isRegKind())
930       addRegOperands(Inst, N);
931     else if (isExpr())
932       Inst.addOperand(MCOperand::createExpr(Expr));
933     else
934       addImmOperands(Inst, N);
935   }
936 
937   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
938     Modifiers Mods = getModifiers();
939     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
940     if (isRegKind()) {
941       addRegOperands(Inst, N);
942     } else {
943       addImmOperands(Inst, N, false);
944     }
945   }
946 
947   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
948     assert(!hasIntModifiers());
949     addRegOrImmWithInputModsOperands(Inst, N);
950   }
951 
952   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
953     assert(!hasFPModifiers());
954     addRegOrImmWithInputModsOperands(Inst, N);
955   }
956 
957   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
958     Modifiers Mods = getModifiers();
959     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
960     assert(isRegKind());
961     addRegOperands(Inst, N);
962   }
963 
964   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
965     assert(!hasIntModifiers());
966     addRegWithInputModsOperands(Inst, N);
967   }
968 
969   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
970     assert(!hasFPModifiers());
971     addRegWithInputModsOperands(Inst, N);
972   }
973 
974   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
975     if (isImm())
976       addImmOperands(Inst, N);
977     else {
978       assert(isExpr());
979       Inst.addOperand(MCOperand::createExpr(Expr));
980     }
981   }
982 
983   static void printImmTy(raw_ostream& OS, ImmTy Type) {
984     switch (Type) {
985     case ImmTyNone: OS << "None"; break;
986     case ImmTyGDS: OS << "GDS"; break;
987     case ImmTyLDS: OS << "LDS"; break;
988     case ImmTyOffen: OS << "Offen"; break;
989     case ImmTyIdxen: OS << "Idxen"; break;
990     case ImmTyAddr64: OS << "Addr64"; break;
991     case ImmTyOffset: OS << "Offset"; break;
992     case ImmTyInstOffset: OS << "InstOffset"; break;
993     case ImmTyOffset0: OS << "Offset0"; break;
994     case ImmTyOffset1: OS << "Offset1"; break;
995     case ImmTyCPol: OS << "CPol"; break;
996     case ImmTySWZ: OS << "SWZ"; break;
997     case ImmTyTFE: OS << "TFE"; break;
998     case ImmTyD16: OS << "D16"; break;
999     case ImmTyFORMAT: OS << "FORMAT"; break;
1000     case ImmTyClampSI: OS << "ClampSI"; break;
1001     case ImmTyOModSI: OS << "OModSI"; break;
1002     case ImmTyDPP8: OS << "DPP8"; break;
1003     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1004     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1005     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1006     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1007     case ImmTyDppFi: OS << "FI"; break;
1008     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1009     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1010     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1011     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1012     case ImmTyDMask: OS << "DMask"; break;
1013     case ImmTyDim: OS << "Dim"; break;
1014     case ImmTyUNorm: OS << "UNorm"; break;
1015     case ImmTyDA: OS << "DA"; break;
1016     case ImmTyR128A16: OS << "R128A16"; break;
1017     case ImmTyA16: OS << "A16"; break;
1018     case ImmTyLWE: OS << "LWE"; break;
1019     case ImmTyOff: OS << "Off"; break;
1020     case ImmTyExpTgt: OS << "ExpTgt"; break;
1021     case ImmTyExpCompr: OS << "ExpCompr"; break;
1022     case ImmTyExpVM: OS << "ExpVM"; break;
1023     case ImmTyHwreg: OS << "Hwreg"; break;
1024     case ImmTySendMsg: OS << "SendMsg"; break;
1025     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1026     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1027     case ImmTyAttrChan: OS << "AttrChan"; break;
1028     case ImmTyOpSel: OS << "OpSel"; break;
1029     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1030     case ImmTyNegLo: OS << "NegLo"; break;
1031     case ImmTyNegHi: OS << "NegHi"; break;
1032     case ImmTySwizzle: OS << "Swizzle"; break;
1033     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1034     case ImmTyHigh: OS << "High"; break;
1035     case ImmTyBLGP: OS << "BLGP"; break;
1036     case ImmTyCBSZ: OS << "CBSZ"; break;
1037     case ImmTyABID: OS << "ABID"; break;
1038     case ImmTyEndpgm: OS << "Endpgm"; break;
1039     }
1040   }
1041 
1042   void print(raw_ostream &OS) const override {
1043     switch (Kind) {
1044     case Register:
1045       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1046       break;
1047     case Immediate:
1048       OS << '<' << getImm();
1049       if (getImmTy() != ImmTyNone) {
1050         OS << " type: "; printImmTy(OS, getImmTy());
1051       }
1052       OS << " mods: " << Imm.Mods << '>';
1053       break;
1054     case Token:
1055       OS << '\'' << getToken() << '\'';
1056       break;
1057     case Expression:
1058       OS << "<expr " << *Expr << '>';
1059       break;
1060     }
1061   }
1062 
1063   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1064                                       int64_t Val, SMLoc Loc,
1065                                       ImmTy Type = ImmTyNone,
1066                                       bool IsFPImm = false) {
1067     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1068     Op->Imm.Val = Val;
1069     Op->Imm.IsFPImm = IsFPImm;
1070     Op->Imm.Kind = ImmKindTyNone;
1071     Op->Imm.Type = Type;
1072     Op->Imm.Mods = Modifiers();
1073     Op->StartLoc = Loc;
1074     Op->EndLoc = Loc;
1075     return Op;
1076   }
1077 
1078   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1079                                         StringRef Str, SMLoc Loc,
1080                                         bool HasExplicitEncodingSize = true) {
1081     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1082     Res->Tok.Data = Str.data();
1083     Res->Tok.Length = Str.size();
1084     Res->StartLoc = Loc;
1085     Res->EndLoc = Loc;
1086     return Res;
1087   }
1088 
1089   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1090                                       unsigned RegNo, SMLoc S,
1091                                       SMLoc E) {
1092     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1093     Op->Reg.RegNo = RegNo;
1094     Op->Reg.Mods = Modifiers();
1095     Op->StartLoc = S;
1096     Op->EndLoc = E;
1097     return Op;
1098   }
1099 
1100   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1101                                        const class MCExpr *Expr, SMLoc S) {
1102     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1103     Op->Expr = Expr;
1104     Op->StartLoc = S;
1105     Op->EndLoc = S;
1106     return Op;
1107   }
1108 };
1109 
1110 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1111   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1112   return OS;
1113 }
1114 
1115 //===----------------------------------------------------------------------===//
1116 // AsmParser
1117 //===----------------------------------------------------------------------===//
1118 
1119 // Holds info related to the current kernel, e.g. count of SGPRs used.
1120 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1121 // .amdgpu_hsa_kernel or at EOF.
1122 class KernelScopeInfo {
1123   int SgprIndexUnusedMin = -1;
1124   int VgprIndexUnusedMin = -1;
1125   MCContext *Ctx = nullptr;
1126 
1127   void usesSgprAt(int i) {
1128     if (i >= SgprIndexUnusedMin) {
1129       SgprIndexUnusedMin = ++i;
1130       if (Ctx) {
1131         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1132         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1133       }
1134     }
1135   }
1136 
1137   void usesVgprAt(int i) {
1138     if (i >= VgprIndexUnusedMin) {
1139       VgprIndexUnusedMin = ++i;
1140       if (Ctx) {
1141         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1142         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1143       }
1144     }
1145   }
1146 
1147 public:
1148   KernelScopeInfo() = default;
1149 
1150   void initialize(MCContext &Context) {
1151     Ctx = &Context;
1152     usesSgprAt(SgprIndexUnusedMin = -1);
1153     usesVgprAt(VgprIndexUnusedMin = -1);
1154   }
1155 
1156   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1157     switch (RegKind) {
1158       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1159       case IS_AGPR: // fall through
1160       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1161       default: break;
1162     }
1163   }
1164 };
1165 
1166 class AMDGPUAsmParser : public MCTargetAsmParser {
1167   MCAsmParser &Parser;
1168 
1169   // Number of extra operands parsed after the first optional operand.
1170   // This may be necessary to skip hardcoded mandatory operands.
1171   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1172 
1173   unsigned ForcedEncodingSize = 0;
1174   bool ForcedDPP = false;
1175   bool ForcedSDWA = false;
1176   KernelScopeInfo KernelScope;
1177   unsigned CPolSeen;
1178 
1179   /// @name Auto-generated Match Functions
1180   /// {
1181 
1182 #define GET_ASSEMBLER_HEADER
1183 #include "AMDGPUGenAsmMatcher.inc"
1184 
1185   /// }
1186 
1187 private:
1188   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1189   bool OutOfRangeError(SMRange Range);
1190   /// Calculate VGPR/SGPR blocks required for given target, reserved
1191   /// registers, and user-specified NextFreeXGPR values.
1192   ///
1193   /// \param Features [in] Target features, used for bug corrections.
1194   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1195   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1196   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1197   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1198   /// descriptor field, if valid.
1199   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1200   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1201   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1202   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1203   /// \param VGPRBlocks [out] Result VGPR block count.
1204   /// \param SGPRBlocks [out] Result SGPR block count.
1205   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1206                           bool FlatScrUsed, bool XNACKUsed,
1207                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1208                           SMRange VGPRRange, unsigned NextFreeSGPR,
1209                           SMRange SGPRRange, unsigned &VGPRBlocks,
1210                           unsigned &SGPRBlocks);
1211   bool ParseDirectiveAMDGCNTarget();
1212   bool ParseDirectiveAMDHSAKernel();
1213   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1214   bool ParseDirectiveHSACodeObjectVersion();
1215   bool ParseDirectiveHSACodeObjectISA();
1216   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1217   bool ParseDirectiveAMDKernelCodeT();
1218   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1219   bool ParseDirectiveAMDGPUHsaKernel();
1220 
1221   bool ParseDirectiveISAVersion();
1222   bool ParseDirectiveHSAMetadata();
1223   bool ParseDirectivePALMetadataBegin();
1224   bool ParseDirectivePALMetadata();
1225   bool ParseDirectiveAMDGPULDS();
1226 
1227   /// Common code to parse out a block of text (typically YAML) between start and
1228   /// end directives.
1229   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1230                            const char *AssemblerDirectiveEnd,
1231                            std::string &CollectString);
1232 
1233   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1234                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1235   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1236                            unsigned &RegNum, unsigned &RegWidth,
1237                            bool RestoreOnFailure = false);
1238   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1239                            unsigned &RegNum, unsigned &RegWidth,
1240                            SmallVectorImpl<AsmToken> &Tokens);
1241   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1242                            unsigned &RegWidth,
1243                            SmallVectorImpl<AsmToken> &Tokens);
1244   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1245                            unsigned &RegWidth,
1246                            SmallVectorImpl<AsmToken> &Tokens);
1247   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1248                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1249   bool ParseRegRange(unsigned& Num, unsigned& Width);
1250   unsigned getRegularReg(RegisterKind RegKind,
1251                          unsigned RegNum,
1252                          unsigned RegWidth,
1253                          SMLoc Loc);
1254 
1255   bool isRegister();
1256   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1257   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1258   void initializeGprCountSymbol(RegisterKind RegKind);
1259   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1260                              unsigned RegWidth);
1261   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1262                     bool IsAtomic, bool IsLds = false);
1263   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1264                  bool IsGdsHardcoded);
1265 
1266 public:
1267   enum AMDGPUMatchResultTy {
1268     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1269   };
1270   enum OperandMode {
1271     OperandMode_Default,
1272     OperandMode_NSA,
1273   };
1274 
1275   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1276 
1277   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1278                const MCInstrInfo &MII,
1279                const MCTargetOptions &Options)
1280       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1281     MCAsmParserExtension::Initialize(Parser);
1282 
1283     if (getFeatureBits().none()) {
1284       // Set default features.
1285       copySTI().ToggleFeature("southern-islands");
1286     }
1287 
1288     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1289 
1290     {
1291       // TODO: make those pre-defined variables read-only.
1292       // Currently there is none suitable machinery in the core llvm-mc for this.
1293       // MCSymbol::isRedefinable is intended for another purpose, and
1294       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1295       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1296       MCContext &Ctx = getContext();
1297       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1298         MCSymbol *Sym =
1299             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1300         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1301         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1302         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1303         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1304         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1305       } else {
1306         MCSymbol *Sym =
1307             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1308         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1309         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1310         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1311         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1312         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1313       }
1314       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1315         initializeGprCountSymbol(IS_VGPR);
1316         initializeGprCountSymbol(IS_SGPR);
1317       } else
1318         KernelScope.initialize(getContext());
1319     }
1320   }
1321 
1322   bool hasXNACK() const {
1323     return AMDGPU::hasXNACK(getSTI());
1324   }
1325 
1326   bool hasMIMG_R128() const {
1327     return AMDGPU::hasMIMG_R128(getSTI());
1328   }
1329 
1330   bool hasPackedD16() const {
1331     return AMDGPU::hasPackedD16(getSTI());
1332   }
1333 
1334   bool hasGFX10A16() const {
1335     return AMDGPU::hasGFX10A16(getSTI());
1336   }
1337 
1338   bool isSI() const {
1339     return AMDGPU::isSI(getSTI());
1340   }
1341 
1342   bool isCI() const {
1343     return AMDGPU::isCI(getSTI());
1344   }
1345 
1346   bool isVI() const {
1347     return AMDGPU::isVI(getSTI());
1348   }
1349 
1350   bool isGFX9() const {
1351     return AMDGPU::isGFX9(getSTI());
1352   }
1353 
1354   bool isGFX90A() const {
1355     return AMDGPU::isGFX90A(getSTI());
1356   }
1357 
1358   bool isGFX9Plus() const {
1359     return AMDGPU::isGFX9Plus(getSTI());
1360   }
1361 
1362   bool isGFX10() const {
1363     return AMDGPU::isGFX10(getSTI());
1364   }
1365 
1366   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1367 
1368   bool isGFX10_BEncoding() const {
1369     return AMDGPU::isGFX10_BEncoding(getSTI());
1370   }
1371 
1372   bool hasInv2PiInlineImm() const {
1373     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1374   }
1375 
1376   bool hasFlatOffsets() const {
1377     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1378   }
1379 
1380   bool hasSGPR102_SGPR103() const {
1381     return !isVI() && !isGFX9();
1382   }
1383 
1384   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1385 
1386   bool hasIntClamp() const {
1387     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1388   }
1389 
1390   AMDGPUTargetStreamer &getTargetStreamer() {
1391     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1392     return static_cast<AMDGPUTargetStreamer &>(TS);
1393   }
1394 
1395   const MCRegisterInfo *getMRI() const {
1396     // We need this const_cast because for some reason getContext() is not const
1397     // in MCAsmParser.
1398     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1399   }
1400 
1401   const MCInstrInfo *getMII() const {
1402     return &MII;
1403   }
1404 
1405   const FeatureBitset &getFeatureBits() const {
1406     return getSTI().getFeatureBits();
1407   }
1408 
1409   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1410   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1411   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1412 
1413   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1414   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1415   bool isForcedDPP() const { return ForcedDPP; }
1416   bool isForcedSDWA() const { return ForcedSDWA; }
1417   ArrayRef<unsigned> getMatchedVariants() const;
1418   StringRef getMatchedVariantName() const;
1419 
1420   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1421   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1422                      bool RestoreOnFailure);
1423   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1424   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1425                                         SMLoc &EndLoc) override;
1426   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1427   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1428                                       unsigned Kind) override;
1429   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1430                                OperandVector &Operands, MCStreamer &Out,
1431                                uint64_t &ErrorInfo,
1432                                bool MatchingInlineAsm) override;
1433   bool ParseDirective(AsmToken DirectiveID) override;
1434   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1435                                     OperandMode Mode = OperandMode_Default);
1436   StringRef parseMnemonicSuffix(StringRef Name);
1437   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1438                         SMLoc NameLoc, OperandVector &Operands) override;
1439   //bool ProcessInstruction(MCInst &Inst);
1440 
1441   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1442 
1443   OperandMatchResultTy
1444   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1445                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1446                      bool (*ConvertResult)(int64_t &) = nullptr);
1447 
1448   OperandMatchResultTy
1449   parseOperandArrayWithPrefix(const char *Prefix,
1450                               OperandVector &Operands,
1451                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1452                               bool (*ConvertResult)(int64_t&) = nullptr);
1453 
1454   OperandMatchResultTy
1455   parseNamedBit(StringRef Name, OperandVector &Operands,
1456                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1457   OperandMatchResultTy parseCPol(OperandVector &Operands);
1458   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1459                                              StringRef &Value,
1460                                              SMLoc &StringLoc);
1461 
1462   bool isModifier();
1463   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1464   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1465   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1466   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1467   bool parseSP3NegModifier();
1468   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1469   OperandMatchResultTy parseReg(OperandVector &Operands);
1470   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1471   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1472   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1473   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1474   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1475   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1476   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1477   OperandMatchResultTy parseUfmt(int64_t &Format);
1478   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1479   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1480   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1481   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1482   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1483   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1484   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1485 
1486   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1487   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1488   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1489   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1490 
1491   bool parseCnt(int64_t &IntVal);
1492   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1493   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1494 
1495 private:
1496   struct OperandInfoTy {
1497     SMLoc Loc;
1498     int64_t Id;
1499     bool IsSymbolic = false;
1500     bool IsDefined = false;
1501 
1502     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1503   };
1504 
1505   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1506   bool validateSendMsg(const OperandInfoTy &Msg,
1507                        const OperandInfoTy &Op,
1508                        const OperandInfoTy &Stream);
1509 
1510   bool parseHwregBody(OperandInfoTy &HwReg,
1511                       OperandInfoTy &Offset,
1512                       OperandInfoTy &Width);
1513   bool validateHwreg(const OperandInfoTy &HwReg,
1514                      const OperandInfoTy &Offset,
1515                      const OperandInfoTy &Width);
1516 
1517   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1518   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1519 
1520   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1521                       const OperandVector &Operands) const;
1522   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1523   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1524   SMLoc getLitLoc(const OperandVector &Operands) const;
1525   SMLoc getConstLoc(const OperandVector &Operands) const;
1526 
1527   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1528   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1529   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1530   bool validateSOPLiteral(const MCInst &Inst) const;
1531   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1532   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1533   bool validateIntClampSupported(const MCInst &Inst);
1534   bool validateMIMGAtomicDMask(const MCInst &Inst);
1535   bool validateMIMGGatherDMask(const MCInst &Inst);
1536   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1537   bool validateMIMGDataSize(const MCInst &Inst);
1538   bool validateMIMGAddrSize(const MCInst &Inst);
1539   bool validateMIMGD16(const MCInst &Inst);
1540   bool validateMIMGDim(const MCInst &Inst);
1541   bool validateMIMGMSAA(const MCInst &Inst);
1542   bool validateOpSel(const MCInst &Inst);
1543   bool validateVccOperand(unsigned Reg) const;
1544   bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1545   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1546   bool validateAGPRLdSt(const MCInst &Inst) const;
1547   bool validateVGPRAlign(const MCInst &Inst) const;
1548   bool validateDivScale(const MCInst &Inst);
1549   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1550                              const SMLoc &IDLoc);
1551   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1552   unsigned getConstantBusLimit(unsigned Opcode) const;
1553   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1554   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1555   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1556 
1557   bool isSupportedMnemo(StringRef Mnemo,
1558                         const FeatureBitset &FBS);
1559   bool isSupportedMnemo(StringRef Mnemo,
1560                         const FeatureBitset &FBS,
1561                         ArrayRef<unsigned> Variants);
1562   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1563 
1564   bool isId(const StringRef Id) const;
1565   bool isId(const AsmToken &Token, const StringRef Id) const;
1566   bool isToken(const AsmToken::TokenKind Kind) const;
1567   bool trySkipId(const StringRef Id);
1568   bool trySkipId(const StringRef Pref, const StringRef Id);
1569   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1570   bool trySkipToken(const AsmToken::TokenKind Kind);
1571   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1572   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1573   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1574 
1575   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1576   AsmToken::TokenKind getTokenKind() const;
1577   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1578   bool parseExpr(OperandVector &Operands);
1579   StringRef getTokenStr() const;
1580   AsmToken peekToken();
1581   AsmToken getToken() const;
1582   SMLoc getLoc() const;
1583   void lex();
1584 
1585 public:
1586   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1587   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1588 
1589   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1590   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1591   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1592   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1593   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1594   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1595 
1596   bool parseSwizzleOperand(int64_t &Op,
1597                            const unsigned MinVal,
1598                            const unsigned MaxVal,
1599                            const StringRef ErrMsg,
1600                            SMLoc &Loc);
1601   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1602                             const unsigned MinVal,
1603                             const unsigned MaxVal,
1604                             const StringRef ErrMsg);
1605   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1606   bool parseSwizzleOffset(int64_t &Imm);
1607   bool parseSwizzleMacro(int64_t &Imm);
1608   bool parseSwizzleQuadPerm(int64_t &Imm);
1609   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1610   bool parseSwizzleBroadcast(int64_t &Imm);
1611   bool parseSwizzleSwap(int64_t &Imm);
1612   bool parseSwizzleReverse(int64_t &Imm);
1613 
1614   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1615   int64_t parseGPRIdxMacro();
1616 
1617   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1618   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1619   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1620   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1621 
1622   AMDGPUOperand::Ptr defaultCPol() const;
1623   AMDGPUOperand::Ptr defaultCPol_GLC1() const;
1624 
1625   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1626   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1627   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1628   AMDGPUOperand::Ptr defaultFlatOffset() const;
1629 
1630   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1631 
1632   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1633                OptionalImmIndexMap &OptionalIdx);
1634   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1635   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1636   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1637 
1638   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1639 
1640   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1641                bool IsAtomic = false);
1642   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1643   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1644 
1645   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1646 
1647   bool parseDimId(unsigned &Encoding);
1648   OperandMatchResultTy parseDim(OperandVector &Operands);
1649   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1650   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1651   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1652   int64_t parseDPPCtrlSel(StringRef Ctrl);
1653   int64_t parseDPPCtrlPerm();
1654   AMDGPUOperand::Ptr defaultRowMask() const;
1655   AMDGPUOperand::Ptr defaultBankMask() const;
1656   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1657   AMDGPUOperand::Ptr defaultFI() const;
1658   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1659   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1660 
1661   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1662                                     AMDGPUOperand::ImmTy Type);
1663   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1664   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1665   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1666   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1667   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1668   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1669   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1670                uint64_t BasicInstType,
1671                bool SkipDstVcc = false,
1672                bool SkipSrcVcc = false);
1673 
1674   AMDGPUOperand::Ptr defaultBLGP() const;
1675   AMDGPUOperand::Ptr defaultCBSZ() const;
1676   AMDGPUOperand::Ptr defaultABID() const;
1677 
1678   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1679   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1680 };
1681 
1682 struct OptionalOperand {
1683   const char *Name;
1684   AMDGPUOperand::ImmTy Type;
1685   bool IsBit;
1686   bool (*ConvertResult)(int64_t&);
1687 };
1688 
1689 } // end anonymous namespace
1690 
1691 // May be called with integer type with equivalent bitwidth.
1692 static const fltSemantics *getFltSemantics(unsigned Size) {
1693   switch (Size) {
1694   case 4:
1695     return &APFloat::IEEEsingle();
1696   case 8:
1697     return &APFloat::IEEEdouble();
1698   case 2:
1699     return &APFloat::IEEEhalf();
1700   default:
1701     llvm_unreachable("unsupported fp type");
1702   }
1703 }
1704 
1705 static const fltSemantics *getFltSemantics(MVT VT) {
1706   return getFltSemantics(VT.getSizeInBits() / 8);
1707 }
1708 
1709 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1710   switch (OperandType) {
1711   case AMDGPU::OPERAND_REG_IMM_INT32:
1712   case AMDGPU::OPERAND_REG_IMM_FP32:
1713   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1714   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1715   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1716   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1717   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1718   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1719   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1720   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1721     return &APFloat::IEEEsingle();
1722   case AMDGPU::OPERAND_REG_IMM_INT64:
1723   case AMDGPU::OPERAND_REG_IMM_FP64:
1724   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1725   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1726   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1727     return &APFloat::IEEEdouble();
1728   case AMDGPU::OPERAND_REG_IMM_INT16:
1729   case AMDGPU::OPERAND_REG_IMM_FP16:
1730   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1731   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1732   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1733   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1734   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1735   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1736   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1737   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1738   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1739   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1740     return &APFloat::IEEEhalf();
1741   default:
1742     llvm_unreachable("unsupported fp type");
1743   }
1744 }
1745 
1746 //===----------------------------------------------------------------------===//
1747 // Operand
1748 //===----------------------------------------------------------------------===//
1749 
1750 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1751   bool Lost;
1752 
1753   // Convert literal to single precision
1754   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1755                                                APFloat::rmNearestTiesToEven,
1756                                                &Lost);
1757   // We allow precision lost but not overflow or underflow
1758   if (Status != APFloat::opOK &&
1759       Lost &&
1760       ((Status & APFloat::opOverflow)  != 0 ||
1761        (Status & APFloat::opUnderflow) != 0)) {
1762     return false;
1763   }
1764 
1765   return true;
1766 }
1767 
1768 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1769   return isUIntN(Size, Val) || isIntN(Size, Val);
1770 }
1771 
1772 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1773   if (VT.getScalarType() == MVT::i16) {
1774     // FP immediate values are broken.
1775     return isInlinableIntLiteral(Val);
1776   }
1777 
1778   // f16/v2f16 operands work correctly for all values.
1779   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1780 }
1781 
1782 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1783 
1784   // This is a hack to enable named inline values like
1785   // shared_base with both 32-bit and 64-bit operands.
1786   // Note that these values are defined as
1787   // 32-bit operands only.
1788   if (isInlineValue()) {
1789     return true;
1790   }
1791 
1792   if (!isImmTy(ImmTyNone)) {
1793     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1794     return false;
1795   }
1796   // TODO: We should avoid using host float here. It would be better to
1797   // check the float bit values which is what a few other places do.
1798   // We've had bot failures before due to weird NaN support on mips hosts.
1799 
1800   APInt Literal(64, Imm.Val);
1801 
1802   if (Imm.IsFPImm) { // We got fp literal token
1803     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1804       return AMDGPU::isInlinableLiteral64(Imm.Val,
1805                                           AsmParser->hasInv2PiInlineImm());
1806     }
1807 
1808     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1809     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1810       return false;
1811 
1812     if (type.getScalarSizeInBits() == 16) {
1813       return isInlineableLiteralOp16(
1814         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1815         type, AsmParser->hasInv2PiInlineImm());
1816     }
1817 
1818     // Check if single precision literal is inlinable
1819     return AMDGPU::isInlinableLiteral32(
1820       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1821       AsmParser->hasInv2PiInlineImm());
1822   }
1823 
1824   // We got int literal token.
1825   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1826     return AMDGPU::isInlinableLiteral64(Imm.Val,
1827                                         AsmParser->hasInv2PiInlineImm());
1828   }
1829 
1830   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1831     return false;
1832   }
1833 
1834   if (type.getScalarSizeInBits() == 16) {
1835     return isInlineableLiteralOp16(
1836       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1837       type, AsmParser->hasInv2PiInlineImm());
1838   }
1839 
1840   return AMDGPU::isInlinableLiteral32(
1841     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1842     AsmParser->hasInv2PiInlineImm());
1843 }
1844 
1845 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1846   // Check that this immediate can be added as literal
1847   if (!isImmTy(ImmTyNone)) {
1848     return false;
1849   }
1850 
1851   if (!Imm.IsFPImm) {
1852     // We got int literal token.
1853 
1854     if (type == MVT::f64 && hasFPModifiers()) {
1855       // Cannot apply fp modifiers to int literals preserving the same semantics
1856       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1857       // disable these cases.
1858       return false;
1859     }
1860 
1861     unsigned Size = type.getSizeInBits();
1862     if (Size == 64)
1863       Size = 32;
1864 
1865     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1866     // types.
1867     return isSafeTruncation(Imm.Val, Size);
1868   }
1869 
1870   // We got fp literal token
1871   if (type == MVT::f64) { // Expected 64-bit fp operand
1872     // We would set low 64-bits of literal to zeroes but we accept this literals
1873     return true;
1874   }
1875 
1876   if (type == MVT::i64) { // Expected 64-bit int operand
1877     // We don't allow fp literals in 64-bit integer instructions. It is
1878     // unclear how we should encode them.
1879     return false;
1880   }
1881 
1882   // We allow fp literals with f16x2 operands assuming that the specified
1883   // literal goes into the lower half and the upper half is zero. We also
1884   // require that the literal may be losslesly converted to f16.
1885   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1886                      (type == MVT::v2i16)? MVT::i16 :
1887                      (type == MVT::v2f32)? MVT::f32 : type;
1888 
1889   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1890   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1891 }
1892 
1893 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1894   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1895 }
1896 
1897 bool AMDGPUOperand::isVRegWithInputMods() const {
1898   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1899          // GFX90A allows DPP on 64-bit operands.
1900          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1901           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1902 }
1903 
1904 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1905   if (AsmParser->isVI())
1906     return isVReg32();
1907   else if (AsmParser->isGFX9Plus())
1908     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1909   else
1910     return false;
1911 }
1912 
1913 bool AMDGPUOperand::isSDWAFP16Operand() const {
1914   return isSDWAOperand(MVT::f16);
1915 }
1916 
1917 bool AMDGPUOperand::isSDWAFP32Operand() const {
1918   return isSDWAOperand(MVT::f32);
1919 }
1920 
1921 bool AMDGPUOperand::isSDWAInt16Operand() const {
1922   return isSDWAOperand(MVT::i16);
1923 }
1924 
1925 bool AMDGPUOperand::isSDWAInt32Operand() const {
1926   return isSDWAOperand(MVT::i32);
1927 }
1928 
1929 bool AMDGPUOperand::isBoolReg() const {
1930   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1931          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1932 }
1933 
1934 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1935 {
1936   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1937   assert(Size == 2 || Size == 4 || Size == 8);
1938 
1939   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1940 
1941   if (Imm.Mods.Abs) {
1942     Val &= ~FpSignMask;
1943   }
1944   if (Imm.Mods.Neg) {
1945     Val ^= FpSignMask;
1946   }
1947 
1948   return Val;
1949 }
1950 
1951 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1952   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1953                              Inst.getNumOperands())) {
1954     addLiteralImmOperand(Inst, Imm.Val,
1955                          ApplyModifiers &
1956                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1957   } else {
1958     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1959     Inst.addOperand(MCOperand::createImm(Imm.Val));
1960     setImmKindNone();
1961   }
1962 }
1963 
1964 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1965   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1966   auto OpNum = Inst.getNumOperands();
1967   // Check that this operand accepts literals
1968   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1969 
1970   if (ApplyModifiers) {
1971     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1972     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1973     Val = applyInputFPModifiers(Val, Size);
1974   }
1975 
1976   APInt Literal(64, Val);
1977   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1978 
1979   if (Imm.IsFPImm) { // We got fp literal token
1980     switch (OpTy) {
1981     case AMDGPU::OPERAND_REG_IMM_INT64:
1982     case AMDGPU::OPERAND_REG_IMM_FP64:
1983     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1984     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1985     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1986       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1987                                        AsmParser->hasInv2PiInlineImm())) {
1988         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1989         setImmKindConst();
1990         return;
1991       }
1992 
1993       // Non-inlineable
1994       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1995         // For fp operands we check if low 32 bits are zeros
1996         if (Literal.getLoBits(32) != 0) {
1997           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1998           "Can't encode literal as exact 64-bit floating-point operand. "
1999           "Low 32-bits will be set to zero");
2000         }
2001 
2002         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2003         setImmKindLiteral();
2004         return;
2005       }
2006 
2007       // We don't allow fp literals in 64-bit integer instructions. It is
2008       // unclear how we should encode them. This case should be checked earlier
2009       // in predicate methods (isLiteralImm())
2010       llvm_unreachable("fp literal in 64-bit integer instruction.");
2011 
2012     case AMDGPU::OPERAND_REG_IMM_INT32:
2013     case AMDGPU::OPERAND_REG_IMM_FP32:
2014     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2015     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2016     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2017     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2018     case AMDGPU::OPERAND_REG_IMM_INT16:
2019     case AMDGPU::OPERAND_REG_IMM_FP16:
2020     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2021     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2022     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2023     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2024     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2025     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2026     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2027     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2028     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2029     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2030     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2031     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2032     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2033     case AMDGPU::OPERAND_REG_IMM_V2INT32: {
2034       bool lost;
2035       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2036       // Convert literal to single precision
2037       FPLiteral.convert(*getOpFltSemantics(OpTy),
2038                         APFloat::rmNearestTiesToEven, &lost);
2039       // We allow precision lost but not overflow or underflow. This should be
2040       // checked earlier in isLiteralImm()
2041 
2042       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2043       Inst.addOperand(MCOperand::createImm(ImmVal));
2044       setImmKindLiteral();
2045       return;
2046     }
2047     default:
2048       llvm_unreachable("invalid operand size");
2049     }
2050 
2051     return;
2052   }
2053 
2054   // We got int literal token.
2055   // Only sign extend inline immediates.
2056   switch (OpTy) {
2057   case AMDGPU::OPERAND_REG_IMM_INT32:
2058   case AMDGPU::OPERAND_REG_IMM_FP32:
2059   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2060   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2061   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2062   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2063   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2064   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2065   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2066   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2067   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2068   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2069     if (isSafeTruncation(Val, 32) &&
2070         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2071                                      AsmParser->hasInv2PiInlineImm())) {
2072       Inst.addOperand(MCOperand::createImm(Val));
2073       setImmKindConst();
2074       return;
2075     }
2076 
2077     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2078     setImmKindLiteral();
2079     return;
2080 
2081   case AMDGPU::OPERAND_REG_IMM_INT64:
2082   case AMDGPU::OPERAND_REG_IMM_FP64:
2083   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2084   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2085   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2086     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2087       Inst.addOperand(MCOperand::createImm(Val));
2088       setImmKindConst();
2089       return;
2090     }
2091 
2092     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2093     setImmKindLiteral();
2094     return;
2095 
2096   case AMDGPU::OPERAND_REG_IMM_INT16:
2097   case AMDGPU::OPERAND_REG_IMM_FP16:
2098   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2099   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2100   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2101   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2102     if (isSafeTruncation(Val, 16) &&
2103         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2104                                      AsmParser->hasInv2PiInlineImm())) {
2105       Inst.addOperand(MCOperand::createImm(Val));
2106       setImmKindConst();
2107       return;
2108     }
2109 
2110     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2111     setImmKindLiteral();
2112     return;
2113 
2114   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2115   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2116   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2117   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2118     assert(isSafeTruncation(Val, 16));
2119     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2120                                         AsmParser->hasInv2PiInlineImm()));
2121 
2122     Inst.addOperand(MCOperand::createImm(Val));
2123     return;
2124   }
2125   default:
2126     llvm_unreachable("invalid operand size");
2127   }
2128 }
2129 
2130 template <unsigned Bitwidth>
2131 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2132   APInt Literal(64, Imm.Val);
2133   setImmKindNone();
2134 
2135   if (!Imm.IsFPImm) {
2136     // We got int literal token.
2137     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2138     return;
2139   }
2140 
2141   bool Lost;
2142   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2143   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2144                     APFloat::rmNearestTiesToEven, &Lost);
2145   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2146 }
2147 
2148 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2149   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2150 }
2151 
2152 static bool isInlineValue(unsigned Reg) {
2153   switch (Reg) {
2154   case AMDGPU::SRC_SHARED_BASE:
2155   case AMDGPU::SRC_SHARED_LIMIT:
2156   case AMDGPU::SRC_PRIVATE_BASE:
2157   case AMDGPU::SRC_PRIVATE_LIMIT:
2158   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2159     return true;
2160   case AMDGPU::SRC_VCCZ:
2161   case AMDGPU::SRC_EXECZ:
2162   case AMDGPU::SRC_SCC:
2163     return true;
2164   case AMDGPU::SGPR_NULL:
2165     return true;
2166   default:
2167     return false;
2168   }
2169 }
2170 
2171 bool AMDGPUOperand::isInlineValue() const {
2172   return isRegKind() && ::isInlineValue(getReg());
2173 }
2174 
2175 //===----------------------------------------------------------------------===//
2176 // AsmParser
2177 //===----------------------------------------------------------------------===//
2178 
2179 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2180   if (Is == IS_VGPR) {
2181     switch (RegWidth) {
2182       default: return -1;
2183       case 1: return AMDGPU::VGPR_32RegClassID;
2184       case 2: return AMDGPU::VReg_64RegClassID;
2185       case 3: return AMDGPU::VReg_96RegClassID;
2186       case 4: return AMDGPU::VReg_128RegClassID;
2187       case 5: return AMDGPU::VReg_160RegClassID;
2188       case 6: return AMDGPU::VReg_192RegClassID;
2189       case 8: return AMDGPU::VReg_256RegClassID;
2190       case 16: return AMDGPU::VReg_512RegClassID;
2191       case 32: return AMDGPU::VReg_1024RegClassID;
2192     }
2193   } else if (Is == IS_TTMP) {
2194     switch (RegWidth) {
2195       default: return -1;
2196       case 1: return AMDGPU::TTMP_32RegClassID;
2197       case 2: return AMDGPU::TTMP_64RegClassID;
2198       case 4: return AMDGPU::TTMP_128RegClassID;
2199       case 8: return AMDGPU::TTMP_256RegClassID;
2200       case 16: return AMDGPU::TTMP_512RegClassID;
2201     }
2202   } else if (Is == IS_SGPR) {
2203     switch (RegWidth) {
2204       default: return -1;
2205       case 1: return AMDGPU::SGPR_32RegClassID;
2206       case 2: return AMDGPU::SGPR_64RegClassID;
2207       case 3: return AMDGPU::SGPR_96RegClassID;
2208       case 4: return AMDGPU::SGPR_128RegClassID;
2209       case 5: return AMDGPU::SGPR_160RegClassID;
2210       case 6: return AMDGPU::SGPR_192RegClassID;
2211       case 8: return AMDGPU::SGPR_256RegClassID;
2212       case 16: return AMDGPU::SGPR_512RegClassID;
2213     }
2214   } else if (Is == IS_AGPR) {
2215     switch (RegWidth) {
2216       default: return -1;
2217       case 1: return AMDGPU::AGPR_32RegClassID;
2218       case 2: return AMDGPU::AReg_64RegClassID;
2219       case 3: return AMDGPU::AReg_96RegClassID;
2220       case 4: return AMDGPU::AReg_128RegClassID;
2221       case 5: return AMDGPU::AReg_160RegClassID;
2222       case 6: return AMDGPU::AReg_192RegClassID;
2223       case 8: return AMDGPU::AReg_256RegClassID;
2224       case 16: return AMDGPU::AReg_512RegClassID;
2225       case 32: return AMDGPU::AReg_1024RegClassID;
2226     }
2227   }
2228   return -1;
2229 }
2230 
2231 static unsigned getSpecialRegForName(StringRef RegName) {
2232   return StringSwitch<unsigned>(RegName)
2233     .Case("exec", AMDGPU::EXEC)
2234     .Case("vcc", AMDGPU::VCC)
2235     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2236     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2237     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2238     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2239     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2240     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2241     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2242     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2243     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2244     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2245     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2246     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2247     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2248     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2249     .Case("m0", AMDGPU::M0)
2250     .Case("vccz", AMDGPU::SRC_VCCZ)
2251     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2252     .Case("execz", AMDGPU::SRC_EXECZ)
2253     .Case("src_execz", AMDGPU::SRC_EXECZ)
2254     .Case("scc", AMDGPU::SRC_SCC)
2255     .Case("src_scc", AMDGPU::SRC_SCC)
2256     .Case("tba", AMDGPU::TBA)
2257     .Case("tma", AMDGPU::TMA)
2258     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2259     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2260     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2261     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2262     .Case("vcc_lo", AMDGPU::VCC_LO)
2263     .Case("vcc_hi", AMDGPU::VCC_HI)
2264     .Case("exec_lo", AMDGPU::EXEC_LO)
2265     .Case("exec_hi", AMDGPU::EXEC_HI)
2266     .Case("tma_lo", AMDGPU::TMA_LO)
2267     .Case("tma_hi", AMDGPU::TMA_HI)
2268     .Case("tba_lo", AMDGPU::TBA_LO)
2269     .Case("tba_hi", AMDGPU::TBA_HI)
2270     .Case("pc", AMDGPU::PC_REG)
2271     .Case("null", AMDGPU::SGPR_NULL)
2272     .Default(AMDGPU::NoRegister);
2273 }
2274 
2275 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2276                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2277   auto R = parseRegister();
2278   if (!R) return true;
2279   assert(R->isReg());
2280   RegNo = R->getReg();
2281   StartLoc = R->getStartLoc();
2282   EndLoc = R->getEndLoc();
2283   return false;
2284 }
2285 
2286 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2287                                     SMLoc &EndLoc) {
2288   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2289 }
2290 
2291 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2292                                                        SMLoc &StartLoc,
2293                                                        SMLoc &EndLoc) {
2294   bool Result =
2295       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2296   bool PendingErrors = getParser().hasPendingError();
2297   getParser().clearPendingErrors();
2298   if (PendingErrors)
2299     return MatchOperand_ParseFail;
2300   if (Result)
2301     return MatchOperand_NoMatch;
2302   return MatchOperand_Success;
2303 }
2304 
2305 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2306                                             RegisterKind RegKind, unsigned Reg1,
2307                                             SMLoc Loc) {
2308   switch (RegKind) {
2309   case IS_SPECIAL:
2310     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2311       Reg = AMDGPU::EXEC;
2312       RegWidth = 2;
2313       return true;
2314     }
2315     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2316       Reg = AMDGPU::FLAT_SCR;
2317       RegWidth = 2;
2318       return true;
2319     }
2320     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2321       Reg = AMDGPU::XNACK_MASK;
2322       RegWidth = 2;
2323       return true;
2324     }
2325     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2326       Reg = AMDGPU::VCC;
2327       RegWidth = 2;
2328       return true;
2329     }
2330     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2331       Reg = AMDGPU::TBA;
2332       RegWidth = 2;
2333       return true;
2334     }
2335     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2336       Reg = AMDGPU::TMA;
2337       RegWidth = 2;
2338       return true;
2339     }
2340     Error(Loc, "register does not fit in the list");
2341     return false;
2342   case IS_VGPR:
2343   case IS_SGPR:
2344   case IS_AGPR:
2345   case IS_TTMP:
2346     if (Reg1 != Reg + RegWidth) {
2347       Error(Loc, "registers in a list must have consecutive indices");
2348       return false;
2349     }
2350     RegWidth++;
2351     return true;
2352   default:
2353     llvm_unreachable("unexpected register kind");
2354   }
2355 }
2356 
2357 struct RegInfo {
2358   StringLiteral Name;
2359   RegisterKind Kind;
2360 };
2361 
2362 static constexpr RegInfo RegularRegisters[] = {
2363   {{"v"},    IS_VGPR},
2364   {{"s"},    IS_SGPR},
2365   {{"ttmp"}, IS_TTMP},
2366   {{"acc"},  IS_AGPR},
2367   {{"a"},    IS_AGPR},
2368 };
2369 
2370 static bool isRegularReg(RegisterKind Kind) {
2371   return Kind == IS_VGPR ||
2372          Kind == IS_SGPR ||
2373          Kind == IS_TTMP ||
2374          Kind == IS_AGPR;
2375 }
2376 
2377 static const RegInfo* getRegularRegInfo(StringRef Str) {
2378   for (const RegInfo &Reg : RegularRegisters)
2379     if (Str.startswith(Reg.Name))
2380       return &Reg;
2381   return nullptr;
2382 }
2383 
2384 static bool getRegNum(StringRef Str, unsigned& Num) {
2385   return !Str.getAsInteger(10, Num);
2386 }
2387 
2388 bool
2389 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2390                             const AsmToken &NextToken) const {
2391 
2392   // A list of consecutive registers: [s0,s1,s2,s3]
2393   if (Token.is(AsmToken::LBrac))
2394     return true;
2395 
2396   if (!Token.is(AsmToken::Identifier))
2397     return false;
2398 
2399   // A single register like s0 or a range of registers like s[0:1]
2400 
2401   StringRef Str = Token.getString();
2402   const RegInfo *Reg = getRegularRegInfo(Str);
2403   if (Reg) {
2404     StringRef RegName = Reg->Name;
2405     StringRef RegSuffix = Str.substr(RegName.size());
2406     if (!RegSuffix.empty()) {
2407       unsigned Num;
2408       // A single register with an index: rXX
2409       if (getRegNum(RegSuffix, Num))
2410         return true;
2411     } else {
2412       // A range of registers: r[XX:YY].
2413       if (NextToken.is(AsmToken::LBrac))
2414         return true;
2415     }
2416   }
2417 
2418   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2419 }
2420 
2421 bool
2422 AMDGPUAsmParser::isRegister()
2423 {
2424   return isRegister(getToken(), peekToken());
2425 }
2426 
2427 unsigned
2428 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2429                                unsigned RegNum,
2430                                unsigned RegWidth,
2431                                SMLoc Loc) {
2432 
2433   assert(isRegularReg(RegKind));
2434 
2435   unsigned AlignSize = 1;
2436   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2437     // SGPR and TTMP registers must be aligned.
2438     // Max required alignment is 4 dwords.
2439     AlignSize = std::min(RegWidth, 4u);
2440   }
2441 
2442   if (RegNum % AlignSize != 0) {
2443     Error(Loc, "invalid register alignment");
2444     return AMDGPU::NoRegister;
2445   }
2446 
2447   unsigned RegIdx = RegNum / AlignSize;
2448   int RCID = getRegClass(RegKind, RegWidth);
2449   if (RCID == -1) {
2450     Error(Loc, "invalid or unsupported register size");
2451     return AMDGPU::NoRegister;
2452   }
2453 
2454   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2455   const MCRegisterClass RC = TRI->getRegClass(RCID);
2456   if (RegIdx >= RC.getNumRegs()) {
2457     Error(Loc, "register index is out of range");
2458     return AMDGPU::NoRegister;
2459   }
2460 
2461   return RC.getRegister(RegIdx);
2462 }
2463 
2464 bool
2465 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2466   int64_t RegLo, RegHi;
2467   if (!skipToken(AsmToken::LBrac, "missing register index"))
2468     return false;
2469 
2470   SMLoc FirstIdxLoc = getLoc();
2471   SMLoc SecondIdxLoc;
2472 
2473   if (!parseExpr(RegLo))
2474     return false;
2475 
2476   if (trySkipToken(AsmToken::Colon)) {
2477     SecondIdxLoc = getLoc();
2478     if (!parseExpr(RegHi))
2479       return false;
2480   } else {
2481     RegHi = RegLo;
2482   }
2483 
2484   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2485     return false;
2486 
2487   if (!isUInt<32>(RegLo)) {
2488     Error(FirstIdxLoc, "invalid register index");
2489     return false;
2490   }
2491 
2492   if (!isUInt<32>(RegHi)) {
2493     Error(SecondIdxLoc, "invalid register index");
2494     return false;
2495   }
2496 
2497   if (RegLo > RegHi) {
2498     Error(FirstIdxLoc, "first register index should not exceed second index");
2499     return false;
2500   }
2501 
2502   Num = static_cast<unsigned>(RegLo);
2503   Width = (RegHi - RegLo) + 1;
2504   return true;
2505 }
2506 
2507 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2508                                           unsigned &RegNum, unsigned &RegWidth,
2509                                           SmallVectorImpl<AsmToken> &Tokens) {
2510   assert(isToken(AsmToken::Identifier));
2511   unsigned Reg = getSpecialRegForName(getTokenStr());
2512   if (Reg) {
2513     RegNum = 0;
2514     RegWidth = 1;
2515     RegKind = IS_SPECIAL;
2516     Tokens.push_back(getToken());
2517     lex(); // skip register name
2518   }
2519   return Reg;
2520 }
2521 
2522 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2523                                           unsigned &RegNum, unsigned &RegWidth,
2524                                           SmallVectorImpl<AsmToken> &Tokens) {
2525   assert(isToken(AsmToken::Identifier));
2526   StringRef RegName = getTokenStr();
2527   auto Loc = getLoc();
2528 
2529   const RegInfo *RI = getRegularRegInfo(RegName);
2530   if (!RI) {
2531     Error(Loc, "invalid register name");
2532     return AMDGPU::NoRegister;
2533   }
2534 
2535   Tokens.push_back(getToken());
2536   lex(); // skip register name
2537 
2538   RegKind = RI->Kind;
2539   StringRef RegSuffix = RegName.substr(RI->Name.size());
2540   if (!RegSuffix.empty()) {
2541     // Single 32-bit register: vXX.
2542     if (!getRegNum(RegSuffix, RegNum)) {
2543       Error(Loc, "invalid register index");
2544       return AMDGPU::NoRegister;
2545     }
2546     RegWidth = 1;
2547   } else {
2548     // Range of registers: v[XX:YY]. ":YY" is optional.
2549     if (!ParseRegRange(RegNum, RegWidth))
2550       return AMDGPU::NoRegister;
2551   }
2552 
2553   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2554 }
2555 
2556 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2557                                        unsigned &RegWidth,
2558                                        SmallVectorImpl<AsmToken> &Tokens) {
2559   unsigned Reg = AMDGPU::NoRegister;
2560   auto ListLoc = getLoc();
2561 
2562   if (!skipToken(AsmToken::LBrac,
2563                  "expected a register or a list of registers")) {
2564     return AMDGPU::NoRegister;
2565   }
2566 
2567   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2568 
2569   auto Loc = getLoc();
2570   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2571     return AMDGPU::NoRegister;
2572   if (RegWidth != 1) {
2573     Error(Loc, "expected a single 32-bit register");
2574     return AMDGPU::NoRegister;
2575   }
2576 
2577   for (; trySkipToken(AsmToken::Comma); ) {
2578     RegisterKind NextRegKind;
2579     unsigned NextReg, NextRegNum, NextRegWidth;
2580     Loc = getLoc();
2581 
2582     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2583                              NextRegNum, NextRegWidth,
2584                              Tokens)) {
2585       return AMDGPU::NoRegister;
2586     }
2587     if (NextRegWidth != 1) {
2588       Error(Loc, "expected a single 32-bit register");
2589       return AMDGPU::NoRegister;
2590     }
2591     if (NextRegKind != RegKind) {
2592       Error(Loc, "registers in a list must be of the same kind");
2593       return AMDGPU::NoRegister;
2594     }
2595     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2596       return AMDGPU::NoRegister;
2597   }
2598 
2599   if (!skipToken(AsmToken::RBrac,
2600                  "expected a comma or a closing square bracket")) {
2601     return AMDGPU::NoRegister;
2602   }
2603 
2604   if (isRegularReg(RegKind))
2605     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2606 
2607   return Reg;
2608 }
2609 
2610 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2611                                           unsigned &RegNum, unsigned &RegWidth,
2612                                           SmallVectorImpl<AsmToken> &Tokens) {
2613   auto Loc = getLoc();
2614   Reg = AMDGPU::NoRegister;
2615 
2616   if (isToken(AsmToken::Identifier)) {
2617     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2618     if (Reg == AMDGPU::NoRegister)
2619       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2620   } else {
2621     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2622   }
2623 
2624   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2625   if (Reg == AMDGPU::NoRegister) {
2626     assert(Parser.hasPendingError());
2627     return false;
2628   }
2629 
2630   if (!subtargetHasRegister(*TRI, Reg)) {
2631     if (Reg == AMDGPU::SGPR_NULL) {
2632       Error(Loc, "'null' operand is not supported on this GPU");
2633     } else {
2634       Error(Loc, "register not available on this GPU");
2635     }
2636     return false;
2637   }
2638 
2639   return true;
2640 }
2641 
2642 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2643                                           unsigned &RegNum, unsigned &RegWidth,
2644                                           bool RestoreOnFailure /*=false*/) {
2645   Reg = AMDGPU::NoRegister;
2646 
2647   SmallVector<AsmToken, 1> Tokens;
2648   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2649     if (RestoreOnFailure) {
2650       while (!Tokens.empty()) {
2651         getLexer().UnLex(Tokens.pop_back_val());
2652       }
2653     }
2654     return true;
2655   }
2656   return false;
2657 }
2658 
2659 Optional<StringRef>
2660 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2661   switch (RegKind) {
2662   case IS_VGPR:
2663     return StringRef(".amdgcn.next_free_vgpr");
2664   case IS_SGPR:
2665     return StringRef(".amdgcn.next_free_sgpr");
2666   default:
2667     return None;
2668   }
2669 }
2670 
2671 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2672   auto SymbolName = getGprCountSymbolName(RegKind);
2673   assert(SymbolName && "initializing invalid register kind");
2674   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2675   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2676 }
2677 
2678 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2679                                             unsigned DwordRegIndex,
2680                                             unsigned RegWidth) {
2681   // Symbols are only defined for GCN targets
2682   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2683     return true;
2684 
2685   auto SymbolName = getGprCountSymbolName(RegKind);
2686   if (!SymbolName)
2687     return true;
2688   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2689 
2690   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2691   int64_t OldCount;
2692 
2693   if (!Sym->isVariable())
2694     return !Error(getLoc(),
2695                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2696   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2697     return !Error(
2698         getLoc(),
2699         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2700 
2701   if (OldCount <= NewMax)
2702     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2703 
2704   return true;
2705 }
2706 
2707 std::unique_ptr<AMDGPUOperand>
2708 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2709   const auto &Tok = getToken();
2710   SMLoc StartLoc = Tok.getLoc();
2711   SMLoc EndLoc = Tok.getEndLoc();
2712   RegisterKind RegKind;
2713   unsigned Reg, RegNum, RegWidth;
2714 
2715   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2716     return nullptr;
2717   }
2718   if (isHsaAbiVersion3(&getSTI())) {
2719     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2720       return nullptr;
2721   } else
2722     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2723   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2724 }
2725 
2726 OperandMatchResultTy
2727 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2728   // TODO: add syntactic sugar for 1/(2*PI)
2729 
2730   assert(!isRegister());
2731   assert(!isModifier());
2732 
2733   const auto& Tok = getToken();
2734   const auto& NextTok = peekToken();
2735   bool IsReal = Tok.is(AsmToken::Real);
2736   SMLoc S = getLoc();
2737   bool Negate = false;
2738 
2739   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2740     lex();
2741     IsReal = true;
2742     Negate = true;
2743   }
2744 
2745   if (IsReal) {
2746     // Floating-point expressions are not supported.
2747     // Can only allow floating-point literals with an
2748     // optional sign.
2749 
2750     StringRef Num = getTokenStr();
2751     lex();
2752 
2753     APFloat RealVal(APFloat::IEEEdouble());
2754     auto roundMode = APFloat::rmNearestTiesToEven;
2755     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2756       return MatchOperand_ParseFail;
2757     }
2758     if (Negate)
2759       RealVal.changeSign();
2760 
2761     Operands.push_back(
2762       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2763                                AMDGPUOperand::ImmTyNone, true));
2764 
2765     return MatchOperand_Success;
2766 
2767   } else {
2768     int64_t IntVal;
2769     const MCExpr *Expr;
2770     SMLoc S = getLoc();
2771 
2772     if (HasSP3AbsModifier) {
2773       // This is a workaround for handling expressions
2774       // as arguments of SP3 'abs' modifier, for example:
2775       //     |1.0|
2776       //     |-1|
2777       //     |1+x|
2778       // This syntax is not compatible with syntax of standard
2779       // MC expressions (due to the trailing '|').
2780       SMLoc EndLoc;
2781       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2782         return MatchOperand_ParseFail;
2783     } else {
2784       if (Parser.parseExpression(Expr))
2785         return MatchOperand_ParseFail;
2786     }
2787 
2788     if (Expr->evaluateAsAbsolute(IntVal)) {
2789       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2790     } else {
2791       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2792     }
2793 
2794     return MatchOperand_Success;
2795   }
2796 
2797   return MatchOperand_NoMatch;
2798 }
2799 
2800 OperandMatchResultTy
2801 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2802   if (!isRegister())
2803     return MatchOperand_NoMatch;
2804 
2805   if (auto R = parseRegister()) {
2806     assert(R->isReg());
2807     Operands.push_back(std::move(R));
2808     return MatchOperand_Success;
2809   }
2810   return MatchOperand_ParseFail;
2811 }
2812 
2813 OperandMatchResultTy
2814 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2815   auto res = parseReg(Operands);
2816   if (res != MatchOperand_NoMatch) {
2817     return res;
2818   } else if (isModifier()) {
2819     return MatchOperand_NoMatch;
2820   } else {
2821     return parseImm(Operands, HasSP3AbsMod);
2822   }
2823 }
2824 
2825 bool
2826 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2827   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2828     const auto &str = Token.getString();
2829     return str == "abs" || str == "neg" || str == "sext";
2830   }
2831   return false;
2832 }
2833 
2834 bool
2835 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2836   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2837 }
2838 
2839 bool
2840 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2841   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2842 }
2843 
2844 bool
2845 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2846   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2847 }
2848 
2849 // Check if this is an operand modifier or an opcode modifier
2850 // which may look like an expression but it is not. We should
2851 // avoid parsing these modifiers as expressions. Currently
2852 // recognized sequences are:
2853 //   |...|
2854 //   abs(...)
2855 //   neg(...)
2856 //   sext(...)
2857 //   -reg
2858 //   -|...|
2859 //   -abs(...)
2860 //   name:...
2861 // Note that simple opcode modifiers like 'gds' may be parsed as
2862 // expressions; this is a special case. See getExpressionAsToken.
2863 //
2864 bool
2865 AMDGPUAsmParser::isModifier() {
2866 
2867   AsmToken Tok = getToken();
2868   AsmToken NextToken[2];
2869   peekTokens(NextToken);
2870 
2871   return isOperandModifier(Tok, NextToken[0]) ||
2872          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2873          isOpcodeModifierWithVal(Tok, NextToken[0]);
2874 }
2875 
2876 // Check if the current token is an SP3 'neg' modifier.
2877 // Currently this modifier is allowed in the following context:
2878 //
2879 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2880 // 2. Before an 'abs' modifier: -abs(...)
2881 // 3. Before an SP3 'abs' modifier: -|...|
2882 //
2883 // In all other cases "-" is handled as a part
2884 // of an expression that follows the sign.
2885 //
2886 // Note: When "-" is followed by an integer literal,
2887 // this is interpreted as integer negation rather
2888 // than a floating-point NEG modifier applied to N.
2889 // Beside being contr-intuitive, such use of floating-point
2890 // NEG modifier would have resulted in different meaning
2891 // of integer literals used with VOP1/2/C and VOP3,
2892 // for example:
2893 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2894 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2895 // Negative fp literals with preceding "-" are
2896 // handled likewise for unifomtity
2897 //
2898 bool
2899 AMDGPUAsmParser::parseSP3NegModifier() {
2900 
2901   AsmToken NextToken[2];
2902   peekTokens(NextToken);
2903 
2904   if (isToken(AsmToken::Minus) &&
2905       (isRegister(NextToken[0], NextToken[1]) ||
2906        NextToken[0].is(AsmToken::Pipe) ||
2907        isId(NextToken[0], "abs"))) {
2908     lex();
2909     return true;
2910   }
2911 
2912   return false;
2913 }
2914 
2915 OperandMatchResultTy
2916 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2917                                               bool AllowImm) {
2918   bool Neg, SP3Neg;
2919   bool Abs, SP3Abs;
2920   SMLoc Loc;
2921 
2922   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2923   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2924     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2925     return MatchOperand_ParseFail;
2926   }
2927 
2928   SP3Neg = parseSP3NegModifier();
2929 
2930   Loc = getLoc();
2931   Neg = trySkipId("neg");
2932   if (Neg && SP3Neg) {
2933     Error(Loc, "expected register or immediate");
2934     return MatchOperand_ParseFail;
2935   }
2936   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2937     return MatchOperand_ParseFail;
2938 
2939   Abs = trySkipId("abs");
2940   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2941     return MatchOperand_ParseFail;
2942 
2943   Loc = getLoc();
2944   SP3Abs = trySkipToken(AsmToken::Pipe);
2945   if (Abs && SP3Abs) {
2946     Error(Loc, "expected register or immediate");
2947     return MatchOperand_ParseFail;
2948   }
2949 
2950   OperandMatchResultTy Res;
2951   if (AllowImm) {
2952     Res = parseRegOrImm(Operands, SP3Abs);
2953   } else {
2954     Res = parseReg(Operands);
2955   }
2956   if (Res != MatchOperand_Success) {
2957     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2958   }
2959 
2960   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2961     return MatchOperand_ParseFail;
2962   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2963     return MatchOperand_ParseFail;
2964   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2965     return MatchOperand_ParseFail;
2966 
2967   AMDGPUOperand::Modifiers Mods;
2968   Mods.Abs = Abs || SP3Abs;
2969   Mods.Neg = Neg || SP3Neg;
2970 
2971   if (Mods.hasFPModifiers()) {
2972     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2973     if (Op.isExpr()) {
2974       Error(Op.getStartLoc(), "expected an absolute expression");
2975       return MatchOperand_ParseFail;
2976     }
2977     Op.setModifiers(Mods);
2978   }
2979   return MatchOperand_Success;
2980 }
2981 
2982 OperandMatchResultTy
2983 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2984                                                bool AllowImm) {
2985   bool Sext = trySkipId("sext");
2986   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2987     return MatchOperand_ParseFail;
2988 
2989   OperandMatchResultTy Res;
2990   if (AllowImm) {
2991     Res = parseRegOrImm(Operands);
2992   } else {
2993     Res = parseReg(Operands);
2994   }
2995   if (Res != MatchOperand_Success) {
2996     return Sext? MatchOperand_ParseFail : Res;
2997   }
2998 
2999   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3000     return MatchOperand_ParseFail;
3001 
3002   AMDGPUOperand::Modifiers Mods;
3003   Mods.Sext = Sext;
3004 
3005   if (Mods.hasIntModifiers()) {
3006     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3007     if (Op.isExpr()) {
3008       Error(Op.getStartLoc(), "expected an absolute expression");
3009       return MatchOperand_ParseFail;
3010     }
3011     Op.setModifiers(Mods);
3012   }
3013 
3014   return MatchOperand_Success;
3015 }
3016 
3017 OperandMatchResultTy
3018 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3019   return parseRegOrImmWithFPInputMods(Operands, false);
3020 }
3021 
3022 OperandMatchResultTy
3023 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3024   return parseRegOrImmWithIntInputMods(Operands, false);
3025 }
3026 
3027 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3028   auto Loc = getLoc();
3029   if (trySkipId("off")) {
3030     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3031                                                 AMDGPUOperand::ImmTyOff, false));
3032     return MatchOperand_Success;
3033   }
3034 
3035   if (!isRegister())
3036     return MatchOperand_NoMatch;
3037 
3038   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3039   if (Reg) {
3040     Operands.push_back(std::move(Reg));
3041     return MatchOperand_Success;
3042   }
3043 
3044   return MatchOperand_ParseFail;
3045 
3046 }
3047 
3048 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3049   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3050 
3051   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3052       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3053       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3054       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3055     return Match_InvalidOperand;
3056 
3057   if ((TSFlags & SIInstrFlags::VOP3) &&
3058       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3059       getForcedEncodingSize() != 64)
3060     return Match_PreferE32;
3061 
3062   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3063       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3064     // v_mac_f32/16 allow only dst_sel == DWORD;
3065     auto OpNum =
3066         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3067     const auto &Op = Inst.getOperand(OpNum);
3068     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3069       return Match_InvalidOperand;
3070     }
3071   }
3072 
3073   return Match_Success;
3074 }
3075 
3076 static ArrayRef<unsigned> getAllVariants() {
3077   static const unsigned Variants[] = {
3078     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3079     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3080   };
3081 
3082   return makeArrayRef(Variants);
3083 }
3084 
3085 // What asm variants we should check
3086 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3087   if (getForcedEncodingSize() == 32) {
3088     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3089     return makeArrayRef(Variants);
3090   }
3091 
3092   if (isForcedVOP3()) {
3093     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3094     return makeArrayRef(Variants);
3095   }
3096 
3097   if (isForcedSDWA()) {
3098     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3099                                         AMDGPUAsmVariants::SDWA9};
3100     return makeArrayRef(Variants);
3101   }
3102 
3103   if (isForcedDPP()) {
3104     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3105     return makeArrayRef(Variants);
3106   }
3107 
3108   return getAllVariants();
3109 }
3110 
3111 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3112   if (getForcedEncodingSize() == 32)
3113     return "e32";
3114 
3115   if (isForcedVOP3())
3116     return "e64";
3117 
3118   if (isForcedSDWA())
3119     return "sdwa";
3120 
3121   if (isForcedDPP())
3122     return "dpp";
3123 
3124   return "";
3125 }
3126 
3127 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3128   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3129   const unsigned Num = Desc.getNumImplicitUses();
3130   for (unsigned i = 0; i < Num; ++i) {
3131     unsigned Reg = Desc.ImplicitUses[i];
3132     switch (Reg) {
3133     case AMDGPU::FLAT_SCR:
3134     case AMDGPU::VCC:
3135     case AMDGPU::VCC_LO:
3136     case AMDGPU::VCC_HI:
3137     case AMDGPU::M0:
3138       return Reg;
3139     default:
3140       break;
3141     }
3142   }
3143   return AMDGPU::NoRegister;
3144 }
3145 
3146 // NB: This code is correct only when used to check constant
3147 // bus limitations because GFX7 support no f16 inline constants.
3148 // Note that there are no cases when a GFX7 opcode violates
3149 // constant bus limitations due to the use of an f16 constant.
3150 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3151                                        unsigned OpIdx) const {
3152   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3153 
3154   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3155     return false;
3156   }
3157 
3158   const MCOperand &MO = Inst.getOperand(OpIdx);
3159 
3160   int64_t Val = MO.getImm();
3161   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3162 
3163   switch (OpSize) { // expected operand size
3164   case 8:
3165     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3166   case 4:
3167     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3168   case 2: {
3169     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3170     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3171         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3172         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3173       return AMDGPU::isInlinableIntLiteral(Val);
3174 
3175     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3176         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3177         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3178       return AMDGPU::isInlinableIntLiteralV216(Val);
3179 
3180     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3181         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3182         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3183       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3184 
3185     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3186   }
3187   default:
3188     llvm_unreachable("invalid operand size");
3189   }
3190 }
3191 
3192 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3193   if (!isGFX10Plus())
3194     return 1;
3195 
3196   switch (Opcode) {
3197   // 64-bit shift instructions can use only one scalar value input
3198   case AMDGPU::V_LSHLREV_B64_e64:
3199   case AMDGPU::V_LSHLREV_B64_gfx10:
3200   case AMDGPU::V_LSHRREV_B64_e64:
3201   case AMDGPU::V_LSHRREV_B64_gfx10:
3202   case AMDGPU::V_ASHRREV_I64_e64:
3203   case AMDGPU::V_ASHRREV_I64_gfx10:
3204   case AMDGPU::V_LSHL_B64_e64:
3205   case AMDGPU::V_LSHR_B64_e64:
3206   case AMDGPU::V_ASHR_I64_e64:
3207     return 1;
3208   default:
3209     return 2;
3210   }
3211 }
3212 
3213 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3214   const MCOperand &MO = Inst.getOperand(OpIdx);
3215   if (MO.isImm()) {
3216     return !isInlineConstant(Inst, OpIdx);
3217   } else if (MO.isReg()) {
3218     auto Reg = MO.getReg();
3219     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3220     auto PReg = mc2PseudoReg(Reg);
3221     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3222   } else {
3223     return true;
3224   }
3225 }
3226 
3227 bool
3228 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3229                                                 const OperandVector &Operands) {
3230   const unsigned Opcode = Inst.getOpcode();
3231   const MCInstrDesc &Desc = MII.get(Opcode);
3232   unsigned LastSGPR = AMDGPU::NoRegister;
3233   unsigned ConstantBusUseCount = 0;
3234   unsigned NumLiterals = 0;
3235   unsigned LiteralSize;
3236 
3237   if (Desc.TSFlags &
3238       (SIInstrFlags::VOPC |
3239        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3240        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3241        SIInstrFlags::SDWA)) {
3242     // Check special imm operands (used by madmk, etc)
3243     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3244       ++ConstantBusUseCount;
3245     }
3246 
3247     SmallDenseSet<unsigned> SGPRsUsed;
3248     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3249     if (SGPRUsed != AMDGPU::NoRegister) {
3250       SGPRsUsed.insert(SGPRUsed);
3251       ++ConstantBusUseCount;
3252     }
3253 
3254     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3255     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3256     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3257 
3258     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3259 
3260     for (int OpIdx : OpIndices) {
3261       if (OpIdx == -1) break;
3262 
3263       const MCOperand &MO = Inst.getOperand(OpIdx);
3264       if (usesConstantBus(Inst, OpIdx)) {
3265         if (MO.isReg()) {
3266           LastSGPR = mc2PseudoReg(MO.getReg());
3267           // Pairs of registers with a partial intersections like these
3268           //   s0, s[0:1]
3269           //   flat_scratch_lo, flat_scratch
3270           //   flat_scratch_lo, flat_scratch_hi
3271           // are theoretically valid but they are disabled anyway.
3272           // Note that this code mimics SIInstrInfo::verifyInstruction
3273           if (!SGPRsUsed.count(LastSGPR)) {
3274             SGPRsUsed.insert(LastSGPR);
3275             ++ConstantBusUseCount;
3276           }
3277         } else { // Expression or a literal
3278 
3279           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3280             continue; // special operand like VINTERP attr_chan
3281 
3282           // An instruction may use only one literal.
3283           // This has been validated on the previous step.
3284           // See validateVOP3Literal.
3285           // This literal may be used as more than one operand.
3286           // If all these operands are of the same size,
3287           // this literal counts as one scalar value.
3288           // Otherwise it counts as 2 scalar values.
3289           // See "GFX10 Shader Programming", section 3.6.2.3.
3290 
3291           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3292           if (Size < 4) Size = 4;
3293 
3294           if (NumLiterals == 0) {
3295             NumLiterals = 1;
3296             LiteralSize = Size;
3297           } else if (LiteralSize != Size) {
3298             NumLiterals = 2;
3299           }
3300         }
3301       }
3302     }
3303   }
3304   ConstantBusUseCount += NumLiterals;
3305 
3306   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3307     return true;
3308 
3309   SMLoc LitLoc = getLitLoc(Operands);
3310   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3311   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3312   Error(Loc, "invalid operand (violates constant bus restrictions)");
3313   return false;
3314 }
3315 
3316 bool
3317 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3318                                                  const OperandVector &Operands) {
3319   const unsigned Opcode = Inst.getOpcode();
3320   const MCInstrDesc &Desc = MII.get(Opcode);
3321 
3322   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3323   if (DstIdx == -1 ||
3324       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3325     return true;
3326   }
3327 
3328   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3329 
3330   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3331   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3332   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3333 
3334   assert(DstIdx != -1);
3335   const MCOperand &Dst = Inst.getOperand(DstIdx);
3336   assert(Dst.isReg());
3337   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3338 
3339   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3340 
3341   for (int SrcIdx : SrcIndices) {
3342     if (SrcIdx == -1) break;
3343     const MCOperand &Src = Inst.getOperand(SrcIdx);
3344     if (Src.isReg()) {
3345       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3346       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3347         Error(getRegLoc(SrcReg, Operands),
3348           "destination must be different than all sources");
3349         return false;
3350       }
3351     }
3352   }
3353 
3354   return true;
3355 }
3356 
3357 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3358 
3359   const unsigned Opc = Inst.getOpcode();
3360   const MCInstrDesc &Desc = MII.get(Opc);
3361 
3362   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3363     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3364     assert(ClampIdx != -1);
3365     return Inst.getOperand(ClampIdx).getImm() == 0;
3366   }
3367 
3368   return true;
3369 }
3370 
3371 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3372 
3373   const unsigned Opc = Inst.getOpcode();
3374   const MCInstrDesc &Desc = MII.get(Opc);
3375 
3376   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3377     return true;
3378 
3379   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3380   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3381   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3382 
3383   assert(VDataIdx != -1);
3384 
3385   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3386     return true;
3387 
3388   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3389   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3390   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3391   if (DMask == 0)
3392     DMask = 1;
3393 
3394   unsigned DataSize =
3395     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3396   if (hasPackedD16()) {
3397     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3398     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3399       DataSize = (DataSize + 1) / 2;
3400   }
3401 
3402   return (VDataSize / 4) == DataSize + TFESize;
3403 }
3404 
3405 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3406   const unsigned Opc = Inst.getOpcode();
3407   const MCInstrDesc &Desc = MII.get(Opc);
3408 
3409   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3410     return true;
3411 
3412   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3413 
3414   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3415       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3416   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3417   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3418   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3419 
3420   assert(VAddr0Idx != -1);
3421   assert(SrsrcIdx != -1);
3422   assert(SrsrcIdx > VAddr0Idx);
3423 
3424   if (DimIdx == -1)
3425     return true; // intersect_ray
3426 
3427   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3428   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3429   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3430   unsigned VAddrSize =
3431       IsNSA ? SrsrcIdx - VAddr0Idx
3432             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3433 
3434   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3435                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3436                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3437                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3438   if (!IsNSA) {
3439     if (AddrSize > 8)
3440       AddrSize = 16;
3441     else if (AddrSize > 4)
3442       AddrSize = 8;
3443   }
3444 
3445   return VAddrSize == AddrSize;
3446 }
3447 
3448 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3449 
3450   const unsigned Opc = Inst.getOpcode();
3451   const MCInstrDesc &Desc = MII.get(Opc);
3452 
3453   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3454     return true;
3455   if (!Desc.mayLoad() || !Desc.mayStore())
3456     return true; // Not atomic
3457 
3458   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3459   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3460 
3461   // This is an incomplete check because image_atomic_cmpswap
3462   // may only use 0x3 and 0xf while other atomic operations
3463   // may use 0x1 and 0x3. However these limitations are
3464   // verified when we check that dmask matches dst size.
3465   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3466 }
3467 
3468 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3469 
3470   const unsigned Opc = Inst.getOpcode();
3471   const MCInstrDesc &Desc = MII.get(Opc);
3472 
3473   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3474     return true;
3475 
3476   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3477   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3478 
3479   // GATHER4 instructions use dmask in a different fashion compared to
3480   // other MIMG instructions. The only useful DMASK values are
3481   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3482   // (red,red,red,red) etc.) The ISA document doesn't mention
3483   // this.
3484   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3485 }
3486 
3487 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3488   const unsigned Opc = Inst.getOpcode();
3489   const MCInstrDesc &Desc = MII.get(Opc);
3490 
3491   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3492     return true;
3493 
3494   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3495   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3496       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3497 
3498   if (!BaseOpcode->MSAA)
3499     return true;
3500 
3501   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3502   assert(DimIdx != -1);
3503 
3504   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3505   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3506 
3507   return DimInfo->MSAA;
3508 }
3509 
3510 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3511 {
3512   switch (Opcode) {
3513   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3514   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3515   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3516     return true;
3517   default:
3518     return false;
3519   }
3520 }
3521 
3522 // movrels* opcodes should only allow VGPRS as src0.
3523 // This is specified in .td description for vop1/vop3,
3524 // but sdwa is handled differently. See isSDWAOperand.
3525 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3526                                       const OperandVector &Operands) {
3527 
3528   const unsigned Opc = Inst.getOpcode();
3529   const MCInstrDesc &Desc = MII.get(Opc);
3530 
3531   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3532     return true;
3533 
3534   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3535   assert(Src0Idx != -1);
3536 
3537   SMLoc ErrLoc;
3538   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3539   if (Src0.isReg()) {
3540     auto Reg = mc2PseudoReg(Src0.getReg());
3541     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3542     if (!isSGPR(Reg, TRI))
3543       return true;
3544     ErrLoc = getRegLoc(Reg, Operands);
3545   } else {
3546     ErrLoc = getConstLoc(Operands);
3547   }
3548 
3549   Error(ErrLoc, "source operand must be a VGPR");
3550   return false;
3551 }
3552 
3553 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3554                                           const OperandVector &Operands) {
3555 
3556   const unsigned Opc = Inst.getOpcode();
3557 
3558   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3559     return true;
3560 
3561   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3562   assert(Src0Idx != -1);
3563 
3564   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3565   if (!Src0.isReg())
3566     return true;
3567 
3568   auto Reg = mc2PseudoReg(Src0.getReg());
3569   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3570   if (isSGPR(Reg, TRI)) {
3571     Error(getRegLoc(Reg, Operands),
3572           "source operand must be either a VGPR or an inline constant");
3573     return false;
3574   }
3575 
3576   return true;
3577 }
3578 
3579 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3580   switch (Inst.getOpcode()) {
3581   default:
3582     return true;
3583   case V_DIV_SCALE_F32_gfx6_gfx7:
3584   case V_DIV_SCALE_F32_vi:
3585   case V_DIV_SCALE_F32_gfx10:
3586   case V_DIV_SCALE_F64_gfx6_gfx7:
3587   case V_DIV_SCALE_F64_vi:
3588   case V_DIV_SCALE_F64_gfx10:
3589     break;
3590   }
3591 
3592   // TODO: Check that src0 = src1 or src2.
3593 
3594   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3595                     AMDGPU::OpName::src2_modifiers,
3596                     AMDGPU::OpName::src2_modifiers}) {
3597     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3598             .getImm() &
3599         SISrcMods::ABS) {
3600       return false;
3601     }
3602   }
3603 
3604   return true;
3605 }
3606 
3607 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3608 
3609   const unsigned Opc = Inst.getOpcode();
3610   const MCInstrDesc &Desc = MII.get(Opc);
3611 
3612   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3613     return true;
3614 
3615   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3616   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3617     if (isCI() || isSI())
3618       return false;
3619   }
3620 
3621   return true;
3622 }
3623 
3624 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3625   const unsigned Opc = Inst.getOpcode();
3626   const MCInstrDesc &Desc = MII.get(Opc);
3627 
3628   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3629     return true;
3630 
3631   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3632   if (DimIdx < 0)
3633     return true;
3634 
3635   long Imm = Inst.getOperand(DimIdx).getImm();
3636   if (Imm < 0 || Imm >= 8)
3637     return false;
3638 
3639   return true;
3640 }
3641 
3642 static bool IsRevOpcode(const unsigned Opcode)
3643 {
3644   switch (Opcode) {
3645   case AMDGPU::V_SUBREV_F32_e32:
3646   case AMDGPU::V_SUBREV_F32_e64:
3647   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3648   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3649   case AMDGPU::V_SUBREV_F32_e32_vi:
3650   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3651   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3652   case AMDGPU::V_SUBREV_F32_e64_vi:
3653 
3654   case AMDGPU::V_SUBREV_CO_U32_e32:
3655   case AMDGPU::V_SUBREV_CO_U32_e64:
3656   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3657   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3658 
3659   case AMDGPU::V_SUBBREV_U32_e32:
3660   case AMDGPU::V_SUBBREV_U32_e64:
3661   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3662   case AMDGPU::V_SUBBREV_U32_e32_vi:
3663   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3664   case AMDGPU::V_SUBBREV_U32_e64_vi:
3665 
3666   case AMDGPU::V_SUBREV_U32_e32:
3667   case AMDGPU::V_SUBREV_U32_e64:
3668   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3669   case AMDGPU::V_SUBREV_U32_e32_vi:
3670   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3671   case AMDGPU::V_SUBREV_U32_e64_vi:
3672 
3673   case AMDGPU::V_SUBREV_F16_e32:
3674   case AMDGPU::V_SUBREV_F16_e64:
3675   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3676   case AMDGPU::V_SUBREV_F16_e32_vi:
3677   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3678   case AMDGPU::V_SUBREV_F16_e64_vi:
3679 
3680   case AMDGPU::V_SUBREV_U16_e32:
3681   case AMDGPU::V_SUBREV_U16_e64:
3682   case AMDGPU::V_SUBREV_U16_e32_vi:
3683   case AMDGPU::V_SUBREV_U16_e64_vi:
3684 
3685   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3686   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3687   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3688 
3689   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3690   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3691 
3692   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3693   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3694 
3695   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3696   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3697 
3698   case AMDGPU::V_LSHRREV_B32_e32:
3699   case AMDGPU::V_LSHRREV_B32_e64:
3700   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3701   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3702   case AMDGPU::V_LSHRREV_B32_e32_vi:
3703   case AMDGPU::V_LSHRREV_B32_e64_vi:
3704   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3705   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3706 
3707   case AMDGPU::V_ASHRREV_I32_e32:
3708   case AMDGPU::V_ASHRREV_I32_e64:
3709   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3710   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3711   case AMDGPU::V_ASHRREV_I32_e32_vi:
3712   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3713   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3714   case AMDGPU::V_ASHRREV_I32_e64_vi:
3715 
3716   case AMDGPU::V_LSHLREV_B32_e32:
3717   case AMDGPU::V_LSHLREV_B32_e64:
3718   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3719   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3720   case AMDGPU::V_LSHLREV_B32_e32_vi:
3721   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3722   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3723   case AMDGPU::V_LSHLREV_B32_e64_vi:
3724 
3725   case AMDGPU::V_LSHLREV_B16_e32:
3726   case AMDGPU::V_LSHLREV_B16_e64:
3727   case AMDGPU::V_LSHLREV_B16_e32_vi:
3728   case AMDGPU::V_LSHLREV_B16_e64_vi:
3729   case AMDGPU::V_LSHLREV_B16_gfx10:
3730 
3731   case AMDGPU::V_LSHRREV_B16_e32:
3732   case AMDGPU::V_LSHRREV_B16_e64:
3733   case AMDGPU::V_LSHRREV_B16_e32_vi:
3734   case AMDGPU::V_LSHRREV_B16_e64_vi:
3735   case AMDGPU::V_LSHRREV_B16_gfx10:
3736 
3737   case AMDGPU::V_ASHRREV_I16_e32:
3738   case AMDGPU::V_ASHRREV_I16_e64:
3739   case AMDGPU::V_ASHRREV_I16_e32_vi:
3740   case AMDGPU::V_ASHRREV_I16_e64_vi:
3741   case AMDGPU::V_ASHRREV_I16_gfx10:
3742 
3743   case AMDGPU::V_LSHLREV_B64_e64:
3744   case AMDGPU::V_LSHLREV_B64_gfx10:
3745   case AMDGPU::V_LSHLREV_B64_vi:
3746 
3747   case AMDGPU::V_LSHRREV_B64_e64:
3748   case AMDGPU::V_LSHRREV_B64_gfx10:
3749   case AMDGPU::V_LSHRREV_B64_vi:
3750 
3751   case AMDGPU::V_ASHRREV_I64_e64:
3752   case AMDGPU::V_ASHRREV_I64_gfx10:
3753   case AMDGPU::V_ASHRREV_I64_vi:
3754 
3755   case AMDGPU::V_PK_LSHLREV_B16:
3756   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3757   case AMDGPU::V_PK_LSHLREV_B16_vi:
3758 
3759   case AMDGPU::V_PK_LSHRREV_B16:
3760   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3761   case AMDGPU::V_PK_LSHRREV_B16_vi:
3762   case AMDGPU::V_PK_ASHRREV_I16:
3763   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3764   case AMDGPU::V_PK_ASHRREV_I16_vi:
3765     return true;
3766   default:
3767     return false;
3768   }
3769 }
3770 
3771 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3772 
3773   using namespace SIInstrFlags;
3774   const unsigned Opcode = Inst.getOpcode();
3775   const MCInstrDesc &Desc = MII.get(Opcode);
3776 
3777   // lds_direct register is defined so that it can be used
3778   // with 9-bit operands only. Ignore encodings which do not accept these.
3779   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3780   if ((Desc.TSFlags & Enc) == 0)
3781     return None;
3782 
3783   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3784     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3785     if (SrcIdx == -1)
3786       break;
3787     const auto &Src = Inst.getOperand(SrcIdx);
3788     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3789 
3790       if (isGFX90A())
3791         return StringRef("lds_direct is not supported on this GPU");
3792 
3793       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3794         return StringRef("lds_direct cannot be used with this instruction");
3795 
3796       if (SrcName != OpName::src0)
3797         return StringRef("lds_direct may be used as src0 only");
3798     }
3799   }
3800 
3801   return None;
3802 }
3803 
3804 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3805   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3806     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3807     if (Op.isFlatOffset())
3808       return Op.getStartLoc();
3809   }
3810   return getLoc();
3811 }
3812 
3813 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3814                                          const OperandVector &Operands) {
3815   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3816   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3817     return true;
3818 
3819   auto Opcode = Inst.getOpcode();
3820   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3821   assert(OpNum != -1);
3822 
3823   const auto &Op = Inst.getOperand(OpNum);
3824   if (!hasFlatOffsets() && Op.getImm() != 0) {
3825     Error(getFlatOffsetLoc(Operands),
3826           "flat offset modifier is not supported on this GPU");
3827     return false;
3828   }
3829 
3830   // For FLAT segment the offset must be positive;
3831   // MSB is ignored and forced to zero.
3832   if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) {
3833     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3834     if (!isIntN(OffsetSize, Op.getImm())) {
3835       Error(getFlatOffsetLoc(Operands),
3836             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3837       return false;
3838     }
3839   } else {
3840     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3841     if (!isUIntN(OffsetSize, Op.getImm())) {
3842       Error(getFlatOffsetLoc(Operands),
3843             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3844       return false;
3845     }
3846   }
3847 
3848   return true;
3849 }
3850 
3851 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3852   // Start with second operand because SMEM Offset cannot be dst or src0.
3853   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3854     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3855     if (Op.isSMEMOffset())
3856       return Op.getStartLoc();
3857   }
3858   return getLoc();
3859 }
3860 
3861 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3862                                          const OperandVector &Operands) {
3863   if (isCI() || isSI())
3864     return true;
3865 
3866   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3867   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3868     return true;
3869 
3870   auto Opcode = Inst.getOpcode();
3871   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3872   if (OpNum == -1)
3873     return true;
3874 
3875   const auto &Op = Inst.getOperand(OpNum);
3876   if (!Op.isImm())
3877     return true;
3878 
3879   uint64_t Offset = Op.getImm();
3880   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3881   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3882       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3883     return true;
3884 
3885   Error(getSMEMOffsetLoc(Operands),
3886         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3887                                "expected a 21-bit signed offset");
3888 
3889   return false;
3890 }
3891 
3892 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3893   unsigned Opcode = Inst.getOpcode();
3894   const MCInstrDesc &Desc = MII.get(Opcode);
3895   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3896     return true;
3897 
3898   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3899   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3900 
3901   const int OpIndices[] = { Src0Idx, Src1Idx };
3902 
3903   unsigned NumExprs = 0;
3904   unsigned NumLiterals = 0;
3905   uint32_t LiteralValue;
3906 
3907   for (int OpIdx : OpIndices) {
3908     if (OpIdx == -1) break;
3909 
3910     const MCOperand &MO = Inst.getOperand(OpIdx);
3911     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3912     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3913       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3914         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3915         if (NumLiterals == 0 || LiteralValue != Value) {
3916           LiteralValue = Value;
3917           ++NumLiterals;
3918         }
3919       } else if (MO.isExpr()) {
3920         ++NumExprs;
3921       }
3922     }
3923   }
3924 
3925   return NumLiterals + NumExprs <= 1;
3926 }
3927 
3928 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3929   const unsigned Opc = Inst.getOpcode();
3930   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3931       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3932     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3933     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3934 
3935     if (OpSel & ~3)
3936       return false;
3937   }
3938   return true;
3939 }
3940 
3941 // Check if VCC register matches wavefront size
3942 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3943   auto FB = getFeatureBits();
3944   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3945     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3946 }
3947 
3948 // VOP3 literal is only allowed in GFX10+ and only one can be used
3949 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3950                                           const OperandVector &Operands) {
3951   unsigned Opcode = Inst.getOpcode();
3952   const MCInstrDesc &Desc = MII.get(Opcode);
3953   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3954     return true;
3955 
3956   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3957   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3958   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3959 
3960   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3961 
3962   unsigned NumExprs = 0;
3963   unsigned NumLiterals = 0;
3964   uint32_t LiteralValue;
3965 
3966   for (int OpIdx : OpIndices) {
3967     if (OpIdx == -1) break;
3968 
3969     const MCOperand &MO = Inst.getOperand(OpIdx);
3970     if (!MO.isImm() && !MO.isExpr())
3971       continue;
3972     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3973       continue;
3974 
3975     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3976         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
3977       Error(getConstLoc(Operands),
3978             "inline constants are not allowed for this operand");
3979       return false;
3980     }
3981 
3982     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3983       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3984       if (NumLiterals == 0 || LiteralValue != Value) {
3985         LiteralValue = Value;
3986         ++NumLiterals;
3987       }
3988     } else if (MO.isExpr()) {
3989       ++NumExprs;
3990     }
3991   }
3992   NumLiterals += NumExprs;
3993 
3994   if (!NumLiterals)
3995     return true;
3996 
3997   if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
3998     Error(getLitLoc(Operands), "literal operands are not supported");
3999     return false;
4000   }
4001 
4002   if (NumLiterals > 1) {
4003     Error(getLitLoc(Operands), "only one literal operand is allowed");
4004     return false;
4005   }
4006 
4007   return true;
4008 }
4009 
4010 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4011 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4012                          const MCRegisterInfo *MRI) {
4013   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4014   if (OpIdx < 0)
4015     return -1;
4016 
4017   const MCOperand &Op = Inst.getOperand(OpIdx);
4018   if (!Op.isReg())
4019     return -1;
4020 
4021   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4022   auto Reg = Sub ? Sub : Op.getReg();
4023   const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4024   return AGRP32.contains(Reg) ? 1 : 0;
4025 }
4026 
4027 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4028   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4029   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4030                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4031                   SIInstrFlags::DS)) == 0)
4032     return true;
4033 
4034   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4035                                                       : AMDGPU::OpName::vdata;
4036 
4037   const MCRegisterInfo *MRI = getMRI();
4038   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4039   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4040 
4041   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4042     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4043     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4044       return false;
4045   }
4046 
4047   auto FB = getFeatureBits();
4048   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4049     if (DataAreg < 0 || DstAreg < 0)
4050       return true;
4051     return DstAreg == DataAreg;
4052   }
4053 
4054   return DstAreg < 1 && DataAreg < 1;
4055 }
4056 
4057 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4058   auto FB = getFeatureBits();
4059   if (!FB[AMDGPU::FeatureGFX90AInsts])
4060     return true;
4061 
4062   const MCRegisterInfo *MRI = getMRI();
4063   const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4064   const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4065   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4066     const MCOperand &Op = Inst.getOperand(I);
4067     if (!Op.isReg())
4068       continue;
4069 
4070     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4071     if (!Sub)
4072       continue;
4073 
4074     if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4075       return false;
4076     if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4077       return false;
4078   }
4079 
4080   return true;
4081 }
4082 
4083 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4084                                             const OperandVector &Operands,
4085                                             const SMLoc &IDLoc) {
4086   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4087                                            AMDGPU::OpName::cpol);
4088   if (CPolPos == -1)
4089     return true;
4090 
4091   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4092 
4093   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4094   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4095       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4096     Error(IDLoc, "invalid cache policy for SMRD instruction");
4097     return false;
4098   }
4099 
4100   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4101     return true;
4102 
4103   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4104     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4105       Error(IDLoc, "instruction must use glc");
4106       return false;
4107     }
4108   } else {
4109     if (CPol & CPol::GLC) {
4110       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4111       StringRef CStr(S.getPointer());
4112       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4113       Error(S, "instruction must not use glc");
4114       return false;
4115     }
4116   }
4117 
4118   if (isGFX90A() && (CPol & CPol::SCC) && (TSFlags & SIInstrFlags::FPAtomic)) {
4119     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4120     StringRef CStr(S.getPointer());
4121     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4122     Error(S, "instruction must not use scc");
4123     return false;
4124   }
4125 
4126   return true;
4127 }
4128 
4129 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4130                                           const SMLoc &IDLoc,
4131                                           const OperandVector &Operands) {
4132   if (auto ErrMsg = validateLdsDirect(Inst)) {
4133     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4134     return false;
4135   }
4136   if (!validateSOPLiteral(Inst)) {
4137     Error(getLitLoc(Operands),
4138       "only one literal operand is allowed");
4139     return false;
4140   }
4141   if (!validateVOP3Literal(Inst, Operands)) {
4142     return false;
4143   }
4144   if (!validateConstantBusLimitations(Inst, Operands)) {
4145     return false;
4146   }
4147   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4148     return false;
4149   }
4150   if (!validateIntClampSupported(Inst)) {
4151     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4152       "integer clamping is not supported on this GPU");
4153     return false;
4154   }
4155   if (!validateOpSel(Inst)) {
4156     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4157       "invalid op_sel operand");
4158     return false;
4159   }
4160   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4161   if (!validateMIMGD16(Inst)) {
4162     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4163       "d16 modifier is not supported on this GPU");
4164     return false;
4165   }
4166   if (!validateMIMGDim(Inst)) {
4167     Error(IDLoc, "dim modifier is required on this GPU");
4168     return false;
4169   }
4170   if (!validateMIMGMSAA(Inst)) {
4171     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4172           "invalid dim; must be MSAA type");
4173     return false;
4174   }
4175   if (!validateMIMGDataSize(Inst)) {
4176     Error(IDLoc,
4177       "image data size does not match dmask and tfe");
4178     return false;
4179   }
4180   if (!validateMIMGAddrSize(Inst)) {
4181     Error(IDLoc,
4182       "image address size does not match dim and a16");
4183     return false;
4184   }
4185   if (!validateMIMGAtomicDMask(Inst)) {
4186     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4187       "invalid atomic image dmask");
4188     return false;
4189   }
4190   if (!validateMIMGGatherDMask(Inst)) {
4191     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4192       "invalid image_gather dmask: only one bit must be set");
4193     return false;
4194   }
4195   if (!validateMovrels(Inst, Operands)) {
4196     return false;
4197   }
4198   if (!validateFlatOffset(Inst, Operands)) {
4199     return false;
4200   }
4201   if (!validateSMEMOffset(Inst, Operands)) {
4202     return false;
4203   }
4204   if (!validateMAIAccWrite(Inst, Operands)) {
4205     return false;
4206   }
4207   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4208     return false;
4209   }
4210 
4211   if (!validateAGPRLdSt(Inst)) {
4212     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4213     ? "invalid register class: data and dst should be all VGPR or AGPR"
4214     : "invalid register class: agpr loads and stores not supported on this GPU"
4215     );
4216     return false;
4217   }
4218   if (!validateVGPRAlign(Inst)) {
4219     Error(IDLoc,
4220       "invalid register class: vgpr tuples must be 64 bit aligned");
4221     return false;
4222   }
4223 
4224   if (!validateDivScale(Inst)) {
4225     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4226     return false;
4227   }
4228   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4229     return false;
4230   }
4231 
4232   return true;
4233 }
4234 
4235 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4236                                             const FeatureBitset &FBS,
4237                                             unsigned VariantID = 0);
4238 
4239 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4240                                 const FeatureBitset &AvailableFeatures,
4241                                 unsigned VariantID);
4242 
4243 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4244                                        const FeatureBitset &FBS) {
4245   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4246 }
4247 
4248 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4249                                        const FeatureBitset &FBS,
4250                                        ArrayRef<unsigned> Variants) {
4251   for (auto Variant : Variants) {
4252     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4253       return true;
4254   }
4255 
4256   return false;
4257 }
4258 
4259 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4260                                                   const SMLoc &IDLoc) {
4261   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4262 
4263   // Check if requested instruction variant is supported.
4264   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4265     return false;
4266 
4267   // This instruction is not supported.
4268   // Clear any other pending errors because they are no longer relevant.
4269   getParser().clearPendingErrors();
4270 
4271   // Requested instruction variant is not supported.
4272   // Check if any other variants are supported.
4273   StringRef VariantName = getMatchedVariantName();
4274   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4275     return Error(IDLoc,
4276                  Twine(VariantName,
4277                        " variant of this instruction is not supported"));
4278   }
4279 
4280   // Finally check if this instruction is supported on any other GPU.
4281   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4282     return Error(IDLoc, "instruction not supported on this GPU");
4283   }
4284 
4285   // Instruction not supported on any GPU. Probably a typo.
4286   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4287   return Error(IDLoc, "invalid instruction" + Suggestion);
4288 }
4289 
4290 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4291                                               OperandVector &Operands,
4292                                               MCStreamer &Out,
4293                                               uint64_t &ErrorInfo,
4294                                               bool MatchingInlineAsm) {
4295   MCInst Inst;
4296   unsigned Result = Match_Success;
4297   for (auto Variant : getMatchedVariants()) {
4298     uint64_t EI;
4299     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4300                                   Variant);
4301     // We order match statuses from least to most specific. We use most specific
4302     // status as resulting
4303     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4304     if ((R == Match_Success) ||
4305         (R == Match_PreferE32) ||
4306         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4307         (R == Match_InvalidOperand && Result != Match_MissingFeature
4308                                    && Result != Match_PreferE32) ||
4309         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4310                                    && Result != Match_MissingFeature
4311                                    && Result != Match_PreferE32)) {
4312       Result = R;
4313       ErrorInfo = EI;
4314     }
4315     if (R == Match_Success)
4316       break;
4317   }
4318 
4319   if (Result == Match_Success) {
4320     if (!validateInstruction(Inst, IDLoc, Operands)) {
4321       return true;
4322     }
4323     Inst.setLoc(IDLoc);
4324     Out.emitInstruction(Inst, getSTI());
4325     return false;
4326   }
4327 
4328   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4329   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4330     return true;
4331   }
4332 
4333   switch (Result) {
4334   default: break;
4335   case Match_MissingFeature:
4336     // It has been verified that the specified instruction
4337     // mnemonic is valid. A match was found but it requires
4338     // features which are not supported on this GPU.
4339     return Error(IDLoc, "operands are not valid for this GPU or mode");
4340 
4341   case Match_InvalidOperand: {
4342     SMLoc ErrorLoc = IDLoc;
4343     if (ErrorInfo != ~0ULL) {
4344       if (ErrorInfo >= Operands.size()) {
4345         return Error(IDLoc, "too few operands for instruction");
4346       }
4347       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4348       if (ErrorLoc == SMLoc())
4349         ErrorLoc = IDLoc;
4350     }
4351     return Error(ErrorLoc, "invalid operand for instruction");
4352   }
4353 
4354   case Match_PreferE32:
4355     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4356                         "should be encoded as e32");
4357   case Match_MnemonicFail:
4358     llvm_unreachable("Invalid instructions should have been handled already");
4359   }
4360   llvm_unreachable("Implement any new match types added!");
4361 }
4362 
4363 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4364   int64_t Tmp = -1;
4365   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4366     return true;
4367   }
4368   if (getParser().parseAbsoluteExpression(Tmp)) {
4369     return true;
4370   }
4371   Ret = static_cast<uint32_t>(Tmp);
4372   return false;
4373 }
4374 
4375 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4376                                                uint32_t &Minor) {
4377   if (ParseAsAbsoluteExpression(Major))
4378     return TokError("invalid major version");
4379 
4380   if (!trySkipToken(AsmToken::Comma))
4381     return TokError("minor version number required, comma expected");
4382 
4383   if (ParseAsAbsoluteExpression(Minor))
4384     return TokError("invalid minor version");
4385 
4386   return false;
4387 }
4388 
4389 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4390   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4391     return TokError("directive only supported for amdgcn architecture");
4392 
4393   std::string Target;
4394 
4395   SMLoc TargetStart = getLoc();
4396   if (getParser().parseEscapedString(Target))
4397     return true;
4398   SMRange TargetRange = SMRange(TargetStart, getLoc());
4399 
4400   std::string ExpectedTarget;
4401   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
4402   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
4403 
4404   if (Target != ExpectedTargetOS.str())
4405     return Error(TargetRange.Start, "target must match options", TargetRange);
4406 
4407   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
4408   return false;
4409 }
4410 
4411 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4412   return Error(Range.Start, "value out of range", Range);
4413 }
4414 
4415 bool AMDGPUAsmParser::calculateGPRBlocks(
4416     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4417     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4418     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4419     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4420   // TODO(scott.linder): These calculations are duplicated from
4421   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4422   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4423 
4424   unsigned NumVGPRs = NextFreeVGPR;
4425   unsigned NumSGPRs = NextFreeSGPR;
4426 
4427   if (Version.Major >= 10)
4428     NumSGPRs = 0;
4429   else {
4430     unsigned MaxAddressableNumSGPRs =
4431         IsaInfo::getAddressableNumSGPRs(&getSTI());
4432 
4433     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4434         NumSGPRs > MaxAddressableNumSGPRs)
4435       return OutOfRangeError(SGPRRange);
4436 
4437     NumSGPRs +=
4438         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4439 
4440     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4441         NumSGPRs > MaxAddressableNumSGPRs)
4442       return OutOfRangeError(SGPRRange);
4443 
4444     if (Features.test(FeatureSGPRInitBug))
4445       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4446   }
4447 
4448   VGPRBlocks =
4449       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4450   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4451 
4452   return false;
4453 }
4454 
4455 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4456   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4457     return TokError("directive only supported for amdgcn architecture");
4458 
4459   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4460     return TokError("directive only supported for amdhsa OS");
4461 
4462   StringRef KernelName;
4463   if (getParser().parseIdentifier(KernelName))
4464     return true;
4465 
4466   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4467 
4468   StringSet<> Seen;
4469 
4470   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4471 
4472   SMRange VGPRRange;
4473   uint64_t NextFreeVGPR = 0;
4474   uint64_t AccumOffset = 0;
4475   SMRange SGPRRange;
4476   uint64_t NextFreeSGPR = 0;
4477   unsigned UserSGPRCount = 0;
4478   bool ReserveVCC = true;
4479   bool ReserveFlatScr = true;
4480   bool ReserveXNACK = hasXNACK();
4481   Optional<bool> EnableWavefrontSize32;
4482 
4483   while (true) {
4484     while (trySkipToken(AsmToken::EndOfStatement));
4485 
4486     StringRef ID;
4487     SMRange IDRange = getTok().getLocRange();
4488     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4489       return true;
4490 
4491     if (ID == ".end_amdhsa_kernel")
4492       break;
4493 
4494     if (Seen.find(ID) != Seen.end())
4495       return TokError(".amdhsa_ directives cannot be repeated");
4496     Seen.insert(ID);
4497 
4498     SMLoc ValStart = getLoc();
4499     int64_t IVal;
4500     if (getParser().parseAbsoluteExpression(IVal))
4501       return true;
4502     SMLoc ValEnd = getLoc();
4503     SMRange ValRange = SMRange(ValStart, ValEnd);
4504 
4505     if (IVal < 0)
4506       return OutOfRangeError(ValRange);
4507 
4508     uint64_t Val = IVal;
4509 
4510 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4511   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4512     return OutOfRangeError(RANGE);                                             \
4513   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4514 
4515     if (ID == ".amdhsa_group_segment_fixed_size") {
4516       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4517         return OutOfRangeError(ValRange);
4518       KD.group_segment_fixed_size = Val;
4519     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4520       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4521         return OutOfRangeError(ValRange);
4522       KD.private_segment_fixed_size = Val;
4523     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4524       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4525                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4526                        Val, ValRange);
4527       if (Val)
4528         UserSGPRCount += 4;
4529     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4530       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4531                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4532                        ValRange);
4533       if (Val)
4534         UserSGPRCount += 2;
4535     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4536       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4537                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4538                        ValRange);
4539       if (Val)
4540         UserSGPRCount += 2;
4541     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4542       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4543                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4544                        Val, ValRange);
4545       if (Val)
4546         UserSGPRCount += 2;
4547     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4548       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4549                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4550                        ValRange);
4551       if (Val)
4552         UserSGPRCount += 2;
4553     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4554       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4555                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4556                        ValRange);
4557       if (Val)
4558         UserSGPRCount += 2;
4559     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4560       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4561                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4562                        Val, ValRange);
4563       if (Val)
4564         UserSGPRCount += 1;
4565     } else if (ID == ".amdhsa_wavefront_size32") {
4566       if (IVersion.Major < 10)
4567         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4568       EnableWavefrontSize32 = Val;
4569       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4570                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4571                        Val, ValRange);
4572     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4573       PARSE_BITS_ENTRY(
4574           KD.compute_pgm_rsrc2,
4575           COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val,
4576           ValRange);
4577     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4578       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4579                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4580                        ValRange);
4581     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4582       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4583                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4584                        ValRange);
4585     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4586       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4587                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4588                        ValRange);
4589     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4590       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4591                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4592                        ValRange);
4593     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4594       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4595                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4596                        ValRange);
4597     } else if (ID == ".amdhsa_next_free_vgpr") {
4598       VGPRRange = ValRange;
4599       NextFreeVGPR = Val;
4600     } else if (ID == ".amdhsa_next_free_sgpr") {
4601       SGPRRange = ValRange;
4602       NextFreeSGPR = Val;
4603     } else if (ID == ".amdhsa_accum_offset") {
4604       if (!isGFX90A())
4605         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4606       AccumOffset = Val;
4607     } else if (ID == ".amdhsa_reserve_vcc") {
4608       if (!isUInt<1>(Val))
4609         return OutOfRangeError(ValRange);
4610       ReserveVCC = Val;
4611     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4612       if (IVersion.Major < 7)
4613         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4614       if (!isUInt<1>(Val))
4615         return OutOfRangeError(ValRange);
4616       ReserveFlatScr = Val;
4617     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4618       if (IVersion.Major < 8)
4619         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4620       if (!isUInt<1>(Val))
4621         return OutOfRangeError(ValRange);
4622       ReserveXNACK = Val;
4623     } else if (ID == ".amdhsa_float_round_mode_32") {
4624       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4625                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4626     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4627       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4628                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4629     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4630       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4631                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4632     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4633       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4634                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4635                        ValRange);
4636     } else if (ID == ".amdhsa_dx10_clamp") {
4637       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4638                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4639     } else if (ID == ".amdhsa_ieee_mode") {
4640       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4641                        Val, ValRange);
4642     } else if (ID == ".amdhsa_fp16_overflow") {
4643       if (IVersion.Major < 9)
4644         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4645       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4646                        ValRange);
4647     } else if (ID == ".amdhsa_tg_split") {
4648       if (!isGFX90A())
4649         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4650       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4651                        ValRange);
4652     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4653       if (IVersion.Major < 10)
4654         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4655       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4656                        ValRange);
4657     } else if (ID == ".amdhsa_memory_ordered") {
4658       if (IVersion.Major < 10)
4659         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4660       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4661                        ValRange);
4662     } else if (ID == ".amdhsa_forward_progress") {
4663       if (IVersion.Major < 10)
4664         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4665       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4666                        ValRange);
4667     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4668       PARSE_BITS_ENTRY(
4669           KD.compute_pgm_rsrc2,
4670           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4671           ValRange);
4672     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4673       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4674                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4675                        Val, ValRange);
4676     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4677       PARSE_BITS_ENTRY(
4678           KD.compute_pgm_rsrc2,
4679           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4680           ValRange);
4681     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4682       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4683                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4684                        Val, ValRange);
4685     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4686       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4687                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4688                        Val, ValRange);
4689     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4690       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4691                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4692                        Val, ValRange);
4693     } else if (ID == ".amdhsa_exception_int_div_zero") {
4694       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4695                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4696                        Val, ValRange);
4697     } else {
4698       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4699     }
4700 
4701 #undef PARSE_BITS_ENTRY
4702   }
4703 
4704   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4705     return TokError(".amdhsa_next_free_vgpr directive is required");
4706 
4707   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4708     return TokError(".amdhsa_next_free_sgpr directive is required");
4709 
4710   unsigned VGPRBlocks;
4711   unsigned SGPRBlocks;
4712   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4713                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4714                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4715                          SGPRBlocks))
4716     return true;
4717 
4718   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4719           VGPRBlocks))
4720     return OutOfRangeError(VGPRRange);
4721   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4722                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4723 
4724   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4725           SGPRBlocks))
4726     return OutOfRangeError(SGPRRange);
4727   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4728                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4729                   SGPRBlocks);
4730 
4731   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4732     return TokError("too many user SGPRs enabled");
4733   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4734                   UserSGPRCount);
4735 
4736   if (isGFX90A()) {
4737     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4738       return TokError(".amdhsa_accum_offset directive is required");
4739     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4740       return TokError("accum_offset should be in range [4..256] in "
4741                       "increments of 4");
4742     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4743       return TokError("accum_offset exceeds total VGPR allocation");
4744     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4745                     (AccumOffset / 4 - 1));
4746   }
4747 
4748   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4749       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4750       ReserveFlatScr, ReserveXNACK);
4751   return false;
4752 }
4753 
4754 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4755   uint32_t Major;
4756   uint32_t Minor;
4757 
4758   if (ParseDirectiveMajorMinor(Major, Minor))
4759     return true;
4760 
4761   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4762   return false;
4763 }
4764 
4765 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4766   uint32_t Major;
4767   uint32_t Minor;
4768   uint32_t Stepping;
4769   StringRef VendorName;
4770   StringRef ArchName;
4771 
4772   // If this directive has no arguments, then use the ISA version for the
4773   // targeted GPU.
4774   if (isToken(AsmToken::EndOfStatement)) {
4775     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4776     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4777                                                       ISA.Stepping,
4778                                                       "AMD", "AMDGPU");
4779     return false;
4780   }
4781 
4782   if (ParseDirectiveMajorMinor(Major, Minor))
4783     return true;
4784 
4785   if (!trySkipToken(AsmToken::Comma))
4786     return TokError("stepping version number required, comma expected");
4787 
4788   if (ParseAsAbsoluteExpression(Stepping))
4789     return TokError("invalid stepping version");
4790 
4791   if (!trySkipToken(AsmToken::Comma))
4792     return TokError("vendor name required, comma expected");
4793 
4794   if (!parseString(VendorName, "invalid vendor name"))
4795     return true;
4796 
4797   if (!trySkipToken(AsmToken::Comma))
4798     return TokError("arch name required, comma expected");
4799 
4800   if (!parseString(ArchName, "invalid arch name"))
4801     return true;
4802 
4803   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4804                                                     VendorName, ArchName);
4805   return false;
4806 }
4807 
4808 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4809                                                amd_kernel_code_t &Header) {
4810   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4811   // assembly for backwards compatibility.
4812   if (ID == "max_scratch_backing_memory_byte_size") {
4813     Parser.eatToEndOfStatement();
4814     return false;
4815   }
4816 
4817   SmallString<40> ErrStr;
4818   raw_svector_ostream Err(ErrStr);
4819   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4820     return TokError(Err.str());
4821   }
4822   Lex();
4823 
4824   if (ID == "enable_wavefront_size32") {
4825     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4826       if (!isGFX10Plus())
4827         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4828       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4829         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4830     } else {
4831       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4832         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4833     }
4834   }
4835 
4836   if (ID == "wavefront_size") {
4837     if (Header.wavefront_size == 5) {
4838       if (!isGFX10Plus())
4839         return TokError("wavefront_size=5 is only allowed on GFX10+");
4840       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4841         return TokError("wavefront_size=5 requires +WavefrontSize32");
4842     } else if (Header.wavefront_size == 6) {
4843       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4844         return TokError("wavefront_size=6 requires +WavefrontSize64");
4845     }
4846   }
4847 
4848   if (ID == "enable_wgp_mode") {
4849     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
4850         !isGFX10Plus())
4851       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4852   }
4853 
4854   if (ID == "enable_mem_ordered") {
4855     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
4856         !isGFX10Plus())
4857       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4858   }
4859 
4860   if (ID == "enable_fwd_progress") {
4861     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
4862         !isGFX10Plus())
4863       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4864   }
4865 
4866   return false;
4867 }
4868 
4869 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4870   amd_kernel_code_t Header;
4871   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4872 
4873   while (true) {
4874     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4875     // will set the current token to EndOfStatement.
4876     while(trySkipToken(AsmToken::EndOfStatement));
4877 
4878     StringRef ID;
4879     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4880       return true;
4881 
4882     if (ID == ".end_amd_kernel_code_t")
4883       break;
4884 
4885     if (ParseAMDKernelCodeTValue(ID, Header))
4886       return true;
4887   }
4888 
4889   getTargetStreamer().EmitAMDKernelCodeT(Header);
4890 
4891   return false;
4892 }
4893 
4894 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4895   StringRef KernelName;
4896   if (!parseId(KernelName, "expected symbol name"))
4897     return true;
4898 
4899   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4900                                            ELF::STT_AMDGPU_HSA_KERNEL);
4901 
4902   KernelScope.initialize(getContext());
4903   return false;
4904 }
4905 
4906 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4907   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4908     return Error(getLoc(),
4909                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4910                  "architectures");
4911   }
4912 
4913   auto ISAVersionStringFromASM = getToken().getStringContents();
4914 
4915   std::string ISAVersionStringFromSTI;
4916   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4917   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4918 
4919   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4920     return Error(getLoc(),
4921                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4922                  "arguments specified through the command line");
4923   }
4924 
4925   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4926   Lex();
4927 
4928   return false;
4929 }
4930 
4931 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4932   const char *AssemblerDirectiveBegin;
4933   const char *AssemblerDirectiveEnd;
4934   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4935       isHsaAbiVersion3(&getSTI())
4936           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4937                             HSAMD::V3::AssemblerDirectiveEnd)
4938           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4939                             HSAMD::AssemblerDirectiveEnd);
4940 
4941   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4942     return Error(getLoc(),
4943                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4944                  "not available on non-amdhsa OSes")).str());
4945   }
4946 
4947   std::string HSAMetadataString;
4948   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4949                           HSAMetadataString))
4950     return true;
4951 
4952   if (isHsaAbiVersion3(&getSTI())) {
4953     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4954       return Error(getLoc(), "invalid HSA metadata");
4955   } else {
4956     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4957       return Error(getLoc(), "invalid HSA metadata");
4958   }
4959 
4960   return false;
4961 }
4962 
4963 /// Common code to parse out a block of text (typically YAML) between start and
4964 /// end directives.
4965 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4966                                           const char *AssemblerDirectiveEnd,
4967                                           std::string &CollectString) {
4968 
4969   raw_string_ostream CollectStream(CollectString);
4970 
4971   getLexer().setSkipSpace(false);
4972 
4973   bool FoundEnd = false;
4974   while (!isToken(AsmToken::Eof)) {
4975     while (isToken(AsmToken::Space)) {
4976       CollectStream << getTokenStr();
4977       Lex();
4978     }
4979 
4980     if (trySkipId(AssemblerDirectiveEnd)) {
4981       FoundEnd = true;
4982       break;
4983     }
4984 
4985     CollectStream << Parser.parseStringToEndOfStatement()
4986                   << getContext().getAsmInfo()->getSeparatorString();
4987 
4988     Parser.eatToEndOfStatement();
4989   }
4990 
4991   getLexer().setSkipSpace(true);
4992 
4993   if (isToken(AsmToken::Eof) && !FoundEnd) {
4994     return TokError(Twine("expected directive ") +
4995                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4996   }
4997 
4998   CollectStream.flush();
4999   return false;
5000 }
5001 
5002 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5003 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5004   std::string String;
5005   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5006                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5007     return true;
5008 
5009   auto PALMetadata = getTargetStreamer().getPALMetadata();
5010   if (!PALMetadata->setFromString(String))
5011     return Error(getLoc(), "invalid PAL metadata");
5012   return false;
5013 }
5014 
5015 /// Parse the assembler directive for old linear-format PAL metadata.
5016 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5017   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5018     return Error(getLoc(),
5019                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5020                  "not available on non-amdpal OSes")).str());
5021   }
5022 
5023   auto PALMetadata = getTargetStreamer().getPALMetadata();
5024   PALMetadata->setLegacy();
5025   for (;;) {
5026     uint32_t Key, Value;
5027     if (ParseAsAbsoluteExpression(Key)) {
5028       return TokError(Twine("invalid value in ") +
5029                       Twine(PALMD::AssemblerDirective));
5030     }
5031     if (!trySkipToken(AsmToken::Comma)) {
5032       return TokError(Twine("expected an even number of values in ") +
5033                       Twine(PALMD::AssemblerDirective));
5034     }
5035     if (ParseAsAbsoluteExpression(Value)) {
5036       return TokError(Twine("invalid value in ") +
5037                       Twine(PALMD::AssemblerDirective));
5038     }
5039     PALMetadata->setRegister(Key, Value);
5040     if (!trySkipToken(AsmToken::Comma))
5041       break;
5042   }
5043   return false;
5044 }
5045 
5046 /// ParseDirectiveAMDGPULDS
5047 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5048 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5049   if (getParser().checkForValidSection())
5050     return true;
5051 
5052   StringRef Name;
5053   SMLoc NameLoc = getLoc();
5054   if (getParser().parseIdentifier(Name))
5055     return TokError("expected identifier in directive");
5056 
5057   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5058   if (parseToken(AsmToken::Comma, "expected ','"))
5059     return true;
5060 
5061   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5062 
5063   int64_t Size;
5064   SMLoc SizeLoc = getLoc();
5065   if (getParser().parseAbsoluteExpression(Size))
5066     return true;
5067   if (Size < 0)
5068     return Error(SizeLoc, "size must be non-negative");
5069   if (Size > LocalMemorySize)
5070     return Error(SizeLoc, "size is too large");
5071 
5072   int64_t Alignment = 4;
5073   if (trySkipToken(AsmToken::Comma)) {
5074     SMLoc AlignLoc = getLoc();
5075     if (getParser().parseAbsoluteExpression(Alignment))
5076       return true;
5077     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5078       return Error(AlignLoc, "alignment must be a power of two");
5079 
5080     // Alignment larger than the size of LDS is possible in theory, as long
5081     // as the linker manages to place to symbol at address 0, but we do want
5082     // to make sure the alignment fits nicely into a 32-bit integer.
5083     if (Alignment >= 1u << 31)
5084       return Error(AlignLoc, "alignment is too large");
5085   }
5086 
5087   if (parseToken(AsmToken::EndOfStatement,
5088                  "unexpected token in '.amdgpu_lds' directive"))
5089     return true;
5090 
5091   Symbol->redefineIfPossible();
5092   if (!Symbol->isUndefined())
5093     return Error(NameLoc, "invalid symbol redefinition");
5094 
5095   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5096   return false;
5097 }
5098 
5099 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5100   StringRef IDVal = DirectiveID.getString();
5101 
5102   if (isHsaAbiVersion3(&getSTI())) {
5103     if (IDVal == ".amdgcn_target")
5104       return ParseDirectiveAMDGCNTarget();
5105 
5106     if (IDVal == ".amdhsa_kernel")
5107       return ParseDirectiveAMDHSAKernel();
5108 
5109     // TODO: Restructure/combine with PAL metadata directive.
5110     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5111       return ParseDirectiveHSAMetadata();
5112   } else {
5113     if (IDVal == ".hsa_code_object_version")
5114       return ParseDirectiveHSACodeObjectVersion();
5115 
5116     if (IDVal == ".hsa_code_object_isa")
5117       return ParseDirectiveHSACodeObjectISA();
5118 
5119     if (IDVal == ".amd_kernel_code_t")
5120       return ParseDirectiveAMDKernelCodeT();
5121 
5122     if (IDVal == ".amdgpu_hsa_kernel")
5123       return ParseDirectiveAMDGPUHsaKernel();
5124 
5125     if (IDVal == ".amd_amdgpu_isa")
5126       return ParseDirectiveISAVersion();
5127 
5128     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5129       return ParseDirectiveHSAMetadata();
5130   }
5131 
5132   if (IDVal == ".amdgpu_lds")
5133     return ParseDirectiveAMDGPULDS();
5134 
5135   if (IDVal == PALMD::AssemblerDirectiveBegin)
5136     return ParseDirectivePALMetadataBegin();
5137 
5138   if (IDVal == PALMD::AssemblerDirective)
5139     return ParseDirectivePALMetadata();
5140 
5141   return true;
5142 }
5143 
5144 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5145                                            unsigned RegNo) const {
5146 
5147   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5148        R.isValid(); ++R) {
5149     if (*R == RegNo)
5150       return isGFX9Plus();
5151   }
5152 
5153   // GFX10 has 2 more SGPRs 104 and 105.
5154   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5155        R.isValid(); ++R) {
5156     if (*R == RegNo)
5157       return hasSGPR104_SGPR105();
5158   }
5159 
5160   switch (RegNo) {
5161   case AMDGPU::SRC_SHARED_BASE:
5162   case AMDGPU::SRC_SHARED_LIMIT:
5163   case AMDGPU::SRC_PRIVATE_BASE:
5164   case AMDGPU::SRC_PRIVATE_LIMIT:
5165   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5166     return isGFX9Plus();
5167   case AMDGPU::TBA:
5168   case AMDGPU::TBA_LO:
5169   case AMDGPU::TBA_HI:
5170   case AMDGPU::TMA:
5171   case AMDGPU::TMA_LO:
5172   case AMDGPU::TMA_HI:
5173     return !isGFX9Plus();
5174   case AMDGPU::XNACK_MASK:
5175   case AMDGPU::XNACK_MASK_LO:
5176   case AMDGPU::XNACK_MASK_HI:
5177     return (isVI() || isGFX9()) && hasXNACK();
5178   case AMDGPU::SGPR_NULL:
5179     return isGFX10Plus();
5180   default:
5181     break;
5182   }
5183 
5184   if (isCI())
5185     return true;
5186 
5187   if (isSI() || isGFX10Plus()) {
5188     // No flat_scr on SI.
5189     // On GFX10 flat scratch is not a valid register operand and can only be
5190     // accessed with s_setreg/s_getreg.
5191     switch (RegNo) {
5192     case AMDGPU::FLAT_SCR:
5193     case AMDGPU::FLAT_SCR_LO:
5194     case AMDGPU::FLAT_SCR_HI:
5195       return false;
5196     default:
5197       return true;
5198     }
5199   }
5200 
5201   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5202   // SI/CI have.
5203   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5204        R.isValid(); ++R) {
5205     if (*R == RegNo)
5206       return hasSGPR102_SGPR103();
5207   }
5208 
5209   return true;
5210 }
5211 
5212 OperandMatchResultTy
5213 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5214                               OperandMode Mode) {
5215   // Try to parse with a custom parser
5216   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5217 
5218   // If we successfully parsed the operand or if there as an error parsing,
5219   // we are done.
5220   //
5221   // If we are parsing after we reach EndOfStatement then this means we
5222   // are appending default values to the Operands list.  This is only done
5223   // by custom parser, so we shouldn't continue on to the generic parsing.
5224   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5225       isToken(AsmToken::EndOfStatement))
5226     return ResTy;
5227 
5228   SMLoc RBraceLoc;
5229   SMLoc LBraceLoc = getLoc();
5230   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5231     unsigned Prefix = Operands.size();
5232 
5233     for (;;) {
5234       auto Loc = getLoc();
5235       ResTy = parseReg(Operands);
5236       if (ResTy == MatchOperand_NoMatch)
5237         Error(Loc, "expected a register");
5238       if (ResTy != MatchOperand_Success)
5239         return MatchOperand_ParseFail;
5240 
5241       RBraceLoc = getLoc();
5242       if (trySkipToken(AsmToken::RBrac))
5243         break;
5244 
5245       if (!skipToken(AsmToken::Comma,
5246                      "expected a comma or a closing square bracket")) {
5247         return MatchOperand_ParseFail;
5248       }
5249     }
5250 
5251     if (Operands.size() - Prefix > 1) {
5252       Operands.insert(Operands.begin() + Prefix,
5253                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5254       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5255     }
5256 
5257     return MatchOperand_Success;
5258   }
5259 
5260   return parseRegOrImm(Operands);
5261 }
5262 
5263 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5264   // Clear any forced encodings from the previous instruction.
5265   setForcedEncodingSize(0);
5266   setForcedDPP(false);
5267   setForcedSDWA(false);
5268 
5269   if (Name.endswith("_e64")) {
5270     setForcedEncodingSize(64);
5271     return Name.substr(0, Name.size() - 4);
5272   } else if (Name.endswith("_e32")) {
5273     setForcedEncodingSize(32);
5274     return Name.substr(0, Name.size() - 4);
5275   } else if (Name.endswith("_dpp")) {
5276     setForcedDPP(true);
5277     return Name.substr(0, Name.size() - 4);
5278   } else if (Name.endswith("_sdwa")) {
5279     setForcedSDWA(true);
5280     return Name.substr(0, Name.size() - 5);
5281   }
5282   return Name;
5283 }
5284 
5285 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5286                                        StringRef Name,
5287                                        SMLoc NameLoc, OperandVector &Operands) {
5288   // Add the instruction mnemonic
5289   Name = parseMnemonicSuffix(Name);
5290   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5291 
5292   bool IsMIMG = Name.startswith("image_");
5293 
5294   while (!trySkipToken(AsmToken::EndOfStatement)) {
5295     OperandMode Mode = OperandMode_Default;
5296     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5297       Mode = OperandMode_NSA;
5298     CPolSeen = 0;
5299     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5300 
5301     if (Res != MatchOperand_Success) {
5302       checkUnsupportedInstruction(Name, NameLoc);
5303       if (!Parser.hasPendingError()) {
5304         // FIXME: use real operand location rather than the current location.
5305         StringRef Msg =
5306           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5307                                             "not a valid operand.";
5308         Error(getLoc(), Msg);
5309       }
5310       while (!trySkipToken(AsmToken::EndOfStatement)) {
5311         lex();
5312       }
5313       return true;
5314     }
5315 
5316     // Eat the comma or space if there is one.
5317     trySkipToken(AsmToken::Comma);
5318   }
5319 
5320   return false;
5321 }
5322 
5323 //===----------------------------------------------------------------------===//
5324 // Utility functions
5325 //===----------------------------------------------------------------------===//
5326 
5327 OperandMatchResultTy
5328 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5329 
5330   if (!trySkipId(Prefix, AsmToken::Colon))
5331     return MatchOperand_NoMatch;
5332 
5333   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5334 }
5335 
5336 OperandMatchResultTy
5337 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5338                                     AMDGPUOperand::ImmTy ImmTy,
5339                                     bool (*ConvertResult)(int64_t&)) {
5340   SMLoc S = getLoc();
5341   int64_t Value = 0;
5342 
5343   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5344   if (Res != MatchOperand_Success)
5345     return Res;
5346 
5347   if (ConvertResult && !ConvertResult(Value)) {
5348     Error(S, "invalid " + StringRef(Prefix) + " value.");
5349   }
5350 
5351   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5352   return MatchOperand_Success;
5353 }
5354 
5355 OperandMatchResultTy
5356 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5357                                              OperandVector &Operands,
5358                                              AMDGPUOperand::ImmTy ImmTy,
5359                                              bool (*ConvertResult)(int64_t&)) {
5360   SMLoc S = getLoc();
5361   if (!trySkipId(Prefix, AsmToken::Colon))
5362     return MatchOperand_NoMatch;
5363 
5364   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5365     return MatchOperand_ParseFail;
5366 
5367   unsigned Val = 0;
5368   const unsigned MaxSize = 4;
5369 
5370   // FIXME: How to verify the number of elements matches the number of src
5371   // operands?
5372   for (int I = 0; ; ++I) {
5373     int64_t Op;
5374     SMLoc Loc = getLoc();
5375     if (!parseExpr(Op))
5376       return MatchOperand_ParseFail;
5377 
5378     if (Op != 0 && Op != 1) {
5379       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5380       return MatchOperand_ParseFail;
5381     }
5382 
5383     Val |= (Op << I);
5384 
5385     if (trySkipToken(AsmToken::RBrac))
5386       break;
5387 
5388     if (I + 1 == MaxSize) {
5389       Error(getLoc(), "expected a closing square bracket");
5390       return MatchOperand_ParseFail;
5391     }
5392 
5393     if (!skipToken(AsmToken::Comma, "expected a comma"))
5394       return MatchOperand_ParseFail;
5395   }
5396 
5397   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5398   return MatchOperand_Success;
5399 }
5400 
5401 OperandMatchResultTy
5402 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5403                                AMDGPUOperand::ImmTy ImmTy) {
5404   int64_t Bit;
5405   SMLoc S = getLoc();
5406 
5407   if (trySkipId(Name)) {
5408     Bit = 1;
5409   } else if (trySkipId("no", Name)) {
5410     Bit = 0;
5411   } else {
5412     return MatchOperand_NoMatch;
5413   }
5414 
5415   if (Name == "r128" && !hasMIMG_R128()) {
5416     Error(S, "r128 modifier is not supported on this GPU");
5417     return MatchOperand_ParseFail;
5418   }
5419   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5420     Error(S, "a16 modifier is not supported on this GPU");
5421     return MatchOperand_ParseFail;
5422   }
5423 
5424   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5425     ImmTy = AMDGPUOperand::ImmTyR128A16;
5426 
5427   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5428   return MatchOperand_Success;
5429 }
5430 
5431 OperandMatchResultTy
5432 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5433   unsigned CPolOn = 0;
5434   unsigned CPolOff = 0;
5435   SMLoc S = getLoc();
5436 
5437   if (trySkipId("glc"))
5438     CPolOn = AMDGPU::CPol::GLC;
5439   else if (trySkipId("noglc"))
5440     CPolOff = AMDGPU::CPol::GLC;
5441   else if (trySkipId("slc"))
5442     CPolOn = AMDGPU::CPol::SLC;
5443   else if (trySkipId("noslc"))
5444     CPolOff = AMDGPU::CPol::SLC;
5445   else if (trySkipId("dlc"))
5446     CPolOn = AMDGPU::CPol::DLC;
5447   else if (trySkipId("nodlc"))
5448     CPolOff = AMDGPU::CPol::DLC;
5449   else if (trySkipId("scc"))
5450     CPolOn = AMDGPU::CPol::SCC;
5451   else if (trySkipId("noscc"))
5452     CPolOff = AMDGPU::CPol::SCC;
5453   else
5454     return MatchOperand_NoMatch;
5455 
5456   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5457     Error(S, "dlc modifier is not supported on this GPU");
5458     return MatchOperand_ParseFail;
5459   }
5460 
5461   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5462     Error(S, "scc modifier is not supported on this GPU");
5463     return MatchOperand_ParseFail;
5464   }
5465 
5466   if (CPolSeen & (CPolOn | CPolOff)) {
5467     Error(S, "duplicate cache policy modifier");
5468     return MatchOperand_ParseFail;
5469   }
5470 
5471   CPolSeen |= (CPolOn | CPolOff);
5472 
5473   for (unsigned I = 1; I != Operands.size(); ++I) {
5474     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5475     if (Op.isCPol()) {
5476       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5477       return MatchOperand_Success;
5478     }
5479   }
5480 
5481   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5482                                               AMDGPUOperand::ImmTyCPol));
5483 
5484   return MatchOperand_Success;
5485 }
5486 
5487 static void addOptionalImmOperand(
5488   MCInst& Inst, const OperandVector& Operands,
5489   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5490   AMDGPUOperand::ImmTy ImmT,
5491   int64_t Default = 0) {
5492   auto i = OptionalIdx.find(ImmT);
5493   if (i != OptionalIdx.end()) {
5494     unsigned Idx = i->second;
5495     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5496   } else {
5497     Inst.addOperand(MCOperand::createImm(Default));
5498   }
5499 }
5500 
5501 OperandMatchResultTy
5502 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5503                                        StringRef &Value,
5504                                        SMLoc &StringLoc) {
5505   if (!trySkipId(Prefix, AsmToken::Colon))
5506     return MatchOperand_NoMatch;
5507 
5508   StringLoc = getLoc();
5509   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5510                                                   : MatchOperand_ParseFail;
5511 }
5512 
5513 //===----------------------------------------------------------------------===//
5514 // MTBUF format
5515 //===----------------------------------------------------------------------===//
5516 
5517 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5518                                   int64_t MaxVal,
5519                                   int64_t &Fmt) {
5520   int64_t Val;
5521   SMLoc Loc = getLoc();
5522 
5523   auto Res = parseIntWithPrefix(Pref, Val);
5524   if (Res == MatchOperand_ParseFail)
5525     return false;
5526   if (Res == MatchOperand_NoMatch)
5527     return true;
5528 
5529   if (Val < 0 || Val > MaxVal) {
5530     Error(Loc, Twine("out of range ", StringRef(Pref)));
5531     return false;
5532   }
5533 
5534   Fmt = Val;
5535   return true;
5536 }
5537 
5538 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5539 // values to live in a joint format operand in the MCInst encoding.
5540 OperandMatchResultTy
5541 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5542   using namespace llvm::AMDGPU::MTBUFFormat;
5543 
5544   int64_t Dfmt = DFMT_UNDEF;
5545   int64_t Nfmt = NFMT_UNDEF;
5546 
5547   // dfmt and nfmt can appear in either order, and each is optional.
5548   for (int I = 0; I < 2; ++I) {
5549     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5550       return MatchOperand_ParseFail;
5551 
5552     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5553       return MatchOperand_ParseFail;
5554     }
5555     // Skip optional comma between dfmt/nfmt
5556     // but guard against 2 commas following each other.
5557     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5558         !peekToken().is(AsmToken::Comma)) {
5559       trySkipToken(AsmToken::Comma);
5560     }
5561   }
5562 
5563   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5564     return MatchOperand_NoMatch;
5565 
5566   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5567   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5568 
5569   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5570   return MatchOperand_Success;
5571 }
5572 
5573 OperandMatchResultTy
5574 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5575   using namespace llvm::AMDGPU::MTBUFFormat;
5576 
5577   int64_t Fmt = UFMT_UNDEF;
5578 
5579   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5580     return MatchOperand_ParseFail;
5581 
5582   if (Fmt == UFMT_UNDEF)
5583     return MatchOperand_NoMatch;
5584 
5585   Format = Fmt;
5586   return MatchOperand_Success;
5587 }
5588 
5589 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5590                                     int64_t &Nfmt,
5591                                     StringRef FormatStr,
5592                                     SMLoc Loc) {
5593   using namespace llvm::AMDGPU::MTBUFFormat;
5594   int64_t Format;
5595 
5596   Format = getDfmt(FormatStr);
5597   if (Format != DFMT_UNDEF) {
5598     Dfmt = Format;
5599     return true;
5600   }
5601 
5602   Format = getNfmt(FormatStr, getSTI());
5603   if (Format != NFMT_UNDEF) {
5604     Nfmt = Format;
5605     return true;
5606   }
5607 
5608   Error(Loc, "unsupported format");
5609   return false;
5610 }
5611 
5612 OperandMatchResultTy
5613 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5614                                           SMLoc FormatLoc,
5615                                           int64_t &Format) {
5616   using namespace llvm::AMDGPU::MTBUFFormat;
5617 
5618   int64_t Dfmt = DFMT_UNDEF;
5619   int64_t Nfmt = NFMT_UNDEF;
5620   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5621     return MatchOperand_ParseFail;
5622 
5623   if (trySkipToken(AsmToken::Comma)) {
5624     StringRef Str;
5625     SMLoc Loc = getLoc();
5626     if (!parseId(Str, "expected a format string") ||
5627         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5628       return MatchOperand_ParseFail;
5629     }
5630     if (Dfmt == DFMT_UNDEF) {
5631       Error(Loc, "duplicate numeric format");
5632       return MatchOperand_ParseFail;
5633     } else if (Nfmt == NFMT_UNDEF) {
5634       Error(Loc, "duplicate data format");
5635       return MatchOperand_ParseFail;
5636     }
5637   }
5638 
5639   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5640   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5641 
5642   if (isGFX10Plus()) {
5643     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5644     if (Ufmt == UFMT_UNDEF) {
5645       Error(FormatLoc, "unsupported format");
5646       return MatchOperand_ParseFail;
5647     }
5648     Format = Ufmt;
5649   } else {
5650     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5651   }
5652 
5653   return MatchOperand_Success;
5654 }
5655 
5656 OperandMatchResultTy
5657 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5658                                             SMLoc Loc,
5659                                             int64_t &Format) {
5660   using namespace llvm::AMDGPU::MTBUFFormat;
5661 
5662   auto Id = getUnifiedFormat(FormatStr);
5663   if (Id == UFMT_UNDEF)
5664     return MatchOperand_NoMatch;
5665 
5666   if (!isGFX10Plus()) {
5667     Error(Loc, "unified format is not supported on this GPU");
5668     return MatchOperand_ParseFail;
5669   }
5670 
5671   Format = Id;
5672   return MatchOperand_Success;
5673 }
5674 
5675 OperandMatchResultTy
5676 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5677   using namespace llvm::AMDGPU::MTBUFFormat;
5678   SMLoc Loc = getLoc();
5679 
5680   if (!parseExpr(Format))
5681     return MatchOperand_ParseFail;
5682   if (!isValidFormatEncoding(Format, getSTI())) {
5683     Error(Loc, "out of range format");
5684     return MatchOperand_ParseFail;
5685   }
5686 
5687   return MatchOperand_Success;
5688 }
5689 
5690 OperandMatchResultTy
5691 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5692   using namespace llvm::AMDGPU::MTBUFFormat;
5693 
5694   if (!trySkipId("format", AsmToken::Colon))
5695     return MatchOperand_NoMatch;
5696 
5697   if (trySkipToken(AsmToken::LBrac)) {
5698     StringRef FormatStr;
5699     SMLoc Loc = getLoc();
5700     if (!parseId(FormatStr, "expected a format string"))
5701       return MatchOperand_ParseFail;
5702 
5703     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5704     if (Res == MatchOperand_NoMatch)
5705       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5706     if (Res != MatchOperand_Success)
5707       return Res;
5708 
5709     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5710       return MatchOperand_ParseFail;
5711 
5712     return MatchOperand_Success;
5713   }
5714 
5715   return parseNumericFormat(Format);
5716 }
5717 
5718 OperandMatchResultTy
5719 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5720   using namespace llvm::AMDGPU::MTBUFFormat;
5721 
5722   int64_t Format = getDefaultFormatEncoding(getSTI());
5723   OperandMatchResultTy Res;
5724   SMLoc Loc = getLoc();
5725 
5726   // Parse legacy format syntax.
5727   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5728   if (Res == MatchOperand_ParseFail)
5729     return Res;
5730 
5731   bool FormatFound = (Res == MatchOperand_Success);
5732 
5733   Operands.push_back(
5734     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5735 
5736   if (FormatFound)
5737     trySkipToken(AsmToken::Comma);
5738 
5739   if (isToken(AsmToken::EndOfStatement)) {
5740     // We are expecting an soffset operand,
5741     // but let matcher handle the error.
5742     return MatchOperand_Success;
5743   }
5744 
5745   // Parse soffset.
5746   Res = parseRegOrImm(Operands);
5747   if (Res != MatchOperand_Success)
5748     return Res;
5749 
5750   trySkipToken(AsmToken::Comma);
5751 
5752   if (!FormatFound) {
5753     Res = parseSymbolicOrNumericFormat(Format);
5754     if (Res == MatchOperand_ParseFail)
5755       return Res;
5756     if (Res == MatchOperand_Success) {
5757       auto Size = Operands.size();
5758       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5759       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5760       Op.setImm(Format);
5761     }
5762     return MatchOperand_Success;
5763   }
5764 
5765   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5766     Error(getLoc(), "duplicate format");
5767     return MatchOperand_ParseFail;
5768   }
5769   return MatchOperand_Success;
5770 }
5771 
5772 //===----------------------------------------------------------------------===//
5773 // ds
5774 //===----------------------------------------------------------------------===//
5775 
5776 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5777                                     const OperandVector &Operands) {
5778   OptionalImmIndexMap OptionalIdx;
5779 
5780   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5781     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5782 
5783     // Add the register arguments
5784     if (Op.isReg()) {
5785       Op.addRegOperands(Inst, 1);
5786       continue;
5787     }
5788 
5789     // Handle optional arguments
5790     OptionalIdx[Op.getImmTy()] = i;
5791   }
5792 
5793   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5794   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5795   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5796 
5797   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5798 }
5799 
5800 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5801                                 bool IsGdsHardcoded) {
5802   OptionalImmIndexMap OptionalIdx;
5803 
5804   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5805     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5806 
5807     // Add the register arguments
5808     if (Op.isReg()) {
5809       Op.addRegOperands(Inst, 1);
5810       continue;
5811     }
5812 
5813     if (Op.isToken() && Op.getToken() == "gds") {
5814       IsGdsHardcoded = true;
5815       continue;
5816     }
5817 
5818     // Handle optional arguments
5819     OptionalIdx[Op.getImmTy()] = i;
5820   }
5821 
5822   AMDGPUOperand::ImmTy OffsetType =
5823     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5824      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5825      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5826                                                       AMDGPUOperand::ImmTyOffset;
5827 
5828   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5829 
5830   if (!IsGdsHardcoded) {
5831     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5832   }
5833   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5834 }
5835 
5836 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5837   OptionalImmIndexMap OptionalIdx;
5838 
5839   unsigned OperandIdx[4];
5840   unsigned EnMask = 0;
5841   int SrcIdx = 0;
5842 
5843   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5844     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5845 
5846     // Add the register arguments
5847     if (Op.isReg()) {
5848       assert(SrcIdx < 4);
5849       OperandIdx[SrcIdx] = Inst.size();
5850       Op.addRegOperands(Inst, 1);
5851       ++SrcIdx;
5852       continue;
5853     }
5854 
5855     if (Op.isOff()) {
5856       assert(SrcIdx < 4);
5857       OperandIdx[SrcIdx] = Inst.size();
5858       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5859       ++SrcIdx;
5860       continue;
5861     }
5862 
5863     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5864       Op.addImmOperands(Inst, 1);
5865       continue;
5866     }
5867 
5868     if (Op.isToken() && Op.getToken() == "done")
5869       continue;
5870 
5871     // Handle optional arguments
5872     OptionalIdx[Op.getImmTy()] = i;
5873   }
5874 
5875   assert(SrcIdx == 4);
5876 
5877   bool Compr = false;
5878   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5879     Compr = true;
5880     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5881     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5882     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5883   }
5884 
5885   for (auto i = 0; i < SrcIdx; ++i) {
5886     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5887       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5888     }
5889   }
5890 
5891   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5892   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5893 
5894   Inst.addOperand(MCOperand::createImm(EnMask));
5895 }
5896 
5897 //===----------------------------------------------------------------------===//
5898 // s_waitcnt
5899 //===----------------------------------------------------------------------===//
5900 
5901 static bool
5902 encodeCnt(
5903   const AMDGPU::IsaVersion ISA,
5904   int64_t &IntVal,
5905   int64_t CntVal,
5906   bool Saturate,
5907   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5908   unsigned (*decode)(const IsaVersion &Version, unsigned))
5909 {
5910   bool Failed = false;
5911 
5912   IntVal = encode(ISA, IntVal, CntVal);
5913   if (CntVal != decode(ISA, IntVal)) {
5914     if (Saturate) {
5915       IntVal = encode(ISA, IntVal, -1);
5916     } else {
5917       Failed = true;
5918     }
5919   }
5920   return Failed;
5921 }
5922 
5923 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5924 
5925   SMLoc CntLoc = getLoc();
5926   StringRef CntName = getTokenStr();
5927 
5928   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5929       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5930     return false;
5931 
5932   int64_t CntVal;
5933   SMLoc ValLoc = getLoc();
5934   if (!parseExpr(CntVal))
5935     return false;
5936 
5937   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5938 
5939   bool Failed = true;
5940   bool Sat = CntName.endswith("_sat");
5941 
5942   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5943     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5944   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5945     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5946   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5947     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5948   } else {
5949     Error(CntLoc, "invalid counter name " + CntName);
5950     return false;
5951   }
5952 
5953   if (Failed) {
5954     Error(ValLoc, "too large value for " + CntName);
5955     return false;
5956   }
5957 
5958   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5959     return false;
5960 
5961   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5962     if (isToken(AsmToken::EndOfStatement)) {
5963       Error(getLoc(), "expected a counter name");
5964       return false;
5965     }
5966   }
5967 
5968   return true;
5969 }
5970 
5971 OperandMatchResultTy
5972 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5973   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5974   int64_t Waitcnt = getWaitcntBitMask(ISA);
5975   SMLoc S = getLoc();
5976 
5977   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5978     while (!isToken(AsmToken::EndOfStatement)) {
5979       if (!parseCnt(Waitcnt))
5980         return MatchOperand_ParseFail;
5981     }
5982   } else {
5983     if (!parseExpr(Waitcnt))
5984       return MatchOperand_ParseFail;
5985   }
5986 
5987   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5988   return MatchOperand_Success;
5989 }
5990 
5991 bool
5992 AMDGPUOperand::isSWaitCnt() const {
5993   return isImm();
5994 }
5995 
5996 //===----------------------------------------------------------------------===//
5997 // hwreg
5998 //===----------------------------------------------------------------------===//
5999 
6000 bool
6001 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6002                                 OperandInfoTy &Offset,
6003                                 OperandInfoTy &Width) {
6004   using namespace llvm::AMDGPU::Hwreg;
6005 
6006   // The register may be specified by name or using a numeric code
6007   HwReg.Loc = getLoc();
6008   if (isToken(AsmToken::Identifier) &&
6009       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
6010     HwReg.IsSymbolic = true;
6011     lex(); // skip register name
6012   } else if (!parseExpr(HwReg.Id, "a register name")) {
6013     return false;
6014   }
6015 
6016   if (trySkipToken(AsmToken::RParen))
6017     return true;
6018 
6019   // parse optional params
6020   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6021     return false;
6022 
6023   Offset.Loc = getLoc();
6024   if (!parseExpr(Offset.Id))
6025     return false;
6026 
6027   if (!skipToken(AsmToken::Comma, "expected a comma"))
6028     return false;
6029 
6030   Width.Loc = getLoc();
6031   return parseExpr(Width.Id) &&
6032          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6033 }
6034 
6035 bool
6036 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6037                                const OperandInfoTy &Offset,
6038                                const OperandInfoTy &Width) {
6039 
6040   using namespace llvm::AMDGPU::Hwreg;
6041 
6042   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6043     Error(HwReg.Loc,
6044           "specified hardware register is not supported on this GPU");
6045     return false;
6046   }
6047   if (!isValidHwreg(HwReg.Id)) {
6048     Error(HwReg.Loc,
6049           "invalid code of hardware register: only 6-bit values are legal");
6050     return false;
6051   }
6052   if (!isValidHwregOffset(Offset.Id)) {
6053     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6054     return false;
6055   }
6056   if (!isValidHwregWidth(Width.Id)) {
6057     Error(Width.Loc,
6058           "invalid bitfield width: only values from 1 to 32 are legal");
6059     return false;
6060   }
6061   return true;
6062 }
6063 
6064 OperandMatchResultTy
6065 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6066   using namespace llvm::AMDGPU::Hwreg;
6067 
6068   int64_t ImmVal = 0;
6069   SMLoc Loc = getLoc();
6070 
6071   if (trySkipId("hwreg", AsmToken::LParen)) {
6072     OperandInfoTy HwReg(ID_UNKNOWN_);
6073     OperandInfoTy Offset(OFFSET_DEFAULT_);
6074     OperandInfoTy Width(WIDTH_DEFAULT_);
6075     if (parseHwregBody(HwReg, Offset, Width) &&
6076         validateHwreg(HwReg, Offset, Width)) {
6077       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6078     } else {
6079       return MatchOperand_ParseFail;
6080     }
6081   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6082     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6083       Error(Loc, "invalid immediate: only 16-bit values are legal");
6084       return MatchOperand_ParseFail;
6085     }
6086   } else {
6087     return MatchOperand_ParseFail;
6088   }
6089 
6090   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6091   return MatchOperand_Success;
6092 }
6093 
6094 bool AMDGPUOperand::isHwreg() const {
6095   return isImmTy(ImmTyHwreg);
6096 }
6097 
6098 //===----------------------------------------------------------------------===//
6099 // sendmsg
6100 //===----------------------------------------------------------------------===//
6101 
6102 bool
6103 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6104                                   OperandInfoTy &Op,
6105                                   OperandInfoTy &Stream) {
6106   using namespace llvm::AMDGPU::SendMsg;
6107 
6108   Msg.Loc = getLoc();
6109   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6110     Msg.IsSymbolic = true;
6111     lex(); // skip message name
6112   } else if (!parseExpr(Msg.Id, "a message name")) {
6113     return false;
6114   }
6115 
6116   if (trySkipToken(AsmToken::Comma)) {
6117     Op.IsDefined = true;
6118     Op.Loc = getLoc();
6119     if (isToken(AsmToken::Identifier) &&
6120         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6121       lex(); // skip operation name
6122     } else if (!parseExpr(Op.Id, "an operation name")) {
6123       return false;
6124     }
6125 
6126     if (trySkipToken(AsmToken::Comma)) {
6127       Stream.IsDefined = true;
6128       Stream.Loc = getLoc();
6129       if (!parseExpr(Stream.Id))
6130         return false;
6131     }
6132   }
6133 
6134   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6135 }
6136 
6137 bool
6138 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6139                                  const OperandInfoTy &Op,
6140                                  const OperandInfoTy &Stream) {
6141   using namespace llvm::AMDGPU::SendMsg;
6142 
6143   // Validation strictness depends on whether message is specified
6144   // in a symbolc or in a numeric form. In the latter case
6145   // only encoding possibility is checked.
6146   bool Strict = Msg.IsSymbolic;
6147 
6148   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6149     Error(Msg.Loc, "invalid message id");
6150     return false;
6151   }
6152   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6153     if (Op.IsDefined) {
6154       Error(Op.Loc, "message does not support operations");
6155     } else {
6156       Error(Msg.Loc, "missing message operation");
6157     }
6158     return false;
6159   }
6160   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6161     Error(Op.Loc, "invalid operation id");
6162     return false;
6163   }
6164   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6165     Error(Stream.Loc, "message operation does not support streams");
6166     return false;
6167   }
6168   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6169     Error(Stream.Loc, "invalid message stream id");
6170     return false;
6171   }
6172   return true;
6173 }
6174 
6175 OperandMatchResultTy
6176 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6177   using namespace llvm::AMDGPU::SendMsg;
6178 
6179   int64_t ImmVal = 0;
6180   SMLoc Loc = getLoc();
6181 
6182   if (trySkipId("sendmsg", AsmToken::LParen)) {
6183     OperandInfoTy Msg(ID_UNKNOWN_);
6184     OperandInfoTy Op(OP_NONE_);
6185     OperandInfoTy Stream(STREAM_ID_NONE_);
6186     if (parseSendMsgBody(Msg, Op, Stream) &&
6187         validateSendMsg(Msg, Op, Stream)) {
6188       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6189     } else {
6190       return MatchOperand_ParseFail;
6191     }
6192   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6193     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6194       Error(Loc, "invalid immediate: only 16-bit values are legal");
6195       return MatchOperand_ParseFail;
6196     }
6197   } else {
6198     return MatchOperand_ParseFail;
6199   }
6200 
6201   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6202   return MatchOperand_Success;
6203 }
6204 
6205 bool AMDGPUOperand::isSendMsg() const {
6206   return isImmTy(ImmTySendMsg);
6207 }
6208 
6209 //===----------------------------------------------------------------------===//
6210 // v_interp
6211 //===----------------------------------------------------------------------===//
6212 
6213 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6214   StringRef Str;
6215   SMLoc S = getLoc();
6216 
6217   if (!parseId(Str))
6218     return MatchOperand_NoMatch;
6219 
6220   int Slot = StringSwitch<int>(Str)
6221     .Case("p10", 0)
6222     .Case("p20", 1)
6223     .Case("p0", 2)
6224     .Default(-1);
6225 
6226   if (Slot == -1) {
6227     Error(S, "invalid interpolation slot");
6228     return MatchOperand_ParseFail;
6229   }
6230 
6231   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6232                                               AMDGPUOperand::ImmTyInterpSlot));
6233   return MatchOperand_Success;
6234 }
6235 
6236 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6237   StringRef Str;
6238   SMLoc S = getLoc();
6239 
6240   if (!parseId(Str))
6241     return MatchOperand_NoMatch;
6242 
6243   if (!Str.startswith("attr")) {
6244     Error(S, "invalid interpolation attribute");
6245     return MatchOperand_ParseFail;
6246   }
6247 
6248   StringRef Chan = Str.take_back(2);
6249   int AttrChan = StringSwitch<int>(Chan)
6250     .Case(".x", 0)
6251     .Case(".y", 1)
6252     .Case(".z", 2)
6253     .Case(".w", 3)
6254     .Default(-1);
6255   if (AttrChan == -1) {
6256     Error(S, "invalid or missing interpolation attribute channel");
6257     return MatchOperand_ParseFail;
6258   }
6259 
6260   Str = Str.drop_back(2).drop_front(4);
6261 
6262   uint8_t Attr;
6263   if (Str.getAsInteger(10, Attr)) {
6264     Error(S, "invalid or missing interpolation attribute number");
6265     return MatchOperand_ParseFail;
6266   }
6267 
6268   if (Attr > 63) {
6269     Error(S, "out of bounds interpolation attribute number");
6270     return MatchOperand_ParseFail;
6271   }
6272 
6273   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6274 
6275   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6276                                               AMDGPUOperand::ImmTyInterpAttr));
6277   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6278                                               AMDGPUOperand::ImmTyAttrChan));
6279   return MatchOperand_Success;
6280 }
6281 
6282 //===----------------------------------------------------------------------===//
6283 // exp
6284 //===----------------------------------------------------------------------===//
6285 
6286 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6287   using namespace llvm::AMDGPU::Exp;
6288 
6289   StringRef Str;
6290   SMLoc S = getLoc();
6291 
6292   if (!parseId(Str))
6293     return MatchOperand_NoMatch;
6294 
6295   unsigned Id = getTgtId(Str);
6296   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6297     Error(S, (Id == ET_INVALID) ?
6298                 "invalid exp target" :
6299                 "exp target is not supported on this GPU");
6300     return MatchOperand_ParseFail;
6301   }
6302 
6303   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6304                                               AMDGPUOperand::ImmTyExpTgt));
6305   return MatchOperand_Success;
6306 }
6307 
6308 //===----------------------------------------------------------------------===//
6309 // parser helpers
6310 //===----------------------------------------------------------------------===//
6311 
6312 bool
6313 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6314   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6315 }
6316 
6317 bool
6318 AMDGPUAsmParser::isId(const StringRef Id) const {
6319   return isId(getToken(), Id);
6320 }
6321 
6322 bool
6323 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6324   return getTokenKind() == Kind;
6325 }
6326 
6327 bool
6328 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6329   if (isId(Id)) {
6330     lex();
6331     return true;
6332   }
6333   return false;
6334 }
6335 
6336 bool
6337 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6338   if (isToken(AsmToken::Identifier)) {
6339     StringRef Tok = getTokenStr();
6340     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6341       lex();
6342       return true;
6343     }
6344   }
6345   return false;
6346 }
6347 
6348 bool
6349 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6350   if (isId(Id) && peekToken().is(Kind)) {
6351     lex();
6352     lex();
6353     return true;
6354   }
6355   return false;
6356 }
6357 
6358 bool
6359 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6360   if (isToken(Kind)) {
6361     lex();
6362     return true;
6363   }
6364   return false;
6365 }
6366 
6367 bool
6368 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6369                            const StringRef ErrMsg) {
6370   if (!trySkipToken(Kind)) {
6371     Error(getLoc(), ErrMsg);
6372     return false;
6373   }
6374   return true;
6375 }
6376 
6377 bool
6378 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6379   SMLoc S = getLoc();
6380 
6381   const MCExpr *Expr;
6382   if (Parser.parseExpression(Expr))
6383     return false;
6384 
6385   if (Expr->evaluateAsAbsolute(Imm))
6386     return true;
6387 
6388   if (Expected.empty()) {
6389     Error(S, "expected absolute expression");
6390   } else {
6391     Error(S, Twine("expected ", Expected) +
6392              Twine(" or an absolute expression"));
6393   }
6394   return false;
6395 }
6396 
6397 bool
6398 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6399   SMLoc S = getLoc();
6400 
6401   const MCExpr *Expr;
6402   if (Parser.parseExpression(Expr))
6403     return false;
6404 
6405   int64_t IntVal;
6406   if (Expr->evaluateAsAbsolute(IntVal)) {
6407     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6408   } else {
6409     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6410   }
6411   return true;
6412 }
6413 
6414 bool
6415 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6416   if (isToken(AsmToken::String)) {
6417     Val = getToken().getStringContents();
6418     lex();
6419     return true;
6420   } else {
6421     Error(getLoc(), ErrMsg);
6422     return false;
6423   }
6424 }
6425 
6426 bool
6427 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6428   if (isToken(AsmToken::Identifier)) {
6429     Val = getTokenStr();
6430     lex();
6431     return true;
6432   } else {
6433     if (!ErrMsg.empty())
6434       Error(getLoc(), ErrMsg);
6435     return false;
6436   }
6437 }
6438 
6439 AsmToken
6440 AMDGPUAsmParser::getToken() const {
6441   return Parser.getTok();
6442 }
6443 
6444 AsmToken
6445 AMDGPUAsmParser::peekToken() {
6446   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6447 }
6448 
6449 void
6450 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6451   auto TokCount = getLexer().peekTokens(Tokens);
6452 
6453   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6454     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6455 }
6456 
6457 AsmToken::TokenKind
6458 AMDGPUAsmParser::getTokenKind() const {
6459   return getLexer().getKind();
6460 }
6461 
6462 SMLoc
6463 AMDGPUAsmParser::getLoc() const {
6464   return getToken().getLoc();
6465 }
6466 
6467 StringRef
6468 AMDGPUAsmParser::getTokenStr() const {
6469   return getToken().getString();
6470 }
6471 
6472 void
6473 AMDGPUAsmParser::lex() {
6474   Parser.Lex();
6475 }
6476 
6477 SMLoc
6478 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6479                                const OperandVector &Operands) const {
6480   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6481     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6482     if (Test(Op))
6483       return Op.getStartLoc();
6484   }
6485   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6486 }
6487 
6488 SMLoc
6489 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6490                            const OperandVector &Operands) const {
6491   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6492   return getOperandLoc(Test, Operands);
6493 }
6494 
6495 SMLoc
6496 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6497                            const OperandVector &Operands) const {
6498   auto Test = [=](const AMDGPUOperand& Op) {
6499     return Op.isRegKind() && Op.getReg() == Reg;
6500   };
6501   return getOperandLoc(Test, Operands);
6502 }
6503 
6504 SMLoc
6505 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6506   auto Test = [](const AMDGPUOperand& Op) {
6507     return Op.IsImmKindLiteral() || Op.isExpr();
6508   };
6509   return getOperandLoc(Test, Operands);
6510 }
6511 
6512 SMLoc
6513 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6514   auto Test = [](const AMDGPUOperand& Op) {
6515     return Op.isImmKindConst();
6516   };
6517   return getOperandLoc(Test, Operands);
6518 }
6519 
6520 //===----------------------------------------------------------------------===//
6521 // swizzle
6522 //===----------------------------------------------------------------------===//
6523 
6524 LLVM_READNONE
6525 static unsigned
6526 encodeBitmaskPerm(const unsigned AndMask,
6527                   const unsigned OrMask,
6528                   const unsigned XorMask) {
6529   using namespace llvm::AMDGPU::Swizzle;
6530 
6531   return BITMASK_PERM_ENC |
6532          (AndMask << BITMASK_AND_SHIFT) |
6533          (OrMask  << BITMASK_OR_SHIFT)  |
6534          (XorMask << BITMASK_XOR_SHIFT);
6535 }
6536 
6537 bool
6538 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6539                                      const unsigned MinVal,
6540                                      const unsigned MaxVal,
6541                                      const StringRef ErrMsg,
6542                                      SMLoc &Loc) {
6543   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6544     return false;
6545   }
6546   Loc = getLoc();
6547   if (!parseExpr(Op)) {
6548     return false;
6549   }
6550   if (Op < MinVal || Op > MaxVal) {
6551     Error(Loc, ErrMsg);
6552     return false;
6553   }
6554 
6555   return true;
6556 }
6557 
6558 bool
6559 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6560                                       const unsigned MinVal,
6561                                       const unsigned MaxVal,
6562                                       const StringRef ErrMsg) {
6563   SMLoc Loc;
6564   for (unsigned i = 0; i < OpNum; ++i) {
6565     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6566       return false;
6567   }
6568 
6569   return true;
6570 }
6571 
6572 bool
6573 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6574   using namespace llvm::AMDGPU::Swizzle;
6575 
6576   int64_t Lane[LANE_NUM];
6577   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6578                            "expected a 2-bit lane id")) {
6579     Imm = QUAD_PERM_ENC;
6580     for (unsigned I = 0; I < LANE_NUM; ++I) {
6581       Imm |= Lane[I] << (LANE_SHIFT * I);
6582     }
6583     return true;
6584   }
6585   return false;
6586 }
6587 
6588 bool
6589 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6590   using namespace llvm::AMDGPU::Swizzle;
6591 
6592   SMLoc Loc;
6593   int64_t GroupSize;
6594   int64_t LaneIdx;
6595 
6596   if (!parseSwizzleOperand(GroupSize,
6597                            2, 32,
6598                            "group size must be in the interval [2,32]",
6599                            Loc)) {
6600     return false;
6601   }
6602   if (!isPowerOf2_64(GroupSize)) {
6603     Error(Loc, "group size must be a power of two");
6604     return false;
6605   }
6606   if (parseSwizzleOperand(LaneIdx,
6607                           0, GroupSize - 1,
6608                           "lane id must be in the interval [0,group size - 1]",
6609                           Loc)) {
6610     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6611     return true;
6612   }
6613   return false;
6614 }
6615 
6616 bool
6617 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6618   using namespace llvm::AMDGPU::Swizzle;
6619 
6620   SMLoc Loc;
6621   int64_t GroupSize;
6622 
6623   if (!parseSwizzleOperand(GroupSize,
6624                            2, 32,
6625                            "group size must be in the interval [2,32]",
6626                            Loc)) {
6627     return false;
6628   }
6629   if (!isPowerOf2_64(GroupSize)) {
6630     Error(Loc, "group size must be a power of two");
6631     return false;
6632   }
6633 
6634   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6635   return true;
6636 }
6637 
6638 bool
6639 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6640   using namespace llvm::AMDGPU::Swizzle;
6641 
6642   SMLoc Loc;
6643   int64_t GroupSize;
6644 
6645   if (!parseSwizzleOperand(GroupSize,
6646                            1, 16,
6647                            "group size must be in the interval [1,16]",
6648                            Loc)) {
6649     return false;
6650   }
6651   if (!isPowerOf2_64(GroupSize)) {
6652     Error(Loc, "group size must be a power of two");
6653     return false;
6654   }
6655 
6656   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6657   return true;
6658 }
6659 
6660 bool
6661 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6662   using namespace llvm::AMDGPU::Swizzle;
6663 
6664   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6665     return false;
6666   }
6667 
6668   StringRef Ctl;
6669   SMLoc StrLoc = getLoc();
6670   if (!parseString(Ctl)) {
6671     return false;
6672   }
6673   if (Ctl.size() != BITMASK_WIDTH) {
6674     Error(StrLoc, "expected a 5-character mask");
6675     return false;
6676   }
6677 
6678   unsigned AndMask = 0;
6679   unsigned OrMask = 0;
6680   unsigned XorMask = 0;
6681 
6682   for (size_t i = 0; i < Ctl.size(); ++i) {
6683     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6684     switch(Ctl[i]) {
6685     default:
6686       Error(StrLoc, "invalid mask");
6687       return false;
6688     case '0':
6689       break;
6690     case '1':
6691       OrMask |= Mask;
6692       break;
6693     case 'p':
6694       AndMask |= Mask;
6695       break;
6696     case 'i':
6697       AndMask |= Mask;
6698       XorMask |= Mask;
6699       break;
6700     }
6701   }
6702 
6703   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6704   return true;
6705 }
6706 
6707 bool
6708 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6709 
6710   SMLoc OffsetLoc = getLoc();
6711 
6712   if (!parseExpr(Imm, "a swizzle macro")) {
6713     return false;
6714   }
6715   if (!isUInt<16>(Imm)) {
6716     Error(OffsetLoc, "expected a 16-bit offset");
6717     return false;
6718   }
6719   return true;
6720 }
6721 
6722 bool
6723 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6724   using namespace llvm::AMDGPU::Swizzle;
6725 
6726   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6727 
6728     SMLoc ModeLoc = getLoc();
6729     bool Ok = false;
6730 
6731     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6732       Ok = parseSwizzleQuadPerm(Imm);
6733     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6734       Ok = parseSwizzleBitmaskPerm(Imm);
6735     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6736       Ok = parseSwizzleBroadcast(Imm);
6737     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6738       Ok = parseSwizzleSwap(Imm);
6739     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6740       Ok = parseSwizzleReverse(Imm);
6741     } else {
6742       Error(ModeLoc, "expected a swizzle mode");
6743     }
6744 
6745     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6746   }
6747 
6748   return false;
6749 }
6750 
6751 OperandMatchResultTy
6752 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6753   SMLoc S = getLoc();
6754   int64_t Imm = 0;
6755 
6756   if (trySkipId("offset")) {
6757 
6758     bool Ok = false;
6759     if (skipToken(AsmToken::Colon, "expected a colon")) {
6760       if (trySkipId("swizzle")) {
6761         Ok = parseSwizzleMacro(Imm);
6762       } else {
6763         Ok = parseSwizzleOffset(Imm);
6764       }
6765     }
6766 
6767     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6768 
6769     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6770   } else {
6771     // Swizzle "offset" operand is optional.
6772     // If it is omitted, try parsing other optional operands.
6773     return parseOptionalOpr(Operands);
6774   }
6775 }
6776 
6777 bool
6778 AMDGPUOperand::isSwizzle() const {
6779   return isImmTy(ImmTySwizzle);
6780 }
6781 
6782 //===----------------------------------------------------------------------===//
6783 // VGPR Index Mode
6784 //===----------------------------------------------------------------------===//
6785 
6786 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6787 
6788   using namespace llvm::AMDGPU::VGPRIndexMode;
6789 
6790   if (trySkipToken(AsmToken::RParen)) {
6791     return OFF;
6792   }
6793 
6794   int64_t Imm = 0;
6795 
6796   while (true) {
6797     unsigned Mode = 0;
6798     SMLoc S = getLoc();
6799 
6800     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6801       if (trySkipId(IdSymbolic[ModeId])) {
6802         Mode = 1 << ModeId;
6803         break;
6804       }
6805     }
6806 
6807     if (Mode == 0) {
6808       Error(S, (Imm == 0)?
6809                "expected a VGPR index mode or a closing parenthesis" :
6810                "expected a VGPR index mode");
6811       return UNDEF;
6812     }
6813 
6814     if (Imm & Mode) {
6815       Error(S, "duplicate VGPR index mode");
6816       return UNDEF;
6817     }
6818     Imm |= Mode;
6819 
6820     if (trySkipToken(AsmToken::RParen))
6821       break;
6822     if (!skipToken(AsmToken::Comma,
6823                    "expected a comma or a closing parenthesis"))
6824       return UNDEF;
6825   }
6826 
6827   return Imm;
6828 }
6829 
6830 OperandMatchResultTy
6831 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6832 
6833   using namespace llvm::AMDGPU::VGPRIndexMode;
6834 
6835   int64_t Imm = 0;
6836   SMLoc S = getLoc();
6837 
6838   if (trySkipId("gpr_idx", AsmToken::LParen)) {
6839     Imm = parseGPRIdxMacro();
6840     if (Imm == UNDEF)
6841       return MatchOperand_ParseFail;
6842   } else {
6843     if (getParser().parseAbsoluteExpression(Imm))
6844       return MatchOperand_ParseFail;
6845     if (Imm < 0 || !isUInt<4>(Imm)) {
6846       Error(S, "invalid immediate: only 4-bit values are legal");
6847       return MatchOperand_ParseFail;
6848     }
6849   }
6850 
6851   Operands.push_back(
6852       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6853   return MatchOperand_Success;
6854 }
6855 
6856 bool AMDGPUOperand::isGPRIdxMode() const {
6857   return isImmTy(ImmTyGprIdxMode);
6858 }
6859 
6860 //===----------------------------------------------------------------------===//
6861 // sopp branch targets
6862 //===----------------------------------------------------------------------===//
6863 
6864 OperandMatchResultTy
6865 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6866 
6867   // Make sure we are not parsing something
6868   // that looks like a label or an expression but is not.
6869   // This will improve error messages.
6870   if (isRegister() || isModifier())
6871     return MatchOperand_NoMatch;
6872 
6873   if (!parseExpr(Operands))
6874     return MatchOperand_ParseFail;
6875 
6876   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6877   assert(Opr.isImm() || Opr.isExpr());
6878   SMLoc Loc = Opr.getStartLoc();
6879 
6880   // Currently we do not support arbitrary expressions as branch targets.
6881   // Only labels and absolute expressions are accepted.
6882   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6883     Error(Loc, "expected an absolute expression or a label");
6884   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6885     Error(Loc, "expected a 16-bit signed jump offset");
6886   }
6887 
6888   return MatchOperand_Success;
6889 }
6890 
6891 //===----------------------------------------------------------------------===//
6892 // Boolean holding registers
6893 //===----------------------------------------------------------------------===//
6894 
6895 OperandMatchResultTy
6896 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6897   return parseReg(Operands);
6898 }
6899 
6900 //===----------------------------------------------------------------------===//
6901 // mubuf
6902 //===----------------------------------------------------------------------===//
6903 
6904 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
6905   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
6906 }
6907 
6908 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol_GLC1() const {
6909   return AMDGPUOperand::CreateImm(this, CPol::GLC, SMLoc(),
6910                                   AMDGPUOperand::ImmTyCPol);
6911 }
6912 
6913 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6914                                    const OperandVector &Operands,
6915                                    bool IsAtomic,
6916                                    bool IsLds) {
6917   bool IsLdsOpcode = IsLds;
6918   bool HasLdsModifier = false;
6919   OptionalImmIndexMap OptionalIdx;
6920   unsigned FirstOperandIdx = 1;
6921   bool IsAtomicReturn = false;
6922 
6923   if (IsAtomic) {
6924     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6925       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6926       if (!Op.isCPol())
6927         continue;
6928       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
6929       break;
6930     }
6931 
6932     if (!IsAtomicReturn) {
6933       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
6934       if (NewOpc != -1)
6935         Inst.setOpcode(NewOpc);
6936     }
6937 
6938     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
6939                       SIInstrFlags::IsAtomicRet;
6940   }
6941 
6942   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6943     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6944 
6945     // Add the register arguments
6946     if (Op.isReg()) {
6947       Op.addRegOperands(Inst, 1);
6948       // Insert a tied src for atomic return dst.
6949       // This cannot be postponed as subsequent calls to
6950       // addImmOperands rely on correct number of MC operands.
6951       if (IsAtomicReturn && i == FirstOperandIdx)
6952         Op.addRegOperands(Inst, 1);
6953       continue;
6954     }
6955 
6956     // Handle the case where soffset is an immediate
6957     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6958       Op.addImmOperands(Inst, 1);
6959       continue;
6960     }
6961 
6962     HasLdsModifier |= Op.isLDS();
6963 
6964     // Handle tokens like 'offen' which are sometimes hard-coded into the
6965     // asm string.  There are no MCInst operands for these.
6966     if (Op.isToken()) {
6967       continue;
6968     }
6969     assert(Op.isImm());
6970 
6971     // Handle optional arguments
6972     OptionalIdx[Op.getImmTy()] = i;
6973   }
6974 
6975   // This is a workaround for an llvm quirk which may result in an
6976   // incorrect instruction selection. Lds and non-lds versions of
6977   // MUBUF instructions are identical except that lds versions
6978   // have mandatory 'lds' modifier. However this modifier follows
6979   // optional modifiers and llvm asm matcher regards this 'lds'
6980   // modifier as an optional one. As a result, an lds version
6981   // of opcode may be selected even if it has no 'lds' modifier.
6982   if (IsLdsOpcode && !HasLdsModifier) {
6983     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6984     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6985       Inst.setOpcode(NoLdsOpcode);
6986       IsLdsOpcode = false;
6987     }
6988   }
6989 
6990   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6991   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
6992 
6993   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6994     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6995   }
6996   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
6997 }
6998 
6999 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7000   OptionalImmIndexMap OptionalIdx;
7001 
7002   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7003     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7004 
7005     // Add the register arguments
7006     if (Op.isReg()) {
7007       Op.addRegOperands(Inst, 1);
7008       continue;
7009     }
7010 
7011     // Handle the case where soffset is an immediate
7012     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7013       Op.addImmOperands(Inst, 1);
7014       continue;
7015     }
7016 
7017     // Handle tokens like 'offen' which are sometimes hard-coded into the
7018     // asm string.  There are no MCInst operands for these.
7019     if (Op.isToken()) {
7020       continue;
7021     }
7022     assert(Op.isImm());
7023 
7024     // Handle optional arguments
7025     OptionalIdx[Op.getImmTy()] = i;
7026   }
7027 
7028   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7029                         AMDGPUOperand::ImmTyOffset);
7030   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7031   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7032   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7033   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7034 }
7035 
7036 //===----------------------------------------------------------------------===//
7037 // mimg
7038 //===----------------------------------------------------------------------===//
7039 
7040 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7041                               bool IsAtomic) {
7042   unsigned I = 1;
7043   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7044   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7045     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7046   }
7047 
7048   if (IsAtomic) {
7049     // Add src, same as dst
7050     assert(Desc.getNumDefs() == 1);
7051     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7052   }
7053 
7054   OptionalImmIndexMap OptionalIdx;
7055 
7056   for (unsigned E = Operands.size(); I != E; ++I) {
7057     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7058 
7059     // Add the register arguments
7060     if (Op.isReg()) {
7061       Op.addRegOperands(Inst, 1);
7062     } else if (Op.isImmModifier()) {
7063       OptionalIdx[Op.getImmTy()] = I;
7064     } else if (!Op.isToken()) {
7065       llvm_unreachable("unexpected operand type");
7066     }
7067   }
7068 
7069   bool IsGFX10Plus = isGFX10Plus();
7070 
7071   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7072   if (IsGFX10Plus)
7073     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7074   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7075   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7076   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7077   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7078     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7079   if (IsGFX10Plus)
7080     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7081   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7082   if (!IsGFX10Plus)
7083     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7084   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7085 }
7086 
7087 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7088   cvtMIMG(Inst, Operands, true);
7089 }
7090 
7091 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7092   OptionalImmIndexMap OptionalIdx;
7093   bool IsAtomicReturn = false;
7094 
7095   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7096     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7097     if (!Op.isCPol())
7098       continue;
7099     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7100     break;
7101   }
7102 
7103   if (!IsAtomicReturn) {
7104     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7105     if (NewOpc != -1)
7106       Inst.setOpcode(NewOpc);
7107   }
7108 
7109   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7110                     SIInstrFlags::IsAtomicRet;
7111 
7112   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7113     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7114 
7115     // Add the register arguments
7116     if (Op.isReg()) {
7117       Op.addRegOperands(Inst, 1);
7118       if (IsAtomicReturn && i == 1)
7119         Op.addRegOperands(Inst, 1);
7120       continue;
7121     }
7122 
7123     // Handle the case where soffset is an immediate
7124     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7125       Op.addImmOperands(Inst, 1);
7126       continue;
7127     }
7128 
7129     // Handle tokens like 'offen' which are sometimes hard-coded into the
7130     // asm string.  There are no MCInst operands for these.
7131     if (Op.isToken()) {
7132       continue;
7133     }
7134     assert(Op.isImm());
7135 
7136     // Handle optional arguments
7137     OptionalIdx[Op.getImmTy()] = i;
7138   }
7139 
7140   if ((int)Inst.getNumOperands() <=
7141       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7142     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7143   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7144 }
7145 
7146 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7147                                       const OperandVector &Operands) {
7148   for (unsigned I = 1; I < Operands.size(); ++I) {
7149     auto &Operand = (AMDGPUOperand &)*Operands[I];
7150     if (Operand.isReg())
7151       Operand.addRegOperands(Inst, 1);
7152   }
7153 
7154   Inst.addOperand(MCOperand::createImm(1)); // a16
7155 }
7156 
7157 //===----------------------------------------------------------------------===//
7158 // smrd
7159 //===----------------------------------------------------------------------===//
7160 
7161 bool AMDGPUOperand::isSMRDOffset8() const {
7162   return isImm() && isUInt<8>(getImm());
7163 }
7164 
7165 bool AMDGPUOperand::isSMEMOffset() const {
7166   return isImm(); // Offset range is checked later by validator.
7167 }
7168 
7169 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7170   // 32-bit literals are only supported on CI and we only want to use them
7171   // when the offset is > 8-bits.
7172   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7173 }
7174 
7175 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7176   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7177 }
7178 
7179 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7180   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7181 }
7182 
7183 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7184   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7185 }
7186 
7187 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7188   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7189 }
7190 
7191 //===----------------------------------------------------------------------===//
7192 // vop3
7193 //===----------------------------------------------------------------------===//
7194 
7195 static bool ConvertOmodMul(int64_t &Mul) {
7196   if (Mul != 1 && Mul != 2 && Mul != 4)
7197     return false;
7198 
7199   Mul >>= 1;
7200   return true;
7201 }
7202 
7203 static bool ConvertOmodDiv(int64_t &Div) {
7204   if (Div == 1) {
7205     Div = 0;
7206     return true;
7207   }
7208 
7209   if (Div == 2) {
7210     Div = 3;
7211     return true;
7212   }
7213 
7214   return false;
7215 }
7216 
7217 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7218 // This is intentional and ensures compatibility with sp3.
7219 // See bug 35397 for details.
7220 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7221   if (BoundCtrl == 0 || BoundCtrl == 1) {
7222     BoundCtrl = 1;
7223     return true;
7224   }
7225   return false;
7226 }
7227 
7228 // Note: the order in this table matches the order of operands in AsmString.
7229 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7230   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7231   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7232   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7233   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7234   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7235   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7236   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7237   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7238   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7239   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7240   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7241   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7242   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7243   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7244   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7245   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7246   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7247   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7248   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7249   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7250   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7251   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7252   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7253   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7254   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7255   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7256   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7257   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7258   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7259   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7260   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7261   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7262   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7263   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7264   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7265   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7266   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7267   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7268   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7269   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7270   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7271 };
7272 
7273 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7274 
7275   OperandMatchResultTy res = parseOptionalOpr(Operands);
7276 
7277   // This is a hack to enable hardcoded mandatory operands which follow
7278   // optional operands.
7279   //
7280   // Current design assumes that all operands after the first optional operand
7281   // are also optional. However implementation of some instructions violates
7282   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7283   //
7284   // To alleviate this problem, we have to (implicitly) parse extra operands
7285   // to make sure autogenerated parser of custom operands never hit hardcoded
7286   // mandatory operands.
7287 
7288   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7289     if (res != MatchOperand_Success ||
7290         isToken(AsmToken::EndOfStatement))
7291       break;
7292 
7293     trySkipToken(AsmToken::Comma);
7294     res = parseOptionalOpr(Operands);
7295   }
7296 
7297   return res;
7298 }
7299 
7300 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7301   OperandMatchResultTy res;
7302   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7303     // try to parse any optional operand here
7304     if (Op.IsBit) {
7305       res = parseNamedBit(Op.Name, Operands, Op.Type);
7306     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7307       res = parseOModOperand(Operands);
7308     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7309                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7310                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7311       res = parseSDWASel(Operands, Op.Name, Op.Type);
7312     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7313       res = parseSDWADstUnused(Operands);
7314     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7315                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7316                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7317                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7318       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7319                                         Op.ConvertResult);
7320     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7321       res = parseDim(Operands);
7322     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7323       res = parseCPol(Operands);
7324     } else {
7325       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7326     }
7327     if (res != MatchOperand_NoMatch) {
7328       return res;
7329     }
7330   }
7331   return MatchOperand_NoMatch;
7332 }
7333 
7334 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7335   StringRef Name = getTokenStr();
7336   if (Name == "mul") {
7337     return parseIntWithPrefix("mul", Operands,
7338                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7339   }
7340 
7341   if (Name == "div") {
7342     return parseIntWithPrefix("div", Operands,
7343                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7344   }
7345 
7346   return MatchOperand_NoMatch;
7347 }
7348 
7349 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7350   cvtVOP3P(Inst, Operands);
7351 
7352   int Opc = Inst.getOpcode();
7353 
7354   int SrcNum;
7355   const int Ops[] = { AMDGPU::OpName::src0,
7356                       AMDGPU::OpName::src1,
7357                       AMDGPU::OpName::src2 };
7358   for (SrcNum = 0;
7359        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7360        ++SrcNum);
7361   assert(SrcNum > 0);
7362 
7363   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7364   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7365 
7366   if ((OpSel & (1 << SrcNum)) != 0) {
7367     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7368     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7369     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7370   }
7371 }
7372 
7373 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7374       // 1. This operand is input modifiers
7375   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7376       // 2. This is not last operand
7377       && Desc.NumOperands > (OpNum + 1)
7378       // 3. Next operand is register class
7379       && Desc.OpInfo[OpNum + 1].RegClass != -1
7380       // 4. Next register is not tied to any other operand
7381       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7382 }
7383 
7384 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7385 {
7386   OptionalImmIndexMap OptionalIdx;
7387   unsigned Opc = Inst.getOpcode();
7388 
7389   unsigned I = 1;
7390   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7391   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7392     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7393   }
7394 
7395   for (unsigned E = Operands.size(); I != E; ++I) {
7396     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7397     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7398       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7399     } else if (Op.isInterpSlot() ||
7400                Op.isInterpAttr() ||
7401                Op.isAttrChan()) {
7402       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7403     } else if (Op.isImmModifier()) {
7404       OptionalIdx[Op.getImmTy()] = I;
7405     } else {
7406       llvm_unreachable("unhandled operand type");
7407     }
7408   }
7409 
7410   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7411     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7412   }
7413 
7414   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7415     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7416   }
7417 
7418   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7419     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7420   }
7421 }
7422 
7423 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7424                               OptionalImmIndexMap &OptionalIdx) {
7425   unsigned Opc = Inst.getOpcode();
7426 
7427   unsigned I = 1;
7428   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7429   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7430     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7431   }
7432 
7433   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7434     // This instruction has src modifiers
7435     for (unsigned E = Operands.size(); I != E; ++I) {
7436       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7437       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7438         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7439       } else if (Op.isImmModifier()) {
7440         OptionalIdx[Op.getImmTy()] = I;
7441       } else if (Op.isRegOrImm()) {
7442         Op.addRegOrImmOperands(Inst, 1);
7443       } else {
7444         llvm_unreachable("unhandled operand type");
7445       }
7446     }
7447   } else {
7448     // No src modifiers
7449     for (unsigned E = Operands.size(); I != E; ++I) {
7450       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7451       if (Op.isMod()) {
7452         OptionalIdx[Op.getImmTy()] = I;
7453       } else {
7454         Op.addRegOrImmOperands(Inst, 1);
7455       }
7456     }
7457   }
7458 
7459   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7460     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7461   }
7462 
7463   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7464     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7465   }
7466 
7467   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7468   // it has src2 register operand that is tied to dst operand
7469   // we don't allow modifiers for this operand in assembler so src2_modifiers
7470   // should be 0.
7471   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7472       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7473       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7474       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7475       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7476       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7477       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7478       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7479       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7480       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7481       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7482     auto it = Inst.begin();
7483     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7484     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7485     ++it;
7486     // Copy the operand to ensure it's not invalidated when Inst grows.
7487     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7488   }
7489 }
7490 
7491 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7492   OptionalImmIndexMap OptionalIdx;
7493   cvtVOP3(Inst, Operands, OptionalIdx);
7494 }
7495 
7496 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
7497                                const OperandVector &Operands) {
7498   OptionalImmIndexMap OptIdx;
7499   const int Opc = Inst.getOpcode();
7500   const MCInstrDesc &Desc = MII.get(Opc);
7501 
7502   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7503 
7504   cvtVOP3(Inst, Operands, OptIdx);
7505 
7506   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7507     assert(!IsPacked);
7508     Inst.addOperand(Inst.getOperand(0));
7509   }
7510 
7511   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7512   // instruction, and then figure out where to actually put the modifiers
7513 
7514   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7515 
7516   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7517   if (OpSelHiIdx != -1) {
7518     int DefaultVal = IsPacked ? -1 : 0;
7519     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7520                           DefaultVal);
7521   }
7522 
7523   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7524   if (NegLoIdx != -1) {
7525     assert(IsPacked);
7526     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7527     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7528   }
7529 
7530   const int Ops[] = { AMDGPU::OpName::src0,
7531                       AMDGPU::OpName::src1,
7532                       AMDGPU::OpName::src2 };
7533   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7534                          AMDGPU::OpName::src1_modifiers,
7535                          AMDGPU::OpName::src2_modifiers };
7536 
7537   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7538 
7539   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7540   unsigned OpSelHi = 0;
7541   unsigned NegLo = 0;
7542   unsigned NegHi = 0;
7543 
7544   if (OpSelHiIdx != -1) {
7545     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7546   }
7547 
7548   if (NegLoIdx != -1) {
7549     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7550     NegLo = Inst.getOperand(NegLoIdx).getImm();
7551     NegHi = Inst.getOperand(NegHiIdx).getImm();
7552   }
7553 
7554   for (int J = 0; J < 3; ++J) {
7555     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7556     if (OpIdx == -1)
7557       break;
7558 
7559     uint32_t ModVal = 0;
7560 
7561     if ((OpSel & (1 << J)) != 0)
7562       ModVal |= SISrcMods::OP_SEL_0;
7563 
7564     if ((OpSelHi & (1 << J)) != 0)
7565       ModVal |= SISrcMods::OP_SEL_1;
7566 
7567     if ((NegLo & (1 << J)) != 0)
7568       ModVal |= SISrcMods::NEG;
7569 
7570     if ((NegHi & (1 << J)) != 0)
7571       ModVal |= SISrcMods::NEG_HI;
7572 
7573     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7574 
7575     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7576   }
7577 }
7578 
7579 //===----------------------------------------------------------------------===//
7580 // dpp
7581 //===----------------------------------------------------------------------===//
7582 
7583 bool AMDGPUOperand::isDPP8() const {
7584   return isImmTy(ImmTyDPP8);
7585 }
7586 
7587 bool AMDGPUOperand::isDPPCtrl() const {
7588   using namespace AMDGPU::DPP;
7589 
7590   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7591   if (result) {
7592     int64_t Imm = getImm();
7593     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7594            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7595            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7596            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7597            (Imm == DppCtrl::WAVE_SHL1) ||
7598            (Imm == DppCtrl::WAVE_ROL1) ||
7599            (Imm == DppCtrl::WAVE_SHR1) ||
7600            (Imm == DppCtrl::WAVE_ROR1) ||
7601            (Imm == DppCtrl::ROW_MIRROR) ||
7602            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7603            (Imm == DppCtrl::BCAST15) ||
7604            (Imm == DppCtrl::BCAST31) ||
7605            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7606            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7607   }
7608   return false;
7609 }
7610 
7611 //===----------------------------------------------------------------------===//
7612 // mAI
7613 //===----------------------------------------------------------------------===//
7614 
7615 bool AMDGPUOperand::isBLGP() const {
7616   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7617 }
7618 
7619 bool AMDGPUOperand::isCBSZ() const {
7620   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7621 }
7622 
7623 bool AMDGPUOperand::isABID() const {
7624   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7625 }
7626 
7627 bool AMDGPUOperand::isS16Imm() const {
7628   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7629 }
7630 
7631 bool AMDGPUOperand::isU16Imm() const {
7632   return isImm() && isUInt<16>(getImm());
7633 }
7634 
7635 //===----------------------------------------------------------------------===//
7636 // dim
7637 //===----------------------------------------------------------------------===//
7638 
7639 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7640   // We want to allow "dim:1D" etc.,
7641   // but the initial 1 is tokenized as an integer.
7642   std::string Token;
7643   if (isToken(AsmToken::Integer)) {
7644     SMLoc Loc = getToken().getEndLoc();
7645     Token = std::string(getTokenStr());
7646     lex();
7647     if (getLoc() != Loc)
7648       return false;
7649   }
7650 
7651   StringRef Suffix;
7652   if (!parseId(Suffix))
7653     return false;
7654   Token += Suffix;
7655 
7656   StringRef DimId = Token;
7657   if (DimId.startswith("SQ_RSRC_IMG_"))
7658     DimId = DimId.drop_front(12);
7659 
7660   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7661   if (!DimInfo)
7662     return false;
7663 
7664   Encoding = DimInfo->Encoding;
7665   return true;
7666 }
7667 
7668 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7669   if (!isGFX10Plus())
7670     return MatchOperand_NoMatch;
7671 
7672   SMLoc S = getLoc();
7673 
7674   if (!trySkipId("dim", AsmToken::Colon))
7675     return MatchOperand_NoMatch;
7676 
7677   unsigned Encoding;
7678   SMLoc Loc = getLoc();
7679   if (!parseDimId(Encoding)) {
7680     Error(Loc, "invalid dim value");
7681     return MatchOperand_ParseFail;
7682   }
7683 
7684   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7685                                               AMDGPUOperand::ImmTyDim));
7686   return MatchOperand_Success;
7687 }
7688 
7689 //===----------------------------------------------------------------------===//
7690 // dpp
7691 //===----------------------------------------------------------------------===//
7692 
7693 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7694   SMLoc S = getLoc();
7695 
7696   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7697     return MatchOperand_NoMatch;
7698 
7699   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7700 
7701   int64_t Sels[8];
7702 
7703   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7704     return MatchOperand_ParseFail;
7705 
7706   for (size_t i = 0; i < 8; ++i) {
7707     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7708       return MatchOperand_ParseFail;
7709 
7710     SMLoc Loc = getLoc();
7711     if (getParser().parseAbsoluteExpression(Sels[i]))
7712       return MatchOperand_ParseFail;
7713     if (0 > Sels[i] || 7 < Sels[i]) {
7714       Error(Loc, "expected a 3-bit value");
7715       return MatchOperand_ParseFail;
7716     }
7717   }
7718 
7719   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7720     return MatchOperand_ParseFail;
7721 
7722   unsigned DPP8 = 0;
7723   for (size_t i = 0; i < 8; ++i)
7724     DPP8 |= (Sels[i] << (i * 3));
7725 
7726   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7727   return MatchOperand_Success;
7728 }
7729 
7730 bool
7731 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7732                                     const OperandVector &Operands) {
7733   if (Ctrl == "row_newbcast")
7734       return isGFX90A();
7735 
7736   // DPP64 is supported for row_newbcast only.
7737   const MCRegisterInfo *MRI = getMRI();
7738   if (Operands.size() > 2 && Operands[1]->isReg() &&
7739       MRI->getSubReg(Operands[1]->getReg(), AMDGPU::sub1))
7740     return false;
7741 
7742   if (Ctrl == "row_share" ||
7743       Ctrl == "row_xmask")
7744     return isGFX10Plus();
7745 
7746   if (Ctrl == "wave_shl" ||
7747       Ctrl == "wave_shr" ||
7748       Ctrl == "wave_rol" ||
7749       Ctrl == "wave_ror" ||
7750       Ctrl == "row_bcast")
7751     return isVI() || isGFX9();
7752 
7753   return Ctrl == "row_mirror" ||
7754          Ctrl == "row_half_mirror" ||
7755          Ctrl == "quad_perm" ||
7756          Ctrl == "row_shl" ||
7757          Ctrl == "row_shr" ||
7758          Ctrl == "row_ror";
7759 }
7760 
7761 int64_t
7762 AMDGPUAsmParser::parseDPPCtrlPerm() {
7763   // quad_perm:[%d,%d,%d,%d]
7764 
7765   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7766     return -1;
7767 
7768   int64_t Val = 0;
7769   for (int i = 0; i < 4; ++i) {
7770     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7771       return -1;
7772 
7773     int64_t Temp;
7774     SMLoc Loc = getLoc();
7775     if (getParser().parseAbsoluteExpression(Temp))
7776       return -1;
7777     if (Temp < 0 || Temp > 3) {
7778       Error(Loc, "expected a 2-bit value");
7779       return -1;
7780     }
7781 
7782     Val += (Temp << i * 2);
7783   }
7784 
7785   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7786     return -1;
7787 
7788   return Val;
7789 }
7790 
7791 int64_t
7792 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7793   using namespace AMDGPU::DPP;
7794 
7795   // sel:%d
7796 
7797   int64_t Val;
7798   SMLoc Loc = getLoc();
7799 
7800   if (getParser().parseAbsoluteExpression(Val))
7801     return -1;
7802 
7803   struct DppCtrlCheck {
7804     int64_t Ctrl;
7805     int Lo;
7806     int Hi;
7807   };
7808 
7809   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7810     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
7811     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
7812     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
7813     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
7814     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
7815     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
7816     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
7817     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7818     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7819     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
7820     .Default({-1, 0, 0});
7821 
7822   bool Valid;
7823   if (Check.Ctrl == -1) {
7824     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
7825     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
7826   } else {
7827     Valid = Check.Lo <= Val && Val <= Check.Hi;
7828     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
7829   }
7830 
7831   if (!Valid) {
7832     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
7833     return -1;
7834   }
7835 
7836   return Val;
7837 }
7838 
7839 OperandMatchResultTy
7840 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7841   using namespace AMDGPU::DPP;
7842 
7843   if (!isToken(AsmToken::Identifier) ||
7844       !isSupportedDPPCtrl(getTokenStr(), Operands))
7845     return MatchOperand_NoMatch;
7846 
7847   SMLoc S = getLoc();
7848   int64_t Val = -1;
7849   StringRef Ctrl;
7850 
7851   parseId(Ctrl);
7852 
7853   if (Ctrl == "row_mirror") {
7854     Val = DppCtrl::ROW_MIRROR;
7855   } else if (Ctrl == "row_half_mirror") {
7856     Val = DppCtrl::ROW_HALF_MIRROR;
7857   } else {
7858     if (skipToken(AsmToken::Colon, "expected a colon")) {
7859       if (Ctrl == "quad_perm") {
7860         Val = parseDPPCtrlPerm();
7861       } else {
7862         Val = parseDPPCtrlSel(Ctrl);
7863       }
7864     }
7865   }
7866 
7867   if (Val == -1)
7868     return MatchOperand_ParseFail;
7869 
7870   Operands.push_back(
7871     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
7872   return MatchOperand_Success;
7873 }
7874 
7875 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7876   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7877 }
7878 
7879 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7880   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7881 }
7882 
7883 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7884   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7885 }
7886 
7887 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7888   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7889 }
7890 
7891 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7892   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7893 }
7894 
7895 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7896   OptionalImmIndexMap OptionalIdx;
7897 
7898   unsigned I = 1;
7899   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7900   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7901     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7902   }
7903 
7904   int Fi = 0;
7905   for (unsigned E = Operands.size(); I != E; ++I) {
7906     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7907                                             MCOI::TIED_TO);
7908     if (TiedTo != -1) {
7909       assert((unsigned)TiedTo < Inst.getNumOperands());
7910       // handle tied old or src2 for MAC instructions
7911       Inst.addOperand(Inst.getOperand(TiedTo));
7912     }
7913     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7914     // Add the register arguments
7915     if (Op.isReg() && validateVccOperand(Op.getReg())) {
7916       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7917       // Skip it.
7918       continue;
7919     }
7920 
7921     if (IsDPP8) {
7922       if (Op.isDPP8()) {
7923         Op.addImmOperands(Inst, 1);
7924       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7925         Op.addRegWithFPInputModsOperands(Inst, 2);
7926       } else if (Op.isFI()) {
7927         Fi = Op.getImm();
7928       } else if (Op.isReg()) {
7929         Op.addRegOperands(Inst, 1);
7930       } else {
7931         llvm_unreachable("Invalid operand type");
7932       }
7933     } else {
7934       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7935         Op.addRegWithFPInputModsOperands(Inst, 2);
7936       } else if (Op.isDPPCtrl()) {
7937         Op.addImmOperands(Inst, 1);
7938       } else if (Op.isImm()) {
7939         // Handle optional arguments
7940         OptionalIdx[Op.getImmTy()] = I;
7941       } else {
7942         llvm_unreachable("Invalid operand type");
7943       }
7944     }
7945   }
7946 
7947   if (IsDPP8) {
7948     using namespace llvm::AMDGPU::DPP;
7949     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7950   } else {
7951     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7952     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7953     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7954     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7955       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7956     }
7957   }
7958 }
7959 
7960 //===----------------------------------------------------------------------===//
7961 // sdwa
7962 //===----------------------------------------------------------------------===//
7963 
7964 OperandMatchResultTy
7965 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7966                               AMDGPUOperand::ImmTy Type) {
7967   using namespace llvm::AMDGPU::SDWA;
7968 
7969   SMLoc S = getLoc();
7970   StringRef Value;
7971   OperandMatchResultTy res;
7972 
7973   SMLoc StringLoc;
7974   res = parseStringWithPrefix(Prefix, Value, StringLoc);
7975   if (res != MatchOperand_Success) {
7976     return res;
7977   }
7978 
7979   int64_t Int;
7980   Int = StringSwitch<int64_t>(Value)
7981         .Case("BYTE_0", SdwaSel::BYTE_0)
7982         .Case("BYTE_1", SdwaSel::BYTE_1)
7983         .Case("BYTE_2", SdwaSel::BYTE_2)
7984         .Case("BYTE_3", SdwaSel::BYTE_3)
7985         .Case("WORD_0", SdwaSel::WORD_0)
7986         .Case("WORD_1", SdwaSel::WORD_1)
7987         .Case("DWORD", SdwaSel::DWORD)
7988         .Default(0xffffffff);
7989 
7990   if (Int == 0xffffffff) {
7991     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
7992     return MatchOperand_ParseFail;
7993   }
7994 
7995   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
7996   return MatchOperand_Success;
7997 }
7998 
7999 OperandMatchResultTy
8000 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8001   using namespace llvm::AMDGPU::SDWA;
8002 
8003   SMLoc S = getLoc();
8004   StringRef Value;
8005   OperandMatchResultTy res;
8006 
8007   SMLoc StringLoc;
8008   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8009   if (res != MatchOperand_Success) {
8010     return res;
8011   }
8012 
8013   int64_t Int;
8014   Int = StringSwitch<int64_t>(Value)
8015         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8016         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8017         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8018         .Default(0xffffffff);
8019 
8020   if (Int == 0xffffffff) {
8021     Error(StringLoc, "invalid dst_unused value");
8022     return MatchOperand_ParseFail;
8023   }
8024 
8025   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8026   return MatchOperand_Success;
8027 }
8028 
8029 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8030   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8031 }
8032 
8033 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8034   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8035 }
8036 
8037 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8038   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8039 }
8040 
8041 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8042   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8043 }
8044 
8045 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8046   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8047 }
8048 
8049 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8050                               uint64_t BasicInstType,
8051                               bool SkipDstVcc,
8052                               bool SkipSrcVcc) {
8053   using namespace llvm::AMDGPU::SDWA;
8054 
8055   OptionalImmIndexMap OptionalIdx;
8056   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8057   bool SkippedVcc = false;
8058 
8059   unsigned I = 1;
8060   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8061   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8062     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8063   }
8064 
8065   for (unsigned E = Operands.size(); I != E; ++I) {
8066     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8067     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8068         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8069       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8070       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8071       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8072       // Skip VCC only if we didn't skip it on previous iteration.
8073       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8074       if (BasicInstType == SIInstrFlags::VOP2 &&
8075           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8076            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8077         SkippedVcc = true;
8078         continue;
8079       } else if (BasicInstType == SIInstrFlags::VOPC &&
8080                  Inst.getNumOperands() == 0) {
8081         SkippedVcc = true;
8082         continue;
8083       }
8084     }
8085     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8086       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8087     } else if (Op.isImm()) {
8088       // Handle optional arguments
8089       OptionalIdx[Op.getImmTy()] = I;
8090     } else {
8091       llvm_unreachable("Invalid operand type");
8092     }
8093     SkippedVcc = false;
8094   }
8095 
8096   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8097       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8098       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8099     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8100     switch (BasicInstType) {
8101     case SIInstrFlags::VOP1:
8102       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8103       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8104         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8105       }
8106       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8107       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8108       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8109       break;
8110 
8111     case SIInstrFlags::VOP2:
8112       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8113       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8114         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8115       }
8116       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8117       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8118       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8119       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8120       break;
8121 
8122     case SIInstrFlags::VOPC:
8123       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8124         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8125       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8126       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8127       break;
8128 
8129     default:
8130       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8131     }
8132   }
8133 
8134   // special case v_mac_{f16, f32}:
8135   // it has src2 register operand that is tied to dst operand
8136   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8137       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8138     auto it = Inst.begin();
8139     std::advance(
8140       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8141     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8142   }
8143 }
8144 
8145 //===----------------------------------------------------------------------===//
8146 // mAI
8147 //===----------------------------------------------------------------------===//
8148 
8149 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8150   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8151 }
8152 
8153 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8154   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8155 }
8156 
8157 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8158   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8159 }
8160 
8161 /// Force static initialization.
8162 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8163   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8164   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8165 }
8166 
8167 #define GET_REGISTER_MATCHER
8168 #define GET_MATCHER_IMPLEMENTATION
8169 #define GET_MNEMONIC_SPELL_CHECKER
8170 #define GET_MNEMONIC_CHECKER
8171 #include "AMDGPUGenAsmMatcher.inc"
8172 
8173 // This fuction should be defined after auto-generated include so that we have
8174 // MatchClassKind enum defined
8175 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8176                                                      unsigned Kind) {
8177   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8178   // But MatchInstructionImpl() expects to meet token and fails to validate
8179   // operand. This method checks if we are given immediate operand but expect to
8180   // get corresponding token.
8181   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8182   switch (Kind) {
8183   case MCK_addr64:
8184     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8185   case MCK_gds:
8186     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8187   case MCK_lds:
8188     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8189   case MCK_idxen:
8190     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8191   case MCK_offen:
8192     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8193   case MCK_SSrcB32:
8194     // When operands have expression values, they will return true for isToken,
8195     // because it is not possible to distinguish between a token and an
8196     // expression at parse time. MatchInstructionImpl() will always try to
8197     // match an operand as a token, when isToken returns true, and when the
8198     // name of the expression is not a valid token, the match will fail,
8199     // so we need to handle it here.
8200     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8201   case MCK_SSrcF32:
8202     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8203   case MCK_SoppBrTarget:
8204     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8205   case MCK_VReg32OrOff:
8206     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8207   case MCK_InterpSlot:
8208     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8209   case MCK_Attr:
8210     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8211   case MCK_AttrChan:
8212     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8213   case MCK_ImmSMEMOffset:
8214     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8215   case MCK_SReg_64:
8216   case MCK_SReg_64_XEXEC:
8217     // Null is defined as a 32-bit register but
8218     // it should also be enabled with 64-bit operands.
8219     // The following code enables it for SReg_64 operands
8220     // used as source and destination. Remaining source
8221     // operands are handled in isInlinableImm.
8222     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8223   default:
8224     return Match_InvalidOperand;
8225   }
8226 }
8227 
8228 //===----------------------------------------------------------------------===//
8229 // endpgm
8230 //===----------------------------------------------------------------------===//
8231 
8232 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8233   SMLoc S = getLoc();
8234   int64_t Imm = 0;
8235 
8236   if (!parseExpr(Imm)) {
8237     // The operand is optional, if not present default to 0
8238     Imm = 0;
8239   }
8240 
8241   if (!isUInt<16>(Imm)) {
8242     Error(S, "expected a 16-bit value");
8243     return MatchOperand_ParseFail;
8244   }
8245 
8246   Operands.push_back(
8247       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8248   return MatchOperand_Success;
8249 }
8250 
8251 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8252