1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/Support/AMDGPUMetadata.h"
32 #include "llvm/Support/AMDHSAKernelDescriptor.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/MachineValueType.h"
35 #include "llvm/Support/TargetParser.h"
36 #include "llvm/Support/TargetRegistry.h"
37 
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::amdhsa;
41 
42 namespace {
43 
44 class AMDGPUAsmParser;
45 
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47 
48 //===----------------------------------------------------------------------===//
49 // Operand
50 //===----------------------------------------------------------------------===//
51 
52 class AMDGPUOperand : public MCParsedAsmOperand {
53   enum KindTy {
54     Token,
55     Immediate,
56     Register,
57     Expression
58   } Kind;
59 
60   SMLoc StartLoc, EndLoc;
61   const AMDGPUAsmParser *AsmParser;
62 
63 public:
64   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
66 
67   using Ptr = std::unique_ptr<AMDGPUOperand>;
68 
69   struct Modifiers {
70     bool Abs = false;
71     bool Neg = false;
72     bool Sext = false;
73 
74     bool hasFPModifiers() const { return Abs || Neg; }
75     bool hasIntModifiers() const { return Sext; }
76     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77 
78     int64_t getFPModifiersOperand() const {
79       int64_t Operand = 0;
80       Operand |= Abs ? SISrcMods::ABS : 0u;
81       Operand |= Neg ? SISrcMods::NEG : 0u;
82       return Operand;
83     }
84 
85     int64_t getIntModifiersOperand() const {
86       int64_t Operand = 0;
87       Operand |= Sext ? SISrcMods::SEXT : 0u;
88       return Operand;
89     }
90 
91     int64_t getModifiersOperand() const {
92       assert(!(hasFPModifiers() && hasIntModifiers())
93            && "fp and int modifiers should not be used simultaneously");
94       if (hasFPModifiers()) {
95         return getFPModifiersOperand();
96       } else if (hasIntModifiers()) {
97         return getIntModifiersOperand();
98       } else {
99         return 0;
100       }
101     }
102 
103     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104   };
105 
106   enum ImmTy {
107     ImmTyNone,
108     ImmTyGDS,
109     ImmTyLDS,
110     ImmTyOffen,
111     ImmTyIdxen,
112     ImmTyAddr64,
113     ImmTyOffset,
114     ImmTyInstOffset,
115     ImmTyOffset0,
116     ImmTyOffset1,
117     ImmTyCPol,
118     ImmTySWZ,
119     ImmTyTFE,
120     ImmTyD16,
121     ImmTyClampSI,
122     ImmTyOModSI,
123     ImmTyDPP8,
124     ImmTyDppCtrl,
125     ImmTyDppRowMask,
126     ImmTyDppBankMask,
127     ImmTyDppBoundCtrl,
128     ImmTyDppFi,
129     ImmTySdwaDstSel,
130     ImmTySdwaSrc0Sel,
131     ImmTySdwaSrc1Sel,
132     ImmTySdwaDstUnused,
133     ImmTyDMask,
134     ImmTyDim,
135     ImmTyUNorm,
136     ImmTyDA,
137     ImmTyR128A16,
138     ImmTyA16,
139     ImmTyLWE,
140     ImmTyExpTgt,
141     ImmTyExpCompr,
142     ImmTyExpVM,
143     ImmTyFORMAT,
144     ImmTyHwreg,
145     ImmTyOff,
146     ImmTySendMsg,
147     ImmTyInterpSlot,
148     ImmTyInterpAttr,
149     ImmTyAttrChan,
150     ImmTyOpSel,
151     ImmTyOpSelHi,
152     ImmTyNegLo,
153     ImmTyNegHi,
154     ImmTySwizzle,
155     ImmTyGprIdxMode,
156     ImmTyHigh,
157     ImmTyBLGP,
158     ImmTyCBSZ,
159     ImmTyABID,
160     ImmTyEndpgm,
161   };
162 
163   enum ImmKindTy {
164     ImmKindTyNone,
165     ImmKindTyLiteral,
166     ImmKindTyConst,
167   };
168 
169 private:
170   struct TokOp {
171     const char *Data;
172     unsigned Length;
173   };
174 
175   struct ImmOp {
176     int64_t Val;
177     ImmTy Type;
178     bool IsFPImm;
179     mutable ImmKindTy Kind;
180     Modifiers Mods;
181   };
182 
183   struct RegOp {
184     unsigned RegNo;
185     Modifiers Mods;
186   };
187 
188   union {
189     TokOp Tok;
190     ImmOp Imm;
191     RegOp Reg;
192     const MCExpr *Expr;
193   };
194 
195 public:
196   bool isToken() const override {
197     if (Kind == Token)
198       return true;
199 
200     // When parsing operands, we can't always tell if something was meant to be
201     // a token, like 'gds', or an expression that references a global variable.
202     // In this case, we assume the string is an expression, and if we need to
203     // interpret is a token, then we treat the symbol name as the token.
204     return isSymbolRefExpr();
205   }
206 
207   bool isSymbolRefExpr() const {
208     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
209   }
210 
211   bool isImm() const override {
212     return Kind == Immediate;
213   }
214 
215   void setImmKindNone() const {
216     assert(isImm());
217     Imm.Kind = ImmKindTyNone;
218   }
219 
220   void setImmKindLiteral() const {
221     assert(isImm());
222     Imm.Kind = ImmKindTyLiteral;
223   }
224 
225   void setImmKindConst() const {
226     assert(isImm());
227     Imm.Kind = ImmKindTyConst;
228   }
229 
230   bool IsImmKindLiteral() const {
231     return isImm() && Imm.Kind == ImmKindTyLiteral;
232   }
233 
234   bool isImmKindConst() const {
235     return isImm() && Imm.Kind == ImmKindTyConst;
236   }
237 
238   bool isInlinableImm(MVT type) const;
239   bool isLiteralImm(MVT type) const;
240 
241   bool isRegKind() const {
242     return Kind == Register;
243   }
244 
245   bool isReg() const override {
246     return isRegKind() && !hasModifiers();
247   }
248 
249   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
250     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
251   }
252 
253   bool isRegOrImmWithInt16InputMods() const {
254     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
255   }
256 
257   bool isRegOrImmWithInt32InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259   }
260 
261   bool isRegOrImmWithInt64InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
263   }
264 
265   bool isRegOrImmWithFP16InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
267   }
268 
269   bool isRegOrImmWithFP32InputMods() const {
270     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
271   }
272 
273   bool isRegOrImmWithFP64InputMods() const {
274     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
275   }
276 
277   bool isVReg() const {
278     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
279            isRegClass(AMDGPU::VReg_64RegClassID) ||
280            isRegClass(AMDGPU::VReg_96RegClassID) ||
281            isRegClass(AMDGPU::VReg_128RegClassID) ||
282            isRegClass(AMDGPU::VReg_160RegClassID) ||
283            isRegClass(AMDGPU::VReg_192RegClassID) ||
284            isRegClass(AMDGPU::VReg_256RegClassID) ||
285            isRegClass(AMDGPU::VReg_512RegClassID) ||
286            isRegClass(AMDGPU::VReg_1024RegClassID);
287   }
288 
289   bool isVReg32() const {
290     return isRegClass(AMDGPU::VGPR_32RegClassID);
291   }
292 
293   bool isVReg32OrOff() const {
294     return isOff() || isVReg32();
295   }
296 
297   bool isNull() const {
298     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
299   }
300 
301   bool isVRegWithInputMods() const;
302 
303   bool isSDWAOperand(MVT type) const;
304   bool isSDWAFP16Operand() const;
305   bool isSDWAFP32Operand() const;
306   bool isSDWAInt16Operand() const;
307   bool isSDWAInt32Operand() const;
308 
309   bool isImmTy(ImmTy ImmT) const {
310     return isImm() && Imm.Type == ImmT;
311   }
312 
313   bool isImmModifier() const {
314     return isImm() && Imm.Type != ImmTyNone;
315   }
316 
317   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
318   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
319   bool isDMask() const { return isImmTy(ImmTyDMask); }
320   bool isDim() const { return isImmTy(ImmTyDim); }
321   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
322   bool isDA() const { return isImmTy(ImmTyDA); }
323   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
324   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
325   bool isLWE() const { return isImmTy(ImmTyLWE); }
326   bool isOff() const { return isImmTy(ImmTyOff); }
327   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
328   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
329   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
330   bool isOffen() const { return isImmTy(ImmTyOffen); }
331   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
332   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
333   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
334   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
335   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
336 
337   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
338   bool isGDS() const { return isImmTy(ImmTyGDS); }
339   bool isLDS() const { return isImmTy(ImmTyLDS); }
340   bool isCPol() const { return isImmTy(ImmTyCPol); }
341   bool isSWZ() const { return isImmTy(ImmTySWZ); }
342   bool isTFE() const { return isImmTy(ImmTyTFE); }
343   bool isD16() const { return isImmTy(ImmTyD16); }
344   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
345   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
346   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
347   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
348   bool isFI() const { return isImmTy(ImmTyDppFi); }
349   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
350   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
351   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
352   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
353   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
354   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
355   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
356   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
357   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
358   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
359   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
360   bool isHigh() const { return isImmTy(ImmTyHigh); }
361 
362   bool isMod() const {
363     return isClampSI() || isOModSI();
364   }
365 
366   bool isRegOrImm() const {
367     return isReg() || isImm();
368   }
369 
370   bool isRegClass(unsigned RCID) const;
371 
372   bool isInlineValue() const;
373 
374   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
375     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
376   }
377 
378   bool isSCSrcB16() const {
379     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
380   }
381 
382   bool isSCSrcV2B16() const {
383     return isSCSrcB16();
384   }
385 
386   bool isSCSrcB32() const {
387     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
388   }
389 
390   bool isSCSrcB64() const {
391     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
392   }
393 
394   bool isBoolReg() const;
395 
396   bool isSCSrcF16() const {
397     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
398   }
399 
400   bool isSCSrcV2F16() const {
401     return isSCSrcF16();
402   }
403 
404   bool isSCSrcF32() const {
405     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
406   }
407 
408   bool isSCSrcF64() const {
409     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
410   }
411 
412   bool isSSrcB32() const {
413     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
414   }
415 
416   bool isSSrcB16() const {
417     return isSCSrcB16() || isLiteralImm(MVT::i16);
418   }
419 
420   bool isSSrcV2B16() const {
421     llvm_unreachable("cannot happen");
422     return isSSrcB16();
423   }
424 
425   bool isSSrcB64() const {
426     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
427     // See isVSrc64().
428     return isSCSrcB64() || isLiteralImm(MVT::i64);
429   }
430 
431   bool isSSrcF32() const {
432     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
433   }
434 
435   bool isSSrcF64() const {
436     return isSCSrcB64() || isLiteralImm(MVT::f64);
437   }
438 
439   bool isSSrcF16() const {
440     return isSCSrcB16() || isLiteralImm(MVT::f16);
441   }
442 
443   bool isSSrcV2F16() const {
444     llvm_unreachable("cannot happen");
445     return isSSrcF16();
446   }
447 
448   bool isSSrcV2FP32() const {
449     llvm_unreachable("cannot happen");
450     return isSSrcF32();
451   }
452 
453   bool isSCSrcV2FP32() const {
454     llvm_unreachable("cannot happen");
455     return isSCSrcF32();
456   }
457 
458   bool isSSrcV2INT32() const {
459     llvm_unreachable("cannot happen");
460     return isSSrcB32();
461   }
462 
463   bool isSCSrcV2INT32() const {
464     llvm_unreachable("cannot happen");
465     return isSCSrcB32();
466   }
467 
468   bool isSSrcOrLdsB32() const {
469     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
470            isLiteralImm(MVT::i32) || isExpr();
471   }
472 
473   bool isVCSrcB32() const {
474     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
475   }
476 
477   bool isVCSrcB64() const {
478     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
479   }
480 
481   bool isVCSrcB16() const {
482     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
483   }
484 
485   bool isVCSrcV2B16() const {
486     return isVCSrcB16();
487   }
488 
489   bool isVCSrcF32() const {
490     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
491   }
492 
493   bool isVCSrcF64() const {
494     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
495   }
496 
497   bool isVCSrcF16() const {
498     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
499   }
500 
501   bool isVCSrcV2F16() const {
502     return isVCSrcF16();
503   }
504 
505   bool isVSrcB32() const {
506     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
507   }
508 
509   bool isVSrcB64() const {
510     return isVCSrcF64() || isLiteralImm(MVT::i64);
511   }
512 
513   bool isVSrcB16() const {
514     return isVCSrcB16() || isLiteralImm(MVT::i16);
515   }
516 
517   bool isVSrcV2B16() const {
518     return isVSrcB16() || isLiteralImm(MVT::v2i16);
519   }
520 
521   bool isVCSrcV2FP32() const {
522     return isVCSrcF64();
523   }
524 
525   bool isVSrcV2FP32() const {
526     return isVSrcF64() || isLiteralImm(MVT::v2f32);
527   }
528 
529   bool isVCSrcV2INT32() const {
530     return isVCSrcB64();
531   }
532 
533   bool isVSrcV2INT32() const {
534     return isVSrcB64() || isLiteralImm(MVT::v2i32);
535   }
536 
537   bool isVSrcF32() const {
538     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
539   }
540 
541   bool isVSrcF64() const {
542     return isVCSrcF64() || isLiteralImm(MVT::f64);
543   }
544 
545   bool isVSrcF16() const {
546     return isVCSrcF16() || isLiteralImm(MVT::f16);
547   }
548 
549   bool isVSrcV2F16() const {
550     return isVSrcF16() || isLiteralImm(MVT::v2f16);
551   }
552 
553   bool isVISrcB32() const {
554     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
555   }
556 
557   bool isVISrcB16() const {
558     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
559   }
560 
561   bool isVISrcV2B16() const {
562     return isVISrcB16();
563   }
564 
565   bool isVISrcF32() const {
566     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
567   }
568 
569   bool isVISrcF16() const {
570     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
571   }
572 
573   bool isVISrcV2F16() const {
574     return isVISrcF16() || isVISrcB32();
575   }
576 
577   bool isVISrc_64B64() const {
578     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
579   }
580 
581   bool isVISrc_64F64() const {
582     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
583   }
584 
585   bool isVISrc_64V2FP32() const {
586     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
587   }
588 
589   bool isVISrc_64V2INT32() const {
590     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
591   }
592 
593   bool isVISrc_256B64() const {
594     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
595   }
596 
597   bool isVISrc_256F64() const {
598     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
599   }
600 
601   bool isVISrc_128B16() const {
602     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
603   }
604 
605   bool isVISrc_128V2B16() const {
606     return isVISrc_128B16();
607   }
608 
609   bool isVISrc_128B32() const {
610     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
611   }
612 
613   bool isVISrc_128F32() const {
614     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
615   }
616 
617   bool isVISrc_256V2FP32() const {
618     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
619   }
620 
621   bool isVISrc_256V2INT32() const {
622     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
623   }
624 
625   bool isVISrc_512B32() const {
626     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
627   }
628 
629   bool isVISrc_512B16() const {
630     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
631   }
632 
633   bool isVISrc_512V2B16() const {
634     return isVISrc_512B16();
635   }
636 
637   bool isVISrc_512F32() const {
638     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
639   }
640 
641   bool isVISrc_512F16() const {
642     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
643   }
644 
645   bool isVISrc_512V2F16() const {
646     return isVISrc_512F16() || isVISrc_512B32();
647   }
648 
649   bool isVISrc_1024B32() const {
650     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
651   }
652 
653   bool isVISrc_1024B16() const {
654     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
655   }
656 
657   bool isVISrc_1024V2B16() const {
658     return isVISrc_1024B16();
659   }
660 
661   bool isVISrc_1024F32() const {
662     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
663   }
664 
665   bool isVISrc_1024F16() const {
666     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
667   }
668 
669   bool isVISrc_1024V2F16() const {
670     return isVISrc_1024F16() || isVISrc_1024B32();
671   }
672 
673   bool isAISrcB32() const {
674     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
675   }
676 
677   bool isAISrcB16() const {
678     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
679   }
680 
681   bool isAISrcV2B16() const {
682     return isAISrcB16();
683   }
684 
685   bool isAISrcF32() const {
686     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
687   }
688 
689   bool isAISrcF16() const {
690     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
691   }
692 
693   bool isAISrcV2F16() const {
694     return isAISrcF16() || isAISrcB32();
695   }
696 
697   bool isAISrc_64B64() const {
698     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
699   }
700 
701   bool isAISrc_64F64() const {
702     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
703   }
704 
705   bool isAISrc_128B32() const {
706     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
707   }
708 
709   bool isAISrc_128B16() const {
710     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
711   }
712 
713   bool isAISrc_128V2B16() const {
714     return isAISrc_128B16();
715   }
716 
717   bool isAISrc_128F32() const {
718     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
719   }
720 
721   bool isAISrc_128F16() const {
722     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
723   }
724 
725   bool isAISrc_128V2F16() const {
726     return isAISrc_128F16() || isAISrc_128B32();
727   }
728 
729   bool isVISrc_128F16() const {
730     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
731   }
732 
733   bool isVISrc_128V2F16() const {
734     return isVISrc_128F16() || isVISrc_128B32();
735   }
736 
737   bool isAISrc_256B64() const {
738     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
739   }
740 
741   bool isAISrc_256F64() const {
742     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
743   }
744 
745   bool isAISrc_512B32() const {
746     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
747   }
748 
749   bool isAISrc_512B16() const {
750     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
751   }
752 
753   bool isAISrc_512V2B16() const {
754     return isAISrc_512B16();
755   }
756 
757   bool isAISrc_512F32() const {
758     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
759   }
760 
761   bool isAISrc_512F16() const {
762     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
763   }
764 
765   bool isAISrc_512V2F16() const {
766     return isAISrc_512F16() || isAISrc_512B32();
767   }
768 
769   bool isAISrc_1024B32() const {
770     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
771   }
772 
773   bool isAISrc_1024B16() const {
774     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
775   }
776 
777   bool isAISrc_1024V2B16() const {
778     return isAISrc_1024B16();
779   }
780 
781   bool isAISrc_1024F32() const {
782     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
783   }
784 
785   bool isAISrc_1024F16() const {
786     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
787   }
788 
789   bool isAISrc_1024V2F16() const {
790     return isAISrc_1024F16() || isAISrc_1024B32();
791   }
792 
793   bool isKImmFP32() const {
794     return isLiteralImm(MVT::f32);
795   }
796 
797   bool isKImmFP16() const {
798     return isLiteralImm(MVT::f16);
799   }
800 
801   bool isMem() const override {
802     return false;
803   }
804 
805   bool isExpr() const {
806     return Kind == Expression;
807   }
808 
809   bool isSoppBrTarget() const {
810     return isExpr() || isImm();
811   }
812 
813   bool isSWaitCnt() const;
814   bool isHwreg() const;
815   bool isSendMsg() const;
816   bool isSwizzle() const;
817   bool isSMRDOffset8() const;
818   bool isSMEMOffset() const;
819   bool isSMRDLiteralOffset() const;
820   bool isDPP8() const;
821   bool isDPPCtrl() const;
822   bool isBLGP() const;
823   bool isCBSZ() const;
824   bool isABID() const;
825   bool isGPRIdxMode() const;
826   bool isS16Imm() const;
827   bool isU16Imm() const;
828   bool isEndpgm() const;
829 
830   StringRef getExpressionAsToken() const {
831     assert(isExpr());
832     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
833     return S->getSymbol().getName();
834   }
835 
836   StringRef getToken() const {
837     assert(isToken());
838 
839     if (Kind == Expression)
840       return getExpressionAsToken();
841 
842     return StringRef(Tok.Data, Tok.Length);
843   }
844 
845   int64_t getImm() const {
846     assert(isImm());
847     return Imm.Val;
848   }
849 
850   void setImm(int64_t Val) {
851     assert(isImm());
852     Imm.Val = Val;
853   }
854 
855   ImmTy getImmTy() const {
856     assert(isImm());
857     return Imm.Type;
858   }
859 
860   unsigned getReg() const override {
861     assert(isRegKind());
862     return Reg.RegNo;
863   }
864 
865   SMLoc getStartLoc() const override {
866     return StartLoc;
867   }
868 
869   SMLoc getEndLoc() const override {
870     return EndLoc;
871   }
872 
873   SMRange getLocRange() const {
874     return SMRange(StartLoc, EndLoc);
875   }
876 
877   Modifiers getModifiers() const {
878     assert(isRegKind() || isImmTy(ImmTyNone));
879     return isRegKind() ? Reg.Mods : Imm.Mods;
880   }
881 
882   void setModifiers(Modifiers Mods) {
883     assert(isRegKind() || isImmTy(ImmTyNone));
884     if (isRegKind())
885       Reg.Mods = Mods;
886     else
887       Imm.Mods = Mods;
888   }
889 
890   bool hasModifiers() const {
891     return getModifiers().hasModifiers();
892   }
893 
894   bool hasFPModifiers() const {
895     return getModifiers().hasFPModifiers();
896   }
897 
898   bool hasIntModifiers() const {
899     return getModifiers().hasIntModifiers();
900   }
901 
902   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
903 
904   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
905 
906   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
907 
908   template <unsigned Bitwidth>
909   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
910 
911   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
912     addKImmFPOperands<16>(Inst, N);
913   }
914 
915   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
916     addKImmFPOperands<32>(Inst, N);
917   }
918 
919   void addRegOperands(MCInst &Inst, unsigned N) const;
920 
921   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
922     addRegOperands(Inst, N);
923   }
924 
925   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
926     if (isRegKind())
927       addRegOperands(Inst, N);
928     else if (isExpr())
929       Inst.addOperand(MCOperand::createExpr(Expr));
930     else
931       addImmOperands(Inst, N);
932   }
933 
934   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
935     Modifiers Mods = getModifiers();
936     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
937     if (isRegKind()) {
938       addRegOperands(Inst, N);
939     } else {
940       addImmOperands(Inst, N, false);
941     }
942   }
943 
944   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
945     assert(!hasIntModifiers());
946     addRegOrImmWithInputModsOperands(Inst, N);
947   }
948 
949   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
950     assert(!hasFPModifiers());
951     addRegOrImmWithInputModsOperands(Inst, N);
952   }
953 
954   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
955     Modifiers Mods = getModifiers();
956     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
957     assert(isRegKind());
958     addRegOperands(Inst, N);
959   }
960 
961   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
962     assert(!hasIntModifiers());
963     addRegWithInputModsOperands(Inst, N);
964   }
965 
966   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
967     assert(!hasFPModifiers());
968     addRegWithInputModsOperands(Inst, N);
969   }
970 
971   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
972     if (isImm())
973       addImmOperands(Inst, N);
974     else {
975       assert(isExpr());
976       Inst.addOperand(MCOperand::createExpr(Expr));
977     }
978   }
979 
980   static void printImmTy(raw_ostream& OS, ImmTy Type) {
981     switch (Type) {
982     case ImmTyNone: OS << "None"; break;
983     case ImmTyGDS: OS << "GDS"; break;
984     case ImmTyLDS: OS << "LDS"; break;
985     case ImmTyOffen: OS << "Offen"; break;
986     case ImmTyIdxen: OS << "Idxen"; break;
987     case ImmTyAddr64: OS << "Addr64"; break;
988     case ImmTyOffset: OS << "Offset"; break;
989     case ImmTyInstOffset: OS << "InstOffset"; break;
990     case ImmTyOffset0: OS << "Offset0"; break;
991     case ImmTyOffset1: OS << "Offset1"; break;
992     case ImmTyCPol: OS << "CPol"; break;
993     case ImmTySWZ: OS << "SWZ"; break;
994     case ImmTyTFE: OS << "TFE"; break;
995     case ImmTyD16: OS << "D16"; break;
996     case ImmTyFORMAT: OS << "FORMAT"; break;
997     case ImmTyClampSI: OS << "ClampSI"; break;
998     case ImmTyOModSI: OS << "OModSI"; break;
999     case ImmTyDPP8: OS << "DPP8"; break;
1000     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1001     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1002     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1003     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1004     case ImmTyDppFi: OS << "FI"; break;
1005     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1006     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1007     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1008     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1009     case ImmTyDMask: OS << "DMask"; break;
1010     case ImmTyDim: OS << "Dim"; break;
1011     case ImmTyUNorm: OS << "UNorm"; break;
1012     case ImmTyDA: OS << "DA"; break;
1013     case ImmTyR128A16: OS << "R128A16"; break;
1014     case ImmTyA16: OS << "A16"; break;
1015     case ImmTyLWE: OS << "LWE"; break;
1016     case ImmTyOff: OS << "Off"; break;
1017     case ImmTyExpTgt: OS << "ExpTgt"; break;
1018     case ImmTyExpCompr: OS << "ExpCompr"; break;
1019     case ImmTyExpVM: OS << "ExpVM"; break;
1020     case ImmTyHwreg: OS << "Hwreg"; break;
1021     case ImmTySendMsg: OS << "SendMsg"; break;
1022     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1023     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1024     case ImmTyAttrChan: OS << "AttrChan"; break;
1025     case ImmTyOpSel: OS << "OpSel"; break;
1026     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1027     case ImmTyNegLo: OS << "NegLo"; break;
1028     case ImmTyNegHi: OS << "NegHi"; break;
1029     case ImmTySwizzle: OS << "Swizzle"; break;
1030     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1031     case ImmTyHigh: OS << "High"; break;
1032     case ImmTyBLGP: OS << "BLGP"; break;
1033     case ImmTyCBSZ: OS << "CBSZ"; break;
1034     case ImmTyABID: OS << "ABID"; break;
1035     case ImmTyEndpgm: OS << "Endpgm"; break;
1036     }
1037   }
1038 
1039   void print(raw_ostream &OS) const override {
1040     switch (Kind) {
1041     case Register:
1042       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1043       break;
1044     case Immediate:
1045       OS << '<' << getImm();
1046       if (getImmTy() != ImmTyNone) {
1047         OS << " type: "; printImmTy(OS, getImmTy());
1048       }
1049       OS << " mods: " << Imm.Mods << '>';
1050       break;
1051     case Token:
1052       OS << '\'' << getToken() << '\'';
1053       break;
1054     case Expression:
1055       OS << "<expr " << *Expr << '>';
1056       break;
1057     }
1058   }
1059 
1060   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1061                                       int64_t Val, SMLoc Loc,
1062                                       ImmTy Type = ImmTyNone,
1063                                       bool IsFPImm = false) {
1064     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1065     Op->Imm.Val = Val;
1066     Op->Imm.IsFPImm = IsFPImm;
1067     Op->Imm.Kind = ImmKindTyNone;
1068     Op->Imm.Type = Type;
1069     Op->Imm.Mods = Modifiers();
1070     Op->StartLoc = Loc;
1071     Op->EndLoc = Loc;
1072     return Op;
1073   }
1074 
1075   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1076                                         StringRef Str, SMLoc Loc,
1077                                         bool HasExplicitEncodingSize = true) {
1078     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1079     Res->Tok.Data = Str.data();
1080     Res->Tok.Length = Str.size();
1081     Res->StartLoc = Loc;
1082     Res->EndLoc = Loc;
1083     return Res;
1084   }
1085 
1086   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1087                                       unsigned RegNo, SMLoc S,
1088                                       SMLoc E) {
1089     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1090     Op->Reg.RegNo = RegNo;
1091     Op->Reg.Mods = Modifiers();
1092     Op->StartLoc = S;
1093     Op->EndLoc = E;
1094     return Op;
1095   }
1096 
1097   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1098                                        const class MCExpr *Expr, SMLoc S) {
1099     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1100     Op->Expr = Expr;
1101     Op->StartLoc = S;
1102     Op->EndLoc = S;
1103     return Op;
1104   }
1105 };
1106 
1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1108   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1109   return OS;
1110 }
1111 
1112 //===----------------------------------------------------------------------===//
1113 // AsmParser
1114 //===----------------------------------------------------------------------===//
1115 
1116 // Holds info related to the current kernel, e.g. count of SGPRs used.
1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1118 // .amdgpu_hsa_kernel or at EOF.
1119 class KernelScopeInfo {
1120   int SgprIndexUnusedMin = -1;
1121   int VgprIndexUnusedMin = -1;
1122   MCContext *Ctx = nullptr;
1123 
1124   void usesSgprAt(int i) {
1125     if (i >= SgprIndexUnusedMin) {
1126       SgprIndexUnusedMin = ++i;
1127       if (Ctx) {
1128         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1129         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1130       }
1131     }
1132   }
1133 
1134   void usesVgprAt(int i) {
1135     if (i >= VgprIndexUnusedMin) {
1136       VgprIndexUnusedMin = ++i;
1137       if (Ctx) {
1138         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1139         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1140       }
1141     }
1142   }
1143 
1144 public:
1145   KernelScopeInfo() = default;
1146 
1147   void initialize(MCContext &Context) {
1148     Ctx = &Context;
1149     usesSgprAt(SgprIndexUnusedMin = -1);
1150     usesVgprAt(VgprIndexUnusedMin = -1);
1151   }
1152 
1153   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1154     switch (RegKind) {
1155       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1156       case IS_AGPR: // fall through
1157       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1158       default: break;
1159     }
1160   }
1161 };
1162 
1163 class AMDGPUAsmParser : public MCTargetAsmParser {
1164   MCAsmParser &Parser;
1165 
1166   // Number of extra operands parsed after the first optional operand.
1167   // This may be necessary to skip hardcoded mandatory operands.
1168   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1169 
1170   unsigned ForcedEncodingSize = 0;
1171   bool ForcedDPP = false;
1172   bool ForcedSDWA = false;
1173   KernelScopeInfo KernelScope;
1174   unsigned CPolSeen;
1175 
1176   /// @name Auto-generated Match Functions
1177   /// {
1178 
1179 #define GET_ASSEMBLER_HEADER
1180 #include "AMDGPUGenAsmMatcher.inc"
1181 
1182   /// }
1183 
1184 private:
1185   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1186   bool OutOfRangeError(SMRange Range);
1187   /// Calculate VGPR/SGPR blocks required for given target, reserved
1188   /// registers, and user-specified NextFreeXGPR values.
1189   ///
1190   /// \param Features [in] Target features, used for bug corrections.
1191   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1192   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1193   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1194   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1195   /// descriptor field, if valid.
1196   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1197   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1198   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1199   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1200   /// \param VGPRBlocks [out] Result VGPR block count.
1201   /// \param SGPRBlocks [out] Result SGPR block count.
1202   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1203                           bool FlatScrUsed, bool XNACKUsed,
1204                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1205                           SMRange VGPRRange, unsigned NextFreeSGPR,
1206                           SMRange SGPRRange, unsigned &VGPRBlocks,
1207                           unsigned &SGPRBlocks);
1208   bool ParseDirectiveAMDGCNTarget();
1209   bool ParseDirectiveAMDHSAKernel();
1210   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1211   bool ParseDirectiveHSACodeObjectVersion();
1212   bool ParseDirectiveHSACodeObjectISA();
1213   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1214   bool ParseDirectiveAMDKernelCodeT();
1215   // TODO: Possibly make subtargetHasRegister const.
1216   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1217   bool ParseDirectiveAMDGPUHsaKernel();
1218 
1219   bool ParseDirectiveISAVersion();
1220   bool ParseDirectiveHSAMetadata();
1221   bool ParseDirectivePALMetadataBegin();
1222   bool ParseDirectivePALMetadata();
1223   bool ParseDirectiveAMDGPULDS();
1224 
1225   /// Common code to parse out a block of text (typically YAML) between start and
1226   /// end directives.
1227   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1228                            const char *AssemblerDirectiveEnd,
1229                            std::string &CollectString);
1230 
1231   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1232                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1233   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1234                            unsigned &RegNum, unsigned &RegWidth,
1235                            bool RestoreOnFailure = false);
1236   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1237                            unsigned &RegNum, unsigned &RegWidth,
1238                            SmallVectorImpl<AsmToken> &Tokens);
1239   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1240                            unsigned &RegWidth,
1241                            SmallVectorImpl<AsmToken> &Tokens);
1242   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1243                            unsigned &RegWidth,
1244                            SmallVectorImpl<AsmToken> &Tokens);
1245   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1246                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1247   bool ParseRegRange(unsigned& Num, unsigned& Width);
1248   unsigned getRegularReg(RegisterKind RegKind,
1249                          unsigned RegNum,
1250                          unsigned RegWidth,
1251                          SMLoc Loc);
1252 
1253   bool isRegister();
1254   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1255   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1256   void initializeGprCountSymbol(RegisterKind RegKind);
1257   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1258                              unsigned RegWidth);
1259   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1260                     bool IsAtomic, bool IsLds = false);
1261   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1262                  bool IsGdsHardcoded);
1263 
1264 public:
1265   enum AMDGPUMatchResultTy {
1266     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1267   };
1268   enum OperandMode {
1269     OperandMode_Default,
1270     OperandMode_NSA,
1271   };
1272 
1273   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1274 
1275   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1276                const MCInstrInfo &MII,
1277                const MCTargetOptions &Options)
1278       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1279     MCAsmParserExtension::Initialize(Parser);
1280 
1281     if (getFeatureBits().none()) {
1282       // Set default features.
1283       copySTI().ToggleFeature("southern-islands");
1284     }
1285 
1286     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1287 
1288     {
1289       // TODO: make those pre-defined variables read-only.
1290       // Currently there is none suitable machinery in the core llvm-mc for this.
1291       // MCSymbol::isRedefinable is intended for another purpose, and
1292       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1293       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1294       MCContext &Ctx = getContext();
1295       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1296         MCSymbol *Sym =
1297             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1298         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1299         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1300         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1301         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1302         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1303       } else {
1304         MCSymbol *Sym =
1305             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1306         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1307         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1308         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1309         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1310         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1311       }
1312       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1313         initializeGprCountSymbol(IS_VGPR);
1314         initializeGprCountSymbol(IS_SGPR);
1315       } else
1316         KernelScope.initialize(getContext());
1317     }
1318   }
1319 
1320   bool hasMIMG_R128() const {
1321     return AMDGPU::hasMIMG_R128(getSTI());
1322   }
1323 
1324   bool hasPackedD16() const {
1325     return AMDGPU::hasPackedD16(getSTI());
1326   }
1327 
1328   bool hasGFX10A16() const {
1329     return AMDGPU::hasGFX10A16(getSTI());
1330   }
1331 
1332   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1333 
1334   bool isSI() const {
1335     return AMDGPU::isSI(getSTI());
1336   }
1337 
1338   bool isCI() const {
1339     return AMDGPU::isCI(getSTI());
1340   }
1341 
1342   bool isVI() const {
1343     return AMDGPU::isVI(getSTI());
1344   }
1345 
1346   bool isGFX9() const {
1347     return AMDGPU::isGFX9(getSTI());
1348   }
1349 
1350   bool isGFX90A() const {
1351     return AMDGPU::isGFX90A(getSTI());
1352   }
1353 
1354   bool isGFX9Plus() const {
1355     return AMDGPU::isGFX9Plus(getSTI());
1356   }
1357 
1358   bool isGFX10() const {
1359     return AMDGPU::isGFX10(getSTI());
1360   }
1361 
1362   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1363 
1364   bool isGFX10_BEncoding() const {
1365     return AMDGPU::isGFX10_BEncoding(getSTI());
1366   }
1367 
1368   bool hasInv2PiInlineImm() const {
1369     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1370   }
1371 
1372   bool hasFlatOffsets() const {
1373     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1374   }
1375 
1376   bool hasSGPR102_SGPR103() const {
1377     return !isVI() && !isGFX9();
1378   }
1379 
1380   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1381 
1382   bool hasIntClamp() const {
1383     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1384   }
1385 
1386   AMDGPUTargetStreamer &getTargetStreamer() {
1387     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1388     return static_cast<AMDGPUTargetStreamer &>(TS);
1389   }
1390 
1391   const MCRegisterInfo *getMRI() const {
1392     // We need this const_cast because for some reason getContext() is not const
1393     // in MCAsmParser.
1394     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1395   }
1396 
1397   const MCInstrInfo *getMII() const {
1398     return &MII;
1399   }
1400 
1401   const FeatureBitset &getFeatureBits() const {
1402     return getSTI().getFeatureBits();
1403   }
1404 
1405   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1406   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1407   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1408 
1409   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1410   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1411   bool isForcedDPP() const { return ForcedDPP; }
1412   bool isForcedSDWA() const { return ForcedSDWA; }
1413   ArrayRef<unsigned> getMatchedVariants() const;
1414   StringRef getMatchedVariantName() const;
1415 
1416   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1417   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1418                      bool RestoreOnFailure);
1419   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1420   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1421                                         SMLoc &EndLoc) override;
1422   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1423   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1424                                       unsigned Kind) override;
1425   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1426                                OperandVector &Operands, MCStreamer &Out,
1427                                uint64_t &ErrorInfo,
1428                                bool MatchingInlineAsm) override;
1429   bool ParseDirective(AsmToken DirectiveID) override;
1430   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1431                                     OperandMode Mode = OperandMode_Default);
1432   StringRef parseMnemonicSuffix(StringRef Name);
1433   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1434                         SMLoc NameLoc, OperandVector &Operands) override;
1435   //bool ProcessInstruction(MCInst &Inst);
1436 
1437   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1438 
1439   OperandMatchResultTy
1440   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1441                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1442                      bool (*ConvertResult)(int64_t &) = nullptr);
1443 
1444   OperandMatchResultTy
1445   parseOperandArrayWithPrefix(const char *Prefix,
1446                               OperandVector &Operands,
1447                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1448                               bool (*ConvertResult)(int64_t&) = nullptr);
1449 
1450   OperandMatchResultTy
1451   parseNamedBit(StringRef Name, OperandVector &Operands,
1452                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1453   OperandMatchResultTy parseCPol(OperandVector &Operands);
1454   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1455                                              StringRef &Value,
1456                                              SMLoc &StringLoc);
1457 
1458   bool isModifier();
1459   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1460   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1461   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1462   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1463   bool parseSP3NegModifier();
1464   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1465   OperandMatchResultTy parseReg(OperandVector &Operands);
1466   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1467   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1468   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1469   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1470   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1471   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1472   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1473   OperandMatchResultTy parseUfmt(int64_t &Format);
1474   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1475   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1476   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1477   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1478   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1479   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1480   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1481 
1482   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1483   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1484   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1485   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1486 
1487   bool parseCnt(int64_t &IntVal);
1488   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1489   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1490 
1491 private:
1492   struct OperandInfoTy {
1493     SMLoc Loc;
1494     int64_t Id;
1495     bool IsSymbolic = false;
1496     bool IsDefined = false;
1497 
1498     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1499   };
1500 
1501   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1502   bool validateSendMsg(const OperandInfoTy &Msg,
1503                        const OperandInfoTy &Op,
1504                        const OperandInfoTy &Stream);
1505 
1506   bool parseHwregBody(OperandInfoTy &HwReg,
1507                       OperandInfoTy &Offset,
1508                       OperandInfoTy &Width);
1509   bool validateHwreg(const OperandInfoTy &HwReg,
1510                      const OperandInfoTy &Offset,
1511                      const OperandInfoTy &Width);
1512 
1513   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1514   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1515 
1516   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1517                       const OperandVector &Operands) const;
1518   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1519   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1520   SMLoc getLitLoc(const OperandVector &Operands) const;
1521   SMLoc getConstLoc(const OperandVector &Operands) const;
1522 
1523   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1524   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1525   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1526   bool validateSOPLiteral(const MCInst &Inst) const;
1527   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1528   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1529   bool validateIntClampSupported(const MCInst &Inst);
1530   bool validateMIMGAtomicDMask(const MCInst &Inst);
1531   bool validateMIMGGatherDMask(const MCInst &Inst);
1532   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1533   bool validateMIMGDataSize(const MCInst &Inst);
1534   bool validateMIMGAddrSize(const MCInst &Inst);
1535   bool validateMIMGD16(const MCInst &Inst);
1536   bool validateMIMGDim(const MCInst &Inst);
1537   bool validateMIMGMSAA(const MCInst &Inst);
1538   bool validateOpSel(const MCInst &Inst);
1539   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1540   bool validateVccOperand(unsigned Reg) const;
1541   bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1542   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1543   bool validateAGPRLdSt(const MCInst &Inst) const;
1544   bool validateVGPRAlign(const MCInst &Inst) const;
1545   bool validateDivScale(const MCInst &Inst);
1546   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1547                              const SMLoc &IDLoc);
1548   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1549   unsigned getConstantBusLimit(unsigned Opcode) const;
1550   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1551   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1552   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1553 
1554   bool isSupportedMnemo(StringRef Mnemo,
1555                         const FeatureBitset &FBS);
1556   bool isSupportedMnemo(StringRef Mnemo,
1557                         const FeatureBitset &FBS,
1558                         ArrayRef<unsigned> Variants);
1559   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1560 
1561   bool isId(const StringRef Id) const;
1562   bool isId(const AsmToken &Token, const StringRef Id) const;
1563   bool isToken(const AsmToken::TokenKind Kind) const;
1564   bool trySkipId(const StringRef Id);
1565   bool trySkipId(const StringRef Pref, const StringRef Id);
1566   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1567   bool trySkipToken(const AsmToken::TokenKind Kind);
1568   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1569   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1570   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1571 
1572   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1573   AsmToken::TokenKind getTokenKind() const;
1574   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1575   bool parseExpr(OperandVector &Operands);
1576   StringRef getTokenStr() const;
1577   AsmToken peekToken();
1578   AsmToken getToken() const;
1579   SMLoc getLoc() const;
1580   void lex();
1581 
1582 public:
1583   void onBeginOfFile() override;
1584 
1585   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1586   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1587 
1588   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1589   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1590   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1591   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1592   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1593   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1594 
1595   bool parseSwizzleOperand(int64_t &Op,
1596                            const unsigned MinVal,
1597                            const unsigned MaxVal,
1598                            const StringRef ErrMsg,
1599                            SMLoc &Loc);
1600   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1601                             const unsigned MinVal,
1602                             const unsigned MaxVal,
1603                             const StringRef ErrMsg);
1604   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1605   bool parseSwizzleOffset(int64_t &Imm);
1606   bool parseSwizzleMacro(int64_t &Imm);
1607   bool parseSwizzleQuadPerm(int64_t &Imm);
1608   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1609   bool parseSwizzleBroadcast(int64_t &Imm);
1610   bool parseSwizzleSwap(int64_t &Imm);
1611   bool parseSwizzleReverse(int64_t &Imm);
1612 
1613   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1614   int64_t parseGPRIdxMacro();
1615 
1616   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1617   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1618   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1619   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1620 
1621   AMDGPUOperand::Ptr defaultCPol() const;
1622 
1623   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1624   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1625   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1626   AMDGPUOperand::Ptr defaultFlatOffset() const;
1627 
1628   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1629 
1630   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1631                OptionalImmIndexMap &OptionalIdx);
1632   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1633   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1634   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1635   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1636                 OptionalImmIndexMap &OptionalIdx);
1637 
1638   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1639 
1640   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1641                bool IsAtomic = false);
1642   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1643   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1644 
1645   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1646 
1647   bool parseDimId(unsigned &Encoding);
1648   OperandMatchResultTy parseDim(OperandVector &Operands);
1649   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1650   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1651   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1652   int64_t parseDPPCtrlSel(StringRef Ctrl);
1653   int64_t parseDPPCtrlPerm();
1654   AMDGPUOperand::Ptr defaultRowMask() const;
1655   AMDGPUOperand::Ptr defaultBankMask() const;
1656   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1657   AMDGPUOperand::Ptr defaultFI() const;
1658   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1659   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1660 
1661   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1662                                     AMDGPUOperand::ImmTy Type);
1663   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1664   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1665   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1666   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1667   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1668   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1669   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1670                uint64_t BasicInstType,
1671                bool SkipDstVcc = false,
1672                bool SkipSrcVcc = false);
1673 
1674   AMDGPUOperand::Ptr defaultBLGP() const;
1675   AMDGPUOperand::Ptr defaultCBSZ() const;
1676   AMDGPUOperand::Ptr defaultABID() const;
1677 
1678   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1679   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1680 };
1681 
1682 struct OptionalOperand {
1683   const char *Name;
1684   AMDGPUOperand::ImmTy Type;
1685   bool IsBit;
1686   bool (*ConvertResult)(int64_t&);
1687 };
1688 
1689 } // end anonymous namespace
1690 
1691 // May be called with integer type with equivalent bitwidth.
1692 static const fltSemantics *getFltSemantics(unsigned Size) {
1693   switch (Size) {
1694   case 4:
1695     return &APFloat::IEEEsingle();
1696   case 8:
1697     return &APFloat::IEEEdouble();
1698   case 2:
1699     return &APFloat::IEEEhalf();
1700   default:
1701     llvm_unreachable("unsupported fp type");
1702   }
1703 }
1704 
1705 static const fltSemantics *getFltSemantics(MVT VT) {
1706   return getFltSemantics(VT.getSizeInBits() / 8);
1707 }
1708 
1709 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1710   switch (OperandType) {
1711   case AMDGPU::OPERAND_REG_IMM_INT32:
1712   case AMDGPU::OPERAND_REG_IMM_FP32:
1713   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1714   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1715   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1716   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1717   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1718   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1719   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1720   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1721     return &APFloat::IEEEsingle();
1722   case AMDGPU::OPERAND_REG_IMM_INT64:
1723   case AMDGPU::OPERAND_REG_IMM_FP64:
1724   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1725   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1726   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1727     return &APFloat::IEEEdouble();
1728   case AMDGPU::OPERAND_REG_IMM_INT16:
1729   case AMDGPU::OPERAND_REG_IMM_FP16:
1730   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1731   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1732   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1733   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1734   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1735   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1736   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1737   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1738   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1739   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1740     return &APFloat::IEEEhalf();
1741   default:
1742     llvm_unreachable("unsupported fp type");
1743   }
1744 }
1745 
1746 //===----------------------------------------------------------------------===//
1747 // Operand
1748 //===----------------------------------------------------------------------===//
1749 
1750 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1751   bool Lost;
1752 
1753   // Convert literal to single precision
1754   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1755                                                APFloat::rmNearestTiesToEven,
1756                                                &Lost);
1757   // We allow precision lost but not overflow or underflow
1758   if (Status != APFloat::opOK &&
1759       Lost &&
1760       ((Status & APFloat::opOverflow)  != 0 ||
1761        (Status & APFloat::opUnderflow) != 0)) {
1762     return false;
1763   }
1764 
1765   return true;
1766 }
1767 
1768 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1769   return isUIntN(Size, Val) || isIntN(Size, Val);
1770 }
1771 
1772 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1773   if (VT.getScalarType() == MVT::i16) {
1774     // FP immediate values are broken.
1775     return isInlinableIntLiteral(Val);
1776   }
1777 
1778   // f16/v2f16 operands work correctly for all values.
1779   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1780 }
1781 
1782 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1783 
1784   // This is a hack to enable named inline values like
1785   // shared_base with both 32-bit and 64-bit operands.
1786   // Note that these values are defined as
1787   // 32-bit operands only.
1788   if (isInlineValue()) {
1789     return true;
1790   }
1791 
1792   if (!isImmTy(ImmTyNone)) {
1793     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1794     return false;
1795   }
1796   // TODO: We should avoid using host float here. It would be better to
1797   // check the float bit values which is what a few other places do.
1798   // We've had bot failures before due to weird NaN support on mips hosts.
1799 
1800   APInt Literal(64, Imm.Val);
1801 
1802   if (Imm.IsFPImm) { // We got fp literal token
1803     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1804       return AMDGPU::isInlinableLiteral64(Imm.Val,
1805                                           AsmParser->hasInv2PiInlineImm());
1806     }
1807 
1808     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1809     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1810       return false;
1811 
1812     if (type.getScalarSizeInBits() == 16) {
1813       return isInlineableLiteralOp16(
1814         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1815         type, AsmParser->hasInv2PiInlineImm());
1816     }
1817 
1818     // Check if single precision literal is inlinable
1819     return AMDGPU::isInlinableLiteral32(
1820       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1821       AsmParser->hasInv2PiInlineImm());
1822   }
1823 
1824   // We got int literal token.
1825   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1826     return AMDGPU::isInlinableLiteral64(Imm.Val,
1827                                         AsmParser->hasInv2PiInlineImm());
1828   }
1829 
1830   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1831     return false;
1832   }
1833 
1834   if (type.getScalarSizeInBits() == 16) {
1835     return isInlineableLiteralOp16(
1836       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1837       type, AsmParser->hasInv2PiInlineImm());
1838   }
1839 
1840   return AMDGPU::isInlinableLiteral32(
1841     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1842     AsmParser->hasInv2PiInlineImm());
1843 }
1844 
1845 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1846   // Check that this immediate can be added as literal
1847   if (!isImmTy(ImmTyNone)) {
1848     return false;
1849   }
1850 
1851   if (!Imm.IsFPImm) {
1852     // We got int literal token.
1853 
1854     if (type == MVT::f64 && hasFPModifiers()) {
1855       // Cannot apply fp modifiers to int literals preserving the same semantics
1856       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1857       // disable these cases.
1858       return false;
1859     }
1860 
1861     unsigned Size = type.getSizeInBits();
1862     if (Size == 64)
1863       Size = 32;
1864 
1865     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1866     // types.
1867     return isSafeTruncation(Imm.Val, Size);
1868   }
1869 
1870   // We got fp literal token
1871   if (type == MVT::f64) { // Expected 64-bit fp operand
1872     // We would set low 64-bits of literal to zeroes but we accept this literals
1873     return true;
1874   }
1875 
1876   if (type == MVT::i64) { // Expected 64-bit int operand
1877     // We don't allow fp literals in 64-bit integer instructions. It is
1878     // unclear how we should encode them.
1879     return false;
1880   }
1881 
1882   // We allow fp literals with f16x2 operands assuming that the specified
1883   // literal goes into the lower half and the upper half is zero. We also
1884   // require that the literal may be losslesly converted to f16.
1885   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1886                      (type == MVT::v2i16)? MVT::i16 :
1887                      (type == MVT::v2f32)? MVT::f32 : type;
1888 
1889   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1890   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1891 }
1892 
1893 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1894   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1895 }
1896 
1897 bool AMDGPUOperand::isVRegWithInputMods() const {
1898   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1899          // GFX90A allows DPP on 64-bit operands.
1900          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1901           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1902 }
1903 
1904 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1905   if (AsmParser->isVI())
1906     return isVReg32();
1907   else if (AsmParser->isGFX9Plus())
1908     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1909   else
1910     return false;
1911 }
1912 
1913 bool AMDGPUOperand::isSDWAFP16Operand() const {
1914   return isSDWAOperand(MVT::f16);
1915 }
1916 
1917 bool AMDGPUOperand::isSDWAFP32Operand() const {
1918   return isSDWAOperand(MVT::f32);
1919 }
1920 
1921 bool AMDGPUOperand::isSDWAInt16Operand() const {
1922   return isSDWAOperand(MVT::i16);
1923 }
1924 
1925 bool AMDGPUOperand::isSDWAInt32Operand() const {
1926   return isSDWAOperand(MVT::i32);
1927 }
1928 
1929 bool AMDGPUOperand::isBoolReg() const {
1930   auto FB = AsmParser->getFeatureBits();
1931   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1932                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1933 }
1934 
1935 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1936 {
1937   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1938   assert(Size == 2 || Size == 4 || Size == 8);
1939 
1940   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1941 
1942   if (Imm.Mods.Abs) {
1943     Val &= ~FpSignMask;
1944   }
1945   if (Imm.Mods.Neg) {
1946     Val ^= FpSignMask;
1947   }
1948 
1949   return Val;
1950 }
1951 
1952 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1953   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1954                              Inst.getNumOperands())) {
1955     addLiteralImmOperand(Inst, Imm.Val,
1956                          ApplyModifiers &
1957                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1958   } else {
1959     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1960     Inst.addOperand(MCOperand::createImm(Imm.Val));
1961     setImmKindNone();
1962   }
1963 }
1964 
1965 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1966   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1967   auto OpNum = Inst.getNumOperands();
1968   // Check that this operand accepts literals
1969   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1970 
1971   if (ApplyModifiers) {
1972     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1973     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1974     Val = applyInputFPModifiers(Val, Size);
1975   }
1976 
1977   APInt Literal(64, Val);
1978   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1979 
1980   if (Imm.IsFPImm) { // We got fp literal token
1981     switch (OpTy) {
1982     case AMDGPU::OPERAND_REG_IMM_INT64:
1983     case AMDGPU::OPERAND_REG_IMM_FP64:
1984     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1985     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1986     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1987       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1988                                        AsmParser->hasInv2PiInlineImm())) {
1989         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1990         setImmKindConst();
1991         return;
1992       }
1993 
1994       // Non-inlineable
1995       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1996         // For fp operands we check if low 32 bits are zeros
1997         if (Literal.getLoBits(32) != 0) {
1998           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1999           "Can't encode literal as exact 64-bit floating-point operand. "
2000           "Low 32-bits will be set to zero");
2001         }
2002 
2003         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2004         setImmKindLiteral();
2005         return;
2006       }
2007 
2008       // We don't allow fp literals in 64-bit integer instructions. It is
2009       // unclear how we should encode them. This case should be checked earlier
2010       // in predicate methods (isLiteralImm())
2011       llvm_unreachable("fp literal in 64-bit integer instruction.");
2012 
2013     case AMDGPU::OPERAND_REG_IMM_INT32:
2014     case AMDGPU::OPERAND_REG_IMM_FP32:
2015     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2016     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2017     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2018     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2019     case AMDGPU::OPERAND_REG_IMM_INT16:
2020     case AMDGPU::OPERAND_REG_IMM_FP16:
2021     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2022     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2023     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2024     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2025     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2026     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2027     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2028     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2029     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2030     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2031     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2032     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2033     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2034     case AMDGPU::OPERAND_REG_IMM_V2INT32: {
2035       bool lost;
2036       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2037       // Convert literal to single precision
2038       FPLiteral.convert(*getOpFltSemantics(OpTy),
2039                         APFloat::rmNearestTiesToEven, &lost);
2040       // We allow precision lost but not overflow or underflow. This should be
2041       // checked earlier in isLiteralImm()
2042 
2043       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2044       Inst.addOperand(MCOperand::createImm(ImmVal));
2045       setImmKindLiteral();
2046       return;
2047     }
2048     default:
2049       llvm_unreachable("invalid operand size");
2050     }
2051 
2052     return;
2053   }
2054 
2055   // We got int literal token.
2056   // Only sign extend inline immediates.
2057   switch (OpTy) {
2058   case AMDGPU::OPERAND_REG_IMM_INT32:
2059   case AMDGPU::OPERAND_REG_IMM_FP32:
2060   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2061   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2062   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2063   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2064   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2065   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2066   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2067   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2068   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2069   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2070     if (isSafeTruncation(Val, 32) &&
2071         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2072                                      AsmParser->hasInv2PiInlineImm())) {
2073       Inst.addOperand(MCOperand::createImm(Val));
2074       setImmKindConst();
2075       return;
2076     }
2077 
2078     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2079     setImmKindLiteral();
2080     return;
2081 
2082   case AMDGPU::OPERAND_REG_IMM_INT64:
2083   case AMDGPU::OPERAND_REG_IMM_FP64:
2084   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2085   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2086   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2087     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2088       Inst.addOperand(MCOperand::createImm(Val));
2089       setImmKindConst();
2090       return;
2091     }
2092 
2093     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2094     setImmKindLiteral();
2095     return;
2096 
2097   case AMDGPU::OPERAND_REG_IMM_INT16:
2098   case AMDGPU::OPERAND_REG_IMM_FP16:
2099   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2100   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2101   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2102   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2103     if (isSafeTruncation(Val, 16) &&
2104         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2105                                      AsmParser->hasInv2PiInlineImm())) {
2106       Inst.addOperand(MCOperand::createImm(Val));
2107       setImmKindConst();
2108       return;
2109     }
2110 
2111     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2112     setImmKindLiteral();
2113     return;
2114 
2115   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2116   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2117   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2118   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2119     assert(isSafeTruncation(Val, 16));
2120     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2121                                         AsmParser->hasInv2PiInlineImm()));
2122 
2123     Inst.addOperand(MCOperand::createImm(Val));
2124     return;
2125   }
2126   default:
2127     llvm_unreachable("invalid operand size");
2128   }
2129 }
2130 
2131 template <unsigned Bitwidth>
2132 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2133   APInt Literal(64, Imm.Val);
2134   setImmKindNone();
2135 
2136   if (!Imm.IsFPImm) {
2137     // We got int literal token.
2138     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2139     return;
2140   }
2141 
2142   bool Lost;
2143   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2144   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2145                     APFloat::rmNearestTiesToEven, &Lost);
2146   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2147 }
2148 
2149 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2150   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2151 }
2152 
2153 static bool isInlineValue(unsigned Reg) {
2154   switch (Reg) {
2155   case AMDGPU::SRC_SHARED_BASE:
2156   case AMDGPU::SRC_SHARED_LIMIT:
2157   case AMDGPU::SRC_PRIVATE_BASE:
2158   case AMDGPU::SRC_PRIVATE_LIMIT:
2159   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2160     return true;
2161   case AMDGPU::SRC_VCCZ:
2162   case AMDGPU::SRC_EXECZ:
2163   case AMDGPU::SRC_SCC:
2164     return true;
2165   case AMDGPU::SGPR_NULL:
2166     return true;
2167   default:
2168     return false;
2169   }
2170 }
2171 
2172 bool AMDGPUOperand::isInlineValue() const {
2173   return isRegKind() && ::isInlineValue(getReg());
2174 }
2175 
2176 //===----------------------------------------------------------------------===//
2177 // AsmParser
2178 //===----------------------------------------------------------------------===//
2179 
2180 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2181   if (Is == IS_VGPR) {
2182     switch (RegWidth) {
2183       default: return -1;
2184       case 1: return AMDGPU::VGPR_32RegClassID;
2185       case 2: return AMDGPU::VReg_64RegClassID;
2186       case 3: return AMDGPU::VReg_96RegClassID;
2187       case 4: return AMDGPU::VReg_128RegClassID;
2188       case 5: return AMDGPU::VReg_160RegClassID;
2189       case 6: return AMDGPU::VReg_192RegClassID;
2190       case 8: return AMDGPU::VReg_256RegClassID;
2191       case 16: return AMDGPU::VReg_512RegClassID;
2192       case 32: return AMDGPU::VReg_1024RegClassID;
2193     }
2194   } else if (Is == IS_TTMP) {
2195     switch (RegWidth) {
2196       default: return -1;
2197       case 1: return AMDGPU::TTMP_32RegClassID;
2198       case 2: return AMDGPU::TTMP_64RegClassID;
2199       case 4: return AMDGPU::TTMP_128RegClassID;
2200       case 8: return AMDGPU::TTMP_256RegClassID;
2201       case 16: return AMDGPU::TTMP_512RegClassID;
2202     }
2203   } else if (Is == IS_SGPR) {
2204     switch (RegWidth) {
2205       default: return -1;
2206       case 1: return AMDGPU::SGPR_32RegClassID;
2207       case 2: return AMDGPU::SGPR_64RegClassID;
2208       case 3: return AMDGPU::SGPR_96RegClassID;
2209       case 4: return AMDGPU::SGPR_128RegClassID;
2210       case 5: return AMDGPU::SGPR_160RegClassID;
2211       case 6: return AMDGPU::SGPR_192RegClassID;
2212       case 8: return AMDGPU::SGPR_256RegClassID;
2213       case 16: return AMDGPU::SGPR_512RegClassID;
2214     }
2215   } else if (Is == IS_AGPR) {
2216     switch (RegWidth) {
2217       default: return -1;
2218       case 1: return AMDGPU::AGPR_32RegClassID;
2219       case 2: return AMDGPU::AReg_64RegClassID;
2220       case 3: return AMDGPU::AReg_96RegClassID;
2221       case 4: return AMDGPU::AReg_128RegClassID;
2222       case 5: return AMDGPU::AReg_160RegClassID;
2223       case 6: return AMDGPU::AReg_192RegClassID;
2224       case 8: return AMDGPU::AReg_256RegClassID;
2225       case 16: return AMDGPU::AReg_512RegClassID;
2226       case 32: return AMDGPU::AReg_1024RegClassID;
2227     }
2228   }
2229   return -1;
2230 }
2231 
2232 static unsigned getSpecialRegForName(StringRef RegName) {
2233   return StringSwitch<unsigned>(RegName)
2234     .Case("exec", AMDGPU::EXEC)
2235     .Case("vcc", AMDGPU::VCC)
2236     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2237     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2238     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2239     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2240     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2241     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2242     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2243     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2244     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2245     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2246     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2247     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2248     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2249     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2250     .Case("m0", AMDGPU::M0)
2251     .Case("vccz", AMDGPU::SRC_VCCZ)
2252     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2253     .Case("execz", AMDGPU::SRC_EXECZ)
2254     .Case("src_execz", AMDGPU::SRC_EXECZ)
2255     .Case("scc", AMDGPU::SRC_SCC)
2256     .Case("src_scc", AMDGPU::SRC_SCC)
2257     .Case("tba", AMDGPU::TBA)
2258     .Case("tma", AMDGPU::TMA)
2259     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2260     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2261     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2262     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2263     .Case("vcc_lo", AMDGPU::VCC_LO)
2264     .Case("vcc_hi", AMDGPU::VCC_HI)
2265     .Case("exec_lo", AMDGPU::EXEC_LO)
2266     .Case("exec_hi", AMDGPU::EXEC_HI)
2267     .Case("tma_lo", AMDGPU::TMA_LO)
2268     .Case("tma_hi", AMDGPU::TMA_HI)
2269     .Case("tba_lo", AMDGPU::TBA_LO)
2270     .Case("tba_hi", AMDGPU::TBA_HI)
2271     .Case("pc", AMDGPU::PC_REG)
2272     .Case("null", AMDGPU::SGPR_NULL)
2273     .Default(AMDGPU::NoRegister);
2274 }
2275 
2276 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2277                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2278   auto R = parseRegister();
2279   if (!R) return true;
2280   assert(R->isReg());
2281   RegNo = R->getReg();
2282   StartLoc = R->getStartLoc();
2283   EndLoc = R->getEndLoc();
2284   return false;
2285 }
2286 
2287 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2288                                     SMLoc &EndLoc) {
2289   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2290 }
2291 
2292 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2293                                                        SMLoc &StartLoc,
2294                                                        SMLoc &EndLoc) {
2295   bool Result =
2296       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2297   bool PendingErrors = getParser().hasPendingError();
2298   getParser().clearPendingErrors();
2299   if (PendingErrors)
2300     return MatchOperand_ParseFail;
2301   if (Result)
2302     return MatchOperand_NoMatch;
2303   return MatchOperand_Success;
2304 }
2305 
2306 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2307                                             RegisterKind RegKind, unsigned Reg1,
2308                                             SMLoc Loc) {
2309   switch (RegKind) {
2310   case IS_SPECIAL:
2311     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2312       Reg = AMDGPU::EXEC;
2313       RegWidth = 2;
2314       return true;
2315     }
2316     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2317       Reg = AMDGPU::FLAT_SCR;
2318       RegWidth = 2;
2319       return true;
2320     }
2321     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2322       Reg = AMDGPU::XNACK_MASK;
2323       RegWidth = 2;
2324       return true;
2325     }
2326     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2327       Reg = AMDGPU::VCC;
2328       RegWidth = 2;
2329       return true;
2330     }
2331     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2332       Reg = AMDGPU::TBA;
2333       RegWidth = 2;
2334       return true;
2335     }
2336     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2337       Reg = AMDGPU::TMA;
2338       RegWidth = 2;
2339       return true;
2340     }
2341     Error(Loc, "register does not fit in the list");
2342     return false;
2343   case IS_VGPR:
2344   case IS_SGPR:
2345   case IS_AGPR:
2346   case IS_TTMP:
2347     if (Reg1 != Reg + RegWidth) {
2348       Error(Loc, "registers in a list must have consecutive indices");
2349       return false;
2350     }
2351     RegWidth++;
2352     return true;
2353   default:
2354     llvm_unreachable("unexpected register kind");
2355   }
2356 }
2357 
2358 struct RegInfo {
2359   StringLiteral Name;
2360   RegisterKind Kind;
2361 };
2362 
2363 static constexpr RegInfo RegularRegisters[] = {
2364   {{"v"},    IS_VGPR},
2365   {{"s"},    IS_SGPR},
2366   {{"ttmp"}, IS_TTMP},
2367   {{"acc"},  IS_AGPR},
2368   {{"a"},    IS_AGPR},
2369 };
2370 
2371 static bool isRegularReg(RegisterKind Kind) {
2372   return Kind == IS_VGPR ||
2373          Kind == IS_SGPR ||
2374          Kind == IS_TTMP ||
2375          Kind == IS_AGPR;
2376 }
2377 
2378 static const RegInfo* getRegularRegInfo(StringRef Str) {
2379   for (const RegInfo &Reg : RegularRegisters)
2380     if (Str.startswith(Reg.Name))
2381       return &Reg;
2382   return nullptr;
2383 }
2384 
2385 static bool getRegNum(StringRef Str, unsigned& Num) {
2386   return !Str.getAsInteger(10, Num);
2387 }
2388 
2389 bool
2390 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2391                             const AsmToken &NextToken) const {
2392 
2393   // A list of consecutive registers: [s0,s1,s2,s3]
2394   if (Token.is(AsmToken::LBrac))
2395     return true;
2396 
2397   if (!Token.is(AsmToken::Identifier))
2398     return false;
2399 
2400   // A single register like s0 or a range of registers like s[0:1]
2401 
2402   StringRef Str = Token.getString();
2403   const RegInfo *Reg = getRegularRegInfo(Str);
2404   if (Reg) {
2405     StringRef RegName = Reg->Name;
2406     StringRef RegSuffix = Str.substr(RegName.size());
2407     if (!RegSuffix.empty()) {
2408       unsigned Num;
2409       // A single register with an index: rXX
2410       if (getRegNum(RegSuffix, Num))
2411         return true;
2412     } else {
2413       // A range of registers: r[XX:YY].
2414       if (NextToken.is(AsmToken::LBrac))
2415         return true;
2416     }
2417   }
2418 
2419   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2420 }
2421 
2422 bool
2423 AMDGPUAsmParser::isRegister()
2424 {
2425   return isRegister(getToken(), peekToken());
2426 }
2427 
2428 unsigned
2429 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2430                                unsigned RegNum,
2431                                unsigned RegWidth,
2432                                SMLoc Loc) {
2433 
2434   assert(isRegularReg(RegKind));
2435 
2436   unsigned AlignSize = 1;
2437   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2438     // SGPR and TTMP registers must be aligned.
2439     // Max required alignment is 4 dwords.
2440     AlignSize = std::min(RegWidth, 4u);
2441   }
2442 
2443   if (RegNum % AlignSize != 0) {
2444     Error(Loc, "invalid register alignment");
2445     return AMDGPU::NoRegister;
2446   }
2447 
2448   unsigned RegIdx = RegNum / AlignSize;
2449   int RCID = getRegClass(RegKind, RegWidth);
2450   if (RCID == -1) {
2451     Error(Loc, "invalid or unsupported register size");
2452     return AMDGPU::NoRegister;
2453   }
2454 
2455   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2456   const MCRegisterClass RC = TRI->getRegClass(RCID);
2457   if (RegIdx >= RC.getNumRegs()) {
2458     Error(Loc, "register index is out of range");
2459     return AMDGPU::NoRegister;
2460   }
2461 
2462   return RC.getRegister(RegIdx);
2463 }
2464 
2465 bool
2466 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2467   int64_t RegLo, RegHi;
2468   if (!skipToken(AsmToken::LBrac, "missing register index"))
2469     return false;
2470 
2471   SMLoc FirstIdxLoc = getLoc();
2472   SMLoc SecondIdxLoc;
2473 
2474   if (!parseExpr(RegLo))
2475     return false;
2476 
2477   if (trySkipToken(AsmToken::Colon)) {
2478     SecondIdxLoc = getLoc();
2479     if (!parseExpr(RegHi))
2480       return false;
2481   } else {
2482     RegHi = RegLo;
2483   }
2484 
2485   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2486     return false;
2487 
2488   if (!isUInt<32>(RegLo)) {
2489     Error(FirstIdxLoc, "invalid register index");
2490     return false;
2491   }
2492 
2493   if (!isUInt<32>(RegHi)) {
2494     Error(SecondIdxLoc, "invalid register index");
2495     return false;
2496   }
2497 
2498   if (RegLo > RegHi) {
2499     Error(FirstIdxLoc, "first register index should not exceed second index");
2500     return false;
2501   }
2502 
2503   Num = static_cast<unsigned>(RegLo);
2504   Width = (RegHi - RegLo) + 1;
2505   return true;
2506 }
2507 
2508 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2509                                           unsigned &RegNum, unsigned &RegWidth,
2510                                           SmallVectorImpl<AsmToken> &Tokens) {
2511   assert(isToken(AsmToken::Identifier));
2512   unsigned Reg = getSpecialRegForName(getTokenStr());
2513   if (Reg) {
2514     RegNum = 0;
2515     RegWidth = 1;
2516     RegKind = IS_SPECIAL;
2517     Tokens.push_back(getToken());
2518     lex(); // skip register name
2519   }
2520   return Reg;
2521 }
2522 
2523 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2524                                           unsigned &RegNum, unsigned &RegWidth,
2525                                           SmallVectorImpl<AsmToken> &Tokens) {
2526   assert(isToken(AsmToken::Identifier));
2527   StringRef RegName = getTokenStr();
2528   auto Loc = getLoc();
2529 
2530   const RegInfo *RI = getRegularRegInfo(RegName);
2531   if (!RI) {
2532     Error(Loc, "invalid register name");
2533     return AMDGPU::NoRegister;
2534   }
2535 
2536   Tokens.push_back(getToken());
2537   lex(); // skip register name
2538 
2539   RegKind = RI->Kind;
2540   StringRef RegSuffix = RegName.substr(RI->Name.size());
2541   if (!RegSuffix.empty()) {
2542     // Single 32-bit register: vXX.
2543     if (!getRegNum(RegSuffix, RegNum)) {
2544       Error(Loc, "invalid register index");
2545       return AMDGPU::NoRegister;
2546     }
2547     RegWidth = 1;
2548   } else {
2549     // Range of registers: v[XX:YY]. ":YY" is optional.
2550     if (!ParseRegRange(RegNum, RegWidth))
2551       return AMDGPU::NoRegister;
2552   }
2553 
2554   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2555 }
2556 
2557 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2558                                        unsigned &RegWidth,
2559                                        SmallVectorImpl<AsmToken> &Tokens) {
2560   unsigned Reg = AMDGPU::NoRegister;
2561   auto ListLoc = getLoc();
2562 
2563   if (!skipToken(AsmToken::LBrac,
2564                  "expected a register or a list of registers")) {
2565     return AMDGPU::NoRegister;
2566   }
2567 
2568   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2569 
2570   auto Loc = getLoc();
2571   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2572     return AMDGPU::NoRegister;
2573   if (RegWidth != 1) {
2574     Error(Loc, "expected a single 32-bit register");
2575     return AMDGPU::NoRegister;
2576   }
2577 
2578   for (; trySkipToken(AsmToken::Comma); ) {
2579     RegisterKind NextRegKind;
2580     unsigned NextReg, NextRegNum, NextRegWidth;
2581     Loc = getLoc();
2582 
2583     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2584                              NextRegNum, NextRegWidth,
2585                              Tokens)) {
2586       return AMDGPU::NoRegister;
2587     }
2588     if (NextRegWidth != 1) {
2589       Error(Loc, "expected a single 32-bit register");
2590       return AMDGPU::NoRegister;
2591     }
2592     if (NextRegKind != RegKind) {
2593       Error(Loc, "registers in a list must be of the same kind");
2594       return AMDGPU::NoRegister;
2595     }
2596     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2597       return AMDGPU::NoRegister;
2598   }
2599 
2600   if (!skipToken(AsmToken::RBrac,
2601                  "expected a comma or a closing square bracket")) {
2602     return AMDGPU::NoRegister;
2603   }
2604 
2605   if (isRegularReg(RegKind))
2606     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2607 
2608   return Reg;
2609 }
2610 
2611 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2612                                           unsigned &RegNum, unsigned &RegWidth,
2613                                           SmallVectorImpl<AsmToken> &Tokens) {
2614   auto Loc = getLoc();
2615   Reg = AMDGPU::NoRegister;
2616 
2617   if (isToken(AsmToken::Identifier)) {
2618     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2619     if (Reg == AMDGPU::NoRegister)
2620       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2621   } else {
2622     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2623   }
2624 
2625   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2626   if (Reg == AMDGPU::NoRegister) {
2627     assert(Parser.hasPendingError());
2628     return false;
2629   }
2630 
2631   if (!subtargetHasRegister(*TRI, Reg)) {
2632     if (Reg == AMDGPU::SGPR_NULL) {
2633       Error(Loc, "'null' operand is not supported on this GPU");
2634     } else {
2635       Error(Loc, "register not available on this GPU");
2636     }
2637     return false;
2638   }
2639 
2640   return true;
2641 }
2642 
2643 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2644                                           unsigned &RegNum, unsigned &RegWidth,
2645                                           bool RestoreOnFailure /*=false*/) {
2646   Reg = AMDGPU::NoRegister;
2647 
2648   SmallVector<AsmToken, 1> Tokens;
2649   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2650     if (RestoreOnFailure) {
2651       while (!Tokens.empty()) {
2652         getLexer().UnLex(Tokens.pop_back_val());
2653       }
2654     }
2655     return true;
2656   }
2657   return false;
2658 }
2659 
2660 Optional<StringRef>
2661 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2662   switch (RegKind) {
2663   case IS_VGPR:
2664     return StringRef(".amdgcn.next_free_vgpr");
2665   case IS_SGPR:
2666     return StringRef(".amdgcn.next_free_sgpr");
2667   default:
2668     return None;
2669   }
2670 }
2671 
2672 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2673   auto SymbolName = getGprCountSymbolName(RegKind);
2674   assert(SymbolName && "initializing invalid register kind");
2675   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2676   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2677 }
2678 
2679 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2680                                             unsigned DwordRegIndex,
2681                                             unsigned RegWidth) {
2682   // Symbols are only defined for GCN targets
2683   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2684     return true;
2685 
2686   auto SymbolName = getGprCountSymbolName(RegKind);
2687   if (!SymbolName)
2688     return true;
2689   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2690 
2691   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2692   int64_t OldCount;
2693 
2694   if (!Sym->isVariable())
2695     return !Error(getLoc(),
2696                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2697   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2698     return !Error(
2699         getLoc(),
2700         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2701 
2702   if (OldCount <= NewMax)
2703     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2704 
2705   return true;
2706 }
2707 
2708 std::unique_ptr<AMDGPUOperand>
2709 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2710   const auto &Tok = getToken();
2711   SMLoc StartLoc = Tok.getLoc();
2712   SMLoc EndLoc = Tok.getEndLoc();
2713   RegisterKind RegKind;
2714   unsigned Reg, RegNum, RegWidth;
2715 
2716   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2717     return nullptr;
2718   }
2719   if (isHsaAbiVersion3Or4(&getSTI())) {
2720     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2721       return nullptr;
2722   } else
2723     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2724   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2725 }
2726 
2727 OperandMatchResultTy
2728 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2729   // TODO: add syntactic sugar for 1/(2*PI)
2730 
2731   assert(!isRegister());
2732   assert(!isModifier());
2733 
2734   const auto& Tok = getToken();
2735   const auto& NextTok = peekToken();
2736   bool IsReal = Tok.is(AsmToken::Real);
2737   SMLoc S = getLoc();
2738   bool Negate = false;
2739 
2740   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2741     lex();
2742     IsReal = true;
2743     Negate = true;
2744   }
2745 
2746   if (IsReal) {
2747     // Floating-point expressions are not supported.
2748     // Can only allow floating-point literals with an
2749     // optional sign.
2750 
2751     StringRef Num = getTokenStr();
2752     lex();
2753 
2754     APFloat RealVal(APFloat::IEEEdouble());
2755     auto roundMode = APFloat::rmNearestTiesToEven;
2756     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2757       return MatchOperand_ParseFail;
2758     }
2759     if (Negate)
2760       RealVal.changeSign();
2761 
2762     Operands.push_back(
2763       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2764                                AMDGPUOperand::ImmTyNone, true));
2765 
2766     return MatchOperand_Success;
2767 
2768   } else {
2769     int64_t IntVal;
2770     const MCExpr *Expr;
2771     SMLoc S = getLoc();
2772 
2773     if (HasSP3AbsModifier) {
2774       // This is a workaround for handling expressions
2775       // as arguments of SP3 'abs' modifier, for example:
2776       //     |1.0|
2777       //     |-1|
2778       //     |1+x|
2779       // This syntax is not compatible with syntax of standard
2780       // MC expressions (due to the trailing '|').
2781       SMLoc EndLoc;
2782       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2783         return MatchOperand_ParseFail;
2784     } else {
2785       if (Parser.parseExpression(Expr))
2786         return MatchOperand_ParseFail;
2787     }
2788 
2789     if (Expr->evaluateAsAbsolute(IntVal)) {
2790       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2791     } else {
2792       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2793     }
2794 
2795     return MatchOperand_Success;
2796   }
2797 
2798   return MatchOperand_NoMatch;
2799 }
2800 
2801 OperandMatchResultTy
2802 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2803   if (!isRegister())
2804     return MatchOperand_NoMatch;
2805 
2806   if (auto R = parseRegister()) {
2807     assert(R->isReg());
2808     Operands.push_back(std::move(R));
2809     return MatchOperand_Success;
2810   }
2811   return MatchOperand_ParseFail;
2812 }
2813 
2814 OperandMatchResultTy
2815 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2816   auto res = parseReg(Operands);
2817   if (res != MatchOperand_NoMatch) {
2818     return res;
2819   } else if (isModifier()) {
2820     return MatchOperand_NoMatch;
2821   } else {
2822     return parseImm(Operands, HasSP3AbsMod);
2823   }
2824 }
2825 
2826 bool
2827 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2828   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2829     const auto &str = Token.getString();
2830     return str == "abs" || str == "neg" || str == "sext";
2831   }
2832   return false;
2833 }
2834 
2835 bool
2836 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2837   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2838 }
2839 
2840 bool
2841 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2842   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2843 }
2844 
2845 bool
2846 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2847   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2848 }
2849 
2850 // Check if this is an operand modifier or an opcode modifier
2851 // which may look like an expression but it is not. We should
2852 // avoid parsing these modifiers as expressions. Currently
2853 // recognized sequences are:
2854 //   |...|
2855 //   abs(...)
2856 //   neg(...)
2857 //   sext(...)
2858 //   -reg
2859 //   -|...|
2860 //   -abs(...)
2861 //   name:...
2862 // Note that simple opcode modifiers like 'gds' may be parsed as
2863 // expressions; this is a special case. See getExpressionAsToken.
2864 //
2865 bool
2866 AMDGPUAsmParser::isModifier() {
2867 
2868   AsmToken Tok = getToken();
2869   AsmToken NextToken[2];
2870   peekTokens(NextToken);
2871 
2872   return isOperandModifier(Tok, NextToken[0]) ||
2873          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2874          isOpcodeModifierWithVal(Tok, NextToken[0]);
2875 }
2876 
2877 // Check if the current token is an SP3 'neg' modifier.
2878 // Currently this modifier is allowed in the following context:
2879 //
2880 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2881 // 2. Before an 'abs' modifier: -abs(...)
2882 // 3. Before an SP3 'abs' modifier: -|...|
2883 //
2884 // In all other cases "-" is handled as a part
2885 // of an expression that follows the sign.
2886 //
2887 // Note: When "-" is followed by an integer literal,
2888 // this is interpreted as integer negation rather
2889 // than a floating-point NEG modifier applied to N.
2890 // Beside being contr-intuitive, such use of floating-point
2891 // NEG modifier would have resulted in different meaning
2892 // of integer literals used with VOP1/2/C and VOP3,
2893 // for example:
2894 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2895 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2896 // Negative fp literals with preceding "-" are
2897 // handled likewise for unifomtity
2898 //
2899 bool
2900 AMDGPUAsmParser::parseSP3NegModifier() {
2901 
2902   AsmToken NextToken[2];
2903   peekTokens(NextToken);
2904 
2905   if (isToken(AsmToken::Minus) &&
2906       (isRegister(NextToken[0], NextToken[1]) ||
2907        NextToken[0].is(AsmToken::Pipe) ||
2908        isId(NextToken[0], "abs"))) {
2909     lex();
2910     return true;
2911   }
2912 
2913   return false;
2914 }
2915 
2916 OperandMatchResultTy
2917 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2918                                               bool AllowImm) {
2919   bool Neg, SP3Neg;
2920   bool Abs, SP3Abs;
2921   SMLoc Loc;
2922 
2923   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2924   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2925     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2926     return MatchOperand_ParseFail;
2927   }
2928 
2929   SP3Neg = parseSP3NegModifier();
2930 
2931   Loc = getLoc();
2932   Neg = trySkipId("neg");
2933   if (Neg && SP3Neg) {
2934     Error(Loc, "expected register or immediate");
2935     return MatchOperand_ParseFail;
2936   }
2937   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2938     return MatchOperand_ParseFail;
2939 
2940   Abs = trySkipId("abs");
2941   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2942     return MatchOperand_ParseFail;
2943 
2944   Loc = getLoc();
2945   SP3Abs = trySkipToken(AsmToken::Pipe);
2946   if (Abs && SP3Abs) {
2947     Error(Loc, "expected register or immediate");
2948     return MatchOperand_ParseFail;
2949   }
2950 
2951   OperandMatchResultTy Res;
2952   if (AllowImm) {
2953     Res = parseRegOrImm(Operands, SP3Abs);
2954   } else {
2955     Res = parseReg(Operands);
2956   }
2957   if (Res != MatchOperand_Success) {
2958     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2959   }
2960 
2961   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2962     return MatchOperand_ParseFail;
2963   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2964     return MatchOperand_ParseFail;
2965   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2966     return MatchOperand_ParseFail;
2967 
2968   AMDGPUOperand::Modifiers Mods;
2969   Mods.Abs = Abs || SP3Abs;
2970   Mods.Neg = Neg || SP3Neg;
2971 
2972   if (Mods.hasFPModifiers()) {
2973     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2974     if (Op.isExpr()) {
2975       Error(Op.getStartLoc(), "expected an absolute expression");
2976       return MatchOperand_ParseFail;
2977     }
2978     Op.setModifiers(Mods);
2979   }
2980   return MatchOperand_Success;
2981 }
2982 
2983 OperandMatchResultTy
2984 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2985                                                bool AllowImm) {
2986   bool Sext = trySkipId("sext");
2987   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2988     return MatchOperand_ParseFail;
2989 
2990   OperandMatchResultTy Res;
2991   if (AllowImm) {
2992     Res = parseRegOrImm(Operands);
2993   } else {
2994     Res = parseReg(Operands);
2995   }
2996   if (Res != MatchOperand_Success) {
2997     return Sext? MatchOperand_ParseFail : Res;
2998   }
2999 
3000   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3001     return MatchOperand_ParseFail;
3002 
3003   AMDGPUOperand::Modifiers Mods;
3004   Mods.Sext = Sext;
3005 
3006   if (Mods.hasIntModifiers()) {
3007     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3008     if (Op.isExpr()) {
3009       Error(Op.getStartLoc(), "expected an absolute expression");
3010       return MatchOperand_ParseFail;
3011     }
3012     Op.setModifiers(Mods);
3013   }
3014 
3015   return MatchOperand_Success;
3016 }
3017 
3018 OperandMatchResultTy
3019 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3020   return parseRegOrImmWithFPInputMods(Operands, false);
3021 }
3022 
3023 OperandMatchResultTy
3024 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3025   return parseRegOrImmWithIntInputMods(Operands, false);
3026 }
3027 
3028 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3029   auto Loc = getLoc();
3030   if (trySkipId("off")) {
3031     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3032                                                 AMDGPUOperand::ImmTyOff, false));
3033     return MatchOperand_Success;
3034   }
3035 
3036   if (!isRegister())
3037     return MatchOperand_NoMatch;
3038 
3039   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3040   if (Reg) {
3041     Operands.push_back(std::move(Reg));
3042     return MatchOperand_Success;
3043   }
3044 
3045   return MatchOperand_ParseFail;
3046 
3047 }
3048 
3049 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3050   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3051 
3052   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3053       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3054       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3055       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3056     return Match_InvalidOperand;
3057 
3058   if ((TSFlags & SIInstrFlags::VOP3) &&
3059       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3060       getForcedEncodingSize() != 64)
3061     return Match_PreferE32;
3062 
3063   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3064       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3065     // v_mac_f32/16 allow only dst_sel == DWORD;
3066     auto OpNum =
3067         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3068     const auto &Op = Inst.getOperand(OpNum);
3069     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3070       return Match_InvalidOperand;
3071     }
3072   }
3073 
3074   return Match_Success;
3075 }
3076 
3077 static ArrayRef<unsigned> getAllVariants() {
3078   static const unsigned Variants[] = {
3079     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3080     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3081   };
3082 
3083   return makeArrayRef(Variants);
3084 }
3085 
3086 // What asm variants we should check
3087 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3088   if (getForcedEncodingSize() == 32) {
3089     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3090     return makeArrayRef(Variants);
3091   }
3092 
3093   if (isForcedVOP3()) {
3094     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3095     return makeArrayRef(Variants);
3096   }
3097 
3098   if (isForcedSDWA()) {
3099     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3100                                         AMDGPUAsmVariants::SDWA9};
3101     return makeArrayRef(Variants);
3102   }
3103 
3104   if (isForcedDPP()) {
3105     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3106     return makeArrayRef(Variants);
3107   }
3108 
3109   return getAllVariants();
3110 }
3111 
3112 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3113   if (getForcedEncodingSize() == 32)
3114     return "e32";
3115 
3116   if (isForcedVOP3())
3117     return "e64";
3118 
3119   if (isForcedSDWA())
3120     return "sdwa";
3121 
3122   if (isForcedDPP())
3123     return "dpp";
3124 
3125   return "";
3126 }
3127 
3128 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3129   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3130   const unsigned Num = Desc.getNumImplicitUses();
3131   for (unsigned i = 0; i < Num; ++i) {
3132     unsigned Reg = Desc.ImplicitUses[i];
3133     switch (Reg) {
3134     case AMDGPU::FLAT_SCR:
3135     case AMDGPU::VCC:
3136     case AMDGPU::VCC_LO:
3137     case AMDGPU::VCC_HI:
3138     case AMDGPU::M0:
3139       return Reg;
3140     default:
3141       break;
3142     }
3143   }
3144   return AMDGPU::NoRegister;
3145 }
3146 
3147 // NB: This code is correct only when used to check constant
3148 // bus limitations because GFX7 support no f16 inline constants.
3149 // Note that there are no cases when a GFX7 opcode violates
3150 // constant bus limitations due to the use of an f16 constant.
3151 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3152                                        unsigned OpIdx) const {
3153   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3154 
3155   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3156     return false;
3157   }
3158 
3159   const MCOperand &MO = Inst.getOperand(OpIdx);
3160 
3161   int64_t Val = MO.getImm();
3162   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3163 
3164   switch (OpSize) { // expected operand size
3165   case 8:
3166     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3167   case 4:
3168     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3169   case 2: {
3170     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3171     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3172         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3173         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3174       return AMDGPU::isInlinableIntLiteral(Val);
3175 
3176     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3177         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3178         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3179       return AMDGPU::isInlinableIntLiteralV216(Val);
3180 
3181     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3182         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3183         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3184       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3185 
3186     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3187   }
3188   default:
3189     llvm_unreachable("invalid operand size");
3190   }
3191 }
3192 
3193 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3194   if (!isGFX10Plus())
3195     return 1;
3196 
3197   switch (Opcode) {
3198   // 64-bit shift instructions can use only one scalar value input
3199   case AMDGPU::V_LSHLREV_B64_e64:
3200   case AMDGPU::V_LSHLREV_B64_gfx10:
3201   case AMDGPU::V_LSHRREV_B64_e64:
3202   case AMDGPU::V_LSHRREV_B64_gfx10:
3203   case AMDGPU::V_ASHRREV_I64_e64:
3204   case AMDGPU::V_ASHRREV_I64_gfx10:
3205   case AMDGPU::V_LSHL_B64_e64:
3206   case AMDGPU::V_LSHR_B64_e64:
3207   case AMDGPU::V_ASHR_I64_e64:
3208     return 1;
3209   default:
3210     return 2;
3211   }
3212 }
3213 
3214 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3215   const MCOperand &MO = Inst.getOperand(OpIdx);
3216   if (MO.isImm()) {
3217     return !isInlineConstant(Inst, OpIdx);
3218   } else if (MO.isReg()) {
3219     auto Reg = MO.getReg();
3220     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3221     auto PReg = mc2PseudoReg(Reg);
3222     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3223   } else {
3224     return true;
3225   }
3226 }
3227 
3228 bool
3229 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3230                                                 const OperandVector &Operands) {
3231   const unsigned Opcode = Inst.getOpcode();
3232   const MCInstrDesc &Desc = MII.get(Opcode);
3233   unsigned LastSGPR = AMDGPU::NoRegister;
3234   unsigned ConstantBusUseCount = 0;
3235   unsigned NumLiterals = 0;
3236   unsigned LiteralSize;
3237 
3238   if (Desc.TSFlags &
3239       (SIInstrFlags::VOPC |
3240        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3241        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3242        SIInstrFlags::SDWA)) {
3243     // Check special imm operands (used by madmk, etc)
3244     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3245       ++ConstantBusUseCount;
3246     }
3247 
3248     SmallDenseSet<unsigned> SGPRsUsed;
3249     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3250     if (SGPRUsed != AMDGPU::NoRegister) {
3251       SGPRsUsed.insert(SGPRUsed);
3252       ++ConstantBusUseCount;
3253     }
3254 
3255     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3256     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3257     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3258 
3259     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3260 
3261     for (int OpIdx : OpIndices) {
3262       if (OpIdx == -1) break;
3263 
3264       const MCOperand &MO = Inst.getOperand(OpIdx);
3265       if (usesConstantBus(Inst, OpIdx)) {
3266         if (MO.isReg()) {
3267           LastSGPR = mc2PseudoReg(MO.getReg());
3268           // Pairs of registers with a partial intersections like these
3269           //   s0, s[0:1]
3270           //   flat_scratch_lo, flat_scratch
3271           //   flat_scratch_lo, flat_scratch_hi
3272           // are theoretically valid but they are disabled anyway.
3273           // Note that this code mimics SIInstrInfo::verifyInstruction
3274           if (!SGPRsUsed.count(LastSGPR)) {
3275             SGPRsUsed.insert(LastSGPR);
3276             ++ConstantBusUseCount;
3277           }
3278         } else { // Expression or a literal
3279 
3280           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3281             continue; // special operand like VINTERP attr_chan
3282 
3283           // An instruction may use only one literal.
3284           // This has been validated on the previous step.
3285           // See validateVOP3Literal.
3286           // This literal may be used as more than one operand.
3287           // If all these operands are of the same size,
3288           // this literal counts as one scalar value.
3289           // Otherwise it counts as 2 scalar values.
3290           // See "GFX10 Shader Programming", section 3.6.2.3.
3291 
3292           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3293           if (Size < 4) Size = 4;
3294 
3295           if (NumLiterals == 0) {
3296             NumLiterals = 1;
3297             LiteralSize = Size;
3298           } else if (LiteralSize != Size) {
3299             NumLiterals = 2;
3300           }
3301         }
3302       }
3303     }
3304   }
3305   ConstantBusUseCount += NumLiterals;
3306 
3307   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3308     return true;
3309 
3310   SMLoc LitLoc = getLitLoc(Operands);
3311   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3312   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3313   Error(Loc, "invalid operand (violates constant bus restrictions)");
3314   return false;
3315 }
3316 
3317 bool
3318 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3319                                                  const OperandVector &Operands) {
3320   const unsigned Opcode = Inst.getOpcode();
3321   const MCInstrDesc &Desc = MII.get(Opcode);
3322 
3323   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3324   if (DstIdx == -1 ||
3325       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3326     return true;
3327   }
3328 
3329   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3330 
3331   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3332   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3333   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3334 
3335   assert(DstIdx != -1);
3336   const MCOperand &Dst = Inst.getOperand(DstIdx);
3337   assert(Dst.isReg());
3338   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3339 
3340   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3341 
3342   for (int SrcIdx : SrcIndices) {
3343     if (SrcIdx == -1) break;
3344     const MCOperand &Src = Inst.getOperand(SrcIdx);
3345     if (Src.isReg()) {
3346       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3347       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3348         Error(getRegLoc(SrcReg, Operands),
3349           "destination must be different than all sources");
3350         return false;
3351       }
3352     }
3353   }
3354 
3355   return true;
3356 }
3357 
3358 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3359 
3360   const unsigned Opc = Inst.getOpcode();
3361   const MCInstrDesc &Desc = MII.get(Opc);
3362 
3363   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3364     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3365     assert(ClampIdx != -1);
3366     return Inst.getOperand(ClampIdx).getImm() == 0;
3367   }
3368 
3369   return true;
3370 }
3371 
3372 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3373 
3374   const unsigned Opc = Inst.getOpcode();
3375   const MCInstrDesc &Desc = MII.get(Opc);
3376 
3377   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3378     return true;
3379 
3380   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3381   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3382   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3383 
3384   assert(VDataIdx != -1);
3385 
3386   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3387     return true;
3388 
3389   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3390   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3391   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3392   if (DMask == 0)
3393     DMask = 1;
3394 
3395   unsigned DataSize =
3396     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3397   if (hasPackedD16()) {
3398     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3399     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3400       DataSize = (DataSize + 1) / 2;
3401   }
3402 
3403   return (VDataSize / 4) == DataSize + TFESize;
3404 }
3405 
3406 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3407   const unsigned Opc = Inst.getOpcode();
3408   const MCInstrDesc &Desc = MII.get(Opc);
3409 
3410   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3411     return true;
3412 
3413   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3414 
3415   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3416       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3417   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3418   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3419   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3420   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3421 
3422   assert(VAddr0Idx != -1);
3423   assert(SrsrcIdx != -1);
3424   assert(SrsrcIdx > VAddr0Idx);
3425 
3426   if (DimIdx == -1)
3427     return true; // intersect_ray
3428 
3429   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3430   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3431   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3432   unsigned VAddrSize =
3433       IsNSA ? SrsrcIdx - VAddr0Idx
3434             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3435   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3436 
3437   unsigned AddrSize =
3438       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3439 
3440   if (!IsNSA) {
3441     if (AddrSize > 8)
3442       AddrSize = 16;
3443     else if (AddrSize > 4)
3444       AddrSize = 8;
3445   }
3446 
3447   return VAddrSize == AddrSize;
3448 }
3449 
3450 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3451 
3452   const unsigned Opc = Inst.getOpcode();
3453   const MCInstrDesc &Desc = MII.get(Opc);
3454 
3455   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3456     return true;
3457   if (!Desc.mayLoad() || !Desc.mayStore())
3458     return true; // Not atomic
3459 
3460   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3461   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3462 
3463   // This is an incomplete check because image_atomic_cmpswap
3464   // may only use 0x3 and 0xf while other atomic operations
3465   // may use 0x1 and 0x3. However these limitations are
3466   // verified when we check that dmask matches dst size.
3467   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3468 }
3469 
3470 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3471 
3472   const unsigned Opc = Inst.getOpcode();
3473   const MCInstrDesc &Desc = MII.get(Opc);
3474 
3475   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3476     return true;
3477 
3478   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3479   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3480 
3481   // GATHER4 instructions use dmask in a different fashion compared to
3482   // other MIMG instructions. The only useful DMASK values are
3483   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3484   // (red,red,red,red) etc.) The ISA document doesn't mention
3485   // this.
3486   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3487 }
3488 
3489 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3490   const unsigned Opc = Inst.getOpcode();
3491   const MCInstrDesc &Desc = MII.get(Opc);
3492 
3493   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3494     return true;
3495 
3496   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3497   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3498       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3499 
3500   if (!BaseOpcode->MSAA)
3501     return true;
3502 
3503   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3504   assert(DimIdx != -1);
3505 
3506   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3507   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3508 
3509   return DimInfo->MSAA;
3510 }
3511 
3512 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3513 {
3514   switch (Opcode) {
3515   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3516   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3517   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3518     return true;
3519   default:
3520     return false;
3521   }
3522 }
3523 
3524 // movrels* opcodes should only allow VGPRS as src0.
3525 // This is specified in .td description for vop1/vop3,
3526 // but sdwa is handled differently. See isSDWAOperand.
3527 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3528                                       const OperandVector &Operands) {
3529 
3530   const unsigned Opc = Inst.getOpcode();
3531   const MCInstrDesc &Desc = MII.get(Opc);
3532 
3533   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3534     return true;
3535 
3536   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3537   assert(Src0Idx != -1);
3538 
3539   SMLoc ErrLoc;
3540   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3541   if (Src0.isReg()) {
3542     auto Reg = mc2PseudoReg(Src0.getReg());
3543     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3544     if (!isSGPR(Reg, TRI))
3545       return true;
3546     ErrLoc = getRegLoc(Reg, Operands);
3547   } else {
3548     ErrLoc = getConstLoc(Operands);
3549   }
3550 
3551   Error(ErrLoc, "source operand must be a VGPR");
3552   return false;
3553 }
3554 
3555 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3556                                           const OperandVector &Operands) {
3557 
3558   const unsigned Opc = Inst.getOpcode();
3559 
3560   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3561     return true;
3562 
3563   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3564   assert(Src0Idx != -1);
3565 
3566   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3567   if (!Src0.isReg())
3568     return true;
3569 
3570   auto Reg = mc2PseudoReg(Src0.getReg());
3571   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3572   if (isSGPR(Reg, TRI)) {
3573     Error(getRegLoc(Reg, Operands),
3574           "source operand must be either a VGPR or an inline constant");
3575     return false;
3576   }
3577 
3578   return true;
3579 }
3580 
3581 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3582   switch (Inst.getOpcode()) {
3583   default:
3584     return true;
3585   case V_DIV_SCALE_F32_gfx6_gfx7:
3586   case V_DIV_SCALE_F32_vi:
3587   case V_DIV_SCALE_F32_gfx10:
3588   case V_DIV_SCALE_F64_gfx6_gfx7:
3589   case V_DIV_SCALE_F64_vi:
3590   case V_DIV_SCALE_F64_gfx10:
3591     break;
3592   }
3593 
3594   // TODO: Check that src0 = src1 or src2.
3595 
3596   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3597                     AMDGPU::OpName::src2_modifiers,
3598                     AMDGPU::OpName::src2_modifiers}) {
3599     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3600             .getImm() &
3601         SISrcMods::ABS) {
3602       return false;
3603     }
3604   }
3605 
3606   return true;
3607 }
3608 
3609 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3610 
3611   const unsigned Opc = Inst.getOpcode();
3612   const MCInstrDesc &Desc = MII.get(Opc);
3613 
3614   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3615     return true;
3616 
3617   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3618   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3619     if (isCI() || isSI())
3620       return false;
3621   }
3622 
3623   return true;
3624 }
3625 
3626 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3627   const unsigned Opc = Inst.getOpcode();
3628   const MCInstrDesc &Desc = MII.get(Opc);
3629 
3630   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3631     return true;
3632 
3633   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3634   if (DimIdx < 0)
3635     return true;
3636 
3637   long Imm = Inst.getOperand(DimIdx).getImm();
3638   if (Imm < 0 || Imm >= 8)
3639     return false;
3640 
3641   return true;
3642 }
3643 
3644 static bool IsRevOpcode(const unsigned Opcode)
3645 {
3646   switch (Opcode) {
3647   case AMDGPU::V_SUBREV_F32_e32:
3648   case AMDGPU::V_SUBREV_F32_e64:
3649   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3650   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3651   case AMDGPU::V_SUBREV_F32_e32_vi:
3652   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3653   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3654   case AMDGPU::V_SUBREV_F32_e64_vi:
3655 
3656   case AMDGPU::V_SUBREV_CO_U32_e32:
3657   case AMDGPU::V_SUBREV_CO_U32_e64:
3658   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3659   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3660 
3661   case AMDGPU::V_SUBBREV_U32_e32:
3662   case AMDGPU::V_SUBBREV_U32_e64:
3663   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3664   case AMDGPU::V_SUBBREV_U32_e32_vi:
3665   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3666   case AMDGPU::V_SUBBREV_U32_e64_vi:
3667 
3668   case AMDGPU::V_SUBREV_U32_e32:
3669   case AMDGPU::V_SUBREV_U32_e64:
3670   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3671   case AMDGPU::V_SUBREV_U32_e32_vi:
3672   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3673   case AMDGPU::V_SUBREV_U32_e64_vi:
3674 
3675   case AMDGPU::V_SUBREV_F16_e32:
3676   case AMDGPU::V_SUBREV_F16_e64:
3677   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3678   case AMDGPU::V_SUBREV_F16_e32_vi:
3679   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3680   case AMDGPU::V_SUBREV_F16_e64_vi:
3681 
3682   case AMDGPU::V_SUBREV_U16_e32:
3683   case AMDGPU::V_SUBREV_U16_e64:
3684   case AMDGPU::V_SUBREV_U16_e32_vi:
3685   case AMDGPU::V_SUBREV_U16_e64_vi:
3686 
3687   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3688   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3689   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3690 
3691   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3692   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3693 
3694   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3695   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3696 
3697   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3698   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3699 
3700   case AMDGPU::V_LSHRREV_B32_e32:
3701   case AMDGPU::V_LSHRREV_B32_e64:
3702   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3703   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3704   case AMDGPU::V_LSHRREV_B32_e32_vi:
3705   case AMDGPU::V_LSHRREV_B32_e64_vi:
3706   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3707   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3708 
3709   case AMDGPU::V_ASHRREV_I32_e32:
3710   case AMDGPU::V_ASHRREV_I32_e64:
3711   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3712   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3713   case AMDGPU::V_ASHRREV_I32_e32_vi:
3714   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3715   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3716   case AMDGPU::V_ASHRREV_I32_e64_vi:
3717 
3718   case AMDGPU::V_LSHLREV_B32_e32:
3719   case AMDGPU::V_LSHLREV_B32_e64:
3720   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3721   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3722   case AMDGPU::V_LSHLREV_B32_e32_vi:
3723   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3724   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3725   case AMDGPU::V_LSHLREV_B32_e64_vi:
3726 
3727   case AMDGPU::V_LSHLREV_B16_e32:
3728   case AMDGPU::V_LSHLREV_B16_e64:
3729   case AMDGPU::V_LSHLREV_B16_e32_vi:
3730   case AMDGPU::V_LSHLREV_B16_e64_vi:
3731   case AMDGPU::V_LSHLREV_B16_gfx10:
3732 
3733   case AMDGPU::V_LSHRREV_B16_e32:
3734   case AMDGPU::V_LSHRREV_B16_e64:
3735   case AMDGPU::V_LSHRREV_B16_e32_vi:
3736   case AMDGPU::V_LSHRREV_B16_e64_vi:
3737   case AMDGPU::V_LSHRREV_B16_gfx10:
3738 
3739   case AMDGPU::V_ASHRREV_I16_e32:
3740   case AMDGPU::V_ASHRREV_I16_e64:
3741   case AMDGPU::V_ASHRREV_I16_e32_vi:
3742   case AMDGPU::V_ASHRREV_I16_e64_vi:
3743   case AMDGPU::V_ASHRREV_I16_gfx10:
3744 
3745   case AMDGPU::V_LSHLREV_B64_e64:
3746   case AMDGPU::V_LSHLREV_B64_gfx10:
3747   case AMDGPU::V_LSHLREV_B64_vi:
3748 
3749   case AMDGPU::V_LSHRREV_B64_e64:
3750   case AMDGPU::V_LSHRREV_B64_gfx10:
3751   case AMDGPU::V_LSHRREV_B64_vi:
3752 
3753   case AMDGPU::V_ASHRREV_I64_e64:
3754   case AMDGPU::V_ASHRREV_I64_gfx10:
3755   case AMDGPU::V_ASHRREV_I64_vi:
3756 
3757   case AMDGPU::V_PK_LSHLREV_B16:
3758   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3759   case AMDGPU::V_PK_LSHLREV_B16_vi:
3760 
3761   case AMDGPU::V_PK_LSHRREV_B16:
3762   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3763   case AMDGPU::V_PK_LSHRREV_B16_vi:
3764   case AMDGPU::V_PK_ASHRREV_I16:
3765   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3766   case AMDGPU::V_PK_ASHRREV_I16_vi:
3767     return true;
3768   default:
3769     return false;
3770   }
3771 }
3772 
3773 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3774 
3775   using namespace SIInstrFlags;
3776   const unsigned Opcode = Inst.getOpcode();
3777   const MCInstrDesc &Desc = MII.get(Opcode);
3778 
3779   // lds_direct register is defined so that it can be used
3780   // with 9-bit operands only. Ignore encodings which do not accept these.
3781   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3782   if ((Desc.TSFlags & Enc) == 0)
3783     return None;
3784 
3785   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3786     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3787     if (SrcIdx == -1)
3788       break;
3789     const auto &Src = Inst.getOperand(SrcIdx);
3790     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3791 
3792       if (isGFX90A())
3793         return StringRef("lds_direct is not supported on this GPU");
3794 
3795       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3796         return StringRef("lds_direct cannot be used with this instruction");
3797 
3798       if (SrcName != OpName::src0)
3799         return StringRef("lds_direct may be used as src0 only");
3800     }
3801   }
3802 
3803   return None;
3804 }
3805 
3806 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3807   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3808     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3809     if (Op.isFlatOffset())
3810       return Op.getStartLoc();
3811   }
3812   return getLoc();
3813 }
3814 
3815 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3816                                          const OperandVector &Operands) {
3817   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3818   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3819     return true;
3820 
3821   auto Opcode = Inst.getOpcode();
3822   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3823   assert(OpNum != -1);
3824 
3825   const auto &Op = Inst.getOperand(OpNum);
3826   if (!hasFlatOffsets() && Op.getImm() != 0) {
3827     Error(getFlatOffsetLoc(Operands),
3828           "flat offset modifier is not supported on this GPU");
3829     return false;
3830   }
3831 
3832   // For FLAT segment the offset must be positive;
3833   // MSB is ignored and forced to zero.
3834   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3835     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3836     if (!isIntN(OffsetSize, Op.getImm())) {
3837       Error(getFlatOffsetLoc(Operands),
3838             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3839       return false;
3840     }
3841   } else {
3842     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3843     if (!isUIntN(OffsetSize, Op.getImm())) {
3844       Error(getFlatOffsetLoc(Operands),
3845             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3846       return false;
3847     }
3848   }
3849 
3850   return true;
3851 }
3852 
3853 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3854   // Start with second operand because SMEM Offset cannot be dst or src0.
3855   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3856     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3857     if (Op.isSMEMOffset())
3858       return Op.getStartLoc();
3859   }
3860   return getLoc();
3861 }
3862 
3863 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3864                                          const OperandVector &Operands) {
3865   if (isCI() || isSI())
3866     return true;
3867 
3868   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3869   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3870     return true;
3871 
3872   auto Opcode = Inst.getOpcode();
3873   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3874   if (OpNum == -1)
3875     return true;
3876 
3877   const auto &Op = Inst.getOperand(OpNum);
3878   if (!Op.isImm())
3879     return true;
3880 
3881   uint64_t Offset = Op.getImm();
3882   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3883   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3884       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3885     return true;
3886 
3887   Error(getSMEMOffsetLoc(Operands),
3888         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3889                                "expected a 21-bit signed offset");
3890 
3891   return false;
3892 }
3893 
3894 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3895   unsigned Opcode = Inst.getOpcode();
3896   const MCInstrDesc &Desc = MII.get(Opcode);
3897   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3898     return true;
3899 
3900   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3901   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3902 
3903   const int OpIndices[] = { Src0Idx, Src1Idx };
3904 
3905   unsigned NumExprs = 0;
3906   unsigned NumLiterals = 0;
3907   uint32_t LiteralValue;
3908 
3909   for (int OpIdx : OpIndices) {
3910     if (OpIdx == -1) break;
3911 
3912     const MCOperand &MO = Inst.getOperand(OpIdx);
3913     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3914     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3915       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3916         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3917         if (NumLiterals == 0 || LiteralValue != Value) {
3918           LiteralValue = Value;
3919           ++NumLiterals;
3920         }
3921       } else if (MO.isExpr()) {
3922         ++NumExprs;
3923       }
3924     }
3925   }
3926 
3927   return NumLiterals + NumExprs <= 1;
3928 }
3929 
3930 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3931   const unsigned Opc = Inst.getOpcode();
3932   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3933       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3934     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3935     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3936 
3937     if (OpSel & ~3)
3938       return false;
3939   }
3940   return true;
3941 }
3942 
3943 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
3944                                   const OperandVector &Operands) {
3945   const unsigned Opc = Inst.getOpcode();
3946   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
3947   if (DppCtrlIdx < 0)
3948     return true;
3949   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
3950 
3951   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
3952     // DPP64 is supported for row_newbcast only.
3953     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3954     if (Src0Idx >= 0 &&
3955         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
3956       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
3957       Error(S, "64 bit dpp only supports row_newbcast");
3958       return false;
3959     }
3960   }
3961 
3962   return true;
3963 }
3964 
3965 // Check if VCC register matches wavefront size
3966 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3967   auto FB = getFeatureBits();
3968   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3969     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3970 }
3971 
3972 // VOP3 literal is only allowed in GFX10+ and only one can be used
3973 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3974                                           const OperandVector &Operands) {
3975   unsigned Opcode = Inst.getOpcode();
3976   const MCInstrDesc &Desc = MII.get(Opcode);
3977   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3978     return true;
3979 
3980   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3981   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3982   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3983 
3984   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3985 
3986   unsigned NumExprs = 0;
3987   unsigned NumLiterals = 0;
3988   uint32_t LiteralValue;
3989 
3990   for (int OpIdx : OpIndices) {
3991     if (OpIdx == -1) break;
3992 
3993     const MCOperand &MO = Inst.getOperand(OpIdx);
3994     if (!MO.isImm() && !MO.isExpr())
3995       continue;
3996     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3997       continue;
3998 
3999     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4000         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4001       Error(getConstLoc(Operands),
4002             "inline constants are not allowed for this operand");
4003       return false;
4004     }
4005 
4006     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4007       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4008       if (NumLiterals == 0 || LiteralValue != Value) {
4009         LiteralValue = Value;
4010         ++NumLiterals;
4011       }
4012     } else if (MO.isExpr()) {
4013       ++NumExprs;
4014     }
4015   }
4016   NumLiterals += NumExprs;
4017 
4018   if (!NumLiterals)
4019     return true;
4020 
4021   if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4022     Error(getLitLoc(Operands), "literal operands are not supported");
4023     return false;
4024   }
4025 
4026   if (NumLiterals > 1) {
4027     Error(getLitLoc(Operands), "only one literal operand is allowed");
4028     return false;
4029   }
4030 
4031   return true;
4032 }
4033 
4034 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4035 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4036                          const MCRegisterInfo *MRI) {
4037   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4038   if (OpIdx < 0)
4039     return -1;
4040 
4041   const MCOperand &Op = Inst.getOperand(OpIdx);
4042   if (!Op.isReg())
4043     return -1;
4044 
4045   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4046   auto Reg = Sub ? Sub : Op.getReg();
4047   const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4048   return AGRP32.contains(Reg) ? 1 : 0;
4049 }
4050 
4051 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4052   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4053   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4054                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4055                   SIInstrFlags::DS)) == 0)
4056     return true;
4057 
4058   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4059                                                       : AMDGPU::OpName::vdata;
4060 
4061   const MCRegisterInfo *MRI = getMRI();
4062   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4063   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4064 
4065   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4066     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4067     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4068       return false;
4069   }
4070 
4071   auto FB = getFeatureBits();
4072   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4073     if (DataAreg < 0 || DstAreg < 0)
4074       return true;
4075     return DstAreg == DataAreg;
4076   }
4077 
4078   return DstAreg < 1 && DataAreg < 1;
4079 }
4080 
4081 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4082   auto FB = getFeatureBits();
4083   if (!FB[AMDGPU::FeatureGFX90AInsts])
4084     return true;
4085 
4086   const MCRegisterInfo *MRI = getMRI();
4087   const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4088   const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4089   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4090     const MCOperand &Op = Inst.getOperand(I);
4091     if (!Op.isReg())
4092       continue;
4093 
4094     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4095     if (!Sub)
4096       continue;
4097 
4098     if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4099       return false;
4100     if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4101       return false;
4102   }
4103 
4104   return true;
4105 }
4106 
4107 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4108                                             const OperandVector &Operands,
4109                                             const SMLoc &IDLoc) {
4110   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4111                                            AMDGPU::OpName::cpol);
4112   if (CPolPos == -1)
4113     return true;
4114 
4115   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4116 
4117   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4118   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4119       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4120     Error(IDLoc, "invalid cache policy for SMRD instruction");
4121     return false;
4122   }
4123 
4124   if (isGFX90A() && (CPol & CPol::SCC)) {
4125     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4126     StringRef CStr(S.getPointer());
4127     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4128     Error(S, "scc is not supported on this GPU");
4129     return false;
4130   }
4131 
4132   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4133     return true;
4134 
4135   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4136     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4137       Error(IDLoc, "instruction must use glc");
4138       return false;
4139     }
4140   } else {
4141     if (CPol & CPol::GLC) {
4142       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4143       StringRef CStr(S.getPointer());
4144       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4145       Error(S, "instruction must not use glc");
4146       return false;
4147     }
4148   }
4149 
4150   return true;
4151 }
4152 
4153 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4154                                           const SMLoc &IDLoc,
4155                                           const OperandVector &Operands) {
4156   if (auto ErrMsg = validateLdsDirect(Inst)) {
4157     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4158     return false;
4159   }
4160   if (!validateSOPLiteral(Inst)) {
4161     Error(getLitLoc(Operands),
4162       "only one literal operand is allowed");
4163     return false;
4164   }
4165   if (!validateVOP3Literal(Inst, Operands)) {
4166     return false;
4167   }
4168   if (!validateConstantBusLimitations(Inst, Operands)) {
4169     return false;
4170   }
4171   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4172     return false;
4173   }
4174   if (!validateIntClampSupported(Inst)) {
4175     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4176       "integer clamping is not supported on this GPU");
4177     return false;
4178   }
4179   if (!validateOpSel(Inst)) {
4180     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4181       "invalid op_sel operand");
4182     return false;
4183   }
4184   if (!validateDPP(Inst, Operands)) {
4185     return false;
4186   }
4187   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4188   if (!validateMIMGD16(Inst)) {
4189     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4190       "d16 modifier is not supported on this GPU");
4191     return false;
4192   }
4193   if (!validateMIMGDim(Inst)) {
4194     Error(IDLoc, "dim modifier is required on this GPU");
4195     return false;
4196   }
4197   if (!validateMIMGMSAA(Inst)) {
4198     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4199           "invalid dim; must be MSAA type");
4200     return false;
4201   }
4202   if (!validateMIMGDataSize(Inst)) {
4203     Error(IDLoc,
4204       "image data size does not match dmask and tfe");
4205     return false;
4206   }
4207   if (!validateMIMGAddrSize(Inst)) {
4208     Error(IDLoc,
4209       "image address size does not match dim and a16");
4210     return false;
4211   }
4212   if (!validateMIMGAtomicDMask(Inst)) {
4213     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4214       "invalid atomic image dmask");
4215     return false;
4216   }
4217   if (!validateMIMGGatherDMask(Inst)) {
4218     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4219       "invalid image_gather dmask: only one bit must be set");
4220     return false;
4221   }
4222   if (!validateMovrels(Inst, Operands)) {
4223     return false;
4224   }
4225   if (!validateFlatOffset(Inst, Operands)) {
4226     return false;
4227   }
4228   if (!validateSMEMOffset(Inst, Operands)) {
4229     return false;
4230   }
4231   if (!validateMAIAccWrite(Inst, Operands)) {
4232     return false;
4233   }
4234   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4235     return false;
4236   }
4237 
4238   if (!validateAGPRLdSt(Inst)) {
4239     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4240     ? "invalid register class: data and dst should be all VGPR or AGPR"
4241     : "invalid register class: agpr loads and stores not supported on this GPU"
4242     );
4243     return false;
4244   }
4245   if (!validateVGPRAlign(Inst)) {
4246     Error(IDLoc,
4247       "invalid register class: vgpr tuples must be 64 bit aligned");
4248     return false;
4249   }
4250 
4251   if (!validateDivScale(Inst)) {
4252     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4253     return false;
4254   }
4255   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4256     return false;
4257   }
4258 
4259   return true;
4260 }
4261 
4262 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4263                                             const FeatureBitset &FBS,
4264                                             unsigned VariantID = 0);
4265 
4266 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4267                                 const FeatureBitset &AvailableFeatures,
4268                                 unsigned VariantID);
4269 
4270 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4271                                        const FeatureBitset &FBS) {
4272   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4273 }
4274 
4275 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4276                                        const FeatureBitset &FBS,
4277                                        ArrayRef<unsigned> Variants) {
4278   for (auto Variant : Variants) {
4279     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4280       return true;
4281   }
4282 
4283   return false;
4284 }
4285 
4286 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4287                                                   const SMLoc &IDLoc) {
4288   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4289 
4290   // Check if requested instruction variant is supported.
4291   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4292     return false;
4293 
4294   // This instruction is not supported.
4295   // Clear any other pending errors because they are no longer relevant.
4296   getParser().clearPendingErrors();
4297 
4298   // Requested instruction variant is not supported.
4299   // Check if any other variants are supported.
4300   StringRef VariantName = getMatchedVariantName();
4301   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4302     return Error(IDLoc,
4303                  Twine(VariantName,
4304                        " variant of this instruction is not supported"));
4305   }
4306 
4307   // Finally check if this instruction is supported on any other GPU.
4308   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4309     return Error(IDLoc, "instruction not supported on this GPU");
4310   }
4311 
4312   // Instruction not supported on any GPU. Probably a typo.
4313   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4314   return Error(IDLoc, "invalid instruction" + Suggestion);
4315 }
4316 
4317 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4318                                               OperandVector &Operands,
4319                                               MCStreamer &Out,
4320                                               uint64_t &ErrorInfo,
4321                                               bool MatchingInlineAsm) {
4322   MCInst Inst;
4323   unsigned Result = Match_Success;
4324   for (auto Variant : getMatchedVariants()) {
4325     uint64_t EI;
4326     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4327                                   Variant);
4328     // We order match statuses from least to most specific. We use most specific
4329     // status as resulting
4330     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4331     if ((R == Match_Success) ||
4332         (R == Match_PreferE32) ||
4333         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4334         (R == Match_InvalidOperand && Result != Match_MissingFeature
4335                                    && Result != Match_PreferE32) ||
4336         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4337                                    && Result != Match_MissingFeature
4338                                    && Result != Match_PreferE32)) {
4339       Result = R;
4340       ErrorInfo = EI;
4341     }
4342     if (R == Match_Success)
4343       break;
4344   }
4345 
4346   if (Result == Match_Success) {
4347     if (!validateInstruction(Inst, IDLoc, Operands)) {
4348       return true;
4349     }
4350     Inst.setLoc(IDLoc);
4351     Out.emitInstruction(Inst, getSTI());
4352     return false;
4353   }
4354 
4355   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4356   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4357     return true;
4358   }
4359 
4360   switch (Result) {
4361   default: break;
4362   case Match_MissingFeature:
4363     // It has been verified that the specified instruction
4364     // mnemonic is valid. A match was found but it requires
4365     // features which are not supported on this GPU.
4366     return Error(IDLoc, "operands are not valid for this GPU or mode");
4367 
4368   case Match_InvalidOperand: {
4369     SMLoc ErrorLoc = IDLoc;
4370     if (ErrorInfo != ~0ULL) {
4371       if (ErrorInfo >= Operands.size()) {
4372         return Error(IDLoc, "too few operands for instruction");
4373       }
4374       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4375       if (ErrorLoc == SMLoc())
4376         ErrorLoc = IDLoc;
4377     }
4378     return Error(ErrorLoc, "invalid operand for instruction");
4379   }
4380 
4381   case Match_PreferE32:
4382     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4383                         "should be encoded as e32");
4384   case Match_MnemonicFail:
4385     llvm_unreachable("Invalid instructions should have been handled already");
4386   }
4387   llvm_unreachable("Implement any new match types added!");
4388 }
4389 
4390 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4391   int64_t Tmp = -1;
4392   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4393     return true;
4394   }
4395   if (getParser().parseAbsoluteExpression(Tmp)) {
4396     return true;
4397   }
4398   Ret = static_cast<uint32_t>(Tmp);
4399   return false;
4400 }
4401 
4402 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4403                                                uint32_t &Minor) {
4404   if (ParseAsAbsoluteExpression(Major))
4405     return TokError("invalid major version");
4406 
4407   if (!trySkipToken(AsmToken::Comma))
4408     return TokError("minor version number required, comma expected");
4409 
4410   if (ParseAsAbsoluteExpression(Minor))
4411     return TokError("invalid minor version");
4412 
4413   return false;
4414 }
4415 
4416 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4417   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4418     return TokError("directive only supported for amdgcn architecture");
4419 
4420   std::string TargetIDDirective;
4421   SMLoc TargetStart = getTok().getLoc();
4422   if (getParser().parseEscapedString(TargetIDDirective))
4423     return true;
4424 
4425   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4426   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4427     return getParser().Error(TargetRange.Start,
4428         (Twine(".amdgcn_target directive's target id ") +
4429          Twine(TargetIDDirective) +
4430          Twine(" does not match the specified target id ") +
4431          Twine(getTargetStreamer().getTargetID()->toString())).str());
4432 
4433   return false;
4434 }
4435 
4436 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4437   return Error(Range.Start, "value out of range", Range);
4438 }
4439 
4440 bool AMDGPUAsmParser::calculateGPRBlocks(
4441     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4442     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4443     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4444     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4445   // TODO(scott.linder): These calculations are duplicated from
4446   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4447   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4448 
4449   unsigned NumVGPRs = NextFreeVGPR;
4450   unsigned NumSGPRs = NextFreeSGPR;
4451 
4452   if (Version.Major >= 10)
4453     NumSGPRs = 0;
4454   else {
4455     unsigned MaxAddressableNumSGPRs =
4456         IsaInfo::getAddressableNumSGPRs(&getSTI());
4457 
4458     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4459         NumSGPRs > MaxAddressableNumSGPRs)
4460       return OutOfRangeError(SGPRRange);
4461 
4462     NumSGPRs +=
4463         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4464 
4465     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4466         NumSGPRs > MaxAddressableNumSGPRs)
4467       return OutOfRangeError(SGPRRange);
4468 
4469     if (Features.test(FeatureSGPRInitBug))
4470       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4471   }
4472 
4473   VGPRBlocks =
4474       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4475   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4476 
4477   return false;
4478 }
4479 
4480 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4481   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4482     return TokError("directive only supported for amdgcn architecture");
4483 
4484   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4485     return TokError("directive only supported for amdhsa OS");
4486 
4487   StringRef KernelName;
4488   if (getParser().parseIdentifier(KernelName))
4489     return true;
4490 
4491   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4492 
4493   StringSet<> Seen;
4494 
4495   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4496 
4497   SMRange VGPRRange;
4498   uint64_t NextFreeVGPR = 0;
4499   uint64_t AccumOffset = 0;
4500   SMRange SGPRRange;
4501   uint64_t NextFreeSGPR = 0;
4502   unsigned UserSGPRCount = 0;
4503   bool ReserveVCC = true;
4504   bool ReserveFlatScr = true;
4505   Optional<bool> EnableWavefrontSize32;
4506 
4507   while (true) {
4508     while (trySkipToken(AsmToken::EndOfStatement));
4509 
4510     StringRef ID;
4511     SMRange IDRange = getTok().getLocRange();
4512     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4513       return true;
4514 
4515     if (ID == ".end_amdhsa_kernel")
4516       break;
4517 
4518     if (Seen.find(ID) != Seen.end())
4519       return TokError(".amdhsa_ directives cannot be repeated");
4520     Seen.insert(ID);
4521 
4522     SMLoc ValStart = getLoc();
4523     int64_t IVal;
4524     if (getParser().parseAbsoluteExpression(IVal))
4525       return true;
4526     SMLoc ValEnd = getLoc();
4527     SMRange ValRange = SMRange(ValStart, ValEnd);
4528 
4529     if (IVal < 0)
4530       return OutOfRangeError(ValRange);
4531 
4532     uint64_t Val = IVal;
4533 
4534 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4535   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4536     return OutOfRangeError(RANGE);                                             \
4537   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4538 
4539     if (ID == ".amdhsa_group_segment_fixed_size") {
4540       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4541         return OutOfRangeError(ValRange);
4542       KD.group_segment_fixed_size = Val;
4543     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4544       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4545         return OutOfRangeError(ValRange);
4546       KD.private_segment_fixed_size = Val;
4547     } else if (ID == ".amdhsa_kernarg_size") {
4548       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4549         return OutOfRangeError(ValRange);
4550       KD.kernarg_size = Val;
4551     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4552       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4553                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4554                        Val, ValRange);
4555       if (Val)
4556         UserSGPRCount += 4;
4557     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4558       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4559                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4560                        ValRange);
4561       if (Val)
4562         UserSGPRCount += 2;
4563     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4564       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4565                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4566                        ValRange);
4567       if (Val)
4568         UserSGPRCount += 2;
4569     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4570       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4571                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4572                        Val, ValRange);
4573       if (Val)
4574         UserSGPRCount += 2;
4575     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4576       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4577                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4578                        ValRange);
4579       if (Val)
4580         UserSGPRCount += 2;
4581     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4582       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4583                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4584                        ValRange);
4585       if (Val)
4586         UserSGPRCount += 2;
4587     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4588       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4589                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4590                        Val, ValRange);
4591       if (Val)
4592         UserSGPRCount += 1;
4593     } else if (ID == ".amdhsa_wavefront_size32") {
4594       if (IVersion.Major < 10)
4595         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4596       EnableWavefrontSize32 = Val;
4597       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4598                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4599                        Val, ValRange);
4600     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4601       PARSE_BITS_ENTRY(
4602           KD.compute_pgm_rsrc2,
4603           COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val,
4604           ValRange);
4605     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4606       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4607                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4608                        ValRange);
4609     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4610       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4611                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4612                        ValRange);
4613     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4614       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4615                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4616                        ValRange);
4617     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4618       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4619                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4620                        ValRange);
4621     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4622       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4623                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4624                        ValRange);
4625     } else if (ID == ".amdhsa_next_free_vgpr") {
4626       VGPRRange = ValRange;
4627       NextFreeVGPR = Val;
4628     } else if (ID == ".amdhsa_next_free_sgpr") {
4629       SGPRRange = ValRange;
4630       NextFreeSGPR = Val;
4631     } else if (ID == ".amdhsa_accum_offset") {
4632       if (!isGFX90A())
4633         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4634       AccumOffset = Val;
4635     } else if (ID == ".amdhsa_reserve_vcc") {
4636       if (!isUInt<1>(Val))
4637         return OutOfRangeError(ValRange);
4638       ReserveVCC = Val;
4639     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4640       if (IVersion.Major < 7)
4641         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4642       if (!isUInt<1>(Val))
4643         return OutOfRangeError(ValRange);
4644       ReserveFlatScr = Val;
4645     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4646       if (IVersion.Major < 8)
4647         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4648       if (!isUInt<1>(Val))
4649         return OutOfRangeError(ValRange);
4650       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4651         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4652                                  IDRange);
4653     } else if (ID == ".amdhsa_float_round_mode_32") {
4654       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4655                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4656     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4657       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4658                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4659     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4660       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4661                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4662     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4663       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4664                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4665                        ValRange);
4666     } else if (ID == ".amdhsa_dx10_clamp") {
4667       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4668                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4669     } else if (ID == ".amdhsa_ieee_mode") {
4670       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4671                        Val, ValRange);
4672     } else if (ID == ".amdhsa_fp16_overflow") {
4673       if (IVersion.Major < 9)
4674         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4675       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4676                        ValRange);
4677     } else if (ID == ".amdhsa_tg_split") {
4678       if (!isGFX90A())
4679         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4680       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4681                        ValRange);
4682     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4683       if (IVersion.Major < 10)
4684         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4685       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4686                        ValRange);
4687     } else if (ID == ".amdhsa_memory_ordered") {
4688       if (IVersion.Major < 10)
4689         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4690       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4691                        ValRange);
4692     } else if (ID == ".amdhsa_forward_progress") {
4693       if (IVersion.Major < 10)
4694         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4695       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4696                        ValRange);
4697     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4698       PARSE_BITS_ENTRY(
4699           KD.compute_pgm_rsrc2,
4700           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4701           ValRange);
4702     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4703       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4704                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4705                        Val, ValRange);
4706     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4707       PARSE_BITS_ENTRY(
4708           KD.compute_pgm_rsrc2,
4709           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4710           ValRange);
4711     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4712       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4713                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4714                        Val, ValRange);
4715     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4716       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4717                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4718                        Val, ValRange);
4719     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4720       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4721                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4722                        Val, ValRange);
4723     } else if (ID == ".amdhsa_exception_int_div_zero") {
4724       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4725                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4726                        Val, ValRange);
4727     } else {
4728       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4729     }
4730 
4731 #undef PARSE_BITS_ENTRY
4732   }
4733 
4734   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4735     return TokError(".amdhsa_next_free_vgpr directive is required");
4736 
4737   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4738     return TokError(".amdhsa_next_free_sgpr directive is required");
4739 
4740   unsigned VGPRBlocks;
4741   unsigned SGPRBlocks;
4742   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4743                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4744                          EnableWavefrontSize32, NextFreeVGPR,
4745                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4746                          SGPRBlocks))
4747     return true;
4748 
4749   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4750           VGPRBlocks))
4751     return OutOfRangeError(VGPRRange);
4752   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4753                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4754 
4755   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4756           SGPRBlocks))
4757     return OutOfRangeError(SGPRRange);
4758   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4759                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4760                   SGPRBlocks);
4761 
4762   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4763     return TokError("too many user SGPRs enabled");
4764   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4765                   UserSGPRCount);
4766 
4767   if (isGFX90A()) {
4768     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4769       return TokError(".amdhsa_accum_offset directive is required");
4770     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4771       return TokError("accum_offset should be in range [4..256] in "
4772                       "increments of 4");
4773     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4774       return TokError("accum_offset exceeds total VGPR allocation");
4775     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4776                     (AccumOffset / 4 - 1));
4777   }
4778 
4779   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4780       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4781       ReserveFlatScr);
4782   return false;
4783 }
4784 
4785 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4786   uint32_t Major;
4787   uint32_t Minor;
4788 
4789   if (ParseDirectiveMajorMinor(Major, Minor))
4790     return true;
4791 
4792   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4793   return false;
4794 }
4795 
4796 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4797   uint32_t Major;
4798   uint32_t Minor;
4799   uint32_t Stepping;
4800   StringRef VendorName;
4801   StringRef ArchName;
4802 
4803   // If this directive has no arguments, then use the ISA version for the
4804   // targeted GPU.
4805   if (isToken(AsmToken::EndOfStatement)) {
4806     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4807     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4808                                                         ISA.Stepping,
4809                                                         "AMD", "AMDGPU");
4810     return false;
4811   }
4812 
4813   if (ParseDirectiveMajorMinor(Major, Minor))
4814     return true;
4815 
4816   if (!trySkipToken(AsmToken::Comma))
4817     return TokError("stepping version number required, comma expected");
4818 
4819   if (ParseAsAbsoluteExpression(Stepping))
4820     return TokError("invalid stepping version");
4821 
4822   if (!trySkipToken(AsmToken::Comma))
4823     return TokError("vendor name required, comma expected");
4824 
4825   if (!parseString(VendorName, "invalid vendor name"))
4826     return true;
4827 
4828   if (!trySkipToken(AsmToken::Comma))
4829     return TokError("arch name required, comma expected");
4830 
4831   if (!parseString(ArchName, "invalid arch name"))
4832     return true;
4833 
4834   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
4835                                                       VendorName, ArchName);
4836   return false;
4837 }
4838 
4839 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4840                                                amd_kernel_code_t &Header) {
4841   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4842   // assembly for backwards compatibility.
4843   if (ID == "max_scratch_backing_memory_byte_size") {
4844     Parser.eatToEndOfStatement();
4845     return false;
4846   }
4847 
4848   SmallString<40> ErrStr;
4849   raw_svector_ostream Err(ErrStr);
4850   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4851     return TokError(Err.str());
4852   }
4853   Lex();
4854 
4855   if (ID == "enable_wavefront_size32") {
4856     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4857       if (!isGFX10Plus())
4858         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4859       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4860         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4861     } else {
4862       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4863         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4864     }
4865   }
4866 
4867   if (ID == "wavefront_size") {
4868     if (Header.wavefront_size == 5) {
4869       if (!isGFX10Plus())
4870         return TokError("wavefront_size=5 is only allowed on GFX10+");
4871       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4872         return TokError("wavefront_size=5 requires +WavefrontSize32");
4873     } else if (Header.wavefront_size == 6) {
4874       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4875         return TokError("wavefront_size=6 requires +WavefrontSize64");
4876     }
4877   }
4878 
4879   if (ID == "enable_wgp_mode") {
4880     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
4881         !isGFX10Plus())
4882       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4883   }
4884 
4885   if (ID == "enable_mem_ordered") {
4886     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
4887         !isGFX10Plus())
4888       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4889   }
4890 
4891   if (ID == "enable_fwd_progress") {
4892     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
4893         !isGFX10Plus())
4894       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4895   }
4896 
4897   return false;
4898 }
4899 
4900 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4901   amd_kernel_code_t Header;
4902   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4903 
4904   while (true) {
4905     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4906     // will set the current token to EndOfStatement.
4907     while(trySkipToken(AsmToken::EndOfStatement));
4908 
4909     StringRef ID;
4910     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4911       return true;
4912 
4913     if (ID == ".end_amd_kernel_code_t")
4914       break;
4915 
4916     if (ParseAMDKernelCodeTValue(ID, Header))
4917       return true;
4918   }
4919 
4920   getTargetStreamer().EmitAMDKernelCodeT(Header);
4921 
4922   return false;
4923 }
4924 
4925 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4926   StringRef KernelName;
4927   if (!parseId(KernelName, "expected symbol name"))
4928     return true;
4929 
4930   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4931                                            ELF::STT_AMDGPU_HSA_KERNEL);
4932 
4933   KernelScope.initialize(getContext());
4934   return false;
4935 }
4936 
4937 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4938   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4939     return Error(getLoc(),
4940                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4941                  "architectures");
4942   }
4943 
4944   auto TargetIDDirective = getLexer().getTok().getStringContents();
4945   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4946     return Error(getParser().getTok().getLoc(), "target id must match options");
4947 
4948   getTargetStreamer().EmitISAVersion();
4949   Lex();
4950 
4951   return false;
4952 }
4953 
4954 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4955   const char *AssemblerDirectiveBegin;
4956   const char *AssemblerDirectiveEnd;
4957   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4958       isHsaAbiVersion3Or4(&getSTI())
4959           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4960                             HSAMD::V3::AssemblerDirectiveEnd)
4961           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4962                             HSAMD::AssemblerDirectiveEnd);
4963 
4964   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4965     return Error(getLoc(),
4966                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4967                  "not available on non-amdhsa OSes")).str());
4968   }
4969 
4970   std::string HSAMetadataString;
4971   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4972                           HSAMetadataString))
4973     return true;
4974 
4975   if (isHsaAbiVersion3Or4(&getSTI())) {
4976     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4977       return Error(getLoc(), "invalid HSA metadata");
4978   } else {
4979     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4980       return Error(getLoc(), "invalid HSA metadata");
4981   }
4982 
4983   return false;
4984 }
4985 
4986 /// Common code to parse out a block of text (typically YAML) between start and
4987 /// end directives.
4988 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4989                                           const char *AssemblerDirectiveEnd,
4990                                           std::string &CollectString) {
4991 
4992   raw_string_ostream CollectStream(CollectString);
4993 
4994   getLexer().setSkipSpace(false);
4995 
4996   bool FoundEnd = false;
4997   while (!isToken(AsmToken::Eof)) {
4998     while (isToken(AsmToken::Space)) {
4999       CollectStream << getTokenStr();
5000       Lex();
5001     }
5002 
5003     if (trySkipId(AssemblerDirectiveEnd)) {
5004       FoundEnd = true;
5005       break;
5006     }
5007 
5008     CollectStream << Parser.parseStringToEndOfStatement()
5009                   << getContext().getAsmInfo()->getSeparatorString();
5010 
5011     Parser.eatToEndOfStatement();
5012   }
5013 
5014   getLexer().setSkipSpace(true);
5015 
5016   if (isToken(AsmToken::Eof) && !FoundEnd) {
5017     return TokError(Twine("expected directive ") +
5018                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5019   }
5020 
5021   CollectStream.flush();
5022   return false;
5023 }
5024 
5025 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5026 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5027   std::string String;
5028   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5029                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5030     return true;
5031 
5032   auto PALMetadata = getTargetStreamer().getPALMetadata();
5033   if (!PALMetadata->setFromString(String))
5034     return Error(getLoc(), "invalid PAL metadata");
5035   return false;
5036 }
5037 
5038 /// Parse the assembler directive for old linear-format PAL metadata.
5039 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5040   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5041     return Error(getLoc(),
5042                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5043                  "not available on non-amdpal OSes")).str());
5044   }
5045 
5046   auto PALMetadata = getTargetStreamer().getPALMetadata();
5047   PALMetadata->setLegacy();
5048   for (;;) {
5049     uint32_t Key, Value;
5050     if (ParseAsAbsoluteExpression(Key)) {
5051       return TokError(Twine("invalid value in ") +
5052                       Twine(PALMD::AssemblerDirective));
5053     }
5054     if (!trySkipToken(AsmToken::Comma)) {
5055       return TokError(Twine("expected an even number of values in ") +
5056                       Twine(PALMD::AssemblerDirective));
5057     }
5058     if (ParseAsAbsoluteExpression(Value)) {
5059       return TokError(Twine("invalid value in ") +
5060                       Twine(PALMD::AssemblerDirective));
5061     }
5062     PALMetadata->setRegister(Key, Value);
5063     if (!trySkipToken(AsmToken::Comma))
5064       break;
5065   }
5066   return false;
5067 }
5068 
5069 /// ParseDirectiveAMDGPULDS
5070 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5071 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5072   if (getParser().checkForValidSection())
5073     return true;
5074 
5075   StringRef Name;
5076   SMLoc NameLoc = getLoc();
5077   if (getParser().parseIdentifier(Name))
5078     return TokError("expected identifier in directive");
5079 
5080   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5081   if (parseToken(AsmToken::Comma, "expected ','"))
5082     return true;
5083 
5084   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5085 
5086   int64_t Size;
5087   SMLoc SizeLoc = getLoc();
5088   if (getParser().parseAbsoluteExpression(Size))
5089     return true;
5090   if (Size < 0)
5091     return Error(SizeLoc, "size must be non-negative");
5092   if (Size > LocalMemorySize)
5093     return Error(SizeLoc, "size is too large");
5094 
5095   int64_t Alignment = 4;
5096   if (trySkipToken(AsmToken::Comma)) {
5097     SMLoc AlignLoc = getLoc();
5098     if (getParser().parseAbsoluteExpression(Alignment))
5099       return true;
5100     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5101       return Error(AlignLoc, "alignment must be a power of two");
5102 
5103     // Alignment larger than the size of LDS is possible in theory, as long
5104     // as the linker manages to place to symbol at address 0, but we do want
5105     // to make sure the alignment fits nicely into a 32-bit integer.
5106     if (Alignment >= 1u << 31)
5107       return Error(AlignLoc, "alignment is too large");
5108   }
5109 
5110   if (parseToken(AsmToken::EndOfStatement,
5111                  "unexpected token in '.amdgpu_lds' directive"))
5112     return true;
5113 
5114   Symbol->redefineIfPossible();
5115   if (!Symbol->isUndefined())
5116     return Error(NameLoc, "invalid symbol redefinition");
5117 
5118   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5119   return false;
5120 }
5121 
5122 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5123   StringRef IDVal = DirectiveID.getString();
5124 
5125   if (isHsaAbiVersion3Or4(&getSTI())) {
5126     if (IDVal == ".amdhsa_kernel")
5127      return ParseDirectiveAMDHSAKernel();
5128 
5129     // TODO: Restructure/combine with PAL metadata directive.
5130     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5131       return ParseDirectiveHSAMetadata();
5132   } else {
5133     if (IDVal == ".hsa_code_object_version")
5134       return ParseDirectiveHSACodeObjectVersion();
5135 
5136     if (IDVal == ".hsa_code_object_isa")
5137       return ParseDirectiveHSACodeObjectISA();
5138 
5139     if (IDVal == ".amd_kernel_code_t")
5140       return ParseDirectiveAMDKernelCodeT();
5141 
5142     if (IDVal == ".amdgpu_hsa_kernel")
5143       return ParseDirectiveAMDGPUHsaKernel();
5144 
5145     if (IDVal == ".amd_amdgpu_isa")
5146       return ParseDirectiveISAVersion();
5147 
5148     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5149       return ParseDirectiveHSAMetadata();
5150   }
5151 
5152   if (IDVal == ".amdgcn_target")
5153     return ParseDirectiveAMDGCNTarget();
5154 
5155   if (IDVal == ".amdgpu_lds")
5156     return ParseDirectiveAMDGPULDS();
5157 
5158   if (IDVal == PALMD::AssemblerDirectiveBegin)
5159     return ParseDirectivePALMetadataBegin();
5160 
5161   if (IDVal == PALMD::AssemblerDirective)
5162     return ParseDirectivePALMetadata();
5163 
5164   return true;
5165 }
5166 
5167 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5168                                            unsigned RegNo) {
5169 
5170   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5171        R.isValid(); ++R) {
5172     if (*R == RegNo)
5173       return isGFX9Plus();
5174   }
5175 
5176   // GFX10 has 2 more SGPRs 104 and 105.
5177   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5178        R.isValid(); ++R) {
5179     if (*R == RegNo)
5180       return hasSGPR104_SGPR105();
5181   }
5182 
5183   switch (RegNo) {
5184   case AMDGPU::SRC_SHARED_BASE:
5185   case AMDGPU::SRC_SHARED_LIMIT:
5186   case AMDGPU::SRC_PRIVATE_BASE:
5187   case AMDGPU::SRC_PRIVATE_LIMIT:
5188   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5189     return isGFX9Plus();
5190   case AMDGPU::TBA:
5191   case AMDGPU::TBA_LO:
5192   case AMDGPU::TBA_HI:
5193   case AMDGPU::TMA:
5194   case AMDGPU::TMA_LO:
5195   case AMDGPU::TMA_HI:
5196     return !isGFX9Plus();
5197   case AMDGPU::XNACK_MASK:
5198   case AMDGPU::XNACK_MASK_LO:
5199   case AMDGPU::XNACK_MASK_HI:
5200     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5201   case AMDGPU::SGPR_NULL:
5202     return isGFX10Plus();
5203   default:
5204     break;
5205   }
5206 
5207   if (isCI())
5208     return true;
5209 
5210   if (isSI() || isGFX10Plus()) {
5211     // No flat_scr on SI.
5212     // On GFX10 flat scratch is not a valid register operand and can only be
5213     // accessed with s_setreg/s_getreg.
5214     switch (RegNo) {
5215     case AMDGPU::FLAT_SCR:
5216     case AMDGPU::FLAT_SCR_LO:
5217     case AMDGPU::FLAT_SCR_HI:
5218       return false;
5219     default:
5220       return true;
5221     }
5222   }
5223 
5224   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5225   // SI/CI have.
5226   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5227        R.isValid(); ++R) {
5228     if (*R == RegNo)
5229       return hasSGPR102_SGPR103();
5230   }
5231 
5232   return true;
5233 }
5234 
5235 OperandMatchResultTy
5236 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5237                               OperandMode Mode) {
5238   // Try to parse with a custom parser
5239   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5240 
5241   // If we successfully parsed the operand or if there as an error parsing,
5242   // we are done.
5243   //
5244   // If we are parsing after we reach EndOfStatement then this means we
5245   // are appending default values to the Operands list.  This is only done
5246   // by custom parser, so we shouldn't continue on to the generic parsing.
5247   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5248       isToken(AsmToken::EndOfStatement))
5249     return ResTy;
5250 
5251   SMLoc RBraceLoc;
5252   SMLoc LBraceLoc = getLoc();
5253   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5254     unsigned Prefix = Operands.size();
5255 
5256     for (;;) {
5257       auto Loc = getLoc();
5258       ResTy = parseReg(Operands);
5259       if (ResTy == MatchOperand_NoMatch)
5260         Error(Loc, "expected a register");
5261       if (ResTy != MatchOperand_Success)
5262         return MatchOperand_ParseFail;
5263 
5264       RBraceLoc = getLoc();
5265       if (trySkipToken(AsmToken::RBrac))
5266         break;
5267 
5268       if (!skipToken(AsmToken::Comma,
5269                      "expected a comma or a closing square bracket")) {
5270         return MatchOperand_ParseFail;
5271       }
5272     }
5273 
5274     if (Operands.size() - Prefix > 1) {
5275       Operands.insert(Operands.begin() + Prefix,
5276                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5277       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5278     }
5279 
5280     return MatchOperand_Success;
5281   }
5282 
5283   return parseRegOrImm(Operands);
5284 }
5285 
5286 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5287   // Clear any forced encodings from the previous instruction.
5288   setForcedEncodingSize(0);
5289   setForcedDPP(false);
5290   setForcedSDWA(false);
5291 
5292   if (Name.endswith("_e64")) {
5293     setForcedEncodingSize(64);
5294     return Name.substr(0, Name.size() - 4);
5295   } else if (Name.endswith("_e32")) {
5296     setForcedEncodingSize(32);
5297     return Name.substr(0, Name.size() - 4);
5298   } else if (Name.endswith("_dpp")) {
5299     setForcedDPP(true);
5300     return Name.substr(0, Name.size() - 4);
5301   } else if (Name.endswith("_sdwa")) {
5302     setForcedSDWA(true);
5303     return Name.substr(0, Name.size() - 5);
5304   }
5305   return Name;
5306 }
5307 
5308 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5309                                        StringRef Name,
5310                                        SMLoc NameLoc, OperandVector &Operands) {
5311   // Add the instruction mnemonic
5312   Name = parseMnemonicSuffix(Name);
5313   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5314 
5315   bool IsMIMG = Name.startswith("image_");
5316 
5317   while (!trySkipToken(AsmToken::EndOfStatement)) {
5318     OperandMode Mode = OperandMode_Default;
5319     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5320       Mode = OperandMode_NSA;
5321     CPolSeen = 0;
5322     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5323 
5324     if (Res != MatchOperand_Success) {
5325       checkUnsupportedInstruction(Name, NameLoc);
5326       if (!Parser.hasPendingError()) {
5327         // FIXME: use real operand location rather than the current location.
5328         StringRef Msg =
5329           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5330                                             "not a valid operand.";
5331         Error(getLoc(), Msg);
5332       }
5333       while (!trySkipToken(AsmToken::EndOfStatement)) {
5334         lex();
5335       }
5336       return true;
5337     }
5338 
5339     // Eat the comma or space if there is one.
5340     trySkipToken(AsmToken::Comma);
5341   }
5342 
5343   return false;
5344 }
5345 
5346 //===----------------------------------------------------------------------===//
5347 // Utility functions
5348 //===----------------------------------------------------------------------===//
5349 
5350 OperandMatchResultTy
5351 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5352 
5353   if (!trySkipId(Prefix, AsmToken::Colon))
5354     return MatchOperand_NoMatch;
5355 
5356   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5357 }
5358 
5359 OperandMatchResultTy
5360 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5361                                     AMDGPUOperand::ImmTy ImmTy,
5362                                     bool (*ConvertResult)(int64_t&)) {
5363   SMLoc S = getLoc();
5364   int64_t Value = 0;
5365 
5366   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5367   if (Res != MatchOperand_Success)
5368     return Res;
5369 
5370   if (ConvertResult && !ConvertResult(Value)) {
5371     Error(S, "invalid " + StringRef(Prefix) + " value.");
5372   }
5373 
5374   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5375   return MatchOperand_Success;
5376 }
5377 
5378 OperandMatchResultTy
5379 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5380                                              OperandVector &Operands,
5381                                              AMDGPUOperand::ImmTy ImmTy,
5382                                              bool (*ConvertResult)(int64_t&)) {
5383   SMLoc S = getLoc();
5384   if (!trySkipId(Prefix, AsmToken::Colon))
5385     return MatchOperand_NoMatch;
5386 
5387   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5388     return MatchOperand_ParseFail;
5389 
5390   unsigned Val = 0;
5391   const unsigned MaxSize = 4;
5392 
5393   // FIXME: How to verify the number of elements matches the number of src
5394   // operands?
5395   for (int I = 0; ; ++I) {
5396     int64_t Op;
5397     SMLoc Loc = getLoc();
5398     if (!parseExpr(Op))
5399       return MatchOperand_ParseFail;
5400 
5401     if (Op != 0 && Op != 1) {
5402       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5403       return MatchOperand_ParseFail;
5404     }
5405 
5406     Val |= (Op << I);
5407 
5408     if (trySkipToken(AsmToken::RBrac))
5409       break;
5410 
5411     if (I + 1 == MaxSize) {
5412       Error(getLoc(), "expected a closing square bracket");
5413       return MatchOperand_ParseFail;
5414     }
5415 
5416     if (!skipToken(AsmToken::Comma, "expected a comma"))
5417       return MatchOperand_ParseFail;
5418   }
5419 
5420   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5421   return MatchOperand_Success;
5422 }
5423 
5424 OperandMatchResultTy
5425 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5426                                AMDGPUOperand::ImmTy ImmTy) {
5427   int64_t Bit;
5428   SMLoc S = getLoc();
5429 
5430   if (trySkipId(Name)) {
5431     Bit = 1;
5432   } else if (trySkipId("no", Name)) {
5433     Bit = 0;
5434   } else {
5435     return MatchOperand_NoMatch;
5436   }
5437 
5438   if (Name == "r128" && !hasMIMG_R128()) {
5439     Error(S, "r128 modifier is not supported on this GPU");
5440     return MatchOperand_ParseFail;
5441   }
5442   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5443     Error(S, "a16 modifier is not supported on this GPU");
5444     return MatchOperand_ParseFail;
5445   }
5446 
5447   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5448     ImmTy = AMDGPUOperand::ImmTyR128A16;
5449 
5450   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5451   return MatchOperand_Success;
5452 }
5453 
5454 OperandMatchResultTy
5455 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5456   unsigned CPolOn = 0;
5457   unsigned CPolOff = 0;
5458   SMLoc S = getLoc();
5459 
5460   if (trySkipId("glc"))
5461     CPolOn = AMDGPU::CPol::GLC;
5462   else if (trySkipId("noglc"))
5463     CPolOff = AMDGPU::CPol::GLC;
5464   else if (trySkipId("slc"))
5465     CPolOn = AMDGPU::CPol::SLC;
5466   else if (trySkipId("noslc"))
5467     CPolOff = AMDGPU::CPol::SLC;
5468   else if (trySkipId("dlc"))
5469     CPolOn = AMDGPU::CPol::DLC;
5470   else if (trySkipId("nodlc"))
5471     CPolOff = AMDGPU::CPol::DLC;
5472   else if (trySkipId("scc"))
5473     CPolOn = AMDGPU::CPol::SCC;
5474   else if (trySkipId("noscc"))
5475     CPolOff = AMDGPU::CPol::SCC;
5476   else
5477     return MatchOperand_NoMatch;
5478 
5479   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5480     Error(S, "dlc modifier is not supported on this GPU");
5481     return MatchOperand_ParseFail;
5482   }
5483 
5484   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5485     Error(S, "scc modifier is not supported on this GPU");
5486     return MatchOperand_ParseFail;
5487   }
5488 
5489   if (CPolSeen & (CPolOn | CPolOff)) {
5490     Error(S, "duplicate cache policy modifier");
5491     return MatchOperand_ParseFail;
5492   }
5493 
5494   CPolSeen |= (CPolOn | CPolOff);
5495 
5496   for (unsigned I = 1; I != Operands.size(); ++I) {
5497     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5498     if (Op.isCPol()) {
5499       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5500       return MatchOperand_Success;
5501     }
5502   }
5503 
5504   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5505                                               AMDGPUOperand::ImmTyCPol));
5506 
5507   return MatchOperand_Success;
5508 }
5509 
5510 static void addOptionalImmOperand(
5511   MCInst& Inst, const OperandVector& Operands,
5512   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5513   AMDGPUOperand::ImmTy ImmT,
5514   int64_t Default = 0) {
5515   auto i = OptionalIdx.find(ImmT);
5516   if (i != OptionalIdx.end()) {
5517     unsigned Idx = i->second;
5518     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5519   } else {
5520     Inst.addOperand(MCOperand::createImm(Default));
5521   }
5522 }
5523 
5524 OperandMatchResultTy
5525 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5526                                        StringRef &Value,
5527                                        SMLoc &StringLoc) {
5528   if (!trySkipId(Prefix, AsmToken::Colon))
5529     return MatchOperand_NoMatch;
5530 
5531   StringLoc = getLoc();
5532   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5533                                                   : MatchOperand_ParseFail;
5534 }
5535 
5536 //===----------------------------------------------------------------------===//
5537 // MTBUF format
5538 //===----------------------------------------------------------------------===//
5539 
5540 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5541                                   int64_t MaxVal,
5542                                   int64_t &Fmt) {
5543   int64_t Val;
5544   SMLoc Loc = getLoc();
5545 
5546   auto Res = parseIntWithPrefix(Pref, Val);
5547   if (Res == MatchOperand_ParseFail)
5548     return false;
5549   if (Res == MatchOperand_NoMatch)
5550     return true;
5551 
5552   if (Val < 0 || Val > MaxVal) {
5553     Error(Loc, Twine("out of range ", StringRef(Pref)));
5554     return false;
5555   }
5556 
5557   Fmt = Val;
5558   return true;
5559 }
5560 
5561 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5562 // values to live in a joint format operand in the MCInst encoding.
5563 OperandMatchResultTy
5564 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5565   using namespace llvm::AMDGPU::MTBUFFormat;
5566 
5567   int64_t Dfmt = DFMT_UNDEF;
5568   int64_t Nfmt = NFMT_UNDEF;
5569 
5570   // dfmt and nfmt can appear in either order, and each is optional.
5571   for (int I = 0; I < 2; ++I) {
5572     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5573       return MatchOperand_ParseFail;
5574 
5575     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5576       return MatchOperand_ParseFail;
5577     }
5578     // Skip optional comma between dfmt/nfmt
5579     // but guard against 2 commas following each other.
5580     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5581         !peekToken().is(AsmToken::Comma)) {
5582       trySkipToken(AsmToken::Comma);
5583     }
5584   }
5585 
5586   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5587     return MatchOperand_NoMatch;
5588 
5589   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5590   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5591 
5592   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5593   return MatchOperand_Success;
5594 }
5595 
5596 OperandMatchResultTy
5597 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5598   using namespace llvm::AMDGPU::MTBUFFormat;
5599 
5600   int64_t Fmt = UFMT_UNDEF;
5601 
5602   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5603     return MatchOperand_ParseFail;
5604 
5605   if (Fmt == UFMT_UNDEF)
5606     return MatchOperand_NoMatch;
5607 
5608   Format = Fmt;
5609   return MatchOperand_Success;
5610 }
5611 
5612 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5613                                     int64_t &Nfmt,
5614                                     StringRef FormatStr,
5615                                     SMLoc Loc) {
5616   using namespace llvm::AMDGPU::MTBUFFormat;
5617   int64_t Format;
5618 
5619   Format = getDfmt(FormatStr);
5620   if (Format != DFMT_UNDEF) {
5621     Dfmt = Format;
5622     return true;
5623   }
5624 
5625   Format = getNfmt(FormatStr, getSTI());
5626   if (Format != NFMT_UNDEF) {
5627     Nfmt = Format;
5628     return true;
5629   }
5630 
5631   Error(Loc, "unsupported format");
5632   return false;
5633 }
5634 
5635 OperandMatchResultTy
5636 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5637                                           SMLoc FormatLoc,
5638                                           int64_t &Format) {
5639   using namespace llvm::AMDGPU::MTBUFFormat;
5640 
5641   int64_t Dfmt = DFMT_UNDEF;
5642   int64_t Nfmt = NFMT_UNDEF;
5643   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5644     return MatchOperand_ParseFail;
5645 
5646   if (trySkipToken(AsmToken::Comma)) {
5647     StringRef Str;
5648     SMLoc Loc = getLoc();
5649     if (!parseId(Str, "expected a format string") ||
5650         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5651       return MatchOperand_ParseFail;
5652     }
5653     if (Dfmt == DFMT_UNDEF) {
5654       Error(Loc, "duplicate numeric format");
5655       return MatchOperand_ParseFail;
5656     } else if (Nfmt == NFMT_UNDEF) {
5657       Error(Loc, "duplicate data format");
5658       return MatchOperand_ParseFail;
5659     }
5660   }
5661 
5662   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5663   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5664 
5665   if (isGFX10Plus()) {
5666     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5667     if (Ufmt == UFMT_UNDEF) {
5668       Error(FormatLoc, "unsupported format");
5669       return MatchOperand_ParseFail;
5670     }
5671     Format = Ufmt;
5672   } else {
5673     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5674   }
5675 
5676   return MatchOperand_Success;
5677 }
5678 
5679 OperandMatchResultTy
5680 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5681                                             SMLoc Loc,
5682                                             int64_t &Format) {
5683   using namespace llvm::AMDGPU::MTBUFFormat;
5684 
5685   auto Id = getUnifiedFormat(FormatStr);
5686   if (Id == UFMT_UNDEF)
5687     return MatchOperand_NoMatch;
5688 
5689   if (!isGFX10Plus()) {
5690     Error(Loc, "unified format is not supported on this GPU");
5691     return MatchOperand_ParseFail;
5692   }
5693 
5694   Format = Id;
5695   return MatchOperand_Success;
5696 }
5697 
5698 OperandMatchResultTy
5699 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5700   using namespace llvm::AMDGPU::MTBUFFormat;
5701   SMLoc Loc = getLoc();
5702 
5703   if (!parseExpr(Format))
5704     return MatchOperand_ParseFail;
5705   if (!isValidFormatEncoding(Format, getSTI())) {
5706     Error(Loc, "out of range format");
5707     return MatchOperand_ParseFail;
5708   }
5709 
5710   return MatchOperand_Success;
5711 }
5712 
5713 OperandMatchResultTy
5714 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5715   using namespace llvm::AMDGPU::MTBUFFormat;
5716 
5717   if (!trySkipId("format", AsmToken::Colon))
5718     return MatchOperand_NoMatch;
5719 
5720   if (trySkipToken(AsmToken::LBrac)) {
5721     StringRef FormatStr;
5722     SMLoc Loc = getLoc();
5723     if (!parseId(FormatStr, "expected a format string"))
5724       return MatchOperand_ParseFail;
5725 
5726     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5727     if (Res == MatchOperand_NoMatch)
5728       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5729     if (Res != MatchOperand_Success)
5730       return Res;
5731 
5732     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5733       return MatchOperand_ParseFail;
5734 
5735     return MatchOperand_Success;
5736   }
5737 
5738   return parseNumericFormat(Format);
5739 }
5740 
5741 OperandMatchResultTy
5742 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5743   using namespace llvm::AMDGPU::MTBUFFormat;
5744 
5745   int64_t Format = getDefaultFormatEncoding(getSTI());
5746   OperandMatchResultTy Res;
5747   SMLoc Loc = getLoc();
5748 
5749   // Parse legacy format syntax.
5750   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5751   if (Res == MatchOperand_ParseFail)
5752     return Res;
5753 
5754   bool FormatFound = (Res == MatchOperand_Success);
5755 
5756   Operands.push_back(
5757     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5758 
5759   if (FormatFound)
5760     trySkipToken(AsmToken::Comma);
5761 
5762   if (isToken(AsmToken::EndOfStatement)) {
5763     // We are expecting an soffset operand,
5764     // but let matcher handle the error.
5765     return MatchOperand_Success;
5766   }
5767 
5768   // Parse soffset.
5769   Res = parseRegOrImm(Operands);
5770   if (Res != MatchOperand_Success)
5771     return Res;
5772 
5773   trySkipToken(AsmToken::Comma);
5774 
5775   if (!FormatFound) {
5776     Res = parseSymbolicOrNumericFormat(Format);
5777     if (Res == MatchOperand_ParseFail)
5778       return Res;
5779     if (Res == MatchOperand_Success) {
5780       auto Size = Operands.size();
5781       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5782       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5783       Op.setImm(Format);
5784     }
5785     return MatchOperand_Success;
5786   }
5787 
5788   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5789     Error(getLoc(), "duplicate format");
5790     return MatchOperand_ParseFail;
5791   }
5792   return MatchOperand_Success;
5793 }
5794 
5795 //===----------------------------------------------------------------------===//
5796 // ds
5797 //===----------------------------------------------------------------------===//
5798 
5799 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5800                                     const OperandVector &Operands) {
5801   OptionalImmIndexMap OptionalIdx;
5802 
5803   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5804     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5805 
5806     // Add the register arguments
5807     if (Op.isReg()) {
5808       Op.addRegOperands(Inst, 1);
5809       continue;
5810     }
5811 
5812     // Handle optional arguments
5813     OptionalIdx[Op.getImmTy()] = i;
5814   }
5815 
5816   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5817   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5818   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5819 
5820   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5821 }
5822 
5823 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5824                                 bool IsGdsHardcoded) {
5825   OptionalImmIndexMap OptionalIdx;
5826 
5827   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5828     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5829 
5830     // Add the register arguments
5831     if (Op.isReg()) {
5832       Op.addRegOperands(Inst, 1);
5833       continue;
5834     }
5835 
5836     if (Op.isToken() && Op.getToken() == "gds") {
5837       IsGdsHardcoded = true;
5838       continue;
5839     }
5840 
5841     // Handle optional arguments
5842     OptionalIdx[Op.getImmTy()] = i;
5843   }
5844 
5845   AMDGPUOperand::ImmTy OffsetType =
5846     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5847      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5848      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5849                                                       AMDGPUOperand::ImmTyOffset;
5850 
5851   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5852 
5853   if (!IsGdsHardcoded) {
5854     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5855   }
5856   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5857 }
5858 
5859 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5860   OptionalImmIndexMap OptionalIdx;
5861 
5862   unsigned OperandIdx[4];
5863   unsigned EnMask = 0;
5864   int SrcIdx = 0;
5865 
5866   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5867     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5868 
5869     // Add the register arguments
5870     if (Op.isReg()) {
5871       assert(SrcIdx < 4);
5872       OperandIdx[SrcIdx] = Inst.size();
5873       Op.addRegOperands(Inst, 1);
5874       ++SrcIdx;
5875       continue;
5876     }
5877 
5878     if (Op.isOff()) {
5879       assert(SrcIdx < 4);
5880       OperandIdx[SrcIdx] = Inst.size();
5881       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5882       ++SrcIdx;
5883       continue;
5884     }
5885 
5886     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5887       Op.addImmOperands(Inst, 1);
5888       continue;
5889     }
5890 
5891     if (Op.isToken() && Op.getToken() == "done")
5892       continue;
5893 
5894     // Handle optional arguments
5895     OptionalIdx[Op.getImmTy()] = i;
5896   }
5897 
5898   assert(SrcIdx == 4);
5899 
5900   bool Compr = false;
5901   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5902     Compr = true;
5903     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5904     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5905     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5906   }
5907 
5908   for (auto i = 0; i < SrcIdx; ++i) {
5909     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5910       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5911     }
5912   }
5913 
5914   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5915   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5916 
5917   Inst.addOperand(MCOperand::createImm(EnMask));
5918 }
5919 
5920 //===----------------------------------------------------------------------===//
5921 // s_waitcnt
5922 //===----------------------------------------------------------------------===//
5923 
5924 static bool
5925 encodeCnt(
5926   const AMDGPU::IsaVersion ISA,
5927   int64_t &IntVal,
5928   int64_t CntVal,
5929   bool Saturate,
5930   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5931   unsigned (*decode)(const IsaVersion &Version, unsigned))
5932 {
5933   bool Failed = false;
5934 
5935   IntVal = encode(ISA, IntVal, CntVal);
5936   if (CntVal != decode(ISA, IntVal)) {
5937     if (Saturate) {
5938       IntVal = encode(ISA, IntVal, -1);
5939     } else {
5940       Failed = true;
5941     }
5942   }
5943   return Failed;
5944 }
5945 
5946 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5947 
5948   SMLoc CntLoc = getLoc();
5949   StringRef CntName = getTokenStr();
5950 
5951   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5952       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5953     return false;
5954 
5955   int64_t CntVal;
5956   SMLoc ValLoc = getLoc();
5957   if (!parseExpr(CntVal))
5958     return false;
5959 
5960   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5961 
5962   bool Failed = true;
5963   bool Sat = CntName.endswith("_sat");
5964 
5965   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5966     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5967   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5968     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5969   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5970     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5971   } else {
5972     Error(CntLoc, "invalid counter name " + CntName);
5973     return false;
5974   }
5975 
5976   if (Failed) {
5977     Error(ValLoc, "too large value for " + CntName);
5978     return false;
5979   }
5980 
5981   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5982     return false;
5983 
5984   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5985     if (isToken(AsmToken::EndOfStatement)) {
5986       Error(getLoc(), "expected a counter name");
5987       return false;
5988     }
5989   }
5990 
5991   return true;
5992 }
5993 
5994 OperandMatchResultTy
5995 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5996   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5997   int64_t Waitcnt = getWaitcntBitMask(ISA);
5998   SMLoc S = getLoc();
5999 
6000   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6001     while (!isToken(AsmToken::EndOfStatement)) {
6002       if (!parseCnt(Waitcnt))
6003         return MatchOperand_ParseFail;
6004     }
6005   } else {
6006     if (!parseExpr(Waitcnt))
6007       return MatchOperand_ParseFail;
6008   }
6009 
6010   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6011   return MatchOperand_Success;
6012 }
6013 
6014 bool
6015 AMDGPUOperand::isSWaitCnt() const {
6016   return isImm();
6017 }
6018 
6019 //===----------------------------------------------------------------------===//
6020 // hwreg
6021 //===----------------------------------------------------------------------===//
6022 
6023 bool
6024 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6025                                 OperandInfoTy &Offset,
6026                                 OperandInfoTy &Width) {
6027   using namespace llvm::AMDGPU::Hwreg;
6028 
6029   // The register may be specified by name or using a numeric code
6030   HwReg.Loc = getLoc();
6031   if (isToken(AsmToken::Identifier) &&
6032       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
6033     HwReg.IsSymbolic = true;
6034     lex(); // skip register name
6035   } else if (!parseExpr(HwReg.Id, "a register name")) {
6036     return false;
6037   }
6038 
6039   if (trySkipToken(AsmToken::RParen))
6040     return true;
6041 
6042   // parse optional params
6043   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6044     return false;
6045 
6046   Offset.Loc = getLoc();
6047   if (!parseExpr(Offset.Id))
6048     return false;
6049 
6050   if (!skipToken(AsmToken::Comma, "expected a comma"))
6051     return false;
6052 
6053   Width.Loc = getLoc();
6054   return parseExpr(Width.Id) &&
6055          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6056 }
6057 
6058 bool
6059 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6060                                const OperandInfoTy &Offset,
6061                                const OperandInfoTy &Width) {
6062 
6063   using namespace llvm::AMDGPU::Hwreg;
6064 
6065   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6066     Error(HwReg.Loc,
6067           "specified hardware register is not supported on this GPU");
6068     return false;
6069   }
6070   if (!isValidHwreg(HwReg.Id)) {
6071     Error(HwReg.Loc,
6072           "invalid code of hardware register: only 6-bit values are legal");
6073     return false;
6074   }
6075   if (!isValidHwregOffset(Offset.Id)) {
6076     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6077     return false;
6078   }
6079   if (!isValidHwregWidth(Width.Id)) {
6080     Error(Width.Loc,
6081           "invalid bitfield width: only values from 1 to 32 are legal");
6082     return false;
6083   }
6084   return true;
6085 }
6086 
6087 OperandMatchResultTy
6088 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6089   using namespace llvm::AMDGPU::Hwreg;
6090 
6091   int64_t ImmVal = 0;
6092   SMLoc Loc = getLoc();
6093 
6094   if (trySkipId("hwreg", AsmToken::LParen)) {
6095     OperandInfoTy HwReg(ID_UNKNOWN_);
6096     OperandInfoTy Offset(OFFSET_DEFAULT_);
6097     OperandInfoTy Width(WIDTH_DEFAULT_);
6098     if (parseHwregBody(HwReg, Offset, Width) &&
6099         validateHwreg(HwReg, Offset, Width)) {
6100       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6101     } else {
6102       return MatchOperand_ParseFail;
6103     }
6104   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6105     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6106       Error(Loc, "invalid immediate: only 16-bit values are legal");
6107       return MatchOperand_ParseFail;
6108     }
6109   } else {
6110     return MatchOperand_ParseFail;
6111   }
6112 
6113   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6114   return MatchOperand_Success;
6115 }
6116 
6117 bool AMDGPUOperand::isHwreg() const {
6118   return isImmTy(ImmTyHwreg);
6119 }
6120 
6121 //===----------------------------------------------------------------------===//
6122 // sendmsg
6123 //===----------------------------------------------------------------------===//
6124 
6125 bool
6126 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6127                                   OperandInfoTy &Op,
6128                                   OperandInfoTy &Stream) {
6129   using namespace llvm::AMDGPU::SendMsg;
6130 
6131   Msg.Loc = getLoc();
6132   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6133     Msg.IsSymbolic = true;
6134     lex(); // skip message name
6135   } else if (!parseExpr(Msg.Id, "a message name")) {
6136     return false;
6137   }
6138 
6139   if (trySkipToken(AsmToken::Comma)) {
6140     Op.IsDefined = true;
6141     Op.Loc = getLoc();
6142     if (isToken(AsmToken::Identifier) &&
6143         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6144       lex(); // skip operation name
6145     } else if (!parseExpr(Op.Id, "an operation name")) {
6146       return false;
6147     }
6148 
6149     if (trySkipToken(AsmToken::Comma)) {
6150       Stream.IsDefined = true;
6151       Stream.Loc = getLoc();
6152       if (!parseExpr(Stream.Id))
6153         return false;
6154     }
6155   }
6156 
6157   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6158 }
6159 
6160 bool
6161 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6162                                  const OperandInfoTy &Op,
6163                                  const OperandInfoTy &Stream) {
6164   using namespace llvm::AMDGPU::SendMsg;
6165 
6166   // Validation strictness depends on whether message is specified
6167   // in a symbolc or in a numeric form. In the latter case
6168   // only encoding possibility is checked.
6169   bool Strict = Msg.IsSymbolic;
6170 
6171   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6172     Error(Msg.Loc, "invalid message id");
6173     return false;
6174   }
6175   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6176     if (Op.IsDefined) {
6177       Error(Op.Loc, "message does not support operations");
6178     } else {
6179       Error(Msg.Loc, "missing message operation");
6180     }
6181     return false;
6182   }
6183   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6184     Error(Op.Loc, "invalid operation id");
6185     return false;
6186   }
6187   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6188     Error(Stream.Loc, "message operation does not support streams");
6189     return false;
6190   }
6191   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6192     Error(Stream.Loc, "invalid message stream id");
6193     return false;
6194   }
6195   return true;
6196 }
6197 
6198 OperandMatchResultTy
6199 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6200   using namespace llvm::AMDGPU::SendMsg;
6201 
6202   int64_t ImmVal = 0;
6203   SMLoc Loc = getLoc();
6204 
6205   if (trySkipId("sendmsg", AsmToken::LParen)) {
6206     OperandInfoTy Msg(ID_UNKNOWN_);
6207     OperandInfoTy Op(OP_NONE_);
6208     OperandInfoTy Stream(STREAM_ID_NONE_);
6209     if (parseSendMsgBody(Msg, Op, Stream) &&
6210         validateSendMsg(Msg, Op, Stream)) {
6211       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6212     } else {
6213       return MatchOperand_ParseFail;
6214     }
6215   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6216     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6217       Error(Loc, "invalid immediate: only 16-bit values are legal");
6218       return MatchOperand_ParseFail;
6219     }
6220   } else {
6221     return MatchOperand_ParseFail;
6222   }
6223 
6224   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6225   return MatchOperand_Success;
6226 }
6227 
6228 bool AMDGPUOperand::isSendMsg() const {
6229   return isImmTy(ImmTySendMsg);
6230 }
6231 
6232 //===----------------------------------------------------------------------===//
6233 // v_interp
6234 //===----------------------------------------------------------------------===//
6235 
6236 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6237   StringRef Str;
6238   SMLoc S = getLoc();
6239 
6240   if (!parseId(Str))
6241     return MatchOperand_NoMatch;
6242 
6243   int Slot = StringSwitch<int>(Str)
6244     .Case("p10", 0)
6245     .Case("p20", 1)
6246     .Case("p0", 2)
6247     .Default(-1);
6248 
6249   if (Slot == -1) {
6250     Error(S, "invalid interpolation slot");
6251     return MatchOperand_ParseFail;
6252   }
6253 
6254   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6255                                               AMDGPUOperand::ImmTyInterpSlot));
6256   return MatchOperand_Success;
6257 }
6258 
6259 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6260   StringRef Str;
6261   SMLoc S = getLoc();
6262 
6263   if (!parseId(Str))
6264     return MatchOperand_NoMatch;
6265 
6266   if (!Str.startswith("attr")) {
6267     Error(S, "invalid interpolation attribute");
6268     return MatchOperand_ParseFail;
6269   }
6270 
6271   StringRef Chan = Str.take_back(2);
6272   int AttrChan = StringSwitch<int>(Chan)
6273     .Case(".x", 0)
6274     .Case(".y", 1)
6275     .Case(".z", 2)
6276     .Case(".w", 3)
6277     .Default(-1);
6278   if (AttrChan == -1) {
6279     Error(S, "invalid or missing interpolation attribute channel");
6280     return MatchOperand_ParseFail;
6281   }
6282 
6283   Str = Str.drop_back(2).drop_front(4);
6284 
6285   uint8_t Attr;
6286   if (Str.getAsInteger(10, Attr)) {
6287     Error(S, "invalid or missing interpolation attribute number");
6288     return MatchOperand_ParseFail;
6289   }
6290 
6291   if (Attr > 63) {
6292     Error(S, "out of bounds interpolation attribute number");
6293     return MatchOperand_ParseFail;
6294   }
6295 
6296   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6297 
6298   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6299                                               AMDGPUOperand::ImmTyInterpAttr));
6300   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6301                                               AMDGPUOperand::ImmTyAttrChan));
6302   return MatchOperand_Success;
6303 }
6304 
6305 //===----------------------------------------------------------------------===//
6306 // exp
6307 //===----------------------------------------------------------------------===//
6308 
6309 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6310   using namespace llvm::AMDGPU::Exp;
6311 
6312   StringRef Str;
6313   SMLoc S = getLoc();
6314 
6315   if (!parseId(Str))
6316     return MatchOperand_NoMatch;
6317 
6318   unsigned Id = getTgtId(Str);
6319   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6320     Error(S, (Id == ET_INVALID) ?
6321                 "invalid exp target" :
6322                 "exp target is not supported on this GPU");
6323     return MatchOperand_ParseFail;
6324   }
6325 
6326   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6327                                               AMDGPUOperand::ImmTyExpTgt));
6328   return MatchOperand_Success;
6329 }
6330 
6331 //===----------------------------------------------------------------------===//
6332 // parser helpers
6333 //===----------------------------------------------------------------------===//
6334 
6335 bool
6336 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6337   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6338 }
6339 
6340 bool
6341 AMDGPUAsmParser::isId(const StringRef Id) const {
6342   return isId(getToken(), Id);
6343 }
6344 
6345 bool
6346 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6347   return getTokenKind() == Kind;
6348 }
6349 
6350 bool
6351 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6352   if (isId(Id)) {
6353     lex();
6354     return true;
6355   }
6356   return false;
6357 }
6358 
6359 bool
6360 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6361   if (isToken(AsmToken::Identifier)) {
6362     StringRef Tok = getTokenStr();
6363     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6364       lex();
6365       return true;
6366     }
6367   }
6368   return false;
6369 }
6370 
6371 bool
6372 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6373   if (isId(Id) && peekToken().is(Kind)) {
6374     lex();
6375     lex();
6376     return true;
6377   }
6378   return false;
6379 }
6380 
6381 bool
6382 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6383   if (isToken(Kind)) {
6384     lex();
6385     return true;
6386   }
6387   return false;
6388 }
6389 
6390 bool
6391 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6392                            const StringRef ErrMsg) {
6393   if (!trySkipToken(Kind)) {
6394     Error(getLoc(), ErrMsg);
6395     return false;
6396   }
6397   return true;
6398 }
6399 
6400 bool
6401 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6402   SMLoc S = getLoc();
6403 
6404   const MCExpr *Expr;
6405   if (Parser.parseExpression(Expr))
6406     return false;
6407 
6408   if (Expr->evaluateAsAbsolute(Imm))
6409     return true;
6410 
6411   if (Expected.empty()) {
6412     Error(S, "expected absolute expression");
6413   } else {
6414     Error(S, Twine("expected ", Expected) +
6415              Twine(" or an absolute expression"));
6416   }
6417   return false;
6418 }
6419 
6420 bool
6421 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6422   SMLoc S = getLoc();
6423 
6424   const MCExpr *Expr;
6425   if (Parser.parseExpression(Expr))
6426     return false;
6427 
6428   int64_t IntVal;
6429   if (Expr->evaluateAsAbsolute(IntVal)) {
6430     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6431   } else {
6432     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6433   }
6434   return true;
6435 }
6436 
6437 bool
6438 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6439   if (isToken(AsmToken::String)) {
6440     Val = getToken().getStringContents();
6441     lex();
6442     return true;
6443   } else {
6444     Error(getLoc(), ErrMsg);
6445     return false;
6446   }
6447 }
6448 
6449 bool
6450 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6451   if (isToken(AsmToken::Identifier)) {
6452     Val = getTokenStr();
6453     lex();
6454     return true;
6455   } else {
6456     if (!ErrMsg.empty())
6457       Error(getLoc(), ErrMsg);
6458     return false;
6459   }
6460 }
6461 
6462 AsmToken
6463 AMDGPUAsmParser::getToken() const {
6464   return Parser.getTok();
6465 }
6466 
6467 AsmToken
6468 AMDGPUAsmParser::peekToken() {
6469   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6470 }
6471 
6472 void
6473 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6474   auto TokCount = getLexer().peekTokens(Tokens);
6475 
6476   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6477     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6478 }
6479 
6480 AsmToken::TokenKind
6481 AMDGPUAsmParser::getTokenKind() const {
6482   return getLexer().getKind();
6483 }
6484 
6485 SMLoc
6486 AMDGPUAsmParser::getLoc() const {
6487   return getToken().getLoc();
6488 }
6489 
6490 StringRef
6491 AMDGPUAsmParser::getTokenStr() const {
6492   return getToken().getString();
6493 }
6494 
6495 void
6496 AMDGPUAsmParser::lex() {
6497   Parser.Lex();
6498 }
6499 
6500 SMLoc
6501 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6502                                const OperandVector &Operands) const {
6503   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6504     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6505     if (Test(Op))
6506       return Op.getStartLoc();
6507   }
6508   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6509 }
6510 
6511 SMLoc
6512 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6513                            const OperandVector &Operands) const {
6514   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6515   return getOperandLoc(Test, Operands);
6516 }
6517 
6518 SMLoc
6519 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6520                            const OperandVector &Operands) const {
6521   auto Test = [=](const AMDGPUOperand& Op) {
6522     return Op.isRegKind() && Op.getReg() == Reg;
6523   };
6524   return getOperandLoc(Test, Operands);
6525 }
6526 
6527 SMLoc
6528 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6529   auto Test = [](const AMDGPUOperand& Op) {
6530     return Op.IsImmKindLiteral() || Op.isExpr();
6531   };
6532   return getOperandLoc(Test, Operands);
6533 }
6534 
6535 SMLoc
6536 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6537   auto Test = [](const AMDGPUOperand& Op) {
6538     return Op.isImmKindConst();
6539   };
6540   return getOperandLoc(Test, Operands);
6541 }
6542 
6543 //===----------------------------------------------------------------------===//
6544 // swizzle
6545 //===----------------------------------------------------------------------===//
6546 
6547 LLVM_READNONE
6548 static unsigned
6549 encodeBitmaskPerm(const unsigned AndMask,
6550                   const unsigned OrMask,
6551                   const unsigned XorMask) {
6552   using namespace llvm::AMDGPU::Swizzle;
6553 
6554   return BITMASK_PERM_ENC |
6555          (AndMask << BITMASK_AND_SHIFT) |
6556          (OrMask  << BITMASK_OR_SHIFT)  |
6557          (XorMask << BITMASK_XOR_SHIFT);
6558 }
6559 
6560 bool
6561 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6562                                      const unsigned MinVal,
6563                                      const unsigned MaxVal,
6564                                      const StringRef ErrMsg,
6565                                      SMLoc &Loc) {
6566   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6567     return false;
6568   }
6569   Loc = getLoc();
6570   if (!parseExpr(Op)) {
6571     return false;
6572   }
6573   if (Op < MinVal || Op > MaxVal) {
6574     Error(Loc, ErrMsg);
6575     return false;
6576   }
6577 
6578   return true;
6579 }
6580 
6581 bool
6582 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6583                                       const unsigned MinVal,
6584                                       const unsigned MaxVal,
6585                                       const StringRef ErrMsg) {
6586   SMLoc Loc;
6587   for (unsigned i = 0; i < OpNum; ++i) {
6588     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6589       return false;
6590   }
6591 
6592   return true;
6593 }
6594 
6595 bool
6596 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6597   using namespace llvm::AMDGPU::Swizzle;
6598 
6599   int64_t Lane[LANE_NUM];
6600   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6601                            "expected a 2-bit lane id")) {
6602     Imm = QUAD_PERM_ENC;
6603     for (unsigned I = 0; I < LANE_NUM; ++I) {
6604       Imm |= Lane[I] << (LANE_SHIFT * I);
6605     }
6606     return true;
6607   }
6608   return false;
6609 }
6610 
6611 bool
6612 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6613   using namespace llvm::AMDGPU::Swizzle;
6614 
6615   SMLoc Loc;
6616   int64_t GroupSize;
6617   int64_t LaneIdx;
6618 
6619   if (!parseSwizzleOperand(GroupSize,
6620                            2, 32,
6621                            "group size must be in the interval [2,32]",
6622                            Loc)) {
6623     return false;
6624   }
6625   if (!isPowerOf2_64(GroupSize)) {
6626     Error(Loc, "group size must be a power of two");
6627     return false;
6628   }
6629   if (parseSwizzleOperand(LaneIdx,
6630                           0, GroupSize - 1,
6631                           "lane id must be in the interval [0,group size - 1]",
6632                           Loc)) {
6633     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6634     return true;
6635   }
6636   return false;
6637 }
6638 
6639 bool
6640 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6641   using namespace llvm::AMDGPU::Swizzle;
6642 
6643   SMLoc Loc;
6644   int64_t GroupSize;
6645 
6646   if (!parseSwizzleOperand(GroupSize,
6647                            2, 32,
6648                            "group size must be in the interval [2,32]",
6649                            Loc)) {
6650     return false;
6651   }
6652   if (!isPowerOf2_64(GroupSize)) {
6653     Error(Loc, "group size must be a power of two");
6654     return false;
6655   }
6656 
6657   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6658   return true;
6659 }
6660 
6661 bool
6662 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6663   using namespace llvm::AMDGPU::Swizzle;
6664 
6665   SMLoc Loc;
6666   int64_t GroupSize;
6667 
6668   if (!parseSwizzleOperand(GroupSize,
6669                            1, 16,
6670                            "group size must be in the interval [1,16]",
6671                            Loc)) {
6672     return false;
6673   }
6674   if (!isPowerOf2_64(GroupSize)) {
6675     Error(Loc, "group size must be a power of two");
6676     return false;
6677   }
6678 
6679   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6680   return true;
6681 }
6682 
6683 bool
6684 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6685   using namespace llvm::AMDGPU::Swizzle;
6686 
6687   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6688     return false;
6689   }
6690 
6691   StringRef Ctl;
6692   SMLoc StrLoc = getLoc();
6693   if (!parseString(Ctl)) {
6694     return false;
6695   }
6696   if (Ctl.size() != BITMASK_WIDTH) {
6697     Error(StrLoc, "expected a 5-character mask");
6698     return false;
6699   }
6700 
6701   unsigned AndMask = 0;
6702   unsigned OrMask = 0;
6703   unsigned XorMask = 0;
6704 
6705   for (size_t i = 0; i < Ctl.size(); ++i) {
6706     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6707     switch(Ctl[i]) {
6708     default:
6709       Error(StrLoc, "invalid mask");
6710       return false;
6711     case '0':
6712       break;
6713     case '1':
6714       OrMask |= Mask;
6715       break;
6716     case 'p':
6717       AndMask |= Mask;
6718       break;
6719     case 'i':
6720       AndMask |= Mask;
6721       XorMask |= Mask;
6722       break;
6723     }
6724   }
6725 
6726   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6727   return true;
6728 }
6729 
6730 bool
6731 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6732 
6733   SMLoc OffsetLoc = getLoc();
6734 
6735   if (!parseExpr(Imm, "a swizzle macro")) {
6736     return false;
6737   }
6738   if (!isUInt<16>(Imm)) {
6739     Error(OffsetLoc, "expected a 16-bit offset");
6740     return false;
6741   }
6742   return true;
6743 }
6744 
6745 bool
6746 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6747   using namespace llvm::AMDGPU::Swizzle;
6748 
6749   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6750 
6751     SMLoc ModeLoc = getLoc();
6752     bool Ok = false;
6753 
6754     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6755       Ok = parseSwizzleQuadPerm(Imm);
6756     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6757       Ok = parseSwizzleBitmaskPerm(Imm);
6758     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6759       Ok = parseSwizzleBroadcast(Imm);
6760     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6761       Ok = parseSwizzleSwap(Imm);
6762     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6763       Ok = parseSwizzleReverse(Imm);
6764     } else {
6765       Error(ModeLoc, "expected a swizzle mode");
6766     }
6767 
6768     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6769   }
6770 
6771   return false;
6772 }
6773 
6774 OperandMatchResultTy
6775 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6776   SMLoc S = getLoc();
6777   int64_t Imm = 0;
6778 
6779   if (trySkipId("offset")) {
6780 
6781     bool Ok = false;
6782     if (skipToken(AsmToken::Colon, "expected a colon")) {
6783       if (trySkipId("swizzle")) {
6784         Ok = parseSwizzleMacro(Imm);
6785       } else {
6786         Ok = parseSwizzleOffset(Imm);
6787       }
6788     }
6789 
6790     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6791 
6792     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6793   } else {
6794     // Swizzle "offset" operand is optional.
6795     // If it is omitted, try parsing other optional operands.
6796     return parseOptionalOpr(Operands);
6797   }
6798 }
6799 
6800 bool
6801 AMDGPUOperand::isSwizzle() const {
6802   return isImmTy(ImmTySwizzle);
6803 }
6804 
6805 //===----------------------------------------------------------------------===//
6806 // VGPR Index Mode
6807 //===----------------------------------------------------------------------===//
6808 
6809 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6810 
6811   using namespace llvm::AMDGPU::VGPRIndexMode;
6812 
6813   if (trySkipToken(AsmToken::RParen)) {
6814     return OFF;
6815   }
6816 
6817   int64_t Imm = 0;
6818 
6819   while (true) {
6820     unsigned Mode = 0;
6821     SMLoc S = getLoc();
6822 
6823     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6824       if (trySkipId(IdSymbolic[ModeId])) {
6825         Mode = 1 << ModeId;
6826         break;
6827       }
6828     }
6829 
6830     if (Mode == 0) {
6831       Error(S, (Imm == 0)?
6832                "expected a VGPR index mode or a closing parenthesis" :
6833                "expected a VGPR index mode");
6834       return UNDEF;
6835     }
6836 
6837     if (Imm & Mode) {
6838       Error(S, "duplicate VGPR index mode");
6839       return UNDEF;
6840     }
6841     Imm |= Mode;
6842 
6843     if (trySkipToken(AsmToken::RParen))
6844       break;
6845     if (!skipToken(AsmToken::Comma,
6846                    "expected a comma or a closing parenthesis"))
6847       return UNDEF;
6848   }
6849 
6850   return Imm;
6851 }
6852 
6853 OperandMatchResultTy
6854 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6855 
6856   using namespace llvm::AMDGPU::VGPRIndexMode;
6857 
6858   int64_t Imm = 0;
6859   SMLoc S = getLoc();
6860 
6861   if (trySkipId("gpr_idx", AsmToken::LParen)) {
6862     Imm = parseGPRIdxMacro();
6863     if (Imm == UNDEF)
6864       return MatchOperand_ParseFail;
6865   } else {
6866     if (getParser().parseAbsoluteExpression(Imm))
6867       return MatchOperand_ParseFail;
6868     if (Imm < 0 || !isUInt<4>(Imm)) {
6869       Error(S, "invalid immediate: only 4-bit values are legal");
6870       return MatchOperand_ParseFail;
6871     }
6872   }
6873 
6874   Operands.push_back(
6875       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6876   return MatchOperand_Success;
6877 }
6878 
6879 bool AMDGPUOperand::isGPRIdxMode() const {
6880   return isImmTy(ImmTyGprIdxMode);
6881 }
6882 
6883 //===----------------------------------------------------------------------===//
6884 // sopp branch targets
6885 //===----------------------------------------------------------------------===//
6886 
6887 OperandMatchResultTy
6888 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6889 
6890   // Make sure we are not parsing something
6891   // that looks like a label or an expression but is not.
6892   // This will improve error messages.
6893   if (isRegister() || isModifier())
6894     return MatchOperand_NoMatch;
6895 
6896   if (!parseExpr(Operands))
6897     return MatchOperand_ParseFail;
6898 
6899   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6900   assert(Opr.isImm() || Opr.isExpr());
6901   SMLoc Loc = Opr.getStartLoc();
6902 
6903   // Currently we do not support arbitrary expressions as branch targets.
6904   // Only labels and absolute expressions are accepted.
6905   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6906     Error(Loc, "expected an absolute expression or a label");
6907   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6908     Error(Loc, "expected a 16-bit signed jump offset");
6909   }
6910 
6911   return MatchOperand_Success;
6912 }
6913 
6914 //===----------------------------------------------------------------------===//
6915 // Boolean holding registers
6916 //===----------------------------------------------------------------------===//
6917 
6918 OperandMatchResultTy
6919 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6920   return parseReg(Operands);
6921 }
6922 
6923 //===----------------------------------------------------------------------===//
6924 // mubuf
6925 //===----------------------------------------------------------------------===//
6926 
6927 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
6928   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
6929 }
6930 
6931 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6932                                    const OperandVector &Operands,
6933                                    bool IsAtomic,
6934                                    bool IsLds) {
6935   bool IsLdsOpcode = IsLds;
6936   bool HasLdsModifier = false;
6937   OptionalImmIndexMap OptionalIdx;
6938   unsigned FirstOperandIdx = 1;
6939   bool IsAtomicReturn = false;
6940 
6941   if (IsAtomic) {
6942     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6943       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6944       if (!Op.isCPol())
6945         continue;
6946       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
6947       break;
6948     }
6949 
6950     if (!IsAtomicReturn) {
6951       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
6952       if (NewOpc != -1)
6953         Inst.setOpcode(NewOpc);
6954     }
6955 
6956     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
6957                       SIInstrFlags::IsAtomicRet;
6958   }
6959 
6960   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6961     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6962 
6963     // Add the register arguments
6964     if (Op.isReg()) {
6965       Op.addRegOperands(Inst, 1);
6966       // Insert a tied src for atomic return dst.
6967       // This cannot be postponed as subsequent calls to
6968       // addImmOperands rely on correct number of MC operands.
6969       if (IsAtomicReturn && i == FirstOperandIdx)
6970         Op.addRegOperands(Inst, 1);
6971       continue;
6972     }
6973 
6974     // Handle the case where soffset is an immediate
6975     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6976       Op.addImmOperands(Inst, 1);
6977       continue;
6978     }
6979 
6980     HasLdsModifier |= Op.isLDS();
6981 
6982     // Handle tokens like 'offen' which are sometimes hard-coded into the
6983     // asm string.  There are no MCInst operands for these.
6984     if (Op.isToken()) {
6985       continue;
6986     }
6987     assert(Op.isImm());
6988 
6989     // Handle optional arguments
6990     OptionalIdx[Op.getImmTy()] = i;
6991   }
6992 
6993   // This is a workaround for an llvm quirk which may result in an
6994   // incorrect instruction selection. Lds and non-lds versions of
6995   // MUBUF instructions are identical except that lds versions
6996   // have mandatory 'lds' modifier. However this modifier follows
6997   // optional modifiers and llvm asm matcher regards this 'lds'
6998   // modifier as an optional one. As a result, an lds version
6999   // of opcode may be selected even if it has no 'lds' modifier.
7000   if (IsLdsOpcode && !HasLdsModifier) {
7001     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7002     if (NoLdsOpcode != -1) { // Got lds version - correct it.
7003       Inst.setOpcode(NoLdsOpcode);
7004       IsLdsOpcode = false;
7005     }
7006   }
7007 
7008   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7009   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7010 
7011   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7012     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7013   }
7014   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7015 }
7016 
7017 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7018   OptionalImmIndexMap OptionalIdx;
7019 
7020   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7021     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7022 
7023     // Add the register arguments
7024     if (Op.isReg()) {
7025       Op.addRegOperands(Inst, 1);
7026       continue;
7027     }
7028 
7029     // Handle the case where soffset is an immediate
7030     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7031       Op.addImmOperands(Inst, 1);
7032       continue;
7033     }
7034 
7035     // Handle tokens like 'offen' which are sometimes hard-coded into the
7036     // asm string.  There are no MCInst operands for these.
7037     if (Op.isToken()) {
7038       continue;
7039     }
7040     assert(Op.isImm());
7041 
7042     // Handle optional arguments
7043     OptionalIdx[Op.getImmTy()] = i;
7044   }
7045 
7046   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7047                         AMDGPUOperand::ImmTyOffset);
7048   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7049   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7050   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7051   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7052 }
7053 
7054 //===----------------------------------------------------------------------===//
7055 // mimg
7056 //===----------------------------------------------------------------------===//
7057 
7058 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7059                               bool IsAtomic) {
7060   unsigned I = 1;
7061   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7062   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7063     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7064   }
7065 
7066   if (IsAtomic) {
7067     // Add src, same as dst
7068     assert(Desc.getNumDefs() == 1);
7069     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7070   }
7071 
7072   OptionalImmIndexMap OptionalIdx;
7073 
7074   for (unsigned E = Operands.size(); I != E; ++I) {
7075     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7076 
7077     // Add the register arguments
7078     if (Op.isReg()) {
7079       Op.addRegOperands(Inst, 1);
7080     } else if (Op.isImmModifier()) {
7081       OptionalIdx[Op.getImmTy()] = I;
7082     } else if (!Op.isToken()) {
7083       llvm_unreachable("unexpected operand type");
7084     }
7085   }
7086 
7087   bool IsGFX10Plus = isGFX10Plus();
7088 
7089   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7090   if (IsGFX10Plus)
7091     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7092   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7093   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7094   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7095   if (IsGFX10Plus)
7096     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7097   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7098     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7099   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7100   if (!IsGFX10Plus)
7101     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7102   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7103 }
7104 
7105 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7106   cvtMIMG(Inst, Operands, true);
7107 }
7108 
7109 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7110   OptionalImmIndexMap OptionalIdx;
7111   bool IsAtomicReturn = false;
7112 
7113   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7114     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7115     if (!Op.isCPol())
7116       continue;
7117     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7118     break;
7119   }
7120 
7121   if (!IsAtomicReturn) {
7122     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7123     if (NewOpc != -1)
7124       Inst.setOpcode(NewOpc);
7125   }
7126 
7127   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7128                     SIInstrFlags::IsAtomicRet;
7129 
7130   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7131     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7132 
7133     // Add the register arguments
7134     if (Op.isReg()) {
7135       Op.addRegOperands(Inst, 1);
7136       if (IsAtomicReturn && i == 1)
7137         Op.addRegOperands(Inst, 1);
7138       continue;
7139     }
7140 
7141     // Handle the case where soffset is an immediate
7142     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7143       Op.addImmOperands(Inst, 1);
7144       continue;
7145     }
7146 
7147     // Handle tokens like 'offen' which are sometimes hard-coded into the
7148     // asm string.  There are no MCInst operands for these.
7149     if (Op.isToken()) {
7150       continue;
7151     }
7152     assert(Op.isImm());
7153 
7154     // Handle optional arguments
7155     OptionalIdx[Op.getImmTy()] = i;
7156   }
7157 
7158   if ((int)Inst.getNumOperands() <=
7159       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7160     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7161   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7162 }
7163 
7164 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7165                                       const OperandVector &Operands) {
7166   for (unsigned I = 1; I < Operands.size(); ++I) {
7167     auto &Operand = (AMDGPUOperand &)*Operands[I];
7168     if (Operand.isReg())
7169       Operand.addRegOperands(Inst, 1);
7170   }
7171 
7172   Inst.addOperand(MCOperand::createImm(1)); // a16
7173 }
7174 
7175 //===----------------------------------------------------------------------===//
7176 // smrd
7177 //===----------------------------------------------------------------------===//
7178 
7179 bool AMDGPUOperand::isSMRDOffset8() const {
7180   return isImm() && isUInt<8>(getImm());
7181 }
7182 
7183 bool AMDGPUOperand::isSMEMOffset() const {
7184   return isImm(); // Offset range is checked later by validator.
7185 }
7186 
7187 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7188   // 32-bit literals are only supported on CI and we only want to use them
7189   // when the offset is > 8-bits.
7190   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7191 }
7192 
7193 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7194   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7195 }
7196 
7197 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7198   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7199 }
7200 
7201 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7202   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7203 }
7204 
7205 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7206   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7207 }
7208 
7209 //===----------------------------------------------------------------------===//
7210 // vop3
7211 //===----------------------------------------------------------------------===//
7212 
7213 static bool ConvertOmodMul(int64_t &Mul) {
7214   if (Mul != 1 && Mul != 2 && Mul != 4)
7215     return false;
7216 
7217   Mul >>= 1;
7218   return true;
7219 }
7220 
7221 static bool ConvertOmodDiv(int64_t &Div) {
7222   if (Div == 1) {
7223     Div = 0;
7224     return true;
7225   }
7226 
7227   if (Div == 2) {
7228     Div = 3;
7229     return true;
7230   }
7231 
7232   return false;
7233 }
7234 
7235 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7236 // This is intentional and ensures compatibility with sp3.
7237 // See bug 35397 for details.
7238 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7239   if (BoundCtrl == 0 || BoundCtrl == 1) {
7240     BoundCtrl = 1;
7241     return true;
7242   }
7243   return false;
7244 }
7245 
7246 // Note: the order in this table matches the order of operands in AsmString.
7247 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7248   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7249   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7250   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7251   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7252   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7253   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7254   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7255   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7256   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7257   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7258   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7259   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7260   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7261   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7262   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7263   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7264   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7265   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7266   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7267   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7268   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7269   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7270   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7271   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7272   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7273   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7274   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7275   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7276   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7277   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7278   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7279   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7280   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7281   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7282   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7283   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7284   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7285   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7286   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7287   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7288   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7289 };
7290 
7291 void AMDGPUAsmParser::onBeginOfFile() {
7292   if (!getParser().getStreamer().getTargetStreamer() ||
7293       getSTI().getTargetTriple().getArch() == Triple::r600)
7294     return;
7295 
7296   if (!getTargetStreamer().getTargetID())
7297     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7298 
7299   if (isHsaAbiVersion3Or4(&getSTI()))
7300     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7301 }
7302 
7303 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7304 
7305   OperandMatchResultTy res = parseOptionalOpr(Operands);
7306 
7307   // This is a hack to enable hardcoded mandatory operands which follow
7308   // optional operands.
7309   //
7310   // Current design assumes that all operands after the first optional operand
7311   // are also optional. However implementation of some instructions violates
7312   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7313   //
7314   // To alleviate this problem, we have to (implicitly) parse extra operands
7315   // to make sure autogenerated parser of custom operands never hit hardcoded
7316   // mandatory operands.
7317 
7318   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7319     if (res != MatchOperand_Success ||
7320         isToken(AsmToken::EndOfStatement))
7321       break;
7322 
7323     trySkipToken(AsmToken::Comma);
7324     res = parseOptionalOpr(Operands);
7325   }
7326 
7327   return res;
7328 }
7329 
7330 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7331   OperandMatchResultTy res;
7332   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7333     // try to parse any optional operand here
7334     if (Op.IsBit) {
7335       res = parseNamedBit(Op.Name, Operands, Op.Type);
7336     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7337       res = parseOModOperand(Operands);
7338     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7339                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7340                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7341       res = parseSDWASel(Operands, Op.Name, Op.Type);
7342     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7343       res = parseSDWADstUnused(Operands);
7344     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7345                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7346                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7347                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7348       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7349                                         Op.ConvertResult);
7350     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7351       res = parseDim(Operands);
7352     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7353       res = parseCPol(Operands);
7354     } else {
7355       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7356     }
7357     if (res != MatchOperand_NoMatch) {
7358       return res;
7359     }
7360   }
7361   return MatchOperand_NoMatch;
7362 }
7363 
7364 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7365   StringRef Name = getTokenStr();
7366   if (Name == "mul") {
7367     return parseIntWithPrefix("mul", Operands,
7368                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7369   }
7370 
7371   if (Name == "div") {
7372     return parseIntWithPrefix("div", Operands,
7373                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7374   }
7375 
7376   return MatchOperand_NoMatch;
7377 }
7378 
7379 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7380   cvtVOP3P(Inst, Operands);
7381 
7382   int Opc = Inst.getOpcode();
7383 
7384   int SrcNum;
7385   const int Ops[] = { AMDGPU::OpName::src0,
7386                       AMDGPU::OpName::src1,
7387                       AMDGPU::OpName::src2 };
7388   for (SrcNum = 0;
7389        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7390        ++SrcNum);
7391   assert(SrcNum > 0);
7392 
7393   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7394   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7395 
7396   if ((OpSel & (1 << SrcNum)) != 0) {
7397     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7398     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7399     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7400   }
7401 }
7402 
7403 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7404       // 1. This operand is input modifiers
7405   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7406       // 2. This is not last operand
7407       && Desc.NumOperands > (OpNum + 1)
7408       // 3. Next operand is register class
7409       && Desc.OpInfo[OpNum + 1].RegClass != -1
7410       // 4. Next register is not tied to any other operand
7411       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7412 }
7413 
7414 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7415 {
7416   OptionalImmIndexMap OptionalIdx;
7417   unsigned Opc = Inst.getOpcode();
7418 
7419   unsigned I = 1;
7420   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7421   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7422     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7423   }
7424 
7425   for (unsigned E = Operands.size(); I != E; ++I) {
7426     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7427     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7428       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7429     } else if (Op.isInterpSlot() ||
7430                Op.isInterpAttr() ||
7431                Op.isAttrChan()) {
7432       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7433     } else if (Op.isImmModifier()) {
7434       OptionalIdx[Op.getImmTy()] = I;
7435     } else {
7436       llvm_unreachable("unhandled operand type");
7437     }
7438   }
7439 
7440   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7441     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7442   }
7443 
7444   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7445     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7446   }
7447 
7448   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7449     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7450   }
7451 }
7452 
7453 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7454                               OptionalImmIndexMap &OptionalIdx) {
7455   unsigned Opc = Inst.getOpcode();
7456 
7457   unsigned I = 1;
7458   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7459   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7460     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7461   }
7462 
7463   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7464     // This instruction has src modifiers
7465     for (unsigned E = Operands.size(); I != E; ++I) {
7466       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7467       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7468         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7469       } else if (Op.isImmModifier()) {
7470         OptionalIdx[Op.getImmTy()] = I;
7471       } else if (Op.isRegOrImm()) {
7472         Op.addRegOrImmOperands(Inst, 1);
7473       } else {
7474         llvm_unreachable("unhandled operand type");
7475       }
7476     }
7477   } else {
7478     // No src modifiers
7479     for (unsigned E = Operands.size(); I != E; ++I) {
7480       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7481       if (Op.isMod()) {
7482         OptionalIdx[Op.getImmTy()] = I;
7483       } else {
7484         Op.addRegOrImmOperands(Inst, 1);
7485       }
7486     }
7487   }
7488 
7489   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7490     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7491   }
7492 
7493   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7494     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7495   }
7496 
7497   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7498   // it has src2 register operand that is tied to dst operand
7499   // we don't allow modifiers for this operand in assembler so src2_modifiers
7500   // should be 0.
7501   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7502       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7503       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7504       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7505       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7506       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7507       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7508       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7509       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7510       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7511       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7512     auto it = Inst.begin();
7513     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7514     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7515     ++it;
7516     // Copy the operand to ensure it's not invalidated when Inst grows.
7517     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7518   }
7519 }
7520 
7521 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7522   OptionalImmIndexMap OptionalIdx;
7523   cvtVOP3(Inst, Operands, OptionalIdx);
7524 }
7525 
7526 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7527                                OptionalImmIndexMap &OptIdx) {
7528   const int Opc = Inst.getOpcode();
7529   const MCInstrDesc &Desc = MII.get(Opc);
7530 
7531   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7532 
7533   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7534     assert(!IsPacked);
7535     Inst.addOperand(Inst.getOperand(0));
7536   }
7537 
7538   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7539   // instruction, and then figure out where to actually put the modifiers
7540 
7541   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7542   if (OpSelIdx != -1) {
7543     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7544   }
7545 
7546   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7547   if (OpSelHiIdx != -1) {
7548     int DefaultVal = IsPacked ? -1 : 0;
7549     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7550                           DefaultVal);
7551   }
7552 
7553   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7554   if (NegLoIdx != -1) {
7555     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7556     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7557   }
7558 
7559   const int Ops[] = { AMDGPU::OpName::src0,
7560                       AMDGPU::OpName::src1,
7561                       AMDGPU::OpName::src2 };
7562   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7563                          AMDGPU::OpName::src1_modifiers,
7564                          AMDGPU::OpName::src2_modifiers };
7565 
7566   unsigned OpSel = 0;
7567   unsigned OpSelHi = 0;
7568   unsigned NegLo = 0;
7569   unsigned NegHi = 0;
7570 
7571   if (OpSelIdx != -1)
7572     OpSel = Inst.getOperand(OpSelIdx).getImm();
7573 
7574   if (OpSelHiIdx != -1)
7575     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7576 
7577   if (NegLoIdx != -1) {
7578     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7579     NegLo = Inst.getOperand(NegLoIdx).getImm();
7580     NegHi = Inst.getOperand(NegHiIdx).getImm();
7581   }
7582 
7583   for (int J = 0; J < 3; ++J) {
7584     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7585     if (OpIdx == -1)
7586       break;
7587 
7588     uint32_t ModVal = 0;
7589 
7590     if ((OpSel & (1 << J)) != 0)
7591       ModVal |= SISrcMods::OP_SEL_0;
7592 
7593     if ((OpSelHi & (1 << J)) != 0)
7594       ModVal |= SISrcMods::OP_SEL_1;
7595 
7596     if ((NegLo & (1 << J)) != 0)
7597       ModVal |= SISrcMods::NEG;
7598 
7599     if ((NegHi & (1 << J)) != 0)
7600       ModVal |= SISrcMods::NEG_HI;
7601 
7602     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7603 
7604     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7605   }
7606 }
7607 
7608 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7609   OptionalImmIndexMap OptIdx;
7610   cvtVOP3(Inst, Operands, OptIdx);
7611   cvtVOP3P(Inst, Operands, OptIdx);
7612 }
7613 
7614 //===----------------------------------------------------------------------===//
7615 // dpp
7616 //===----------------------------------------------------------------------===//
7617 
7618 bool AMDGPUOperand::isDPP8() const {
7619   return isImmTy(ImmTyDPP8);
7620 }
7621 
7622 bool AMDGPUOperand::isDPPCtrl() const {
7623   using namespace AMDGPU::DPP;
7624 
7625   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7626   if (result) {
7627     int64_t Imm = getImm();
7628     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7629            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7630            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7631            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7632            (Imm == DppCtrl::WAVE_SHL1) ||
7633            (Imm == DppCtrl::WAVE_ROL1) ||
7634            (Imm == DppCtrl::WAVE_SHR1) ||
7635            (Imm == DppCtrl::WAVE_ROR1) ||
7636            (Imm == DppCtrl::ROW_MIRROR) ||
7637            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7638            (Imm == DppCtrl::BCAST15) ||
7639            (Imm == DppCtrl::BCAST31) ||
7640            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7641            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7642   }
7643   return false;
7644 }
7645 
7646 //===----------------------------------------------------------------------===//
7647 // mAI
7648 //===----------------------------------------------------------------------===//
7649 
7650 bool AMDGPUOperand::isBLGP() const {
7651   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7652 }
7653 
7654 bool AMDGPUOperand::isCBSZ() const {
7655   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7656 }
7657 
7658 bool AMDGPUOperand::isABID() const {
7659   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7660 }
7661 
7662 bool AMDGPUOperand::isS16Imm() const {
7663   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7664 }
7665 
7666 bool AMDGPUOperand::isU16Imm() const {
7667   return isImm() && isUInt<16>(getImm());
7668 }
7669 
7670 //===----------------------------------------------------------------------===//
7671 // dim
7672 //===----------------------------------------------------------------------===//
7673 
7674 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7675   // We want to allow "dim:1D" etc.,
7676   // but the initial 1 is tokenized as an integer.
7677   std::string Token;
7678   if (isToken(AsmToken::Integer)) {
7679     SMLoc Loc = getToken().getEndLoc();
7680     Token = std::string(getTokenStr());
7681     lex();
7682     if (getLoc() != Loc)
7683       return false;
7684   }
7685 
7686   StringRef Suffix;
7687   if (!parseId(Suffix))
7688     return false;
7689   Token += Suffix;
7690 
7691   StringRef DimId = Token;
7692   if (DimId.startswith("SQ_RSRC_IMG_"))
7693     DimId = DimId.drop_front(12);
7694 
7695   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7696   if (!DimInfo)
7697     return false;
7698 
7699   Encoding = DimInfo->Encoding;
7700   return true;
7701 }
7702 
7703 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7704   if (!isGFX10Plus())
7705     return MatchOperand_NoMatch;
7706 
7707   SMLoc S = getLoc();
7708 
7709   if (!trySkipId("dim", AsmToken::Colon))
7710     return MatchOperand_NoMatch;
7711 
7712   unsigned Encoding;
7713   SMLoc Loc = getLoc();
7714   if (!parseDimId(Encoding)) {
7715     Error(Loc, "invalid dim value");
7716     return MatchOperand_ParseFail;
7717   }
7718 
7719   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7720                                               AMDGPUOperand::ImmTyDim));
7721   return MatchOperand_Success;
7722 }
7723 
7724 //===----------------------------------------------------------------------===//
7725 // dpp
7726 //===----------------------------------------------------------------------===//
7727 
7728 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7729   SMLoc S = getLoc();
7730 
7731   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7732     return MatchOperand_NoMatch;
7733 
7734   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7735 
7736   int64_t Sels[8];
7737 
7738   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7739     return MatchOperand_ParseFail;
7740 
7741   for (size_t i = 0; i < 8; ++i) {
7742     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7743       return MatchOperand_ParseFail;
7744 
7745     SMLoc Loc = getLoc();
7746     if (getParser().parseAbsoluteExpression(Sels[i]))
7747       return MatchOperand_ParseFail;
7748     if (0 > Sels[i] || 7 < Sels[i]) {
7749       Error(Loc, "expected a 3-bit value");
7750       return MatchOperand_ParseFail;
7751     }
7752   }
7753 
7754   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7755     return MatchOperand_ParseFail;
7756 
7757   unsigned DPP8 = 0;
7758   for (size_t i = 0; i < 8; ++i)
7759     DPP8 |= (Sels[i] << (i * 3));
7760 
7761   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7762   return MatchOperand_Success;
7763 }
7764 
7765 bool
7766 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7767                                     const OperandVector &Operands) {
7768   if (Ctrl == "row_newbcast")
7769     return isGFX90A();
7770 
7771   if (Ctrl == "row_share" ||
7772       Ctrl == "row_xmask")
7773     return isGFX10Plus();
7774 
7775   if (Ctrl == "wave_shl" ||
7776       Ctrl == "wave_shr" ||
7777       Ctrl == "wave_rol" ||
7778       Ctrl == "wave_ror" ||
7779       Ctrl == "row_bcast")
7780     return isVI() || isGFX9();
7781 
7782   return Ctrl == "row_mirror" ||
7783          Ctrl == "row_half_mirror" ||
7784          Ctrl == "quad_perm" ||
7785          Ctrl == "row_shl" ||
7786          Ctrl == "row_shr" ||
7787          Ctrl == "row_ror";
7788 }
7789 
7790 int64_t
7791 AMDGPUAsmParser::parseDPPCtrlPerm() {
7792   // quad_perm:[%d,%d,%d,%d]
7793 
7794   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7795     return -1;
7796 
7797   int64_t Val = 0;
7798   for (int i = 0; i < 4; ++i) {
7799     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7800       return -1;
7801 
7802     int64_t Temp;
7803     SMLoc Loc = getLoc();
7804     if (getParser().parseAbsoluteExpression(Temp))
7805       return -1;
7806     if (Temp < 0 || Temp > 3) {
7807       Error(Loc, "expected a 2-bit value");
7808       return -1;
7809     }
7810 
7811     Val += (Temp << i * 2);
7812   }
7813 
7814   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7815     return -1;
7816 
7817   return Val;
7818 }
7819 
7820 int64_t
7821 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7822   using namespace AMDGPU::DPP;
7823 
7824   // sel:%d
7825 
7826   int64_t Val;
7827   SMLoc Loc = getLoc();
7828 
7829   if (getParser().parseAbsoluteExpression(Val))
7830     return -1;
7831 
7832   struct DppCtrlCheck {
7833     int64_t Ctrl;
7834     int Lo;
7835     int Hi;
7836   };
7837 
7838   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7839     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
7840     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
7841     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
7842     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
7843     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
7844     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
7845     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
7846     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7847     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7848     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
7849     .Default({-1, 0, 0});
7850 
7851   bool Valid;
7852   if (Check.Ctrl == -1) {
7853     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
7854     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
7855   } else {
7856     Valid = Check.Lo <= Val && Val <= Check.Hi;
7857     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
7858   }
7859 
7860   if (!Valid) {
7861     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
7862     return -1;
7863   }
7864 
7865   return Val;
7866 }
7867 
7868 OperandMatchResultTy
7869 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7870   using namespace AMDGPU::DPP;
7871 
7872   if (!isToken(AsmToken::Identifier) ||
7873       !isSupportedDPPCtrl(getTokenStr(), Operands))
7874     return MatchOperand_NoMatch;
7875 
7876   SMLoc S = getLoc();
7877   int64_t Val = -1;
7878   StringRef Ctrl;
7879 
7880   parseId(Ctrl);
7881 
7882   if (Ctrl == "row_mirror") {
7883     Val = DppCtrl::ROW_MIRROR;
7884   } else if (Ctrl == "row_half_mirror") {
7885     Val = DppCtrl::ROW_HALF_MIRROR;
7886   } else {
7887     if (skipToken(AsmToken::Colon, "expected a colon")) {
7888       if (Ctrl == "quad_perm") {
7889         Val = parseDPPCtrlPerm();
7890       } else {
7891         Val = parseDPPCtrlSel(Ctrl);
7892       }
7893     }
7894   }
7895 
7896   if (Val == -1)
7897     return MatchOperand_ParseFail;
7898 
7899   Operands.push_back(
7900     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
7901   return MatchOperand_Success;
7902 }
7903 
7904 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7905   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7906 }
7907 
7908 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7909   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7910 }
7911 
7912 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7913   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7914 }
7915 
7916 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7917   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7918 }
7919 
7920 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7921   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7922 }
7923 
7924 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7925   OptionalImmIndexMap OptionalIdx;
7926 
7927   unsigned I = 1;
7928   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7929   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7930     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7931   }
7932 
7933   int Fi = 0;
7934   for (unsigned E = Operands.size(); I != E; ++I) {
7935     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7936                                             MCOI::TIED_TO);
7937     if (TiedTo != -1) {
7938       assert((unsigned)TiedTo < Inst.getNumOperands());
7939       // handle tied old or src2 for MAC instructions
7940       Inst.addOperand(Inst.getOperand(TiedTo));
7941     }
7942     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7943     // Add the register arguments
7944     if (Op.isReg() && validateVccOperand(Op.getReg())) {
7945       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7946       // Skip it.
7947       continue;
7948     }
7949 
7950     if (IsDPP8) {
7951       if (Op.isDPP8()) {
7952         Op.addImmOperands(Inst, 1);
7953       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7954         Op.addRegWithFPInputModsOperands(Inst, 2);
7955       } else if (Op.isFI()) {
7956         Fi = Op.getImm();
7957       } else if (Op.isReg()) {
7958         Op.addRegOperands(Inst, 1);
7959       } else {
7960         llvm_unreachable("Invalid operand type");
7961       }
7962     } else {
7963       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7964         Op.addRegWithFPInputModsOperands(Inst, 2);
7965       } else if (Op.isDPPCtrl()) {
7966         Op.addImmOperands(Inst, 1);
7967       } else if (Op.isImm()) {
7968         // Handle optional arguments
7969         OptionalIdx[Op.getImmTy()] = I;
7970       } else {
7971         llvm_unreachable("Invalid operand type");
7972       }
7973     }
7974   }
7975 
7976   if (IsDPP8) {
7977     using namespace llvm::AMDGPU::DPP;
7978     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7979   } else {
7980     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7981     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7982     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7983     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7984       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7985     }
7986   }
7987 }
7988 
7989 //===----------------------------------------------------------------------===//
7990 // sdwa
7991 //===----------------------------------------------------------------------===//
7992 
7993 OperandMatchResultTy
7994 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7995                               AMDGPUOperand::ImmTy Type) {
7996   using namespace llvm::AMDGPU::SDWA;
7997 
7998   SMLoc S = getLoc();
7999   StringRef Value;
8000   OperandMatchResultTy res;
8001 
8002   SMLoc StringLoc;
8003   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8004   if (res != MatchOperand_Success) {
8005     return res;
8006   }
8007 
8008   int64_t Int;
8009   Int = StringSwitch<int64_t>(Value)
8010         .Case("BYTE_0", SdwaSel::BYTE_0)
8011         .Case("BYTE_1", SdwaSel::BYTE_1)
8012         .Case("BYTE_2", SdwaSel::BYTE_2)
8013         .Case("BYTE_3", SdwaSel::BYTE_3)
8014         .Case("WORD_0", SdwaSel::WORD_0)
8015         .Case("WORD_1", SdwaSel::WORD_1)
8016         .Case("DWORD", SdwaSel::DWORD)
8017         .Default(0xffffffff);
8018 
8019   if (Int == 0xffffffff) {
8020     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8021     return MatchOperand_ParseFail;
8022   }
8023 
8024   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8025   return MatchOperand_Success;
8026 }
8027 
8028 OperandMatchResultTy
8029 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8030   using namespace llvm::AMDGPU::SDWA;
8031 
8032   SMLoc S = getLoc();
8033   StringRef Value;
8034   OperandMatchResultTy res;
8035 
8036   SMLoc StringLoc;
8037   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8038   if (res != MatchOperand_Success) {
8039     return res;
8040   }
8041 
8042   int64_t Int;
8043   Int = StringSwitch<int64_t>(Value)
8044         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8045         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8046         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8047         .Default(0xffffffff);
8048 
8049   if (Int == 0xffffffff) {
8050     Error(StringLoc, "invalid dst_unused value");
8051     return MatchOperand_ParseFail;
8052   }
8053 
8054   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8055   return MatchOperand_Success;
8056 }
8057 
8058 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8059   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8060 }
8061 
8062 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8063   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8064 }
8065 
8066 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8067   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8068 }
8069 
8070 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8071   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8072 }
8073 
8074 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8075   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8076 }
8077 
8078 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8079                               uint64_t BasicInstType,
8080                               bool SkipDstVcc,
8081                               bool SkipSrcVcc) {
8082   using namespace llvm::AMDGPU::SDWA;
8083 
8084   OptionalImmIndexMap OptionalIdx;
8085   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8086   bool SkippedVcc = false;
8087 
8088   unsigned I = 1;
8089   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8090   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8091     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8092   }
8093 
8094   for (unsigned E = Operands.size(); I != E; ++I) {
8095     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8096     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8097         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8098       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8099       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8100       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8101       // Skip VCC only if we didn't skip it on previous iteration.
8102       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8103       if (BasicInstType == SIInstrFlags::VOP2 &&
8104           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8105            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8106         SkippedVcc = true;
8107         continue;
8108       } else if (BasicInstType == SIInstrFlags::VOPC &&
8109                  Inst.getNumOperands() == 0) {
8110         SkippedVcc = true;
8111         continue;
8112       }
8113     }
8114     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8115       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8116     } else if (Op.isImm()) {
8117       // Handle optional arguments
8118       OptionalIdx[Op.getImmTy()] = I;
8119     } else {
8120       llvm_unreachable("Invalid operand type");
8121     }
8122     SkippedVcc = false;
8123   }
8124 
8125   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8126       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8127       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8128     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8129     switch (BasicInstType) {
8130     case SIInstrFlags::VOP1:
8131       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8132       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8133         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8134       }
8135       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8136       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8137       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8138       break;
8139 
8140     case SIInstrFlags::VOP2:
8141       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8142       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8143         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8144       }
8145       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8146       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8147       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8148       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8149       break;
8150 
8151     case SIInstrFlags::VOPC:
8152       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8153         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8154       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8155       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8156       break;
8157 
8158     default:
8159       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8160     }
8161   }
8162 
8163   // special case v_mac_{f16, f32}:
8164   // it has src2 register operand that is tied to dst operand
8165   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8166       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8167     auto it = Inst.begin();
8168     std::advance(
8169       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8170     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8171   }
8172 }
8173 
8174 //===----------------------------------------------------------------------===//
8175 // mAI
8176 //===----------------------------------------------------------------------===//
8177 
8178 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8179   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8180 }
8181 
8182 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8183   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8184 }
8185 
8186 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8187   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8188 }
8189 
8190 /// Force static initialization.
8191 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8192   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8193   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8194 }
8195 
8196 #define GET_REGISTER_MATCHER
8197 #define GET_MATCHER_IMPLEMENTATION
8198 #define GET_MNEMONIC_SPELL_CHECKER
8199 #define GET_MNEMONIC_CHECKER
8200 #include "AMDGPUGenAsmMatcher.inc"
8201 
8202 // This fuction should be defined after auto-generated include so that we have
8203 // MatchClassKind enum defined
8204 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8205                                                      unsigned Kind) {
8206   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8207   // But MatchInstructionImpl() expects to meet token and fails to validate
8208   // operand. This method checks if we are given immediate operand but expect to
8209   // get corresponding token.
8210   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8211   switch (Kind) {
8212   case MCK_addr64:
8213     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8214   case MCK_gds:
8215     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8216   case MCK_lds:
8217     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8218   case MCK_idxen:
8219     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8220   case MCK_offen:
8221     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8222   case MCK_SSrcB32:
8223     // When operands have expression values, they will return true for isToken,
8224     // because it is not possible to distinguish between a token and an
8225     // expression at parse time. MatchInstructionImpl() will always try to
8226     // match an operand as a token, when isToken returns true, and when the
8227     // name of the expression is not a valid token, the match will fail,
8228     // so we need to handle it here.
8229     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8230   case MCK_SSrcF32:
8231     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8232   case MCK_SoppBrTarget:
8233     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8234   case MCK_VReg32OrOff:
8235     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8236   case MCK_InterpSlot:
8237     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8238   case MCK_Attr:
8239     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8240   case MCK_AttrChan:
8241     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8242   case MCK_ImmSMEMOffset:
8243     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8244   case MCK_SReg_64:
8245   case MCK_SReg_64_XEXEC:
8246     // Null is defined as a 32-bit register but
8247     // it should also be enabled with 64-bit operands.
8248     // The following code enables it for SReg_64 operands
8249     // used as source and destination. Remaining source
8250     // operands are handled in isInlinableImm.
8251     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8252   default:
8253     return Match_InvalidOperand;
8254   }
8255 }
8256 
8257 //===----------------------------------------------------------------------===//
8258 // endpgm
8259 //===----------------------------------------------------------------------===//
8260 
8261 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8262   SMLoc S = getLoc();
8263   int64_t Imm = 0;
8264 
8265   if (!parseExpr(Imm)) {
8266     // The operand is optional, if not present default to 0
8267     Imm = 0;
8268   }
8269 
8270   if (!isUInt<16>(Imm)) {
8271     Error(S, "expected a 16-bit value");
8272     return MatchOperand_ParseFail;
8273   }
8274 
8275   Operands.push_back(
8276       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8277   return MatchOperand_Success;
8278 }
8279 
8280 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8281