1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/Support/AMDGPUMetadata.h"
32 #include "llvm/Support/AMDHSAKernelDescriptor.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/MachineValueType.h"
35 #include "llvm/Support/TargetParser.h"
36 #include "llvm/Support/TargetRegistry.h"
37 
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::amdhsa;
41 
42 namespace {
43 
44 class AMDGPUAsmParser;
45 
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47 
48 //===----------------------------------------------------------------------===//
49 // Operand
50 //===----------------------------------------------------------------------===//
51 
52 class AMDGPUOperand : public MCParsedAsmOperand {
53   enum KindTy {
54     Token,
55     Immediate,
56     Register,
57     Expression
58   } Kind;
59 
60   SMLoc StartLoc, EndLoc;
61   const AMDGPUAsmParser *AsmParser;
62 
63 public:
64   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
66 
67   using Ptr = std::unique_ptr<AMDGPUOperand>;
68 
69   struct Modifiers {
70     bool Abs = false;
71     bool Neg = false;
72     bool Sext = false;
73 
74     bool hasFPModifiers() const { return Abs || Neg; }
75     bool hasIntModifiers() const { return Sext; }
76     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77 
78     int64_t getFPModifiersOperand() const {
79       int64_t Operand = 0;
80       Operand |= Abs ? SISrcMods::ABS : 0u;
81       Operand |= Neg ? SISrcMods::NEG : 0u;
82       return Operand;
83     }
84 
85     int64_t getIntModifiersOperand() const {
86       int64_t Operand = 0;
87       Operand |= Sext ? SISrcMods::SEXT : 0u;
88       return Operand;
89     }
90 
91     int64_t getModifiersOperand() const {
92       assert(!(hasFPModifiers() && hasIntModifiers())
93            && "fp and int modifiers should not be used simultaneously");
94       if (hasFPModifiers()) {
95         return getFPModifiersOperand();
96       } else if (hasIntModifiers()) {
97         return getIntModifiersOperand();
98       } else {
99         return 0;
100       }
101     }
102 
103     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104   };
105 
106   enum ImmTy {
107     ImmTyNone,
108     ImmTyGDS,
109     ImmTyLDS,
110     ImmTyOffen,
111     ImmTyIdxen,
112     ImmTyAddr64,
113     ImmTyOffset,
114     ImmTyInstOffset,
115     ImmTyOffset0,
116     ImmTyOffset1,
117     ImmTyCPol,
118     ImmTySWZ,
119     ImmTyTFE,
120     ImmTyD16,
121     ImmTyClampSI,
122     ImmTyOModSI,
123     ImmTyDPP8,
124     ImmTyDppCtrl,
125     ImmTyDppRowMask,
126     ImmTyDppBankMask,
127     ImmTyDppBoundCtrl,
128     ImmTyDppFi,
129     ImmTySdwaDstSel,
130     ImmTySdwaSrc0Sel,
131     ImmTySdwaSrc1Sel,
132     ImmTySdwaDstUnused,
133     ImmTyDMask,
134     ImmTyDim,
135     ImmTyUNorm,
136     ImmTyDA,
137     ImmTyR128A16,
138     ImmTyA16,
139     ImmTyLWE,
140     ImmTyExpTgt,
141     ImmTyExpCompr,
142     ImmTyExpVM,
143     ImmTyFORMAT,
144     ImmTyHwreg,
145     ImmTyOff,
146     ImmTySendMsg,
147     ImmTyInterpSlot,
148     ImmTyInterpAttr,
149     ImmTyAttrChan,
150     ImmTyOpSel,
151     ImmTyOpSelHi,
152     ImmTyNegLo,
153     ImmTyNegHi,
154     ImmTySwizzle,
155     ImmTyGprIdxMode,
156     ImmTyHigh,
157     ImmTyBLGP,
158     ImmTyCBSZ,
159     ImmTyABID,
160     ImmTyEndpgm,
161   };
162 
163   enum ImmKindTy {
164     ImmKindTyNone,
165     ImmKindTyLiteral,
166     ImmKindTyConst,
167   };
168 
169 private:
170   struct TokOp {
171     const char *Data;
172     unsigned Length;
173   };
174 
175   struct ImmOp {
176     int64_t Val;
177     ImmTy Type;
178     bool IsFPImm;
179     mutable ImmKindTy Kind;
180     Modifiers Mods;
181   };
182 
183   struct RegOp {
184     unsigned RegNo;
185     Modifiers Mods;
186   };
187 
188   union {
189     TokOp Tok;
190     ImmOp Imm;
191     RegOp Reg;
192     const MCExpr *Expr;
193   };
194 
195 public:
196   bool isToken() const override {
197     if (Kind == Token)
198       return true;
199 
200     // When parsing operands, we can't always tell if something was meant to be
201     // a token, like 'gds', or an expression that references a global variable.
202     // In this case, we assume the string is an expression, and if we need to
203     // interpret is a token, then we treat the symbol name as the token.
204     return isSymbolRefExpr();
205   }
206 
207   bool isSymbolRefExpr() const {
208     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
209   }
210 
211   bool isImm() const override {
212     return Kind == Immediate;
213   }
214 
215   void setImmKindNone() const {
216     assert(isImm());
217     Imm.Kind = ImmKindTyNone;
218   }
219 
220   void setImmKindLiteral() const {
221     assert(isImm());
222     Imm.Kind = ImmKindTyLiteral;
223   }
224 
225   void setImmKindConst() const {
226     assert(isImm());
227     Imm.Kind = ImmKindTyConst;
228   }
229 
230   bool IsImmKindLiteral() const {
231     return isImm() && Imm.Kind == ImmKindTyLiteral;
232   }
233 
234   bool isImmKindConst() const {
235     return isImm() && Imm.Kind == ImmKindTyConst;
236   }
237 
238   bool isInlinableImm(MVT type) const;
239   bool isLiteralImm(MVT type) const;
240 
241   bool isRegKind() const {
242     return Kind == Register;
243   }
244 
245   bool isReg() const override {
246     return isRegKind() && !hasModifiers();
247   }
248 
249   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
250     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
251   }
252 
253   bool isRegOrImmWithInt16InputMods() const {
254     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
255   }
256 
257   bool isRegOrImmWithInt32InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259   }
260 
261   bool isRegOrImmWithInt64InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
263   }
264 
265   bool isRegOrImmWithFP16InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
267   }
268 
269   bool isRegOrImmWithFP32InputMods() const {
270     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
271   }
272 
273   bool isRegOrImmWithFP64InputMods() const {
274     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
275   }
276 
277   bool isVReg() const {
278     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
279            isRegClass(AMDGPU::VReg_64RegClassID) ||
280            isRegClass(AMDGPU::VReg_96RegClassID) ||
281            isRegClass(AMDGPU::VReg_128RegClassID) ||
282            isRegClass(AMDGPU::VReg_160RegClassID) ||
283            isRegClass(AMDGPU::VReg_192RegClassID) ||
284            isRegClass(AMDGPU::VReg_256RegClassID) ||
285            isRegClass(AMDGPU::VReg_512RegClassID) ||
286            isRegClass(AMDGPU::VReg_1024RegClassID);
287   }
288 
289   bool isVReg32() const {
290     return isRegClass(AMDGPU::VGPR_32RegClassID);
291   }
292 
293   bool isVReg32OrOff() const {
294     return isOff() || isVReg32();
295   }
296 
297   bool isNull() const {
298     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
299   }
300 
301   bool isVRegWithInputMods() const;
302 
303   bool isSDWAOperand(MVT type) const;
304   bool isSDWAFP16Operand() const;
305   bool isSDWAFP32Operand() const;
306   bool isSDWAInt16Operand() const;
307   bool isSDWAInt32Operand() const;
308 
309   bool isImmTy(ImmTy ImmT) const {
310     return isImm() && Imm.Type == ImmT;
311   }
312 
313   bool isImmModifier() const {
314     return isImm() && Imm.Type != ImmTyNone;
315   }
316 
317   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
318   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
319   bool isDMask() const { return isImmTy(ImmTyDMask); }
320   bool isDim() const { return isImmTy(ImmTyDim); }
321   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
322   bool isDA() const { return isImmTy(ImmTyDA); }
323   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
324   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
325   bool isLWE() const { return isImmTy(ImmTyLWE); }
326   bool isOff() const { return isImmTy(ImmTyOff); }
327   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
328   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
329   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
330   bool isOffen() const { return isImmTy(ImmTyOffen); }
331   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
332   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
333   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
334   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
335   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
336 
337   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
338   bool isGDS() const { return isImmTy(ImmTyGDS); }
339   bool isLDS() const { return isImmTy(ImmTyLDS); }
340   bool isCPol() const { return isImmTy(ImmTyCPol); }
341   bool isSWZ() const { return isImmTy(ImmTySWZ); }
342   bool isTFE() const { return isImmTy(ImmTyTFE); }
343   bool isD16() const { return isImmTy(ImmTyD16); }
344   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
345   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
346   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
347   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
348   bool isFI() const { return isImmTy(ImmTyDppFi); }
349   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
350   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
351   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
352   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
353   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
354   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
355   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
356   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
357   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
358   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
359   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
360   bool isHigh() const { return isImmTy(ImmTyHigh); }
361 
362   bool isMod() const {
363     return isClampSI() || isOModSI();
364   }
365 
366   bool isRegOrImm() const {
367     return isReg() || isImm();
368   }
369 
370   bool isRegClass(unsigned RCID) const;
371 
372   bool isInlineValue() const;
373 
374   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
375     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
376   }
377 
378   bool isSCSrcB16() const {
379     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
380   }
381 
382   bool isSCSrcV2B16() const {
383     return isSCSrcB16();
384   }
385 
386   bool isSCSrcB32() const {
387     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
388   }
389 
390   bool isSCSrcB64() const {
391     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
392   }
393 
394   bool isBoolReg() const;
395 
396   bool isSCSrcF16() const {
397     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
398   }
399 
400   bool isSCSrcV2F16() const {
401     return isSCSrcF16();
402   }
403 
404   bool isSCSrcF32() const {
405     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
406   }
407 
408   bool isSCSrcF64() const {
409     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
410   }
411 
412   bool isSSrcB32() const {
413     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
414   }
415 
416   bool isSSrcB16() const {
417     return isSCSrcB16() || isLiteralImm(MVT::i16);
418   }
419 
420   bool isSSrcV2B16() const {
421     llvm_unreachable("cannot happen");
422     return isSSrcB16();
423   }
424 
425   bool isSSrcB64() const {
426     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
427     // See isVSrc64().
428     return isSCSrcB64() || isLiteralImm(MVT::i64);
429   }
430 
431   bool isSSrcF32() const {
432     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
433   }
434 
435   bool isSSrcF64() const {
436     return isSCSrcB64() || isLiteralImm(MVT::f64);
437   }
438 
439   bool isSSrcF16() const {
440     return isSCSrcB16() || isLiteralImm(MVT::f16);
441   }
442 
443   bool isSSrcV2F16() const {
444     llvm_unreachable("cannot happen");
445     return isSSrcF16();
446   }
447 
448   bool isSSrcV2FP32() const {
449     llvm_unreachable("cannot happen");
450     return isSSrcF32();
451   }
452 
453   bool isSCSrcV2FP32() const {
454     llvm_unreachable("cannot happen");
455     return isSCSrcF32();
456   }
457 
458   bool isSSrcV2INT32() const {
459     llvm_unreachable("cannot happen");
460     return isSSrcB32();
461   }
462 
463   bool isSCSrcV2INT32() const {
464     llvm_unreachable("cannot happen");
465     return isSCSrcB32();
466   }
467 
468   bool isSSrcOrLdsB32() const {
469     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
470            isLiteralImm(MVT::i32) || isExpr();
471   }
472 
473   bool isVCSrcB32() const {
474     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
475   }
476 
477   bool isVCSrcB64() const {
478     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
479   }
480 
481   bool isVCSrcB16() const {
482     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
483   }
484 
485   bool isVCSrcV2B16() const {
486     return isVCSrcB16();
487   }
488 
489   bool isVCSrcF32() const {
490     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
491   }
492 
493   bool isVCSrcF64() const {
494     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
495   }
496 
497   bool isVCSrcF16() const {
498     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
499   }
500 
501   bool isVCSrcV2F16() const {
502     return isVCSrcF16();
503   }
504 
505   bool isVSrcB32() const {
506     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
507   }
508 
509   bool isVSrcB64() const {
510     return isVCSrcF64() || isLiteralImm(MVT::i64);
511   }
512 
513   bool isVSrcB16() const {
514     return isVCSrcB16() || isLiteralImm(MVT::i16);
515   }
516 
517   bool isVSrcV2B16() const {
518     return isVSrcB16() || isLiteralImm(MVT::v2i16);
519   }
520 
521   bool isVCSrcV2FP32() const {
522     return isVCSrcF64();
523   }
524 
525   bool isVSrcV2FP32() const {
526     return isVSrcF64() || isLiteralImm(MVT::v2f32);
527   }
528 
529   bool isVCSrcV2INT32() const {
530     return isVCSrcB64();
531   }
532 
533   bool isVSrcV2INT32() const {
534     return isVSrcB64() || isLiteralImm(MVT::v2i32);
535   }
536 
537   bool isVSrcF32() const {
538     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
539   }
540 
541   bool isVSrcF64() const {
542     return isVCSrcF64() || isLiteralImm(MVT::f64);
543   }
544 
545   bool isVSrcF16() const {
546     return isVCSrcF16() || isLiteralImm(MVT::f16);
547   }
548 
549   bool isVSrcV2F16() const {
550     return isVSrcF16() || isLiteralImm(MVT::v2f16);
551   }
552 
553   bool isVISrcB32() const {
554     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
555   }
556 
557   bool isVISrcB16() const {
558     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
559   }
560 
561   bool isVISrcV2B16() const {
562     return isVISrcB16();
563   }
564 
565   bool isVISrcF32() const {
566     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
567   }
568 
569   bool isVISrcF16() const {
570     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
571   }
572 
573   bool isVISrcV2F16() const {
574     return isVISrcF16() || isVISrcB32();
575   }
576 
577   bool isVISrc_64B64() const {
578     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
579   }
580 
581   bool isVISrc_64F64() const {
582     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
583   }
584 
585   bool isVISrc_64V2FP32() const {
586     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
587   }
588 
589   bool isVISrc_64V2INT32() const {
590     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
591   }
592 
593   bool isVISrc_256B64() const {
594     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
595   }
596 
597   bool isVISrc_256F64() const {
598     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
599   }
600 
601   bool isVISrc_128B16() const {
602     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
603   }
604 
605   bool isVISrc_128V2B16() const {
606     return isVISrc_128B16();
607   }
608 
609   bool isVISrc_128B32() const {
610     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
611   }
612 
613   bool isVISrc_128F32() const {
614     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
615   }
616 
617   bool isVISrc_256V2FP32() const {
618     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
619   }
620 
621   bool isVISrc_256V2INT32() const {
622     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
623   }
624 
625   bool isVISrc_512B32() const {
626     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
627   }
628 
629   bool isVISrc_512B16() const {
630     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
631   }
632 
633   bool isVISrc_512V2B16() const {
634     return isVISrc_512B16();
635   }
636 
637   bool isVISrc_512F32() const {
638     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
639   }
640 
641   bool isVISrc_512F16() const {
642     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
643   }
644 
645   bool isVISrc_512V2F16() const {
646     return isVISrc_512F16() || isVISrc_512B32();
647   }
648 
649   bool isVISrc_1024B32() const {
650     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
651   }
652 
653   bool isVISrc_1024B16() const {
654     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
655   }
656 
657   bool isVISrc_1024V2B16() const {
658     return isVISrc_1024B16();
659   }
660 
661   bool isVISrc_1024F32() const {
662     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
663   }
664 
665   bool isVISrc_1024F16() const {
666     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
667   }
668 
669   bool isVISrc_1024V2F16() const {
670     return isVISrc_1024F16() || isVISrc_1024B32();
671   }
672 
673   bool isAISrcB32() const {
674     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
675   }
676 
677   bool isAISrcB16() const {
678     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
679   }
680 
681   bool isAISrcV2B16() const {
682     return isAISrcB16();
683   }
684 
685   bool isAISrcF32() const {
686     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
687   }
688 
689   bool isAISrcF16() const {
690     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
691   }
692 
693   bool isAISrcV2F16() const {
694     return isAISrcF16() || isAISrcB32();
695   }
696 
697   bool isAISrc_64B64() const {
698     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
699   }
700 
701   bool isAISrc_64F64() const {
702     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
703   }
704 
705   bool isAISrc_128B32() const {
706     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
707   }
708 
709   bool isAISrc_128B16() const {
710     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
711   }
712 
713   bool isAISrc_128V2B16() const {
714     return isAISrc_128B16();
715   }
716 
717   bool isAISrc_128F32() const {
718     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
719   }
720 
721   bool isAISrc_128F16() const {
722     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
723   }
724 
725   bool isAISrc_128V2F16() const {
726     return isAISrc_128F16() || isAISrc_128B32();
727   }
728 
729   bool isVISrc_128F16() const {
730     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
731   }
732 
733   bool isVISrc_128V2F16() const {
734     return isVISrc_128F16() || isVISrc_128B32();
735   }
736 
737   bool isAISrc_256B64() const {
738     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
739   }
740 
741   bool isAISrc_256F64() const {
742     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
743   }
744 
745   bool isAISrc_512B32() const {
746     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
747   }
748 
749   bool isAISrc_512B16() const {
750     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
751   }
752 
753   bool isAISrc_512V2B16() const {
754     return isAISrc_512B16();
755   }
756 
757   bool isAISrc_512F32() const {
758     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
759   }
760 
761   bool isAISrc_512F16() const {
762     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
763   }
764 
765   bool isAISrc_512V2F16() const {
766     return isAISrc_512F16() || isAISrc_512B32();
767   }
768 
769   bool isAISrc_1024B32() const {
770     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
771   }
772 
773   bool isAISrc_1024B16() const {
774     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
775   }
776 
777   bool isAISrc_1024V2B16() const {
778     return isAISrc_1024B16();
779   }
780 
781   bool isAISrc_1024F32() const {
782     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
783   }
784 
785   bool isAISrc_1024F16() const {
786     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
787   }
788 
789   bool isAISrc_1024V2F16() const {
790     return isAISrc_1024F16() || isAISrc_1024B32();
791   }
792 
793   bool isKImmFP32() const {
794     return isLiteralImm(MVT::f32);
795   }
796 
797   bool isKImmFP16() const {
798     return isLiteralImm(MVT::f16);
799   }
800 
801   bool isMem() const override {
802     return false;
803   }
804 
805   bool isExpr() const {
806     return Kind == Expression;
807   }
808 
809   bool isSoppBrTarget() const {
810     return isExpr() || isImm();
811   }
812 
813   bool isSWaitCnt() const;
814   bool isHwreg() const;
815   bool isSendMsg() const;
816   bool isSwizzle() const;
817   bool isSMRDOffset8() const;
818   bool isSMEMOffset() const;
819   bool isSMRDLiteralOffset() const;
820   bool isDPP8() const;
821   bool isDPPCtrl() const;
822   bool isBLGP() const;
823   bool isCBSZ() const;
824   bool isABID() const;
825   bool isGPRIdxMode() const;
826   bool isS16Imm() const;
827   bool isU16Imm() const;
828   bool isEndpgm() const;
829 
830   StringRef getExpressionAsToken() const {
831     assert(isExpr());
832     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
833     return S->getSymbol().getName();
834   }
835 
836   StringRef getToken() const {
837     assert(isToken());
838 
839     if (Kind == Expression)
840       return getExpressionAsToken();
841 
842     return StringRef(Tok.Data, Tok.Length);
843   }
844 
845   int64_t getImm() const {
846     assert(isImm());
847     return Imm.Val;
848   }
849 
850   void setImm(int64_t Val) {
851     assert(isImm());
852     Imm.Val = Val;
853   }
854 
855   ImmTy getImmTy() const {
856     assert(isImm());
857     return Imm.Type;
858   }
859 
860   unsigned getReg() const override {
861     assert(isRegKind());
862     return Reg.RegNo;
863   }
864 
865   SMLoc getStartLoc() const override {
866     return StartLoc;
867   }
868 
869   SMLoc getEndLoc() const override {
870     return EndLoc;
871   }
872 
873   SMRange getLocRange() const {
874     return SMRange(StartLoc, EndLoc);
875   }
876 
877   Modifiers getModifiers() const {
878     assert(isRegKind() || isImmTy(ImmTyNone));
879     return isRegKind() ? Reg.Mods : Imm.Mods;
880   }
881 
882   void setModifiers(Modifiers Mods) {
883     assert(isRegKind() || isImmTy(ImmTyNone));
884     if (isRegKind())
885       Reg.Mods = Mods;
886     else
887       Imm.Mods = Mods;
888   }
889 
890   bool hasModifiers() const {
891     return getModifiers().hasModifiers();
892   }
893 
894   bool hasFPModifiers() const {
895     return getModifiers().hasFPModifiers();
896   }
897 
898   bool hasIntModifiers() const {
899     return getModifiers().hasIntModifiers();
900   }
901 
902   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
903 
904   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
905 
906   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
907 
908   template <unsigned Bitwidth>
909   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
910 
911   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
912     addKImmFPOperands<16>(Inst, N);
913   }
914 
915   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
916     addKImmFPOperands<32>(Inst, N);
917   }
918 
919   void addRegOperands(MCInst &Inst, unsigned N) const;
920 
921   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
922     addRegOperands(Inst, N);
923   }
924 
925   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
926     if (isRegKind())
927       addRegOperands(Inst, N);
928     else if (isExpr())
929       Inst.addOperand(MCOperand::createExpr(Expr));
930     else
931       addImmOperands(Inst, N);
932   }
933 
934   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
935     Modifiers Mods = getModifiers();
936     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
937     if (isRegKind()) {
938       addRegOperands(Inst, N);
939     } else {
940       addImmOperands(Inst, N, false);
941     }
942   }
943 
944   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
945     assert(!hasIntModifiers());
946     addRegOrImmWithInputModsOperands(Inst, N);
947   }
948 
949   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
950     assert(!hasFPModifiers());
951     addRegOrImmWithInputModsOperands(Inst, N);
952   }
953 
954   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
955     Modifiers Mods = getModifiers();
956     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
957     assert(isRegKind());
958     addRegOperands(Inst, N);
959   }
960 
961   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
962     assert(!hasIntModifiers());
963     addRegWithInputModsOperands(Inst, N);
964   }
965 
966   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
967     assert(!hasFPModifiers());
968     addRegWithInputModsOperands(Inst, N);
969   }
970 
971   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
972     if (isImm())
973       addImmOperands(Inst, N);
974     else {
975       assert(isExpr());
976       Inst.addOperand(MCOperand::createExpr(Expr));
977     }
978   }
979 
980   static void printImmTy(raw_ostream& OS, ImmTy Type) {
981     switch (Type) {
982     case ImmTyNone: OS << "None"; break;
983     case ImmTyGDS: OS << "GDS"; break;
984     case ImmTyLDS: OS << "LDS"; break;
985     case ImmTyOffen: OS << "Offen"; break;
986     case ImmTyIdxen: OS << "Idxen"; break;
987     case ImmTyAddr64: OS << "Addr64"; break;
988     case ImmTyOffset: OS << "Offset"; break;
989     case ImmTyInstOffset: OS << "InstOffset"; break;
990     case ImmTyOffset0: OS << "Offset0"; break;
991     case ImmTyOffset1: OS << "Offset1"; break;
992     case ImmTyCPol: OS << "CPol"; break;
993     case ImmTySWZ: OS << "SWZ"; break;
994     case ImmTyTFE: OS << "TFE"; break;
995     case ImmTyD16: OS << "D16"; break;
996     case ImmTyFORMAT: OS << "FORMAT"; break;
997     case ImmTyClampSI: OS << "ClampSI"; break;
998     case ImmTyOModSI: OS << "OModSI"; break;
999     case ImmTyDPP8: OS << "DPP8"; break;
1000     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1001     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1002     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1003     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1004     case ImmTyDppFi: OS << "FI"; break;
1005     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1006     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1007     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1008     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1009     case ImmTyDMask: OS << "DMask"; break;
1010     case ImmTyDim: OS << "Dim"; break;
1011     case ImmTyUNorm: OS << "UNorm"; break;
1012     case ImmTyDA: OS << "DA"; break;
1013     case ImmTyR128A16: OS << "R128A16"; break;
1014     case ImmTyA16: OS << "A16"; break;
1015     case ImmTyLWE: OS << "LWE"; break;
1016     case ImmTyOff: OS << "Off"; break;
1017     case ImmTyExpTgt: OS << "ExpTgt"; break;
1018     case ImmTyExpCompr: OS << "ExpCompr"; break;
1019     case ImmTyExpVM: OS << "ExpVM"; break;
1020     case ImmTyHwreg: OS << "Hwreg"; break;
1021     case ImmTySendMsg: OS << "SendMsg"; break;
1022     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1023     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1024     case ImmTyAttrChan: OS << "AttrChan"; break;
1025     case ImmTyOpSel: OS << "OpSel"; break;
1026     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1027     case ImmTyNegLo: OS << "NegLo"; break;
1028     case ImmTyNegHi: OS << "NegHi"; break;
1029     case ImmTySwizzle: OS << "Swizzle"; break;
1030     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1031     case ImmTyHigh: OS << "High"; break;
1032     case ImmTyBLGP: OS << "BLGP"; break;
1033     case ImmTyCBSZ: OS << "CBSZ"; break;
1034     case ImmTyABID: OS << "ABID"; break;
1035     case ImmTyEndpgm: OS << "Endpgm"; break;
1036     }
1037   }
1038 
1039   void print(raw_ostream &OS) const override {
1040     switch (Kind) {
1041     case Register:
1042       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1043       break;
1044     case Immediate:
1045       OS << '<' << getImm();
1046       if (getImmTy() != ImmTyNone) {
1047         OS << " type: "; printImmTy(OS, getImmTy());
1048       }
1049       OS << " mods: " << Imm.Mods << '>';
1050       break;
1051     case Token:
1052       OS << '\'' << getToken() << '\'';
1053       break;
1054     case Expression:
1055       OS << "<expr " << *Expr << '>';
1056       break;
1057     }
1058   }
1059 
1060   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1061                                       int64_t Val, SMLoc Loc,
1062                                       ImmTy Type = ImmTyNone,
1063                                       bool IsFPImm = false) {
1064     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1065     Op->Imm.Val = Val;
1066     Op->Imm.IsFPImm = IsFPImm;
1067     Op->Imm.Kind = ImmKindTyNone;
1068     Op->Imm.Type = Type;
1069     Op->Imm.Mods = Modifiers();
1070     Op->StartLoc = Loc;
1071     Op->EndLoc = Loc;
1072     return Op;
1073   }
1074 
1075   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1076                                         StringRef Str, SMLoc Loc,
1077                                         bool HasExplicitEncodingSize = true) {
1078     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1079     Res->Tok.Data = Str.data();
1080     Res->Tok.Length = Str.size();
1081     Res->StartLoc = Loc;
1082     Res->EndLoc = Loc;
1083     return Res;
1084   }
1085 
1086   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1087                                       unsigned RegNo, SMLoc S,
1088                                       SMLoc E) {
1089     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1090     Op->Reg.RegNo = RegNo;
1091     Op->Reg.Mods = Modifiers();
1092     Op->StartLoc = S;
1093     Op->EndLoc = E;
1094     return Op;
1095   }
1096 
1097   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1098                                        const class MCExpr *Expr, SMLoc S) {
1099     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1100     Op->Expr = Expr;
1101     Op->StartLoc = S;
1102     Op->EndLoc = S;
1103     return Op;
1104   }
1105 };
1106 
1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1108   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1109   return OS;
1110 }
1111 
1112 //===----------------------------------------------------------------------===//
1113 // AsmParser
1114 //===----------------------------------------------------------------------===//
1115 
1116 // Holds info related to the current kernel, e.g. count of SGPRs used.
1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1118 // .amdgpu_hsa_kernel or at EOF.
1119 class KernelScopeInfo {
1120   int SgprIndexUnusedMin = -1;
1121   int VgprIndexUnusedMin = -1;
1122   MCContext *Ctx = nullptr;
1123 
1124   void usesSgprAt(int i) {
1125     if (i >= SgprIndexUnusedMin) {
1126       SgprIndexUnusedMin = ++i;
1127       if (Ctx) {
1128         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1129         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1130       }
1131     }
1132   }
1133 
1134   void usesVgprAt(int i) {
1135     if (i >= VgprIndexUnusedMin) {
1136       VgprIndexUnusedMin = ++i;
1137       if (Ctx) {
1138         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1139         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1140       }
1141     }
1142   }
1143 
1144 public:
1145   KernelScopeInfo() = default;
1146 
1147   void initialize(MCContext &Context) {
1148     Ctx = &Context;
1149     usesSgprAt(SgprIndexUnusedMin = -1);
1150     usesVgprAt(VgprIndexUnusedMin = -1);
1151   }
1152 
1153   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1154     switch (RegKind) {
1155       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1156       case IS_AGPR: // fall through
1157       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1158       default: break;
1159     }
1160   }
1161 };
1162 
1163 class AMDGPUAsmParser : public MCTargetAsmParser {
1164   MCAsmParser &Parser;
1165 
1166   // Number of extra operands parsed after the first optional operand.
1167   // This may be necessary to skip hardcoded mandatory operands.
1168   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1169 
1170   unsigned ForcedEncodingSize = 0;
1171   bool ForcedDPP = false;
1172   bool ForcedSDWA = false;
1173   KernelScopeInfo KernelScope;
1174   unsigned CPolSeen;
1175 
1176   /// @name Auto-generated Match Functions
1177   /// {
1178 
1179 #define GET_ASSEMBLER_HEADER
1180 #include "AMDGPUGenAsmMatcher.inc"
1181 
1182   /// }
1183 
1184 private:
1185   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1186   bool OutOfRangeError(SMRange Range);
1187   /// Calculate VGPR/SGPR blocks required for given target, reserved
1188   /// registers, and user-specified NextFreeXGPR values.
1189   ///
1190   /// \param Features [in] Target features, used for bug corrections.
1191   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1192   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1193   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1194   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1195   /// descriptor field, if valid.
1196   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1197   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1198   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1199   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1200   /// \param VGPRBlocks [out] Result VGPR block count.
1201   /// \param SGPRBlocks [out] Result SGPR block count.
1202   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1203                           bool FlatScrUsed, bool XNACKUsed,
1204                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1205                           SMRange VGPRRange, unsigned NextFreeSGPR,
1206                           SMRange SGPRRange, unsigned &VGPRBlocks,
1207                           unsigned &SGPRBlocks);
1208   bool ParseDirectiveAMDGCNTarget();
1209   bool ParseDirectiveAMDHSAKernel();
1210   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1211   bool ParseDirectiveHSACodeObjectVersion();
1212   bool ParseDirectiveHSACodeObjectISA();
1213   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1214   bool ParseDirectiveAMDKernelCodeT();
1215   // TODO: Possibly make subtargetHasRegister const.
1216   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1217   bool ParseDirectiveAMDGPUHsaKernel();
1218 
1219   bool ParseDirectiveISAVersion();
1220   bool ParseDirectiveHSAMetadata();
1221   bool ParseDirectivePALMetadataBegin();
1222   bool ParseDirectivePALMetadata();
1223   bool ParseDirectiveAMDGPULDS();
1224 
1225   /// Common code to parse out a block of text (typically YAML) between start and
1226   /// end directives.
1227   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1228                            const char *AssemblerDirectiveEnd,
1229                            std::string &CollectString);
1230 
1231   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1232                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1233   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1234                            unsigned &RegNum, unsigned &RegWidth,
1235                            bool RestoreOnFailure = false);
1236   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1237                            unsigned &RegNum, unsigned &RegWidth,
1238                            SmallVectorImpl<AsmToken> &Tokens);
1239   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1240                            unsigned &RegWidth,
1241                            SmallVectorImpl<AsmToken> &Tokens);
1242   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1243                            unsigned &RegWidth,
1244                            SmallVectorImpl<AsmToken> &Tokens);
1245   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1246                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1247   bool ParseRegRange(unsigned& Num, unsigned& Width);
1248   unsigned getRegularReg(RegisterKind RegKind,
1249                          unsigned RegNum,
1250                          unsigned RegWidth,
1251                          SMLoc Loc);
1252 
1253   bool isRegister();
1254   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1255   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1256   void initializeGprCountSymbol(RegisterKind RegKind);
1257   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1258                              unsigned RegWidth);
1259   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1260                     bool IsAtomic, bool IsLds = false);
1261   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1262                  bool IsGdsHardcoded);
1263 
1264 public:
1265   enum AMDGPUMatchResultTy {
1266     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1267   };
1268   enum OperandMode {
1269     OperandMode_Default,
1270     OperandMode_NSA,
1271   };
1272 
1273   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1274 
1275   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1276                const MCInstrInfo &MII,
1277                const MCTargetOptions &Options)
1278       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1279     MCAsmParserExtension::Initialize(Parser);
1280 
1281     if (getFeatureBits().none()) {
1282       // Set default features.
1283       copySTI().ToggleFeature("southern-islands");
1284     }
1285 
1286     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1287 
1288     {
1289       // TODO: make those pre-defined variables read-only.
1290       // Currently there is none suitable machinery in the core llvm-mc for this.
1291       // MCSymbol::isRedefinable is intended for another purpose, and
1292       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1293       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1294       MCContext &Ctx = getContext();
1295       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1296         MCSymbol *Sym =
1297             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1298         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1299         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1300         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1301         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1302         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1303       } else {
1304         MCSymbol *Sym =
1305             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1306         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1307         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1308         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1309         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1310         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1311       }
1312       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1313         initializeGprCountSymbol(IS_VGPR);
1314         initializeGprCountSymbol(IS_SGPR);
1315       } else
1316         KernelScope.initialize(getContext());
1317     }
1318   }
1319 
1320   bool hasMIMG_R128() const {
1321     return AMDGPU::hasMIMG_R128(getSTI());
1322   }
1323 
1324   bool hasPackedD16() const {
1325     return AMDGPU::hasPackedD16(getSTI());
1326   }
1327 
1328   bool hasGFX10A16() const {
1329     return AMDGPU::hasGFX10A16(getSTI());
1330   }
1331 
1332   bool isSI() const {
1333     return AMDGPU::isSI(getSTI());
1334   }
1335 
1336   bool isCI() const {
1337     return AMDGPU::isCI(getSTI());
1338   }
1339 
1340   bool isVI() const {
1341     return AMDGPU::isVI(getSTI());
1342   }
1343 
1344   bool isGFX9() const {
1345     return AMDGPU::isGFX9(getSTI());
1346   }
1347 
1348   bool isGFX90A() const {
1349     return AMDGPU::isGFX90A(getSTI());
1350   }
1351 
1352   bool isGFX9Plus() const {
1353     return AMDGPU::isGFX9Plus(getSTI());
1354   }
1355 
1356   bool isGFX10() const {
1357     return AMDGPU::isGFX10(getSTI());
1358   }
1359 
1360   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1361 
1362   bool isGFX10_BEncoding() const {
1363     return AMDGPU::isGFX10_BEncoding(getSTI());
1364   }
1365 
1366   bool hasInv2PiInlineImm() const {
1367     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1368   }
1369 
1370   bool hasFlatOffsets() const {
1371     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1372   }
1373 
1374   bool hasSGPR102_SGPR103() const {
1375     return !isVI() && !isGFX9();
1376   }
1377 
1378   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1379 
1380   bool hasIntClamp() const {
1381     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1382   }
1383 
1384   AMDGPUTargetStreamer &getTargetStreamer() {
1385     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1386     return static_cast<AMDGPUTargetStreamer &>(TS);
1387   }
1388 
1389   const MCRegisterInfo *getMRI() const {
1390     // We need this const_cast because for some reason getContext() is not const
1391     // in MCAsmParser.
1392     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1393   }
1394 
1395   const MCInstrInfo *getMII() const {
1396     return &MII;
1397   }
1398 
1399   const FeatureBitset &getFeatureBits() const {
1400     return getSTI().getFeatureBits();
1401   }
1402 
1403   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1404   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1405   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1406 
1407   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1408   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1409   bool isForcedDPP() const { return ForcedDPP; }
1410   bool isForcedSDWA() const { return ForcedSDWA; }
1411   ArrayRef<unsigned> getMatchedVariants() const;
1412   StringRef getMatchedVariantName() const;
1413 
1414   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1415   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1416                      bool RestoreOnFailure);
1417   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1418   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1419                                         SMLoc &EndLoc) override;
1420   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1421   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1422                                       unsigned Kind) override;
1423   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1424                                OperandVector &Operands, MCStreamer &Out,
1425                                uint64_t &ErrorInfo,
1426                                bool MatchingInlineAsm) override;
1427   bool ParseDirective(AsmToken DirectiveID) override;
1428   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1429                                     OperandMode Mode = OperandMode_Default);
1430   StringRef parseMnemonicSuffix(StringRef Name);
1431   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1432                         SMLoc NameLoc, OperandVector &Operands) override;
1433   //bool ProcessInstruction(MCInst &Inst);
1434 
1435   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1436 
1437   OperandMatchResultTy
1438   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1439                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1440                      bool (*ConvertResult)(int64_t &) = nullptr);
1441 
1442   OperandMatchResultTy
1443   parseOperandArrayWithPrefix(const char *Prefix,
1444                               OperandVector &Operands,
1445                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1446                               bool (*ConvertResult)(int64_t&) = nullptr);
1447 
1448   OperandMatchResultTy
1449   parseNamedBit(StringRef Name, OperandVector &Operands,
1450                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1451   OperandMatchResultTy parseCPol(OperandVector &Operands);
1452   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1453                                              StringRef &Value,
1454                                              SMLoc &StringLoc);
1455 
1456   bool isModifier();
1457   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1458   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1459   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1460   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1461   bool parseSP3NegModifier();
1462   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1463   OperandMatchResultTy parseReg(OperandVector &Operands);
1464   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1465   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1466   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1467   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1468   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1469   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1470   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1471   OperandMatchResultTy parseUfmt(int64_t &Format);
1472   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1473   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1474   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1475   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1476   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1477   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1478   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1479 
1480   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1481   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1482   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1483   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1484 
1485   bool parseCnt(int64_t &IntVal);
1486   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1487   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1488 
1489 private:
1490   struct OperandInfoTy {
1491     SMLoc Loc;
1492     int64_t Id;
1493     bool IsSymbolic = false;
1494     bool IsDefined = false;
1495 
1496     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1497   };
1498 
1499   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1500   bool validateSendMsg(const OperandInfoTy &Msg,
1501                        const OperandInfoTy &Op,
1502                        const OperandInfoTy &Stream);
1503 
1504   bool parseHwregBody(OperandInfoTy &HwReg,
1505                       OperandInfoTy &Offset,
1506                       OperandInfoTy &Width);
1507   bool validateHwreg(const OperandInfoTy &HwReg,
1508                      const OperandInfoTy &Offset,
1509                      const OperandInfoTy &Width);
1510 
1511   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1512   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1513 
1514   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1515                       const OperandVector &Operands) const;
1516   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1517   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1518   SMLoc getLitLoc(const OperandVector &Operands) const;
1519   SMLoc getConstLoc(const OperandVector &Operands) const;
1520 
1521   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1522   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1523   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1524   bool validateSOPLiteral(const MCInst &Inst) const;
1525   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1526   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1527   bool validateIntClampSupported(const MCInst &Inst);
1528   bool validateMIMGAtomicDMask(const MCInst &Inst);
1529   bool validateMIMGGatherDMask(const MCInst &Inst);
1530   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1531   bool validateMIMGDataSize(const MCInst &Inst);
1532   bool validateMIMGAddrSize(const MCInst &Inst);
1533   bool validateMIMGD16(const MCInst &Inst);
1534   bool validateMIMGDim(const MCInst &Inst);
1535   bool validateMIMGMSAA(const MCInst &Inst);
1536   bool validateOpSel(const MCInst &Inst);
1537   bool validateVccOperand(unsigned Reg) const;
1538   bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1539   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1540   bool validateAGPRLdSt(const MCInst &Inst) const;
1541   bool validateVGPRAlign(const MCInst &Inst) const;
1542   bool validateDivScale(const MCInst &Inst);
1543   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1544                              const SMLoc &IDLoc);
1545   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1546   unsigned getConstantBusLimit(unsigned Opcode) const;
1547   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1548   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1549   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1550 
1551   bool isSupportedMnemo(StringRef Mnemo,
1552                         const FeatureBitset &FBS);
1553   bool isSupportedMnemo(StringRef Mnemo,
1554                         const FeatureBitset &FBS,
1555                         ArrayRef<unsigned> Variants);
1556   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1557 
1558   bool isId(const StringRef Id) const;
1559   bool isId(const AsmToken &Token, const StringRef Id) const;
1560   bool isToken(const AsmToken::TokenKind Kind) const;
1561   bool trySkipId(const StringRef Id);
1562   bool trySkipId(const StringRef Pref, const StringRef Id);
1563   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1564   bool trySkipToken(const AsmToken::TokenKind Kind);
1565   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1566   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1567   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1568 
1569   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1570   AsmToken::TokenKind getTokenKind() const;
1571   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1572   bool parseExpr(OperandVector &Operands);
1573   StringRef getTokenStr() const;
1574   AsmToken peekToken();
1575   AsmToken getToken() const;
1576   SMLoc getLoc() const;
1577   void lex();
1578 
1579 public:
1580   void onBeginOfFile() override;
1581 
1582   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1583   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1584 
1585   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1586   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1587   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1588   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1589   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1590   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1591 
1592   bool parseSwizzleOperand(int64_t &Op,
1593                            const unsigned MinVal,
1594                            const unsigned MaxVal,
1595                            const StringRef ErrMsg,
1596                            SMLoc &Loc);
1597   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1598                             const unsigned MinVal,
1599                             const unsigned MaxVal,
1600                             const StringRef ErrMsg);
1601   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1602   bool parseSwizzleOffset(int64_t &Imm);
1603   bool parseSwizzleMacro(int64_t &Imm);
1604   bool parseSwizzleQuadPerm(int64_t &Imm);
1605   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1606   bool parseSwizzleBroadcast(int64_t &Imm);
1607   bool parseSwizzleSwap(int64_t &Imm);
1608   bool parseSwizzleReverse(int64_t &Imm);
1609 
1610   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1611   int64_t parseGPRIdxMacro();
1612 
1613   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1614   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1615   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1616   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1617 
1618   AMDGPUOperand::Ptr defaultCPol() const;
1619 
1620   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1621   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1622   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1623   AMDGPUOperand::Ptr defaultFlatOffset() const;
1624 
1625   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1626 
1627   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1628                OptionalImmIndexMap &OptionalIdx);
1629   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1630   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1631   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1632   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1633                 OptionalImmIndexMap &OptionalIdx);
1634 
1635   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1636 
1637   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1638                bool IsAtomic = false);
1639   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1640   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1641 
1642   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1643 
1644   bool parseDimId(unsigned &Encoding);
1645   OperandMatchResultTy parseDim(OperandVector &Operands);
1646   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1647   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1648   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1649   int64_t parseDPPCtrlSel(StringRef Ctrl);
1650   int64_t parseDPPCtrlPerm();
1651   AMDGPUOperand::Ptr defaultRowMask() const;
1652   AMDGPUOperand::Ptr defaultBankMask() const;
1653   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1654   AMDGPUOperand::Ptr defaultFI() const;
1655   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1656   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1657 
1658   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1659                                     AMDGPUOperand::ImmTy Type);
1660   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1661   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1662   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1663   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1664   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1665   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1666   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1667                uint64_t BasicInstType,
1668                bool SkipDstVcc = false,
1669                bool SkipSrcVcc = false);
1670 
1671   AMDGPUOperand::Ptr defaultBLGP() const;
1672   AMDGPUOperand::Ptr defaultCBSZ() const;
1673   AMDGPUOperand::Ptr defaultABID() const;
1674 
1675   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1676   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1677 };
1678 
1679 struct OptionalOperand {
1680   const char *Name;
1681   AMDGPUOperand::ImmTy Type;
1682   bool IsBit;
1683   bool (*ConvertResult)(int64_t&);
1684 };
1685 
1686 } // end anonymous namespace
1687 
1688 // May be called with integer type with equivalent bitwidth.
1689 static const fltSemantics *getFltSemantics(unsigned Size) {
1690   switch (Size) {
1691   case 4:
1692     return &APFloat::IEEEsingle();
1693   case 8:
1694     return &APFloat::IEEEdouble();
1695   case 2:
1696     return &APFloat::IEEEhalf();
1697   default:
1698     llvm_unreachable("unsupported fp type");
1699   }
1700 }
1701 
1702 static const fltSemantics *getFltSemantics(MVT VT) {
1703   return getFltSemantics(VT.getSizeInBits() / 8);
1704 }
1705 
1706 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1707   switch (OperandType) {
1708   case AMDGPU::OPERAND_REG_IMM_INT32:
1709   case AMDGPU::OPERAND_REG_IMM_FP32:
1710   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1711   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1712   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1713   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1714   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1715   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1716   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1717   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1718     return &APFloat::IEEEsingle();
1719   case AMDGPU::OPERAND_REG_IMM_INT64:
1720   case AMDGPU::OPERAND_REG_IMM_FP64:
1721   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1722   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1723   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1724     return &APFloat::IEEEdouble();
1725   case AMDGPU::OPERAND_REG_IMM_INT16:
1726   case AMDGPU::OPERAND_REG_IMM_FP16:
1727   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1728   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1729   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1730   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1731   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1732   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1733   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1734   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1735   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1736   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1737     return &APFloat::IEEEhalf();
1738   default:
1739     llvm_unreachable("unsupported fp type");
1740   }
1741 }
1742 
1743 //===----------------------------------------------------------------------===//
1744 // Operand
1745 //===----------------------------------------------------------------------===//
1746 
1747 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1748   bool Lost;
1749 
1750   // Convert literal to single precision
1751   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1752                                                APFloat::rmNearestTiesToEven,
1753                                                &Lost);
1754   // We allow precision lost but not overflow or underflow
1755   if (Status != APFloat::opOK &&
1756       Lost &&
1757       ((Status & APFloat::opOverflow)  != 0 ||
1758        (Status & APFloat::opUnderflow) != 0)) {
1759     return false;
1760   }
1761 
1762   return true;
1763 }
1764 
1765 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1766   return isUIntN(Size, Val) || isIntN(Size, Val);
1767 }
1768 
1769 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1770   if (VT.getScalarType() == MVT::i16) {
1771     // FP immediate values are broken.
1772     return isInlinableIntLiteral(Val);
1773   }
1774 
1775   // f16/v2f16 operands work correctly for all values.
1776   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1777 }
1778 
1779 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1780 
1781   // This is a hack to enable named inline values like
1782   // shared_base with both 32-bit and 64-bit operands.
1783   // Note that these values are defined as
1784   // 32-bit operands only.
1785   if (isInlineValue()) {
1786     return true;
1787   }
1788 
1789   if (!isImmTy(ImmTyNone)) {
1790     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1791     return false;
1792   }
1793   // TODO: We should avoid using host float here. It would be better to
1794   // check the float bit values which is what a few other places do.
1795   // We've had bot failures before due to weird NaN support on mips hosts.
1796 
1797   APInt Literal(64, Imm.Val);
1798 
1799   if (Imm.IsFPImm) { // We got fp literal token
1800     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1801       return AMDGPU::isInlinableLiteral64(Imm.Val,
1802                                           AsmParser->hasInv2PiInlineImm());
1803     }
1804 
1805     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1806     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1807       return false;
1808 
1809     if (type.getScalarSizeInBits() == 16) {
1810       return isInlineableLiteralOp16(
1811         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1812         type, AsmParser->hasInv2PiInlineImm());
1813     }
1814 
1815     // Check if single precision literal is inlinable
1816     return AMDGPU::isInlinableLiteral32(
1817       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1818       AsmParser->hasInv2PiInlineImm());
1819   }
1820 
1821   // We got int literal token.
1822   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1823     return AMDGPU::isInlinableLiteral64(Imm.Val,
1824                                         AsmParser->hasInv2PiInlineImm());
1825   }
1826 
1827   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1828     return false;
1829   }
1830 
1831   if (type.getScalarSizeInBits() == 16) {
1832     return isInlineableLiteralOp16(
1833       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1834       type, AsmParser->hasInv2PiInlineImm());
1835   }
1836 
1837   return AMDGPU::isInlinableLiteral32(
1838     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1839     AsmParser->hasInv2PiInlineImm());
1840 }
1841 
1842 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1843   // Check that this immediate can be added as literal
1844   if (!isImmTy(ImmTyNone)) {
1845     return false;
1846   }
1847 
1848   if (!Imm.IsFPImm) {
1849     // We got int literal token.
1850 
1851     if (type == MVT::f64 && hasFPModifiers()) {
1852       // Cannot apply fp modifiers to int literals preserving the same semantics
1853       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1854       // disable these cases.
1855       return false;
1856     }
1857 
1858     unsigned Size = type.getSizeInBits();
1859     if (Size == 64)
1860       Size = 32;
1861 
1862     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1863     // types.
1864     return isSafeTruncation(Imm.Val, Size);
1865   }
1866 
1867   // We got fp literal token
1868   if (type == MVT::f64) { // Expected 64-bit fp operand
1869     // We would set low 64-bits of literal to zeroes but we accept this literals
1870     return true;
1871   }
1872 
1873   if (type == MVT::i64) { // Expected 64-bit int operand
1874     // We don't allow fp literals in 64-bit integer instructions. It is
1875     // unclear how we should encode them.
1876     return false;
1877   }
1878 
1879   // We allow fp literals with f16x2 operands assuming that the specified
1880   // literal goes into the lower half and the upper half is zero. We also
1881   // require that the literal may be losslesly converted to f16.
1882   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1883                      (type == MVT::v2i16)? MVT::i16 :
1884                      (type == MVT::v2f32)? MVT::f32 : type;
1885 
1886   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1887   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1888 }
1889 
1890 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1891   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1892 }
1893 
1894 bool AMDGPUOperand::isVRegWithInputMods() const {
1895   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1896          // GFX90A allows DPP on 64-bit operands.
1897          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1898           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1899 }
1900 
1901 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1902   if (AsmParser->isVI())
1903     return isVReg32();
1904   else if (AsmParser->isGFX9Plus())
1905     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1906   else
1907     return false;
1908 }
1909 
1910 bool AMDGPUOperand::isSDWAFP16Operand() const {
1911   return isSDWAOperand(MVT::f16);
1912 }
1913 
1914 bool AMDGPUOperand::isSDWAFP32Operand() const {
1915   return isSDWAOperand(MVT::f32);
1916 }
1917 
1918 bool AMDGPUOperand::isSDWAInt16Operand() const {
1919   return isSDWAOperand(MVT::i16);
1920 }
1921 
1922 bool AMDGPUOperand::isSDWAInt32Operand() const {
1923   return isSDWAOperand(MVT::i32);
1924 }
1925 
1926 bool AMDGPUOperand::isBoolReg() const {
1927   auto FB = AsmParser->getFeatureBits();
1928   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1929                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1930 }
1931 
1932 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1933 {
1934   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1935   assert(Size == 2 || Size == 4 || Size == 8);
1936 
1937   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1938 
1939   if (Imm.Mods.Abs) {
1940     Val &= ~FpSignMask;
1941   }
1942   if (Imm.Mods.Neg) {
1943     Val ^= FpSignMask;
1944   }
1945 
1946   return Val;
1947 }
1948 
1949 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1950   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1951                              Inst.getNumOperands())) {
1952     addLiteralImmOperand(Inst, Imm.Val,
1953                          ApplyModifiers &
1954                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1955   } else {
1956     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1957     Inst.addOperand(MCOperand::createImm(Imm.Val));
1958     setImmKindNone();
1959   }
1960 }
1961 
1962 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1963   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1964   auto OpNum = Inst.getNumOperands();
1965   // Check that this operand accepts literals
1966   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1967 
1968   if (ApplyModifiers) {
1969     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1970     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1971     Val = applyInputFPModifiers(Val, Size);
1972   }
1973 
1974   APInt Literal(64, Val);
1975   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1976 
1977   if (Imm.IsFPImm) { // We got fp literal token
1978     switch (OpTy) {
1979     case AMDGPU::OPERAND_REG_IMM_INT64:
1980     case AMDGPU::OPERAND_REG_IMM_FP64:
1981     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1982     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1983     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1984       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1985                                        AsmParser->hasInv2PiInlineImm())) {
1986         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1987         setImmKindConst();
1988         return;
1989       }
1990 
1991       // Non-inlineable
1992       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1993         // For fp operands we check if low 32 bits are zeros
1994         if (Literal.getLoBits(32) != 0) {
1995           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1996           "Can't encode literal as exact 64-bit floating-point operand. "
1997           "Low 32-bits will be set to zero");
1998         }
1999 
2000         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2001         setImmKindLiteral();
2002         return;
2003       }
2004 
2005       // We don't allow fp literals in 64-bit integer instructions. It is
2006       // unclear how we should encode them. This case should be checked earlier
2007       // in predicate methods (isLiteralImm())
2008       llvm_unreachable("fp literal in 64-bit integer instruction.");
2009 
2010     case AMDGPU::OPERAND_REG_IMM_INT32:
2011     case AMDGPU::OPERAND_REG_IMM_FP32:
2012     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2013     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2014     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2015     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2016     case AMDGPU::OPERAND_REG_IMM_INT16:
2017     case AMDGPU::OPERAND_REG_IMM_FP16:
2018     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2019     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2020     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2021     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2022     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2023     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2024     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2025     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2026     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2027     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2028     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2029     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2030     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2031     case AMDGPU::OPERAND_REG_IMM_V2INT32: {
2032       bool lost;
2033       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2034       // Convert literal to single precision
2035       FPLiteral.convert(*getOpFltSemantics(OpTy),
2036                         APFloat::rmNearestTiesToEven, &lost);
2037       // We allow precision lost but not overflow or underflow. This should be
2038       // checked earlier in isLiteralImm()
2039 
2040       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2041       Inst.addOperand(MCOperand::createImm(ImmVal));
2042       setImmKindLiteral();
2043       return;
2044     }
2045     default:
2046       llvm_unreachable("invalid operand size");
2047     }
2048 
2049     return;
2050   }
2051 
2052   // We got int literal token.
2053   // Only sign extend inline immediates.
2054   switch (OpTy) {
2055   case AMDGPU::OPERAND_REG_IMM_INT32:
2056   case AMDGPU::OPERAND_REG_IMM_FP32:
2057   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2058   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2059   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2060   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2061   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2062   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2063   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2064   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2065   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2066   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2067     if (isSafeTruncation(Val, 32) &&
2068         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2069                                      AsmParser->hasInv2PiInlineImm())) {
2070       Inst.addOperand(MCOperand::createImm(Val));
2071       setImmKindConst();
2072       return;
2073     }
2074 
2075     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2076     setImmKindLiteral();
2077     return;
2078 
2079   case AMDGPU::OPERAND_REG_IMM_INT64:
2080   case AMDGPU::OPERAND_REG_IMM_FP64:
2081   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2082   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2083   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2084     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2085       Inst.addOperand(MCOperand::createImm(Val));
2086       setImmKindConst();
2087       return;
2088     }
2089 
2090     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2091     setImmKindLiteral();
2092     return;
2093 
2094   case AMDGPU::OPERAND_REG_IMM_INT16:
2095   case AMDGPU::OPERAND_REG_IMM_FP16:
2096   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2097   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2098   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2099   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2100     if (isSafeTruncation(Val, 16) &&
2101         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2102                                      AsmParser->hasInv2PiInlineImm())) {
2103       Inst.addOperand(MCOperand::createImm(Val));
2104       setImmKindConst();
2105       return;
2106     }
2107 
2108     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2109     setImmKindLiteral();
2110     return;
2111 
2112   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2113   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2114   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2115   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2116     assert(isSafeTruncation(Val, 16));
2117     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2118                                         AsmParser->hasInv2PiInlineImm()));
2119 
2120     Inst.addOperand(MCOperand::createImm(Val));
2121     return;
2122   }
2123   default:
2124     llvm_unreachable("invalid operand size");
2125   }
2126 }
2127 
2128 template <unsigned Bitwidth>
2129 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2130   APInt Literal(64, Imm.Val);
2131   setImmKindNone();
2132 
2133   if (!Imm.IsFPImm) {
2134     // We got int literal token.
2135     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2136     return;
2137   }
2138 
2139   bool Lost;
2140   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2141   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2142                     APFloat::rmNearestTiesToEven, &Lost);
2143   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2144 }
2145 
2146 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2147   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2148 }
2149 
2150 static bool isInlineValue(unsigned Reg) {
2151   switch (Reg) {
2152   case AMDGPU::SRC_SHARED_BASE:
2153   case AMDGPU::SRC_SHARED_LIMIT:
2154   case AMDGPU::SRC_PRIVATE_BASE:
2155   case AMDGPU::SRC_PRIVATE_LIMIT:
2156   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2157     return true;
2158   case AMDGPU::SRC_VCCZ:
2159   case AMDGPU::SRC_EXECZ:
2160   case AMDGPU::SRC_SCC:
2161     return true;
2162   case AMDGPU::SGPR_NULL:
2163     return true;
2164   default:
2165     return false;
2166   }
2167 }
2168 
2169 bool AMDGPUOperand::isInlineValue() const {
2170   return isRegKind() && ::isInlineValue(getReg());
2171 }
2172 
2173 //===----------------------------------------------------------------------===//
2174 // AsmParser
2175 //===----------------------------------------------------------------------===//
2176 
2177 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2178   if (Is == IS_VGPR) {
2179     switch (RegWidth) {
2180       default: return -1;
2181       case 1: return AMDGPU::VGPR_32RegClassID;
2182       case 2: return AMDGPU::VReg_64RegClassID;
2183       case 3: return AMDGPU::VReg_96RegClassID;
2184       case 4: return AMDGPU::VReg_128RegClassID;
2185       case 5: return AMDGPU::VReg_160RegClassID;
2186       case 6: return AMDGPU::VReg_192RegClassID;
2187       case 8: return AMDGPU::VReg_256RegClassID;
2188       case 16: return AMDGPU::VReg_512RegClassID;
2189       case 32: return AMDGPU::VReg_1024RegClassID;
2190     }
2191   } else if (Is == IS_TTMP) {
2192     switch (RegWidth) {
2193       default: return -1;
2194       case 1: return AMDGPU::TTMP_32RegClassID;
2195       case 2: return AMDGPU::TTMP_64RegClassID;
2196       case 4: return AMDGPU::TTMP_128RegClassID;
2197       case 8: return AMDGPU::TTMP_256RegClassID;
2198       case 16: return AMDGPU::TTMP_512RegClassID;
2199     }
2200   } else if (Is == IS_SGPR) {
2201     switch (RegWidth) {
2202       default: return -1;
2203       case 1: return AMDGPU::SGPR_32RegClassID;
2204       case 2: return AMDGPU::SGPR_64RegClassID;
2205       case 3: return AMDGPU::SGPR_96RegClassID;
2206       case 4: return AMDGPU::SGPR_128RegClassID;
2207       case 5: return AMDGPU::SGPR_160RegClassID;
2208       case 6: return AMDGPU::SGPR_192RegClassID;
2209       case 8: return AMDGPU::SGPR_256RegClassID;
2210       case 16: return AMDGPU::SGPR_512RegClassID;
2211     }
2212   } else if (Is == IS_AGPR) {
2213     switch (RegWidth) {
2214       default: return -1;
2215       case 1: return AMDGPU::AGPR_32RegClassID;
2216       case 2: return AMDGPU::AReg_64RegClassID;
2217       case 3: return AMDGPU::AReg_96RegClassID;
2218       case 4: return AMDGPU::AReg_128RegClassID;
2219       case 5: return AMDGPU::AReg_160RegClassID;
2220       case 6: return AMDGPU::AReg_192RegClassID;
2221       case 8: return AMDGPU::AReg_256RegClassID;
2222       case 16: return AMDGPU::AReg_512RegClassID;
2223       case 32: return AMDGPU::AReg_1024RegClassID;
2224     }
2225   }
2226   return -1;
2227 }
2228 
2229 static unsigned getSpecialRegForName(StringRef RegName) {
2230   return StringSwitch<unsigned>(RegName)
2231     .Case("exec", AMDGPU::EXEC)
2232     .Case("vcc", AMDGPU::VCC)
2233     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2234     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2235     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2236     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2237     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2238     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2239     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2240     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2241     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2242     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2243     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2244     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2245     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2246     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2247     .Case("m0", AMDGPU::M0)
2248     .Case("vccz", AMDGPU::SRC_VCCZ)
2249     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2250     .Case("execz", AMDGPU::SRC_EXECZ)
2251     .Case("src_execz", AMDGPU::SRC_EXECZ)
2252     .Case("scc", AMDGPU::SRC_SCC)
2253     .Case("src_scc", AMDGPU::SRC_SCC)
2254     .Case("tba", AMDGPU::TBA)
2255     .Case("tma", AMDGPU::TMA)
2256     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2257     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2258     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2259     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2260     .Case("vcc_lo", AMDGPU::VCC_LO)
2261     .Case("vcc_hi", AMDGPU::VCC_HI)
2262     .Case("exec_lo", AMDGPU::EXEC_LO)
2263     .Case("exec_hi", AMDGPU::EXEC_HI)
2264     .Case("tma_lo", AMDGPU::TMA_LO)
2265     .Case("tma_hi", AMDGPU::TMA_HI)
2266     .Case("tba_lo", AMDGPU::TBA_LO)
2267     .Case("tba_hi", AMDGPU::TBA_HI)
2268     .Case("pc", AMDGPU::PC_REG)
2269     .Case("null", AMDGPU::SGPR_NULL)
2270     .Default(AMDGPU::NoRegister);
2271 }
2272 
2273 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2274                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2275   auto R = parseRegister();
2276   if (!R) return true;
2277   assert(R->isReg());
2278   RegNo = R->getReg();
2279   StartLoc = R->getStartLoc();
2280   EndLoc = R->getEndLoc();
2281   return false;
2282 }
2283 
2284 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2285                                     SMLoc &EndLoc) {
2286   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2287 }
2288 
2289 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2290                                                        SMLoc &StartLoc,
2291                                                        SMLoc &EndLoc) {
2292   bool Result =
2293       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2294   bool PendingErrors = getParser().hasPendingError();
2295   getParser().clearPendingErrors();
2296   if (PendingErrors)
2297     return MatchOperand_ParseFail;
2298   if (Result)
2299     return MatchOperand_NoMatch;
2300   return MatchOperand_Success;
2301 }
2302 
2303 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2304                                             RegisterKind RegKind, unsigned Reg1,
2305                                             SMLoc Loc) {
2306   switch (RegKind) {
2307   case IS_SPECIAL:
2308     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2309       Reg = AMDGPU::EXEC;
2310       RegWidth = 2;
2311       return true;
2312     }
2313     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2314       Reg = AMDGPU::FLAT_SCR;
2315       RegWidth = 2;
2316       return true;
2317     }
2318     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2319       Reg = AMDGPU::XNACK_MASK;
2320       RegWidth = 2;
2321       return true;
2322     }
2323     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2324       Reg = AMDGPU::VCC;
2325       RegWidth = 2;
2326       return true;
2327     }
2328     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2329       Reg = AMDGPU::TBA;
2330       RegWidth = 2;
2331       return true;
2332     }
2333     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2334       Reg = AMDGPU::TMA;
2335       RegWidth = 2;
2336       return true;
2337     }
2338     Error(Loc, "register does not fit in the list");
2339     return false;
2340   case IS_VGPR:
2341   case IS_SGPR:
2342   case IS_AGPR:
2343   case IS_TTMP:
2344     if (Reg1 != Reg + RegWidth) {
2345       Error(Loc, "registers in a list must have consecutive indices");
2346       return false;
2347     }
2348     RegWidth++;
2349     return true;
2350   default:
2351     llvm_unreachable("unexpected register kind");
2352   }
2353 }
2354 
2355 struct RegInfo {
2356   StringLiteral Name;
2357   RegisterKind Kind;
2358 };
2359 
2360 static constexpr RegInfo RegularRegisters[] = {
2361   {{"v"},    IS_VGPR},
2362   {{"s"},    IS_SGPR},
2363   {{"ttmp"}, IS_TTMP},
2364   {{"acc"},  IS_AGPR},
2365   {{"a"},    IS_AGPR},
2366 };
2367 
2368 static bool isRegularReg(RegisterKind Kind) {
2369   return Kind == IS_VGPR ||
2370          Kind == IS_SGPR ||
2371          Kind == IS_TTMP ||
2372          Kind == IS_AGPR;
2373 }
2374 
2375 static const RegInfo* getRegularRegInfo(StringRef Str) {
2376   for (const RegInfo &Reg : RegularRegisters)
2377     if (Str.startswith(Reg.Name))
2378       return &Reg;
2379   return nullptr;
2380 }
2381 
2382 static bool getRegNum(StringRef Str, unsigned& Num) {
2383   return !Str.getAsInteger(10, Num);
2384 }
2385 
2386 bool
2387 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2388                             const AsmToken &NextToken) const {
2389 
2390   // A list of consecutive registers: [s0,s1,s2,s3]
2391   if (Token.is(AsmToken::LBrac))
2392     return true;
2393 
2394   if (!Token.is(AsmToken::Identifier))
2395     return false;
2396 
2397   // A single register like s0 or a range of registers like s[0:1]
2398 
2399   StringRef Str = Token.getString();
2400   const RegInfo *Reg = getRegularRegInfo(Str);
2401   if (Reg) {
2402     StringRef RegName = Reg->Name;
2403     StringRef RegSuffix = Str.substr(RegName.size());
2404     if (!RegSuffix.empty()) {
2405       unsigned Num;
2406       // A single register with an index: rXX
2407       if (getRegNum(RegSuffix, Num))
2408         return true;
2409     } else {
2410       // A range of registers: r[XX:YY].
2411       if (NextToken.is(AsmToken::LBrac))
2412         return true;
2413     }
2414   }
2415 
2416   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2417 }
2418 
2419 bool
2420 AMDGPUAsmParser::isRegister()
2421 {
2422   return isRegister(getToken(), peekToken());
2423 }
2424 
2425 unsigned
2426 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2427                                unsigned RegNum,
2428                                unsigned RegWidth,
2429                                SMLoc Loc) {
2430 
2431   assert(isRegularReg(RegKind));
2432 
2433   unsigned AlignSize = 1;
2434   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2435     // SGPR and TTMP registers must be aligned.
2436     // Max required alignment is 4 dwords.
2437     AlignSize = std::min(RegWidth, 4u);
2438   }
2439 
2440   if (RegNum % AlignSize != 0) {
2441     Error(Loc, "invalid register alignment");
2442     return AMDGPU::NoRegister;
2443   }
2444 
2445   unsigned RegIdx = RegNum / AlignSize;
2446   int RCID = getRegClass(RegKind, RegWidth);
2447   if (RCID == -1) {
2448     Error(Loc, "invalid or unsupported register size");
2449     return AMDGPU::NoRegister;
2450   }
2451 
2452   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2453   const MCRegisterClass RC = TRI->getRegClass(RCID);
2454   if (RegIdx >= RC.getNumRegs()) {
2455     Error(Loc, "register index is out of range");
2456     return AMDGPU::NoRegister;
2457   }
2458 
2459   return RC.getRegister(RegIdx);
2460 }
2461 
2462 bool
2463 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2464   int64_t RegLo, RegHi;
2465   if (!skipToken(AsmToken::LBrac, "missing register index"))
2466     return false;
2467 
2468   SMLoc FirstIdxLoc = getLoc();
2469   SMLoc SecondIdxLoc;
2470 
2471   if (!parseExpr(RegLo))
2472     return false;
2473 
2474   if (trySkipToken(AsmToken::Colon)) {
2475     SecondIdxLoc = getLoc();
2476     if (!parseExpr(RegHi))
2477       return false;
2478   } else {
2479     RegHi = RegLo;
2480   }
2481 
2482   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2483     return false;
2484 
2485   if (!isUInt<32>(RegLo)) {
2486     Error(FirstIdxLoc, "invalid register index");
2487     return false;
2488   }
2489 
2490   if (!isUInt<32>(RegHi)) {
2491     Error(SecondIdxLoc, "invalid register index");
2492     return false;
2493   }
2494 
2495   if (RegLo > RegHi) {
2496     Error(FirstIdxLoc, "first register index should not exceed second index");
2497     return false;
2498   }
2499 
2500   Num = static_cast<unsigned>(RegLo);
2501   Width = (RegHi - RegLo) + 1;
2502   return true;
2503 }
2504 
2505 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2506                                           unsigned &RegNum, unsigned &RegWidth,
2507                                           SmallVectorImpl<AsmToken> &Tokens) {
2508   assert(isToken(AsmToken::Identifier));
2509   unsigned Reg = getSpecialRegForName(getTokenStr());
2510   if (Reg) {
2511     RegNum = 0;
2512     RegWidth = 1;
2513     RegKind = IS_SPECIAL;
2514     Tokens.push_back(getToken());
2515     lex(); // skip register name
2516   }
2517   return Reg;
2518 }
2519 
2520 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2521                                           unsigned &RegNum, unsigned &RegWidth,
2522                                           SmallVectorImpl<AsmToken> &Tokens) {
2523   assert(isToken(AsmToken::Identifier));
2524   StringRef RegName = getTokenStr();
2525   auto Loc = getLoc();
2526 
2527   const RegInfo *RI = getRegularRegInfo(RegName);
2528   if (!RI) {
2529     Error(Loc, "invalid register name");
2530     return AMDGPU::NoRegister;
2531   }
2532 
2533   Tokens.push_back(getToken());
2534   lex(); // skip register name
2535 
2536   RegKind = RI->Kind;
2537   StringRef RegSuffix = RegName.substr(RI->Name.size());
2538   if (!RegSuffix.empty()) {
2539     // Single 32-bit register: vXX.
2540     if (!getRegNum(RegSuffix, RegNum)) {
2541       Error(Loc, "invalid register index");
2542       return AMDGPU::NoRegister;
2543     }
2544     RegWidth = 1;
2545   } else {
2546     // Range of registers: v[XX:YY]. ":YY" is optional.
2547     if (!ParseRegRange(RegNum, RegWidth))
2548       return AMDGPU::NoRegister;
2549   }
2550 
2551   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2552 }
2553 
2554 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2555                                        unsigned &RegWidth,
2556                                        SmallVectorImpl<AsmToken> &Tokens) {
2557   unsigned Reg = AMDGPU::NoRegister;
2558   auto ListLoc = getLoc();
2559 
2560   if (!skipToken(AsmToken::LBrac,
2561                  "expected a register or a list of registers")) {
2562     return AMDGPU::NoRegister;
2563   }
2564 
2565   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2566 
2567   auto Loc = getLoc();
2568   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2569     return AMDGPU::NoRegister;
2570   if (RegWidth != 1) {
2571     Error(Loc, "expected a single 32-bit register");
2572     return AMDGPU::NoRegister;
2573   }
2574 
2575   for (; trySkipToken(AsmToken::Comma); ) {
2576     RegisterKind NextRegKind;
2577     unsigned NextReg, NextRegNum, NextRegWidth;
2578     Loc = getLoc();
2579 
2580     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2581                              NextRegNum, NextRegWidth,
2582                              Tokens)) {
2583       return AMDGPU::NoRegister;
2584     }
2585     if (NextRegWidth != 1) {
2586       Error(Loc, "expected a single 32-bit register");
2587       return AMDGPU::NoRegister;
2588     }
2589     if (NextRegKind != RegKind) {
2590       Error(Loc, "registers in a list must be of the same kind");
2591       return AMDGPU::NoRegister;
2592     }
2593     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2594       return AMDGPU::NoRegister;
2595   }
2596 
2597   if (!skipToken(AsmToken::RBrac,
2598                  "expected a comma or a closing square bracket")) {
2599     return AMDGPU::NoRegister;
2600   }
2601 
2602   if (isRegularReg(RegKind))
2603     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2604 
2605   return Reg;
2606 }
2607 
2608 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2609                                           unsigned &RegNum, unsigned &RegWidth,
2610                                           SmallVectorImpl<AsmToken> &Tokens) {
2611   auto Loc = getLoc();
2612   Reg = AMDGPU::NoRegister;
2613 
2614   if (isToken(AsmToken::Identifier)) {
2615     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2616     if (Reg == AMDGPU::NoRegister)
2617       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2618   } else {
2619     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2620   }
2621 
2622   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2623   if (Reg == AMDGPU::NoRegister) {
2624     assert(Parser.hasPendingError());
2625     return false;
2626   }
2627 
2628   if (!subtargetHasRegister(*TRI, Reg)) {
2629     if (Reg == AMDGPU::SGPR_NULL) {
2630       Error(Loc, "'null' operand is not supported on this GPU");
2631     } else {
2632       Error(Loc, "register not available on this GPU");
2633     }
2634     return false;
2635   }
2636 
2637   return true;
2638 }
2639 
2640 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2641                                           unsigned &RegNum, unsigned &RegWidth,
2642                                           bool RestoreOnFailure /*=false*/) {
2643   Reg = AMDGPU::NoRegister;
2644 
2645   SmallVector<AsmToken, 1> Tokens;
2646   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2647     if (RestoreOnFailure) {
2648       while (!Tokens.empty()) {
2649         getLexer().UnLex(Tokens.pop_back_val());
2650       }
2651     }
2652     return true;
2653   }
2654   return false;
2655 }
2656 
2657 Optional<StringRef>
2658 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2659   switch (RegKind) {
2660   case IS_VGPR:
2661     return StringRef(".amdgcn.next_free_vgpr");
2662   case IS_SGPR:
2663     return StringRef(".amdgcn.next_free_sgpr");
2664   default:
2665     return None;
2666   }
2667 }
2668 
2669 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2670   auto SymbolName = getGprCountSymbolName(RegKind);
2671   assert(SymbolName && "initializing invalid register kind");
2672   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2673   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2674 }
2675 
2676 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2677                                             unsigned DwordRegIndex,
2678                                             unsigned RegWidth) {
2679   // Symbols are only defined for GCN targets
2680   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2681     return true;
2682 
2683   auto SymbolName = getGprCountSymbolName(RegKind);
2684   if (!SymbolName)
2685     return true;
2686   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2687 
2688   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2689   int64_t OldCount;
2690 
2691   if (!Sym->isVariable())
2692     return !Error(getLoc(),
2693                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2694   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2695     return !Error(
2696         getLoc(),
2697         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2698 
2699   if (OldCount <= NewMax)
2700     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2701 
2702   return true;
2703 }
2704 
2705 std::unique_ptr<AMDGPUOperand>
2706 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2707   const auto &Tok = getToken();
2708   SMLoc StartLoc = Tok.getLoc();
2709   SMLoc EndLoc = Tok.getEndLoc();
2710   RegisterKind RegKind;
2711   unsigned Reg, RegNum, RegWidth;
2712 
2713   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2714     return nullptr;
2715   }
2716   if (isHsaAbiVersion3Or4(&getSTI())) {
2717     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2718       return nullptr;
2719   } else
2720     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2721   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2722 }
2723 
2724 OperandMatchResultTy
2725 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2726   // TODO: add syntactic sugar for 1/(2*PI)
2727 
2728   assert(!isRegister());
2729   assert(!isModifier());
2730 
2731   const auto& Tok = getToken();
2732   const auto& NextTok = peekToken();
2733   bool IsReal = Tok.is(AsmToken::Real);
2734   SMLoc S = getLoc();
2735   bool Negate = false;
2736 
2737   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2738     lex();
2739     IsReal = true;
2740     Negate = true;
2741   }
2742 
2743   if (IsReal) {
2744     // Floating-point expressions are not supported.
2745     // Can only allow floating-point literals with an
2746     // optional sign.
2747 
2748     StringRef Num = getTokenStr();
2749     lex();
2750 
2751     APFloat RealVal(APFloat::IEEEdouble());
2752     auto roundMode = APFloat::rmNearestTiesToEven;
2753     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2754       return MatchOperand_ParseFail;
2755     }
2756     if (Negate)
2757       RealVal.changeSign();
2758 
2759     Operands.push_back(
2760       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2761                                AMDGPUOperand::ImmTyNone, true));
2762 
2763     return MatchOperand_Success;
2764 
2765   } else {
2766     int64_t IntVal;
2767     const MCExpr *Expr;
2768     SMLoc S = getLoc();
2769 
2770     if (HasSP3AbsModifier) {
2771       // This is a workaround for handling expressions
2772       // as arguments of SP3 'abs' modifier, for example:
2773       //     |1.0|
2774       //     |-1|
2775       //     |1+x|
2776       // This syntax is not compatible with syntax of standard
2777       // MC expressions (due to the trailing '|').
2778       SMLoc EndLoc;
2779       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2780         return MatchOperand_ParseFail;
2781     } else {
2782       if (Parser.parseExpression(Expr))
2783         return MatchOperand_ParseFail;
2784     }
2785 
2786     if (Expr->evaluateAsAbsolute(IntVal)) {
2787       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2788     } else {
2789       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2790     }
2791 
2792     return MatchOperand_Success;
2793   }
2794 
2795   return MatchOperand_NoMatch;
2796 }
2797 
2798 OperandMatchResultTy
2799 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2800   if (!isRegister())
2801     return MatchOperand_NoMatch;
2802 
2803   if (auto R = parseRegister()) {
2804     assert(R->isReg());
2805     Operands.push_back(std::move(R));
2806     return MatchOperand_Success;
2807   }
2808   return MatchOperand_ParseFail;
2809 }
2810 
2811 OperandMatchResultTy
2812 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2813   auto res = parseReg(Operands);
2814   if (res != MatchOperand_NoMatch) {
2815     return res;
2816   } else if (isModifier()) {
2817     return MatchOperand_NoMatch;
2818   } else {
2819     return parseImm(Operands, HasSP3AbsMod);
2820   }
2821 }
2822 
2823 bool
2824 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2825   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2826     const auto &str = Token.getString();
2827     return str == "abs" || str == "neg" || str == "sext";
2828   }
2829   return false;
2830 }
2831 
2832 bool
2833 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2834   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2835 }
2836 
2837 bool
2838 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2839   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2840 }
2841 
2842 bool
2843 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2844   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2845 }
2846 
2847 // Check if this is an operand modifier or an opcode modifier
2848 // which may look like an expression but it is not. We should
2849 // avoid parsing these modifiers as expressions. Currently
2850 // recognized sequences are:
2851 //   |...|
2852 //   abs(...)
2853 //   neg(...)
2854 //   sext(...)
2855 //   -reg
2856 //   -|...|
2857 //   -abs(...)
2858 //   name:...
2859 // Note that simple opcode modifiers like 'gds' may be parsed as
2860 // expressions; this is a special case. See getExpressionAsToken.
2861 //
2862 bool
2863 AMDGPUAsmParser::isModifier() {
2864 
2865   AsmToken Tok = getToken();
2866   AsmToken NextToken[2];
2867   peekTokens(NextToken);
2868 
2869   return isOperandModifier(Tok, NextToken[0]) ||
2870          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2871          isOpcodeModifierWithVal(Tok, NextToken[0]);
2872 }
2873 
2874 // Check if the current token is an SP3 'neg' modifier.
2875 // Currently this modifier is allowed in the following context:
2876 //
2877 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2878 // 2. Before an 'abs' modifier: -abs(...)
2879 // 3. Before an SP3 'abs' modifier: -|...|
2880 //
2881 // In all other cases "-" is handled as a part
2882 // of an expression that follows the sign.
2883 //
2884 // Note: When "-" is followed by an integer literal,
2885 // this is interpreted as integer negation rather
2886 // than a floating-point NEG modifier applied to N.
2887 // Beside being contr-intuitive, such use of floating-point
2888 // NEG modifier would have resulted in different meaning
2889 // of integer literals used with VOP1/2/C and VOP3,
2890 // for example:
2891 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2892 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2893 // Negative fp literals with preceding "-" are
2894 // handled likewise for unifomtity
2895 //
2896 bool
2897 AMDGPUAsmParser::parseSP3NegModifier() {
2898 
2899   AsmToken NextToken[2];
2900   peekTokens(NextToken);
2901 
2902   if (isToken(AsmToken::Minus) &&
2903       (isRegister(NextToken[0], NextToken[1]) ||
2904        NextToken[0].is(AsmToken::Pipe) ||
2905        isId(NextToken[0], "abs"))) {
2906     lex();
2907     return true;
2908   }
2909 
2910   return false;
2911 }
2912 
2913 OperandMatchResultTy
2914 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2915                                               bool AllowImm) {
2916   bool Neg, SP3Neg;
2917   bool Abs, SP3Abs;
2918   SMLoc Loc;
2919 
2920   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2921   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2922     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2923     return MatchOperand_ParseFail;
2924   }
2925 
2926   SP3Neg = parseSP3NegModifier();
2927 
2928   Loc = getLoc();
2929   Neg = trySkipId("neg");
2930   if (Neg && SP3Neg) {
2931     Error(Loc, "expected register or immediate");
2932     return MatchOperand_ParseFail;
2933   }
2934   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2935     return MatchOperand_ParseFail;
2936 
2937   Abs = trySkipId("abs");
2938   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2939     return MatchOperand_ParseFail;
2940 
2941   Loc = getLoc();
2942   SP3Abs = trySkipToken(AsmToken::Pipe);
2943   if (Abs && SP3Abs) {
2944     Error(Loc, "expected register or immediate");
2945     return MatchOperand_ParseFail;
2946   }
2947 
2948   OperandMatchResultTy Res;
2949   if (AllowImm) {
2950     Res = parseRegOrImm(Operands, SP3Abs);
2951   } else {
2952     Res = parseReg(Operands);
2953   }
2954   if (Res != MatchOperand_Success) {
2955     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2956   }
2957 
2958   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2959     return MatchOperand_ParseFail;
2960   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2961     return MatchOperand_ParseFail;
2962   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2963     return MatchOperand_ParseFail;
2964 
2965   AMDGPUOperand::Modifiers Mods;
2966   Mods.Abs = Abs || SP3Abs;
2967   Mods.Neg = Neg || SP3Neg;
2968 
2969   if (Mods.hasFPModifiers()) {
2970     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2971     if (Op.isExpr()) {
2972       Error(Op.getStartLoc(), "expected an absolute expression");
2973       return MatchOperand_ParseFail;
2974     }
2975     Op.setModifiers(Mods);
2976   }
2977   return MatchOperand_Success;
2978 }
2979 
2980 OperandMatchResultTy
2981 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2982                                                bool AllowImm) {
2983   bool Sext = trySkipId("sext");
2984   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2985     return MatchOperand_ParseFail;
2986 
2987   OperandMatchResultTy Res;
2988   if (AllowImm) {
2989     Res = parseRegOrImm(Operands);
2990   } else {
2991     Res = parseReg(Operands);
2992   }
2993   if (Res != MatchOperand_Success) {
2994     return Sext? MatchOperand_ParseFail : Res;
2995   }
2996 
2997   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2998     return MatchOperand_ParseFail;
2999 
3000   AMDGPUOperand::Modifiers Mods;
3001   Mods.Sext = Sext;
3002 
3003   if (Mods.hasIntModifiers()) {
3004     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3005     if (Op.isExpr()) {
3006       Error(Op.getStartLoc(), "expected an absolute expression");
3007       return MatchOperand_ParseFail;
3008     }
3009     Op.setModifiers(Mods);
3010   }
3011 
3012   return MatchOperand_Success;
3013 }
3014 
3015 OperandMatchResultTy
3016 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3017   return parseRegOrImmWithFPInputMods(Operands, false);
3018 }
3019 
3020 OperandMatchResultTy
3021 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3022   return parseRegOrImmWithIntInputMods(Operands, false);
3023 }
3024 
3025 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3026   auto Loc = getLoc();
3027   if (trySkipId("off")) {
3028     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3029                                                 AMDGPUOperand::ImmTyOff, false));
3030     return MatchOperand_Success;
3031   }
3032 
3033   if (!isRegister())
3034     return MatchOperand_NoMatch;
3035 
3036   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3037   if (Reg) {
3038     Operands.push_back(std::move(Reg));
3039     return MatchOperand_Success;
3040   }
3041 
3042   return MatchOperand_ParseFail;
3043 
3044 }
3045 
3046 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3047   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3048 
3049   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3050       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3051       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3052       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3053     return Match_InvalidOperand;
3054 
3055   if ((TSFlags & SIInstrFlags::VOP3) &&
3056       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3057       getForcedEncodingSize() != 64)
3058     return Match_PreferE32;
3059 
3060   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3061       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3062     // v_mac_f32/16 allow only dst_sel == DWORD;
3063     auto OpNum =
3064         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3065     const auto &Op = Inst.getOperand(OpNum);
3066     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3067       return Match_InvalidOperand;
3068     }
3069   }
3070 
3071   return Match_Success;
3072 }
3073 
3074 static ArrayRef<unsigned> getAllVariants() {
3075   static const unsigned Variants[] = {
3076     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3077     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3078   };
3079 
3080   return makeArrayRef(Variants);
3081 }
3082 
3083 // What asm variants we should check
3084 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3085   if (getForcedEncodingSize() == 32) {
3086     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3087     return makeArrayRef(Variants);
3088   }
3089 
3090   if (isForcedVOP3()) {
3091     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3092     return makeArrayRef(Variants);
3093   }
3094 
3095   if (isForcedSDWA()) {
3096     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3097                                         AMDGPUAsmVariants::SDWA9};
3098     return makeArrayRef(Variants);
3099   }
3100 
3101   if (isForcedDPP()) {
3102     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3103     return makeArrayRef(Variants);
3104   }
3105 
3106   return getAllVariants();
3107 }
3108 
3109 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3110   if (getForcedEncodingSize() == 32)
3111     return "e32";
3112 
3113   if (isForcedVOP3())
3114     return "e64";
3115 
3116   if (isForcedSDWA())
3117     return "sdwa";
3118 
3119   if (isForcedDPP())
3120     return "dpp";
3121 
3122   return "";
3123 }
3124 
3125 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3126   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3127   const unsigned Num = Desc.getNumImplicitUses();
3128   for (unsigned i = 0; i < Num; ++i) {
3129     unsigned Reg = Desc.ImplicitUses[i];
3130     switch (Reg) {
3131     case AMDGPU::FLAT_SCR:
3132     case AMDGPU::VCC:
3133     case AMDGPU::VCC_LO:
3134     case AMDGPU::VCC_HI:
3135     case AMDGPU::M0:
3136       return Reg;
3137     default:
3138       break;
3139     }
3140   }
3141   return AMDGPU::NoRegister;
3142 }
3143 
3144 // NB: This code is correct only when used to check constant
3145 // bus limitations because GFX7 support no f16 inline constants.
3146 // Note that there are no cases when a GFX7 opcode violates
3147 // constant bus limitations due to the use of an f16 constant.
3148 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3149                                        unsigned OpIdx) const {
3150   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3151 
3152   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3153     return false;
3154   }
3155 
3156   const MCOperand &MO = Inst.getOperand(OpIdx);
3157 
3158   int64_t Val = MO.getImm();
3159   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3160 
3161   switch (OpSize) { // expected operand size
3162   case 8:
3163     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3164   case 4:
3165     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3166   case 2: {
3167     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3168     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3169         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3170         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3171       return AMDGPU::isInlinableIntLiteral(Val);
3172 
3173     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3174         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3175         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3176       return AMDGPU::isInlinableIntLiteralV216(Val);
3177 
3178     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3179         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3180         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3181       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3182 
3183     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3184   }
3185   default:
3186     llvm_unreachable("invalid operand size");
3187   }
3188 }
3189 
3190 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3191   if (!isGFX10Plus())
3192     return 1;
3193 
3194   switch (Opcode) {
3195   // 64-bit shift instructions can use only one scalar value input
3196   case AMDGPU::V_LSHLREV_B64_e64:
3197   case AMDGPU::V_LSHLREV_B64_gfx10:
3198   case AMDGPU::V_LSHRREV_B64_e64:
3199   case AMDGPU::V_LSHRREV_B64_gfx10:
3200   case AMDGPU::V_ASHRREV_I64_e64:
3201   case AMDGPU::V_ASHRREV_I64_gfx10:
3202   case AMDGPU::V_LSHL_B64_e64:
3203   case AMDGPU::V_LSHR_B64_e64:
3204   case AMDGPU::V_ASHR_I64_e64:
3205     return 1;
3206   default:
3207     return 2;
3208   }
3209 }
3210 
3211 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3212   const MCOperand &MO = Inst.getOperand(OpIdx);
3213   if (MO.isImm()) {
3214     return !isInlineConstant(Inst, OpIdx);
3215   } else if (MO.isReg()) {
3216     auto Reg = MO.getReg();
3217     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3218     auto PReg = mc2PseudoReg(Reg);
3219     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3220   } else {
3221     return true;
3222   }
3223 }
3224 
3225 bool
3226 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3227                                                 const OperandVector &Operands) {
3228   const unsigned Opcode = Inst.getOpcode();
3229   const MCInstrDesc &Desc = MII.get(Opcode);
3230   unsigned LastSGPR = AMDGPU::NoRegister;
3231   unsigned ConstantBusUseCount = 0;
3232   unsigned NumLiterals = 0;
3233   unsigned LiteralSize;
3234 
3235   if (Desc.TSFlags &
3236       (SIInstrFlags::VOPC |
3237        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3238        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3239        SIInstrFlags::SDWA)) {
3240     // Check special imm operands (used by madmk, etc)
3241     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3242       ++ConstantBusUseCount;
3243     }
3244 
3245     SmallDenseSet<unsigned> SGPRsUsed;
3246     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3247     if (SGPRUsed != AMDGPU::NoRegister) {
3248       SGPRsUsed.insert(SGPRUsed);
3249       ++ConstantBusUseCount;
3250     }
3251 
3252     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3253     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3254     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3255 
3256     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3257 
3258     for (int OpIdx : OpIndices) {
3259       if (OpIdx == -1) break;
3260 
3261       const MCOperand &MO = Inst.getOperand(OpIdx);
3262       if (usesConstantBus(Inst, OpIdx)) {
3263         if (MO.isReg()) {
3264           LastSGPR = mc2PseudoReg(MO.getReg());
3265           // Pairs of registers with a partial intersections like these
3266           //   s0, s[0:1]
3267           //   flat_scratch_lo, flat_scratch
3268           //   flat_scratch_lo, flat_scratch_hi
3269           // are theoretically valid but they are disabled anyway.
3270           // Note that this code mimics SIInstrInfo::verifyInstruction
3271           if (!SGPRsUsed.count(LastSGPR)) {
3272             SGPRsUsed.insert(LastSGPR);
3273             ++ConstantBusUseCount;
3274           }
3275         } else { // Expression or a literal
3276 
3277           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3278             continue; // special operand like VINTERP attr_chan
3279 
3280           // An instruction may use only one literal.
3281           // This has been validated on the previous step.
3282           // See validateVOP3Literal.
3283           // This literal may be used as more than one operand.
3284           // If all these operands are of the same size,
3285           // this literal counts as one scalar value.
3286           // Otherwise it counts as 2 scalar values.
3287           // See "GFX10 Shader Programming", section 3.6.2.3.
3288 
3289           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3290           if (Size < 4) Size = 4;
3291 
3292           if (NumLiterals == 0) {
3293             NumLiterals = 1;
3294             LiteralSize = Size;
3295           } else if (LiteralSize != Size) {
3296             NumLiterals = 2;
3297           }
3298         }
3299       }
3300     }
3301   }
3302   ConstantBusUseCount += NumLiterals;
3303 
3304   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3305     return true;
3306 
3307   SMLoc LitLoc = getLitLoc(Operands);
3308   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3309   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3310   Error(Loc, "invalid operand (violates constant bus restrictions)");
3311   return false;
3312 }
3313 
3314 bool
3315 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3316                                                  const OperandVector &Operands) {
3317   const unsigned Opcode = Inst.getOpcode();
3318   const MCInstrDesc &Desc = MII.get(Opcode);
3319 
3320   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3321   if (DstIdx == -1 ||
3322       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3323     return true;
3324   }
3325 
3326   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3327 
3328   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3329   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3330   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3331 
3332   assert(DstIdx != -1);
3333   const MCOperand &Dst = Inst.getOperand(DstIdx);
3334   assert(Dst.isReg());
3335   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3336 
3337   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3338 
3339   for (int SrcIdx : SrcIndices) {
3340     if (SrcIdx == -1) break;
3341     const MCOperand &Src = Inst.getOperand(SrcIdx);
3342     if (Src.isReg()) {
3343       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3344       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3345         Error(getRegLoc(SrcReg, Operands),
3346           "destination must be different than all sources");
3347         return false;
3348       }
3349     }
3350   }
3351 
3352   return true;
3353 }
3354 
3355 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3356 
3357   const unsigned Opc = Inst.getOpcode();
3358   const MCInstrDesc &Desc = MII.get(Opc);
3359 
3360   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3361     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3362     assert(ClampIdx != -1);
3363     return Inst.getOperand(ClampIdx).getImm() == 0;
3364   }
3365 
3366   return true;
3367 }
3368 
3369 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3370 
3371   const unsigned Opc = Inst.getOpcode();
3372   const MCInstrDesc &Desc = MII.get(Opc);
3373 
3374   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3375     return true;
3376 
3377   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3378   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3379   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3380 
3381   assert(VDataIdx != -1);
3382 
3383   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3384     return true;
3385 
3386   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3387   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3388   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3389   if (DMask == 0)
3390     DMask = 1;
3391 
3392   unsigned DataSize =
3393     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3394   if (hasPackedD16()) {
3395     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3396     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3397       DataSize = (DataSize + 1) / 2;
3398   }
3399 
3400   return (VDataSize / 4) == DataSize + TFESize;
3401 }
3402 
3403 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3404   const unsigned Opc = Inst.getOpcode();
3405   const MCInstrDesc &Desc = MII.get(Opc);
3406 
3407   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3408     return true;
3409 
3410   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3411 
3412   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3413       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3414   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3415   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3416   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3417 
3418   assert(VAddr0Idx != -1);
3419   assert(SrsrcIdx != -1);
3420   assert(SrsrcIdx > VAddr0Idx);
3421 
3422   if (DimIdx == -1)
3423     return true; // intersect_ray
3424 
3425   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3426   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3427   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3428   unsigned VAddrSize =
3429       IsNSA ? SrsrcIdx - VAddr0Idx
3430             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3431 
3432   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3433                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3434                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3435                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3436   if (!IsNSA) {
3437     if (AddrSize > 8)
3438       AddrSize = 16;
3439     else if (AddrSize > 4)
3440       AddrSize = 8;
3441   }
3442 
3443   return VAddrSize == AddrSize;
3444 }
3445 
3446 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3447 
3448   const unsigned Opc = Inst.getOpcode();
3449   const MCInstrDesc &Desc = MII.get(Opc);
3450 
3451   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3452     return true;
3453   if (!Desc.mayLoad() || !Desc.mayStore())
3454     return true; // Not atomic
3455 
3456   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3457   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3458 
3459   // This is an incomplete check because image_atomic_cmpswap
3460   // may only use 0x3 and 0xf while other atomic operations
3461   // may use 0x1 and 0x3. However these limitations are
3462   // verified when we check that dmask matches dst size.
3463   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3464 }
3465 
3466 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3467 
3468   const unsigned Opc = Inst.getOpcode();
3469   const MCInstrDesc &Desc = MII.get(Opc);
3470 
3471   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3472     return true;
3473 
3474   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3475   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3476 
3477   // GATHER4 instructions use dmask in a different fashion compared to
3478   // other MIMG instructions. The only useful DMASK values are
3479   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3480   // (red,red,red,red) etc.) The ISA document doesn't mention
3481   // this.
3482   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3483 }
3484 
3485 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3486   const unsigned Opc = Inst.getOpcode();
3487   const MCInstrDesc &Desc = MII.get(Opc);
3488 
3489   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3490     return true;
3491 
3492   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3493   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3494       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3495 
3496   if (!BaseOpcode->MSAA)
3497     return true;
3498 
3499   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3500   assert(DimIdx != -1);
3501 
3502   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3503   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3504 
3505   return DimInfo->MSAA;
3506 }
3507 
3508 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3509 {
3510   switch (Opcode) {
3511   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3512   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3513   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3514     return true;
3515   default:
3516     return false;
3517   }
3518 }
3519 
3520 // movrels* opcodes should only allow VGPRS as src0.
3521 // This is specified in .td description for vop1/vop3,
3522 // but sdwa is handled differently. See isSDWAOperand.
3523 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3524                                       const OperandVector &Operands) {
3525 
3526   const unsigned Opc = Inst.getOpcode();
3527   const MCInstrDesc &Desc = MII.get(Opc);
3528 
3529   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3530     return true;
3531 
3532   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3533   assert(Src0Idx != -1);
3534 
3535   SMLoc ErrLoc;
3536   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3537   if (Src0.isReg()) {
3538     auto Reg = mc2PseudoReg(Src0.getReg());
3539     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3540     if (!isSGPR(Reg, TRI))
3541       return true;
3542     ErrLoc = getRegLoc(Reg, Operands);
3543   } else {
3544     ErrLoc = getConstLoc(Operands);
3545   }
3546 
3547   Error(ErrLoc, "source operand must be a VGPR");
3548   return false;
3549 }
3550 
3551 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3552                                           const OperandVector &Operands) {
3553 
3554   const unsigned Opc = Inst.getOpcode();
3555 
3556   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3557     return true;
3558 
3559   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3560   assert(Src0Idx != -1);
3561 
3562   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3563   if (!Src0.isReg())
3564     return true;
3565 
3566   auto Reg = mc2PseudoReg(Src0.getReg());
3567   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3568   if (isSGPR(Reg, TRI)) {
3569     Error(getRegLoc(Reg, Operands),
3570           "source operand must be either a VGPR or an inline constant");
3571     return false;
3572   }
3573 
3574   return true;
3575 }
3576 
3577 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3578   switch (Inst.getOpcode()) {
3579   default:
3580     return true;
3581   case V_DIV_SCALE_F32_gfx6_gfx7:
3582   case V_DIV_SCALE_F32_vi:
3583   case V_DIV_SCALE_F32_gfx10:
3584   case V_DIV_SCALE_F64_gfx6_gfx7:
3585   case V_DIV_SCALE_F64_vi:
3586   case V_DIV_SCALE_F64_gfx10:
3587     break;
3588   }
3589 
3590   // TODO: Check that src0 = src1 or src2.
3591 
3592   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3593                     AMDGPU::OpName::src2_modifiers,
3594                     AMDGPU::OpName::src2_modifiers}) {
3595     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3596             .getImm() &
3597         SISrcMods::ABS) {
3598       return false;
3599     }
3600   }
3601 
3602   return true;
3603 }
3604 
3605 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3606 
3607   const unsigned Opc = Inst.getOpcode();
3608   const MCInstrDesc &Desc = MII.get(Opc);
3609 
3610   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3611     return true;
3612 
3613   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3614   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3615     if (isCI() || isSI())
3616       return false;
3617   }
3618 
3619   return true;
3620 }
3621 
3622 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3623   const unsigned Opc = Inst.getOpcode();
3624   const MCInstrDesc &Desc = MII.get(Opc);
3625 
3626   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3627     return true;
3628 
3629   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3630   if (DimIdx < 0)
3631     return true;
3632 
3633   long Imm = Inst.getOperand(DimIdx).getImm();
3634   if (Imm < 0 || Imm >= 8)
3635     return false;
3636 
3637   return true;
3638 }
3639 
3640 static bool IsRevOpcode(const unsigned Opcode)
3641 {
3642   switch (Opcode) {
3643   case AMDGPU::V_SUBREV_F32_e32:
3644   case AMDGPU::V_SUBREV_F32_e64:
3645   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3646   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3647   case AMDGPU::V_SUBREV_F32_e32_vi:
3648   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3649   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3650   case AMDGPU::V_SUBREV_F32_e64_vi:
3651 
3652   case AMDGPU::V_SUBREV_CO_U32_e32:
3653   case AMDGPU::V_SUBREV_CO_U32_e64:
3654   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3655   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3656 
3657   case AMDGPU::V_SUBBREV_U32_e32:
3658   case AMDGPU::V_SUBBREV_U32_e64:
3659   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3660   case AMDGPU::V_SUBBREV_U32_e32_vi:
3661   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3662   case AMDGPU::V_SUBBREV_U32_e64_vi:
3663 
3664   case AMDGPU::V_SUBREV_U32_e32:
3665   case AMDGPU::V_SUBREV_U32_e64:
3666   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3667   case AMDGPU::V_SUBREV_U32_e32_vi:
3668   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3669   case AMDGPU::V_SUBREV_U32_e64_vi:
3670 
3671   case AMDGPU::V_SUBREV_F16_e32:
3672   case AMDGPU::V_SUBREV_F16_e64:
3673   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3674   case AMDGPU::V_SUBREV_F16_e32_vi:
3675   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3676   case AMDGPU::V_SUBREV_F16_e64_vi:
3677 
3678   case AMDGPU::V_SUBREV_U16_e32:
3679   case AMDGPU::V_SUBREV_U16_e64:
3680   case AMDGPU::V_SUBREV_U16_e32_vi:
3681   case AMDGPU::V_SUBREV_U16_e64_vi:
3682 
3683   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3684   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3685   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3686 
3687   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3688   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3689 
3690   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3691   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3692 
3693   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3694   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3695 
3696   case AMDGPU::V_LSHRREV_B32_e32:
3697   case AMDGPU::V_LSHRREV_B32_e64:
3698   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3699   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3700   case AMDGPU::V_LSHRREV_B32_e32_vi:
3701   case AMDGPU::V_LSHRREV_B32_e64_vi:
3702   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3703   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3704 
3705   case AMDGPU::V_ASHRREV_I32_e32:
3706   case AMDGPU::V_ASHRREV_I32_e64:
3707   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3708   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3709   case AMDGPU::V_ASHRREV_I32_e32_vi:
3710   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3711   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3712   case AMDGPU::V_ASHRREV_I32_e64_vi:
3713 
3714   case AMDGPU::V_LSHLREV_B32_e32:
3715   case AMDGPU::V_LSHLREV_B32_e64:
3716   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3717   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3718   case AMDGPU::V_LSHLREV_B32_e32_vi:
3719   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3720   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3721   case AMDGPU::V_LSHLREV_B32_e64_vi:
3722 
3723   case AMDGPU::V_LSHLREV_B16_e32:
3724   case AMDGPU::V_LSHLREV_B16_e64:
3725   case AMDGPU::V_LSHLREV_B16_e32_vi:
3726   case AMDGPU::V_LSHLREV_B16_e64_vi:
3727   case AMDGPU::V_LSHLREV_B16_gfx10:
3728 
3729   case AMDGPU::V_LSHRREV_B16_e32:
3730   case AMDGPU::V_LSHRREV_B16_e64:
3731   case AMDGPU::V_LSHRREV_B16_e32_vi:
3732   case AMDGPU::V_LSHRREV_B16_e64_vi:
3733   case AMDGPU::V_LSHRREV_B16_gfx10:
3734 
3735   case AMDGPU::V_ASHRREV_I16_e32:
3736   case AMDGPU::V_ASHRREV_I16_e64:
3737   case AMDGPU::V_ASHRREV_I16_e32_vi:
3738   case AMDGPU::V_ASHRREV_I16_e64_vi:
3739   case AMDGPU::V_ASHRREV_I16_gfx10:
3740 
3741   case AMDGPU::V_LSHLREV_B64_e64:
3742   case AMDGPU::V_LSHLREV_B64_gfx10:
3743   case AMDGPU::V_LSHLREV_B64_vi:
3744 
3745   case AMDGPU::V_LSHRREV_B64_e64:
3746   case AMDGPU::V_LSHRREV_B64_gfx10:
3747   case AMDGPU::V_LSHRREV_B64_vi:
3748 
3749   case AMDGPU::V_ASHRREV_I64_e64:
3750   case AMDGPU::V_ASHRREV_I64_gfx10:
3751   case AMDGPU::V_ASHRREV_I64_vi:
3752 
3753   case AMDGPU::V_PK_LSHLREV_B16:
3754   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3755   case AMDGPU::V_PK_LSHLREV_B16_vi:
3756 
3757   case AMDGPU::V_PK_LSHRREV_B16:
3758   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3759   case AMDGPU::V_PK_LSHRREV_B16_vi:
3760   case AMDGPU::V_PK_ASHRREV_I16:
3761   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3762   case AMDGPU::V_PK_ASHRREV_I16_vi:
3763     return true;
3764   default:
3765     return false;
3766   }
3767 }
3768 
3769 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3770 
3771   using namespace SIInstrFlags;
3772   const unsigned Opcode = Inst.getOpcode();
3773   const MCInstrDesc &Desc = MII.get(Opcode);
3774 
3775   // lds_direct register is defined so that it can be used
3776   // with 9-bit operands only. Ignore encodings which do not accept these.
3777   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3778   if ((Desc.TSFlags & Enc) == 0)
3779     return None;
3780 
3781   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3782     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3783     if (SrcIdx == -1)
3784       break;
3785     const auto &Src = Inst.getOperand(SrcIdx);
3786     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3787 
3788       if (isGFX90A())
3789         return StringRef("lds_direct is not supported on this GPU");
3790 
3791       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3792         return StringRef("lds_direct cannot be used with this instruction");
3793 
3794       if (SrcName != OpName::src0)
3795         return StringRef("lds_direct may be used as src0 only");
3796     }
3797   }
3798 
3799   return None;
3800 }
3801 
3802 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3803   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3804     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3805     if (Op.isFlatOffset())
3806       return Op.getStartLoc();
3807   }
3808   return getLoc();
3809 }
3810 
3811 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3812                                          const OperandVector &Operands) {
3813   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3814   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3815     return true;
3816 
3817   auto Opcode = Inst.getOpcode();
3818   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3819   assert(OpNum != -1);
3820 
3821   const auto &Op = Inst.getOperand(OpNum);
3822   if (!hasFlatOffsets() && Op.getImm() != 0) {
3823     Error(getFlatOffsetLoc(Operands),
3824           "flat offset modifier is not supported on this GPU");
3825     return false;
3826   }
3827 
3828   // For FLAT segment the offset must be positive;
3829   // MSB is ignored and forced to zero.
3830   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3831     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3832     if (!isIntN(OffsetSize, Op.getImm())) {
3833       Error(getFlatOffsetLoc(Operands),
3834             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3835       return false;
3836     }
3837   } else {
3838     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3839     if (!isUIntN(OffsetSize, Op.getImm())) {
3840       Error(getFlatOffsetLoc(Operands),
3841             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3842       return false;
3843     }
3844   }
3845 
3846   return true;
3847 }
3848 
3849 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3850   // Start with second operand because SMEM Offset cannot be dst or src0.
3851   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3852     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3853     if (Op.isSMEMOffset())
3854       return Op.getStartLoc();
3855   }
3856   return getLoc();
3857 }
3858 
3859 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3860                                          const OperandVector &Operands) {
3861   if (isCI() || isSI())
3862     return true;
3863 
3864   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3865   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3866     return true;
3867 
3868   auto Opcode = Inst.getOpcode();
3869   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3870   if (OpNum == -1)
3871     return true;
3872 
3873   const auto &Op = Inst.getOperand(OpNum);
3874   if (!Op.isImm())
3875     return true;
3876 
3877   uint64_t Offset = Op.getImm();
3878   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3879   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3880       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3881     return true;
3882 
3883   Error(getSMEMOffsetLoc(Operands),
3884         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3885                                "expected a 21-bit signed offset");
3886 
3887   return false;
3888 }
3889 
3890 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3891   unsigned Opcode = Inst.getOpcode();
3892   const MCInstrDesc &Desc = MII.get(Opcode);
3893   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3894     return true;
3895 
3896   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3897   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3898 
3899   const int OpIndices[] = { Src0Idx, Src1Idx };
3900 
3901   unsigned NumExprs = 0;
3902   unsigned NumLiterals = 0;
3903   uint32_t LiteralValue;
3904 
3905   for (int OpIdx : OpIndices) {
3906     if (OpIdx == -1) break;
3907 
3908     const MCOperand &MO = Inst.getOperand(OpIdx);
3909     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3910     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3911       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3912         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3913         if (NumLiterals == 0 || LiteralValue != Value) {
3914           LiteralValue = Value;
3915           ++NumLiterals;
3916         }
3917       } else if (MO.isExpr()) {
3918         ++NumExprs;
3919       }
3920     }
3921   }
3922 
3923   return NumLiterals + NumExprs <= 1;
3924 }
3925 
3926 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3927   const unsigned Opc = Inst.getOpcode();
3928   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3929       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3930     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3931     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3932 
3933     if (OpSel & ~3)
3934       return false;
3935   }
3936   return true;
3937 }
3938 
3939 // Check if VCC register matches wavefront size
3940 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3941   auto FB = getFeatureBits();
3942   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3943     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3944 }
3945 
3946 // VOP3 literal is only allowed in GFX10+ and only one can be used
3947 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3948                                           const OperandVector &Operands) {
3949   unsigned Opcode = Inst.getOpcode();
3950   const MCInstrDesc &Desc = MII.get(Opcode);
3951   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3952     return true;
3953 
3954   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3955   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3956   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3957 
3958   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3959 
3960   unsigned NumExprs = 0;
3961   unsigned NumLiterals = 0;
3962   uint32_t LiteralValue;
3963 
3964   for (int OpIdx : OpIndices) {
3965     if (OpIdx == -1) break;
3966 
3967     const MCOperand &MO = Inst.getOperand(OpIdx);
3968     if (!MO.isImm() && !MO.isExpr())
3969       continue;
3970     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3971       continue;
3972 
3973     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3974         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
3975       Error(getConstLoc(Operands),
3976             "inline constants are not allowed for this operand");
3977       return false;
3978     }
3979 
3980     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3981       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3982       if (NumLiterals == 0 || LiteralValue != Value) {
3983         LiteralValue = Value;
3984         ++NumLiterals;
3985       }
3986     } else if (MO.isExpr()) {
3987       ++NumExprs;
3988     }
3989   }
3990   NumLiterals += NumExprs;
3991 
3992   if (!NumLiterals)
3993     return true;
3994 
3995   if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
3996     Error(getLitLoc(Operands), "literal operands are not supported");
3997     return false;
3998   }
3999 
4000   if (NumLiterals > 1) {
4001     Error(getLitLoc(Operands), "only one literal operand is allowed");
4002     return false;
4003   }
4004 
4005   return true;
4006 }
4007 
4008 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4009 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4010                          const MCRegisterInfo *MRI) {
4011   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4012   if (OpIdx < 0)
4013     return -1;
4014 
4015   const MCOperand &Op = Inst.getOperand(OpIdx);
4016   if (!Op.isReg())
4017     return -1;
4018 
4019   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4020   auto Reg = Sub ? Sub : Op.getReg();
4021   const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4022   return AGRP32.contains(Reg) ? 1 : 0;
4023 }
4024 
4025 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4026   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4027   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4028                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4029                   SIInstrFlags::DS)) == 0)
4030     return true;
4031 
4032   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4033                                                       : AMDGPU::OpName::vdata;
4034 
4035   const MCRegisterInfo *MRI = getMRI();
4036   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4037   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4038 
4039   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4040     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4041     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4042       return false;
4043   }
4044 
4045   auto FB = getFeatureBits();
4046   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4047     if (DataAreg < 0 || DstAreg < 0)
4048       return true;
4049     return DstAreg == DataAreg;
4050   }
4051 
4052   return DstAreg < 1 && DataAreg < 1;
4053 }
4054 
4055 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4056   auto FB = getFeatureBits();
4057   if (!FB[AMDGPU::FeatureGFX90AInsts])
4058     return true;
4059 
4060   const MCRegisterInfo *MRI = getMRI();
4061   const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4062   const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4063   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4064     const MCOperand &Op = Inst.getOperand(I);
4065     if (!Op.isReg())
4066       continue;
4067 
4068     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4069     if (!Sub)
4070       continue;
4071 
4072     if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4073       return false;
4074     if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4075       return false;
4076   }
4077 
4078   return true;
4079 }
4080 
4081 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4082                                             const OperandVector &Operands,
4083                                             const SMLoc &IDLoc) {
4084   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4085                                            AMDGPU::OpName::cpol);
4086   if (CPolPos == -1)
4087     return true;
4088 
4089   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4090 
4091   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4092   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4093       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4094     Error(IDLoc, "invalid cache policy for SMRD instruction");
4095     return false;
4096   }
4097 
4098   if (isGFX90A() && (CPol & CPol::SCC)) {
4099     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4100     StringRef CStr(S.getPointer());
4101     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4102     Error(S, "scc is not supported on this GPU");
4103     return false;
4104   }
4105 
4106   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4107     return true;
4108 
4109   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4110     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4111       Error(IDLoc, "instruction must use glc");
4112       return false;
4113     }
4114   } else {
4115     if (CPol & CPol::GLC) {
4116       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4117       StringRef CStr(S.getPointer());
4118       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4119       Error(S, "instruction must not use glc");
4120       return false;
4121     }
4122   }
4123 
4124   return true;
4125 }
4126 
4127 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4128                                           const SMLoc &IDLoc,
4129                                           const OperandVector &Operands) {
4130   if (auto ErrMsg = validateLdsDirect(Inst)) {
4131     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4132     return false;
4133   }
4134   if (!validateSOPLiteral(Inst)) {
4135     Error(getLitLoc(Operands),
4136       "only one literal operand is allowed");
4137     return false;
4138   }
4139   if (!validateVOP3Literal(Inst, Operands)) {
4140     return false;
4141   }
4142   if (!validateConstantBusLimitations(Inst, Operands)) {
4143     return false;
4144   }
4145   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4146     return false;
4147   }
4148   if (!validateIntClampSupported(Inst)) {
4149     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4150       "integer clamping is not supported on this GPU");
4151     return false;
4152   }
4153   if (!validateOpSel(Inst)) {
4154     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4155       "invalid op_sel operand");
4156     return false;
4157   }
4158   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4159   if (!validateMIMGD16(Inst)) {
4160     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4161       "d16 modifier is not supported on this GPU");
4162     return false;
4163   }
4164   if (!validateMIMGDim(Inst)) {
4165     Error(IDLoc, "dim modifier is required on this GPU");
4166     return false;
4167   }
4168   if (!validateMIMGMSAA(Inst)) {
4169     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4170           "invalid dim; must be MSAA type");
4171     return false;
4172   }
4173   if (!validateMIMGDataSize(Inst)) {
4174     Error(IDLoc,
4175       "image data size does not match dmask and tfe");
4176     return false;
4177   }
4178   if (!validateMIMGAddrSize(Inst)) {
4179     Error(IDLoc,
4180       "image address size does not match dim and a16");
4181     return false;
4182   }
4183   if (!validateMIMGAtomicDMask(Inst)) {
4184     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4185       "invalid atomic image dmask");
4186     return false;
4187   }
4188   if (!validateMIMGGatherDMask(Inst)) {
4189     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4190       "invalid image_gather dmask: only one bit must be set");
4191     return false;
4192   }
4193   if (!validateMovrels(Inst, Operands)) {
4194     return false;
4195   }
4196   if (!validateFlatOffset(Inst, Operands)) {
4197     return false;
4198   }
4199   if (!validateSMEMOffset(Inst, Operands)) {
4200     return false;
4201   }
4202   if (!validateMAIAccWrite(Inst, Operands)) {
4203     return false;
4204   }
4205   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4206     return false;
4207   }
4208 
4209   if (!validateAGPRLdSt(Inst)) {
4210     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4211     ? "invalid register class: data and dst should be all VGPR or AGPR"
4212     : "invalid register class: agpr loads and stores not supported on this GPU"
4213     );
4214     return false;
4215   }
4216   if (!validateVGPRAlign(Inst)) {
4217     Error(IDLoc,
4218       "invalid register class: vgpr tuples must be 64 bit aligned");
4219     return false;
4220   }
4221 
4222   if (!validateDivScale(Inst)) {
4223     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4224     return false;
4225   }
4226   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4227     return false;
4228   }
4229 
4230   return true;
4231 }
4232 
4233 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4234                                             const FeatureBitset &FBS,
4235                                             unsigned VariantID = 0);
4236 
4237 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4238                                 const FeatureBitset &AvailableFeatures,
4239                                 unsigned VariantID);
4240 
4241 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4242                                        const FeatureBitset &FBS) {
4243   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4244 }
4245 
4246 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4247                                        const FeatureBitset &FBS,
4248                                        ArrayRef<unsigned> Variants) {
4249   for (auto Variant : Variants) {
4250     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4251       return true;
4252   }
4253 
4254   return false;
4255 }
4256 
4257 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4258                                                   const SMLoc &IDLoc) {
4259   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4260 
4261   // Check if requested instruction variant is supported.
4262   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4263     return false;
4264 
4265   // This instruction is not supported.
4266   // Clear any other pending errors because they are no longer relevant.
4267   getParser().clearPendingErrors();
4268 
4269   // Requested instruction variant is not supported.
4270   // Check if any other variants are supported.
4271   StringRef VariantName = getMatchedVariantName();
4272   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4273     return Error(IDLoc,
4274                  Twine(VariantName,
4275                        " variant of this instruction is not supported"));
4276   }
4277 
4278   // Finally check if this instruction is supported on any other GPU.
4279   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4280     return Error(IDLoc, "instruction not supported on this GPU");
4281   }
4282 
4283   // Instruction not supported on any GPU. Probably a typo.
4284   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4285   return Error(IDLoc, "invalid instruction" + Suggestion);
4286 }
4287 
4288 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4289                                               OperandVector &Operands,
4290                                               MCStreamer &Out,
4291                                               uint64_t &ErrorInfo,
4292                                               bool MatchingInlineAsm) {
4293   MCInst Inst;
4294   unsigned Result = Match_Success;
4295   for (auto Variant : getMatchedVariants()) {
4296     uint64_t EI;
4297     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4298                                   Variant);
4299     // We order match statuses from least to most specific. We use most specific
4300     // status as resulting
4301     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4302     if ((R == Match_Success) ||
4303         (R == Match_PreferE32) ||
4304         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4305         (R == Match_InvalidOperand && Result != Match_MissingFeature
4306                                    && Result != Match_PreferE32) ||
4307         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4308                                    && Result != Match_MissingFeature
4309                                    && Result != Match_PreferE32)) {
4310       Result = R;
4311       ErrorInfo = EI;
4312     }
4313     if (R == Match_Success)
4314       break;
4315   }
4316 
4317   if (Result == Match_Success) {
4318     if (!validateInstruction(Inst, IDLoc, Operands)) {
4319       return true;
4320     }
4321     Inst.setLoc(IDLoc);
4322     Out.emitInstruction(Inst, getSTI());
4323     return false;
4324   }
4325 
4326   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4327   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4328     return true;
4329   }
4330 
4331   switch (Result) {
4332   default: break;
4333   case Match_MissingFeature:
4334     // It has been verified that the specified instruction
4335     // mnemonic is valid. A match was found but it requires
4336     // features which are not supported on this GPU.
4337     return Error(IDLoc, "operands are not valid for this GPU or mode");
4338 
4339   case Match_InvalidOperand: {
4340     SMLoc ErrorLoc = IDLoc;
4341     if (ErrorInfo != ~0ULL) {
4342       if (ErrorInfo >= Operands.size()) {
4343         return Error(IDLoc, "too few operands for instruction");
4344       }
4345       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4346       if (ErrorLoc == SMLoc())
4347         ErrorLoc = IDLoc;
4348     }
4349     return Error(ErrorLoc, "invalid operand for instruction");
4350   }
4351 
4352   case Match_PreferE32:
4353     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4354                         "should be encoded as e32");
4355   case Match_MnemonicFail:
4356     llvm_unreachable("Invalid instructions should have been handled already");
4357   }
4358   llvm_unreachable("Implement any new match types added!");
4359 }
4360 
4361 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4362   int64_t Tmp = -1;
4363   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4364     return true;
4365   }
4366   if (getParser().parseAbsoluteExpression(Tmp)) {
4367     return true;
4368   }
4369   Ret = static_cast<uint32_t>(Tmp);
4370   return false;
4371 }
4372 
4373 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4374                                                uint32_t &Minor) {
4375   if (ParseAsAbsoluteExpression(Major))
4376     return TokError("invalid major version");
4377 
4378   if (!trySkipToken(AsmToken::Comma))
4379     return TokError("minor version number required, comma expected");
4380 
4381   if (ParseAsAbsoluteExpression(Minor))
4382     return TokError("invalid minor version");
4383 
4384   return false;
4385 }
4386 
4387 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4388   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4389     return TokError("directive only supported for amdgcn architecture");
4390 
4391   std::string TargetIDDirective;
4392   SMLoc TargetStart = getTok().getLoc();
4393   if (getParser().parseEscapedString(TargetIDDirective))
4394     return true;
4395 
4396   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4397   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4398     return getParser().Error(TargetRange.Start,
4399         (Twine(".amdgcn_target directive's target id ") +
4400          Twine(TargetIDDirective) +
4401          Twine(" does not match the specified target id ") +
4402          Twine(getTargetStreamer().getTargetID()->toString())).str());
4403 
4404   return false;
4405 }
4406 
4407 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4408   return Error(Range.Start, "value out of range", Range);
4409 }
4410 
4411 bool AMDGPUAsmParser::calculateGPRBlocks(
4412     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4413     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4414     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4415     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4416   // TODO(scott.linder): These calculations are duplicated from
4417   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4418   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4419 
4420   unsigned NumVGPRs = NextFreeVGPR;
4421   unsigned NumSGPRs = NextFreeSGPR;
4422 
4423   if (Version.Major >= 10)
4424     NumSGPRs = 0;
4425   else {
4426     unsigned MaxAddressableNumSGPRs =
4427         IsaInfo::getAddressableNumSGPRs(&getSTI());
4428 
4429     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4430         NumSGPRs > MaxAddressableNumSGPRs)
4431       return OutOfRangeError(SGPRRange);
4432 
4433     NumSGPRs +=
4434         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4435 
4436     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4437         NumSGPRs > MaxAddressableNumSGPRs)
4438       return OutOfRangeError(SGPRRange);
4439 
4440     if (Features.test(FeatureSGPRInitBug))
4441       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4442   }
4443 
4444   VGPRBlocks =
4445       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4446   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4447 
4448   return false;
4449 }
4450 
4451 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4452   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4453     return TokError("directive only supported for amdgcn architecture");
4454 
4455   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4456     return TokError("directive only supported for amdhsa OS");
4457 
4458   StringRef KernelName;
4459   if (getParser().parseIdentifier(KernelName))
4460     return true;
4461 
4462   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4463 
4464   StringSet<> Seen;
4465 
4466   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4467 
4468   SMRange VGPRRange;
4469   uint64_t NextFreeVGPR = 0;
4470   uint64_t AccumOffset = 0;
4471   SMRange SGPRRange;
4472   uint64_t NextFreeSGPR = 0;
4473   unsigned UserSGPRCount = 0;
4474   bool ReserveVCC = true;
4475   bool ReserveFlatScr = true;
4476   Optional<bool> EnableWavefrontSize32;
4477 
4478   while (true) {
4479     while (trySkipToken(AsmToken::EndOfStatement));
4480 
4481     StringRef ID;
4482     SMRange IDRange = getTok().getLocRange();
4483     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4484       return true;
4485 
4486     if (ID == ".end_amdhsa_kernel")
4487       break;
4488 
4489     if (Seen.find(ID) != Seen.end())
4490       return TokError(".amdhsa_ directives cannot be repeated");
4491     Seen.insert(ID);
4492 
4493     SMLoc ValStart = getLoc();
4494     int64_t IVal;
4495     if (getParser().parseAbsoluteExpression(IVal))
4496       return true;
4497     SMLoc ValEnd = getLoc();
4498     SMRange ValRange = SMRange(ValStart, ValEnd);
4499 
4500     if (IVal < 0)
4501       return OutOfRangeError(ValRange);
4502 
4503     uint64_t Val = IVal;
4504 
4505 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4506   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4507     return OutOfRangeError(RANGE);                                             \
4508   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4509 
4510     if (ID == ".amdhsa_group_segment_fixed_size") {
4511       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4512         return OutOfRangeError(ValRange);
4513       KD.group_segment_fixed_size = Val;
4514     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4515       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4516         return OutOfRangeError(ValRange);
4517       KD.private_segment_fixed_size = Val;
4518     } else if (ID == ".amdhsa_kernarg_size") {
4519       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4520         return OutOfRangeError(ValRange);
4521       KD.kernarg_size = Val;
4522     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4523       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4524                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4525                        Val, ValRange);
4526       if (Val)
4527         UserSGPRCount += 4;
4528     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4529       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4530                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4531                        ValRange);
4532       if (Val)
4533         UserSGPRCount += 2;
4534     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4535       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4536                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4537                        ValRange);
4538       if (Val)
4539         UserSGPRCount += 2;
4540     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4541       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4542                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4543                        Val, ValRange);
4544       if (Val)
4545         UserSGPRCount += 2;
4546     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4547       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4548                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4549                        ValRange);
4550       if (Val)
4551         UserSGPRCount += 2;
4552     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4553       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4554                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4555                        ValRange);
4556       if (Val)
4557         UserSGPRCount += 2;
4558     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4559       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4560                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4561                        Val, ValRange);
4562       if (Val)
4563         UserSGPRCount += 1;
4564     } else if (ID == ".amdhsa_wavefront_size32") {
4565       if (IVersion.Major < 10)
4566         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4567       EnableWavefrontSize32 = Val;
4568       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4569                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4570                        Val, ValRange);
4571     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4572       PARSE_BITS_ENTRY(
4573           KD.compute_pgm_rsrc2,
4574           COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val,
4575           ValRange);
4576     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4577       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4578                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4579                        ValRange);
4580     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4581       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4582                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4583                        ValRange);
4584     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4585       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4586                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4587                        ValRange);
4588     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4589       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4590                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4591                        ValRange);
4592     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4593       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4594                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4595                        ValRange);
4596     } else if (ID == ".amdhsa_next_free_vgpr") {
4597       VGPRRange = ValRange;
4598       NextFreeVGPR = Val;
4599     } else if (ID == ".amdhsa_next_free_sgpr") {
4600       SGPRRange = ValRange;
4601       NextFreeSGPR = Val;
4602     } else if (ID == ".amdhsa_accum_offset") {
4603       if (!isGFX90A())
4604         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4605       AccumOffset = Val;
4606     } else if (ID == ".amdhsa_reserve_vcc") {
4607       if (!isUInt<1>(Val))
4608         return OutOfRangeError(ValRange);
4609       ReserveVCC = Val;
4610     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4611       if (IVersion.Major < 7)
4612         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4613       if (!isUInt<1>(Val))
4614         return OutOfRangeError(ValRange);
4615       ReserveFlatScr = Val;
4616     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4617       if (IVersion.Major < 8)
4618         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4619       if (!isUInt<1>(Val))
4620         return OutOfRangeError(ValRange);
4621       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4622         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4623                                  IDRange);
4624     } else if (ID == ".amdhsa_float_round_mode_32") {
4625       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4626                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4627     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4628       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4629                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4630     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4631       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4632                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4633     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4634       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4635                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4636                        ValRange);
4637     } else if (ID == ".amdhsa_dx10_clamp") {
4638       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4639                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4640     } else if (ID == ".amdhsa_ieee_mode") {
4641       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4642                        Val, ValRange);
4643     } else if (ID == ".amdhsa_fp16_overflow") {
4644       if (IVersion.Major < 9)
4645         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4646       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4647                        ValRange);
4648     } else if (ID == ".amdhsa_tg_split") {
4649       if (!isGFX90A())
4650         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4651       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4652                        ValRange);
4653     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4654       if (IVersion.Major < 10)
4655         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4656       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4657                        ValRange);
4658     } else if (ID == ".amdhsa_memory_ordered") {
4659       if (IVersion.Major < 10)
4660         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4661       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4662                        ValRange);
4663     } else if (ID == ".amdhsa_forward_progress") {
4664       if (IVersion.Major < 10)
4665         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4666       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4667                        ValRange);
4668     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4669       PARSE_BITS_ENTRY(
4670           KD.compute_pgm_rsrc2,
4671           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4672           ValRange);
4673     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4674       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4675                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4676                        Val, ValRange);
4677     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4678       PARSE_BITS_ENTRY(
4679           KD.compute_pgm_rsrc2,
4680           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4681           ValRange);
4682     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4683       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4684                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4685                        Val, ValRange);
4686     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4687       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4688                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4689                        Val, ValRange);
4690     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4691       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4692                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4693                        Val, ValRange);
4694     } else if (ID == ".amdhsa_exception_int_div_zero") {
4695       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4696                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4697                        Val, ValRange);
4698     } else {
4699       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4700     }
4701 
4702 #undef PARSE_BITS_ENTRY
4703   }
4704 
4705   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4706     return TokError(".amdhsa_next_free_vgpr directive is required");
4707 
4708   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4709     return TokError(".amdhsa_next_free_sgpr directive is required");
4710 
4711   unsigned VGPRBlocks;
4712   unsigned SGPRBlocks;
4713   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4714                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4715                          EnableWavefrontSize32, NextFreeVGPR,
4716                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4717                          SGPRBlocks))
4718     return true;
4719 
4720   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4721           VGPRBlocks))
4722     return OutOfRangeError(VGPRRange);
4723   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4724                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4725 
4726   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4727           SGPRBlocks))
4728     return OutOfRangeError(SGPRRange);
4729   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4730                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4731                   SGPRBlocks);
4732 
4733   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4734     return TokError("too many user SGPRs enabled");
4735   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4736                   UserSGPRCount);
4737 
4738   if (isGFX90A()) {
4739     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4740       return TokError(".amdhsa_accum_offset directive is required");
4741     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4742       return TokError("accum_offset should be in range [4..256] in "
4743                       "increments of 4");
4744     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4745       return TokError("accum_offset exceeds total VGPR allocation");
4746     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4747                     (AccumOffset / 4 - 1));
4748   }
4749 
4750   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4751       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4752       ReserveFlatScr);
4753   return false;
4754 }
4755 
4756 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4757   uint32_t Major;
4758   uint32_t Minor;
4759 
4760   if (ParseDirectiveMajorMinor(Major, Minor))
4761     return true;
4762 
4763   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4764   return false;
4765 }
4766 
4767 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4768   uint32_t Major;
4769   uint32_t Minor;
4770   uint32_t Stepping;
4771   StringRef VendorName;
4772   StringRef ArchName;
4773 
4774   // If this directive has no arguments, then use the ISA version for the
4775   // targeted GPU.
4776   if (isToken(AsmToken::EndOfStatement)) {
4777     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4778     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4779                                                         ISA.Stepping,
4780                                                         "AMD", "AMDGPU");
4781     return false;
4782   }
4783 
4784   if (ParseDirectiveMajorMinor(Major, Minor))
4785     return true;
4786 
4787   if (!trySkipToken(AsmToken::Comma))
4788     return TokError("stepping version number required, comma expected");
4789 
4790   if (ParseAsAbsoluteExpression(Stepping))
4791     return TokError("invalid stepping version");
4792 
4793   if (!trySkipToken(AsmToken::Comma))
4794     return TokError("vendor name required, comma expected");
4795 
4796   if (!parseString(VendorName, "invalid vendor name"))
4797     return true;
4798 
4799   if (!trySkipToken(AsmToken::Comma))
4800     return TokError("arch name required, comma expected");
4801 
4802   if (!parseString(ArchName, "invalid arch name"))
4803     return true;
4804 
4805   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
4806                                                       VendorName, ArchName);
4807   return false;
4808 }
4809 
4810 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4811                                                amd_kernel_code_t &Header) {
4812   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4813   // assembly for backwards compatibility.
4814   if (ID == "max_scratch_backing_memory_byte_size") {
4815     Parser.eatToEndOfStatement();
4816     return false;
4817   }
4818 
4819   SmallString<40> ErrStr;
4820   raw_svector_ostream Err(ErrStr);
4821   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4822     return TokError(Err.str());
4823   }
4824   Lex();
4825 
4826   if (ID == "enable_wavefront_size32") {
4827     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4828       if (!isGFX10Plus())
4829         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4830       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4831         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4832     } else {
4833       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4834         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4835     }
4836   }
4837 
4838   if (ID == "wavefront_size") {
4839     if (Header.wavefront_size == 5) {
4840       if (!isGFX10Plus())
4841         return TokError("wavefront_size=5 is only allowed on GFX10+");
4842       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4843         return TokError("wavefront_size=5 requires +WavefrontSize32");
4844     } else if (Header.wavefront_size == 6) {
4845       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4846         return TokError("wavefront_size=6 requires +WavefrontSize64");
4847     }
4848   }
4849 
4850   if (ID == "enable_wgp_mode") {
4851     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
4852         !isGFX10Plus())
4853       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4854   }
4855 
4856   if (ID == "enable_mem_ordered") {
4857     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
4858         !isGFX10Plus())
4859       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4860   }
4861 
4862   if (ID == "enable_fwd_progress") {
4863     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
4864         !isGFX10Plus())
4865       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4866   }
4867 
4868   return false;
4869 }
4870 
4871 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4872   amd_kernel_code_t Header;
4873   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4874 
4875   while (true) {
4876     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4877     // will set the current token to EndOfStatement.
4878     while(trySkipToken(AsmToken::EndOfStatement));
4879 
4880     StringRef ID;
4881     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4882       return true;
4883 
4884     if (ID == ".end_amd_kernel_code_t")
4885       break;
4886 
4887     if (ParseAMDKernelCodeTValue(ID, Header))
4888       return true;
4889   }
4890 
4891   getTargetStreamer().EmitAMDKernelCodeT(Header);
4892 
4893   return false;
4894 }
4895 
4896 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4897   StringRef KernelName;
4898   if (!parseId(KernelName, "expected symbol name"))
4899     return true;
4900 
4901   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4902                                            ELF::STT_AMDGPU_HSA_KERNEL);
4903 
4904   KernelScope.initialize(getContext());
4905   return false;
4906 }
4907 
4908 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4909   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4910     return Error(getLoc(),
4911                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4912                  "architectures");
4913   }
4914 
4915   auto TargetIDDirective = getLexer().getTok().getStringContents();
4916   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4917     return Error(getParser().getTok().getLoc(), "target id must match options");
4918 
4919   getTargetStreamer().EmitISAVersion();
4920   Lex();
4921 
4922   return false;
4923 }
4924 
4925 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4926   const char *AssemblerDirectiveBegin;
4927   const char *AssemblerDirectiveEnd;
4928   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4929       isHsaAbiVersion3Or4(&getSTI())
4930           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4931                             HSAMD::V3::AssemblerDirectiveEnd)
4932           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4933                             HSAMD::AssemblerDirectiveEnd);
4934 
4935   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4936     return Error(getLoc(),
4937                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4938                  "not available on non-amdhsa OSes")).str());
4939   }
4940 
4941   std::string HSAMetadataString;
4942   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4943                           HSAMetadataString))
4944     return true;
4945 
4946   if (isHsaAbiVersion3Or4(&getSTI())) {
4947     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4948       return Error(getLoc(), "invalid HSA metadata");
4949   } else {
4950     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4951       return Error(getLoc(), "invalid HSA metadata");
4952   }
4953 
4954   return false;
4955 }
4956 
4957 /// Common code to parse out a block of text (typically YAML) between start and
4958 /// end directives.
4959 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4960                                           const char *AssemblerDirectiveEnd,
4961                                           std::string &CollectString) {
4962 
4963   raw_string_ostream CollectStream(CollectString);
4964 
4965   getLexer().setSkipSpace(false);
4966 
4967   bool FoundEnd = false;
4968   while (!isToken(AsmToken::Eof)) {
4969     while (isToken(AsmToken::Space)) {
4970       CollectStream << getTokenStr();
4971       Lex();
4972     }
4973 
4974     if (trySkipId(AssemblerDirectiveEnd)) {
4975       FoundEnd = true;
4976       break;
4977     }
4978 
4979     CollectStream << Parser.parseStringToEndOfStatement()
4980                   << getContext().getAsmInfo()->getSeparatorString();
4981 
4982     Parser.eatToEndOfStatement();
4983   }
4984 
4985   getLexer().setSkipSpace(true);
4986 
4987   if (isToken(AsmToken::Eof) && !FoundEnd) {
4988     return TokError(Twine("expected directive ") +
4989                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4990   }
4991 
4992   CollectStream.flush();
4993   return false;
4994 }
4995 
4996 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4997 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4998   std::string String;
4999   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5000                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5001     return true;
5002 
5003   auto PALMetadata = getTargetStreamer().getPALMetadata();
5004   if (!PALMetadata->setFromString(String))
5005     return Error(getLoc(), "invalid PAL metadata");
5006   return false;
5007 }
5008 
5009 /// Parse the assembler directive for old linear-format PAL metadata.
5010 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5011   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5012     return Error(getLoc(),
5013                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5014                  "not available on non-amdpal OSes")).str());
5015   }
5016 
5017   auto PALMetadata = getTargetStreamer().getPALMetadata();
5018   PALMetadata->setLegacy();
5019   for (;;) {
5020     uint32_t Key, Value;
5021     if (ParseAsAbsoluteExpression(Key)) {
5022       return TokError(Twine("invalid value in ") +
5023                       Twine(PALMD::AssemblerDirective));
5024     }
5025     if (!trySkipToken(AsmToken::Comma)) {
5026       return TokError(Twine("expected an even number of values in ") +
5027                       Twine(PALMD::AssemblerDirective));
5028     }
5029     if (ParseAsAbsoluteExpression(Value)) {
5030       return TokError(Twine("invalid value in ") +
5031                       Twine(PALMD::AssemblerDirective));
5032     }
5033     PALMetadata->setRegister(Key, Value);
5034     if (!trySkipToken(AsmToken::Comma))
5035       break;
5036   }
5037   return false;
5038 }
5039 
5040 /// ParseDirectiveAMDGPULDS
5041 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5042 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5043   if (getParser().checkForValidSection())
5044     return true;
5045 
5046   StringRef Name;
5047   SMLoc NameLoc = getLoc();
5048   if (getParser().parseIdentifier(Name))
5049     return TokError("expected identifier in directive");
5050 
5051   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5052   if (parseToken(AsmToken::Comma, "expected ','"))
5053     return true;
5054 
5055   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5056 
5057   int64_t Size;
5058   SMLoc SizeLoc = getLoc();
5059   if (getParser().parseAbsoluteExpression(Size))
5060     return true;
5061   if (Size < 0)
5062     return Error(SizeLoc, "size must be non-negative");
5063   if (Size > LocalMemorySize)
5064     return Error(SizeLoc, "size is too large");
5065 
5066   int64_t Alignment = 4;
5067   if (trySkipToken(AsmToken::Comma)) {
5068     SMLoc AlignLoc = getLoc();
5069     if (getParser().parseAbsoluteExpression(Alignment))
5070       return true;
5071     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5072       return Error(AlignLoc, "alignment must be a power of two");
5073 
5074     // Alignment larger than the size of LDS is possible in theory, as long
5075     // as the linker manages to place to symbol at address 0, but we do want
5076     // to make sure the alignment fits nicely into a 32-bit integer.
5077     if (Alignment >= 1u << 31)
5078       return Error(AlignLoc, "alignment is too large");
5079   }
5080 
5081   if (parseToken(AsmToken::EndOfStatement,
5082                  "unexpected token in '.amdgpu_lds' directive"))
5083     return true;
5084 
5085   Symbol->redefineIfPossible();
5086   if (!Symbol->isUndefined())
5087     return Error(NameLoc, "invalid symbol redefinition");
5088 
5089   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5090   return false;
5091 }
5092 
5093 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5094   StringRef IDVal = DirectiveID.getString();
5095 
5096   if (isHsaAbiVersion3Or4(&getSTI())) {
5097     if (IDVal == ".amdhsa_kernel")
5098      return ParseDirectiveAMDHSAKernel();
5099 
5100     // TODO: Restructure/combine with PAL metadata directive.
5101     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5102       return ParseDirectiveHSAMetadata();
5103   } else {
5104     if (IDVal == ".hsa_code_object_version")
5105       return ParseDirectiveHSACodeObjectVersion();
5106 
5107     if (IDVal == ".hsa_code_object_isa")
5108       return ParseDirectiveHSACodeObjectISA();
5109 
5110     if (IDVal == ".amd_kernel_code_t")
5111       return ParseDirectiveAMDKernelCodeT();
5112 
5113     if (IDVal == ".amdgpu_hsa_kernel")
5114       return ParseDirectiveAMDGPUHsaKernel();
5115 
5116     if (IDVal == ".amd_amdgpu_isa")
5117       return ParseDirectiveISAVersion();
5118 
5119     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5120       return ParseDirectiveHSAMetadata();
5121   }
5122 
5123   if (IDVal == ".amdgcn_target")
5124     return ParseDirectiveAMDGCNTarget();
5125 
5126   if (IDVal == ".amdgpu_lds")
5127     return ParseDirectiveAMDGPULDS();
5128 
5129   if (IDVal == PALMD::AssemblerDirectiveBegin)
5130     return ParseDirectivePALMetadataBegin();
5131 
5132   if (IDVal == PALMD::AssemblerDirective)
5133     return ParseDirectivePALMetadata();
5134 
5135   return true;
5136 }
5137 
5138 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5139                                            unsigned RegNo) {
5140 
5141   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5142        R.isValid(); ++R) {
5143     if (*R == RegNo)
5144       return isGFX9Plus();
5145   }
5146 
5147   // GFX10 has 2 more SGPRs 104 and 105.
5148   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5149        R.isValid(); ++R) {
5150     if (*R == RegNo)
5151       return hasSGPR104_SGPR105();
5152   }
5153 
5154   switch (RegNo) {
5155   case AMDGPU::SRC_SHARED_BASE:
5156   case AMDGPU::SRC_SHARED_LIMIT:
5157   case AMDGPU::SRC_PRIVATE_BASE:
5158   case AMDGPU::SRC_PRIVATE_LIMIT:
5159   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5160     return isGFX9Plus();
5161   case AMDGPU::TBA:
5162   case AMDGPU::TBA_LO:
5163   case AMDGPU::TBA_HI:
5164   case AMDGPU::TMA:
5165   case AMDGPU::TMA_LO:
5166   case AMDGPU::TMA_HI:
5167     return !isGFX9Plus();
5168   case AMDGPU::XNACK_MASK:
5169   case AMDGPU::XNACK_MASK_LO:
5170   case AMDGPU::XNACK_MASK_HI:
5171     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5172   case AMDGPU::SGPR_NULL:
5173     return isGFX10Plus();
5174   default:
5175     break;
5176   }
5177 
5178   if (isCI())
5179     return true;
5180 
5181   if (isSI() || isGFX10Plus()) {
5182     // No flat_scr on SI.
5183     // On GFX10 flat scratch is not a valid register operand and can only be
5184     // accessed with s_setreg/s_getreg.
5185     switch (RegNo) {
5186     case AMDGPU::FLAT_SCR:
5187     case AMDGPU::FLAT_SCR_LO:
5188     case AMDGPU::FLAT_SCR_HI:
5189       return false;
5190     default:
5191       return true;
5192     }
5193   }
5194 
5195   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5196   // SI/CI have.
5197   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5198        R.isValid(); ++R) {
5199     if (*R == RegNo)
5200       return hasSGPR102_SGPR103();
5201   }
5202 
5203   return true;
5204 }
5205 
5206 OperandMatchResultTy
5207 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5208                               OperandMode Mode) {
5209   // Try to parse with a custom parser
5210   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5211 
5212   // If we successfully parsed the operand or if there as an error parsing,
5213   // we are done.
5214   //
5215   // If we are parsing after we reach EndOfStatement then this means we
5216   // are appending default values to the Operands list.  This is only done
5217   // by custom parser, so we shouldn't continue on to the generic parsing.
5218   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5219       isToken(AsmToken::EndOfStatement))
5220     return ResTy;
5221 
5222   SMLoc RBraceLoc;
5223   SMLoc LBraceLoc = getLoc();
5224   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5225     unsigned Prefix = Operands.size();
5226 
5227     for (;;) {
5228       auto Loc = getLoc();
5229       ResTy = parseReg(Operands);
5230       if (ResTy == MatchOperand_NoMatch)
5231         Error(Loc, "expected a register");
5232       if (ResTy != MatchOperand_Success)
5233         return MatchOperand_ParseFail;
5234 
5235       RBraceLoc = getLoc();
5236       if (trySkipToken(AsmToken::RBrac))
5237         break;
5238 
5239       if (!skipToken(AsmToken::Comma,
5240                      "expected a comma or a closing square bracket")) {
5241         return MatchOperand_ParseFail;
5242       }
5243     }
5244 
5245     if (Operands.size() - Prefix > 1) {
5246       Operands.insert(Operands.begin() + Prefix,
5247                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5248       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5249     }
5250 
5251     return MatchOperand_Success;
5252   }
5253 
5254   return parseRegOrImm(Operands);
5255 }
5256 
5257 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5258   // Clear any forced encodings from the previous instruction.
5259   setForcedEncodingSize(0);
5260   setForcedDPP(false);
5261   setForcedSDWA(false);
5262 
5263   if (Name.endswith("_e64")) {
5264     setForcedEncodingSize(64);
5265     return Name.substr(0, Name.size() - 4);
5266   } else if (Name.endswith("_e32")) {
5267     setForcedEncodingSize(32);
5268     return Name.substr(0, Name.size() - 4);
5269   } else if (Name.endswith("_dpp")) {
5270     setForcedDPP(true);
5271     return Name.substr(0, Name.size() - 4);
5272   } else if (Name.endswith("_sdwa")) {
5273     setForcedSDWA(true);
5274     return Name.substr(0, Name.size() - 5);
5275   }
5276   return Name;
5277 }
5278 
5279 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5280                                        StringRef Name,
5281                                        SMLoc NameLoc, OperandVector &Operands) {
5282   // Add the instruction mnemonic
5283   Name = parseMnemonicSuffix(Name);
5284   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5285 
5286   bool IsMIMG = Name.startswith("image_");
5287 
5288   while (!trySkipToken(AsmToken::EndOfStatement)) {
5289     OperandMode Mode = OperandMode_Default;
5290     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5291       Mode = OperandMode_NSA;
5292     CPolSeen = 0;
5293     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5294 
5295     if (Res != MatchOperand_Success) {
5296       checkUnsupportedInstruction(Name, NameLoc);
5297       if (!Parser.hasPendingError()) {
5298         // FIXME: use real operand location rather than the current location.
5299         StringRef Msg =
5300           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5301                                             "not a valid operand.";
5302         Error(getLoc(), Msg);
5303       }
5304       while (!trySkipToken(AsmToken::EndOfStatement)) {
5305         lex();
5306       }
5307       return true;
5308     }
5309 
5310     // Eat the comma or space if there is one.
5311     trySkipToken(AsmToken::Comma);
5312   }
5313 
5314   return false;
5315 }
5316 
5317 //===----------------------------------------------------------------------===//
5318 // Utility functions
5319 //===----------------------------------------------------------------------===//
5320 
5321 OperandMatchResultTy
5322 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5323 
5324   if (!trySkipId(Prefix, AsmToken::Colon))
5325     return MatchOperand_NoMatch;
5326 
5327   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5328 }
5329 
5330 OperandMatchResultTy
5331 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5332                                     AMDGPUOperand::ImmTy ImmTy,
5333                                     bool (*ConvertResult)(int64_t&)) {
5334   SMLoc S = getLoc();
5335   int64_t Value = 0;
5336 
5337   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5338   if (Res != MatchOperand_Success)
5339     return Res;
5340 
5341   if (ConvertResult && !ConvertResult(Value)) {
5342     Error(S, "invalid " + StringRef(Prefix) + " value.");
5343   }
5344 
5345   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5346   return MatchOperand_Success;
5347 }
5348 
5349 OperandMatchResultTy
5350 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5351                                              OperandVector &Operands,
5352                                              AMDGPUOperand::ImmTy ImmTy,
5353                                              bool (*ConvertResult)(int64_t&)) {
5354   SMLoc S = getLoc();
5355   if (!trySkipId(Prefix, AsmToken::Colon))
5356     return MatchOperand_NoMatch;
5357 
5358   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5359     return MatchOperand_ParseFail;
5360 
5361   unsigned Val = 0;
5362   const unsigned MaxSize = 4;
5363 
5364   // FIXME: How to verify the number of elements matches the number of src
5365   // operands?
5366   for (int I = 0; ; ++I) {
5367     int64_t Op;
5368     SMLoc Loc = getLoc();
5369     if (!parseExpr(Op))
5370       return MatchOperand_ParseFail;
5371 
5372     if (Op != 0 && Op != 1) {
5373       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5374       return MatchOperand_ParseFail;
5375     }
5376 
5377     Val |= (Op << I);
5378 
5379     if (trySkipToken(AsmToken::RBrac))
5380       break;
5381 
5382     if (I + 1 == MaxSize) {
5383       Error(getLoc(), "expected a closing square bracket");
5384       return MatchOperand_ParseFail;
5385     }
5386 
5387     if (!skipToken(AsmToken::Comma, "expected a comma"))
5388       return MatchOperand_ParseFail;
5389   }
5390 
5391   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5392   return MatchOperand_Success;
5393 }
5394 
5395 OperandMatchResultTy
5396 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5397                                AMDGPUOperand::ImmTy ImmTy) {
5398   int64_t Bit;
5399   SMLoc S = getLoc();
5400 
5401   if (trySkipId(Name)) {
5402     Bit = 1;
5403   } else if (trySkipId("no", Name)) {
5404     Bit = 0;
5405   } else {
5406     return MatchOperand_NoMatch;
5407   }
5408 
5409   if (Name == "r128" && !hasMIMG_R128()) {
5410     Error(S, "r128 modifier is not supported on this GPU");
5411     return MatchOperand_ParseFail;
5412   }
5413   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5414     Error(S, "a16 modifier is not supported on this GPU");
5415     return MatchOperand_ParseFail;
5416   }
5417 
5418   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5419     ImmTy = AMDGPUOperand::ImmTyR128A16;
5420 
5421   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5422   return MatchOperand_Success;
5423 }
5424 
5425 OperandMatchResultTy
5426 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5427   unsigned CPolOn = 0;
5428   unsigned CPolOff = 0;
5429   SMLoc S = getLoc();
5430 
5431   if (trySkipId("glc"))
5432     CPolOn = AMDGPU::CPol::GLC;
5433   else if (trySkipId("noglc"))
5434     CPolOff = AMDGPU::CPol::GLC;
5435   else if (trySkipId("slc"))
5436     CPolOn = AMDGPU::CPol::SLC;
5437   else if (trySkipId("noslc"))
5438     CPolOff = AMDGPU::CPol::SLC;
5439   else if (trySkipId("dlc"))
5440     CPolOn = AMDGPU::CPol::DLC;
5441   else if (trySkipId("nodlc"))
5442     CPolOff = AMDGPU::CPol::DLC;
5443   else if (trySkipId("scc"))
5444     CPolOn = AMDGPU::CPol::SCC;
5445   else if (trySkipId("noscc"))
5446     CPolOff = AMDGPU::CPol::SCC;
5447   else
5448     return MatchOperand_NoMatch;
5449 
5450   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5451     Error(S, "dlc modifier is not supported on this GPU");
5452     return MatchOperand_ParseFail;
5453   }
5454 
5455   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5456     Error(S, "scc modifier is not supported on this GPU");
5457     return MatchOperand_ParseFail;
5458   }
5459 
5460   if (CPolSeen & (CPolOn | CPolOff)) {
5461     Error(S, "duplicate cache policy modifier");
5462     return MatchOperand_ParseFail;
5463   }
5464 
5465   CPolSeen |= (CPolOn | CPolOff);
5466 
5467   for (unsigned I = 1; I != Operands.size(); ++I) {
5468     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5469     if (Op.isCPol()) {
5470       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5471       return MatchOperand_Success;
5472     }
5473   }
5474 
5475   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5476                                               AMDGPUOperand::ImmTyCPol));
5477 
5478   return MatchOperand_Success;
5479 }
5480 
5481 static void addOptionalImmOperand(
5482   MCInst& Inst, const OperandVector& Operands,
5483   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5484   AMDGPUOperand::ImmTy ImmT,
5485   int64_t Default = 0) {
5486   auto i = OptionalIdx.find(ImmT);
5487   if (i != OptionalIdx.end()) {
5488     unsigned Idx = i->second;
5489     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5490   } else {
5491     Inst.addOperand(MCOperand::createImm(Default));
5492   }
5493 }
5494 
5495 OperandMatchResultTy
5496 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5497                                        StringRef &Value,
5498                                        SMLoc &StringLoc) {
5499   if (!trySkipId(Prefix, AsmToken::Colon))
5500     return MatchOperand_NoMatch;
5501 
5502   StringLoc = getLoc();
5503   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5504                                                   : MatchOperand_ParseFail;
5505 }
5506 
5507 //===----------------------------------------------------------------------===//
5508 // MTBUF format
5509 //===----------------------------------------------------------------------===//
5510 
5511 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5512                                   int64_t MaxVal,
5513                                   int64_t &Fmt) {
5514   int64_t Val;
5515   SMLoc Loc = getLoc();
5516 
5517   auto Res = parseIntWithPrefix(Pref, Val);
5518   if (Res == MatchOperand_ParseFail)
5519     return false;
5520   if (Res == MatchOperand_NoMatch)
5521     return true;
5522 
5523   if (Val < 0 || Val > MaxVal) {
5524     Error(Loc, Twine("out of range ", StringRef(Pref)));
5525     return false;
5526   }
5527 
5528   Fmt = Val;
5529   return true;
5530 }
5531 
5532 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5533 // values to live in a joint format operand in the MCInst encoding.
5534 OperandMatchResultTy
5535 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5536   using namespace llvm::AMDGPU::MTBUFFormat;
5537 
5538   int64_t Dfmt = DFMT_UNDEF;
5539   int64_t Nfmt = NFMT_UNDEF;
5540 
5541   // dfmt and nfmt can appear in either order, and each is optional.
5542   for (int I = 0; I < 2; ++I) {
5543     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5544       return MatchOperand_ParseFail;
5545 
5546     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5547       return MatchOperand_ParseFail;
5548     }
5549     // Skip optional comma between dfmt/nfmt
5550     // but guard against 2 commas following each other.
5551     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5552         !peekToken().is(AsmToken::Comma)) {
5553       trySkipToken(AsmToken::Comma);
5554     }
5555   }
5556 
5557   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5558     return MatchOperand_NoMatch;
5559 
5560   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5561   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5562 
5563   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5564   return MatchOperand_Success;
5565 }
5566 
5567 OperandMatchResultTy
5568 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5569   using namespace llvm::AMDGPU::MTBUFFormat;
5570 
5571   int64_t Fmt = UFMT_UNDEF;
5572 
5573   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5574     return MatchOperand_ParseFail;
5575 
5576   if (Fmt == UFMT_UNDEF)
5577     return MatchOperand_NoMatch;
5578 
5579   Format = Fmt;
5580   return MatchOperand_Success;
5581 }
5582 
5583 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5584                                     int64_t &Nfmt,
5585                                     StringRef FormatStr,
5586                                     SMLoc Loc) {
5587   using namespace llvm::AMDGPU::MTBUFFormat;
5588   int64_t Format;
5589 
5590   Format = getDfmt(FormatStr);
5591   if (Format != DFMT_UNDEF) {
5592     Dfmt = Format;
5593     return true;
5594   }
5595 
5596   Format = getNfmt(FormatStr, getSTI());
5597   if (Format != NFMT_UNDEF) {
5598     Nfmt = Format;
5599     return true;
5600   }
5601 
5602   Error(Loc, "unsupported format");
5603   return false;
5604 }
5605 
5606 OperandMatchResultTy
5607 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5608                                           SMLoc FormatLoc,
5609                                           int64_t &Format) {
5610   using namespace llvm::AMDGPU::MTBUFFormat;
5611 
5612   int64_t Dfmt = DFMT_UNDEF;
5613   int64_t Nfmt = NFMT_UNDEF;
5614   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5615     return MatchOperand_ParseFail;
5616 
5617   if (trySkipToken(AsmToken::Comma)) {
5618     StringRef Str;
5619     SMLoc Loc = getLoc();
5620     if (!parseId(Str, "expected a format string") ||
5621         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5622       return MatchOperand_ParseFail;
5623     }
5624     if (Dfmt == DFMT_UNDEF) {
5625       Error(Loc, "duplicate numeric format");
5626       return MatchOperand_ParseFail;
5627     } else if (Nfmt == NFMT_UNDEF) {
5628       Error(Loc, "duplicate data format");
5629       return MatchOperand_ParseFail;
5630     }
5631   }
5632 
5633   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5634   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5635 
5636   if (isGFX10Plus()) {
5637     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5638     if (Ufmt == UFMT_UNDEF) {
5639       Error(FormatLoc, "unsupported format");
5640       return MatchOperand_ParseFail;
5641     }
5642     Format = Ufmt;
5643   } else {
5644     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5645   }
5646 
5647   return MatchOperand_Success;
5648 }
5649 
5650 OperandMatchResultTy
5651 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5652                                             SMLoc Loc,
5653                                             int64_t &Format) {
5654   using namespace llvm::AMDGPU::MTBUFFormat;
5655 
5656   auto Id = getUnifiedFormat(FormatStr);
5657   if (Id == UFMT_UNDEF)
5658     return MatchOperand_NoMatch;
5659 
5660   if (!isGFX10Plus()) {
5661     Error(Loc, "unified format is not supported on this GPU");
5662     return MatchOperand_ParseFail;
5663   }
5664 
5665   Format = Id;
5666   return MatchOperand_Success;
5667 }
5668 
5669 OperandMatchResultTy
5670 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5671   using namespace llvm::AMDGPU::MTBUFFormat;
5672   SMLoc Loc = getLoc();
5673 
5674   if (!parseExpr(Format))
5675     return MatchOperand_ParseFail;
5676   if (!isValidFormatEncoding(Format, getSTI())) {
5677     Error(Loc, "out of range format");
5678     return MatchOperand_ParseFail;
5679   }
5680 
5681   return MatchOperand_Success;
5682 }
5683 
5684 OperandMatchResultTy
5685 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5686   using namespace llvm::AMDGPU::MTBUFFormat;
5687 
5688   if (!trySkipId("format", AsmToken::Colon))
5689     return MatchOperand_NoMatch;
5690 
5691   if (trySkipToken(AsmToken::LBrac)) {
5692     StringRef FormatStr;
5693     SMLoc Loc = getLoc();
5694     if (!parseId(FormatStr, "expected a format string"))
5695       return MatchOperand_ParseFail;
5696 
5697     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5698     if (Res == MatchOperand_NoMatch)
5699       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5700     if (Res != MatchOperand_Success)
5701       return Res;
5702 
5703     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5704       return MatchOperand_ParseFail;
5705 
5706     return MatchOperand_Success;
5707   }
5708 
5709   return parseNumericFormat(Format);
5710 }
5711 
5712 OperandMatchResultTy
5713 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5714   using namespace llvm::AMDGPU::MTBUFFormat;
5715 
5716   int64_t Format = getDefaultFormatEncoding(getSTI());
5717   OperandMatchResultTy Res;
5718   SMLoc Loc = getLoc();
5719 
5720   // Parse legacy format syntax.
5721   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5722   if (Res == MatchOperand_ParseFail)
5723     return Res;
5724 
5725   bool FormatFound = (Res == MatchOperand_Success);
5726 
5727   Operands.push_back(
5728     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5729 
5730   if (FormatFound)
5731     trySkipToken(AsmToken::Comma);
5732 
5733   if (isToken(AsmToken::EndOfStatement)) {
5734     // We are expecting an soffset operand,
5735     // but let matcher handle the error.
5736     return MatchOperand_Success;
5737   }
5738 
5739   // Parse soffset.
5740   Res = parseRegOrImm(Operands);
5741   if (Res != MatchOperand_Success)
5742     return Res;
5743 
5744   trySkipToken(AsmToken::Comma);
5745 
5746   if (!FormatFound) {
5747     Res = parseSymbolicOrNumericFormat(Format);
5748     if (Res == MatchOperand_ParseFail)
5749       return Res;
5750     if (Res == MatchOperand_Success) {
5751       auto Size = Operands.size();
5752       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5753       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5754       Op.setImm(Format);
5755     }
5756     return MatchOperand_Success;
5757   }
5758 
5759   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5760     Error(getLoc(), "duplicate format");
5761     return MatchOperand_ParseFail;
5762   }
5763   return MatchOperand_Success;
5764 }
5765 
5766 //===----------------------------------------------------------------------===//
5767 // ds
5768 //===----------------------------------------------------------------------===//
5769 
5770 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5771                                     const OperandVector &Operands) {
5772   OptionalImmIndexMap OptionalIdx;
5773 
5774   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5775     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5776 
5777     // Add the register arguments
5778     if (Op.isReg()) {
5779       Op.addRegOperands(Inst, 1);
5780       continue;
5781     }
5782 
5783     // Handle optional arguments
5784     OptionalIdx[Op.getImmTy()] = i;
5785   }
5786 
5787   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5788   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5789   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5790 
5791   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5792 }
5793 
5794 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5795                                 bool IsGdsHardcoded) {
5796   OptionalImmIndexMap OptionalIdx;
5797 
5798   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5799     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5800 
5801     // Add the register arguments
5802     if (Op.isReg()) {
5803       Op.addRegOperands(Inst, 1);
5804       continue;
5805     }
5806 
5807     if (Op.isToken() && Op.getToken() == "gds") {
5808       IsGdsHardcoded = true;
5809       continue;
5810     }
5811 
5812     // Handle optional arguments
5813     OptionalIdx[Op.getImmTy()] = i;
5814   }
5815 
5816   AMDGPUOperand::ImmTy OffsetType =
5817     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5818      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5819      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5820                                                       AMDGPUOperand::ImmTyOffset;
5821 
5822   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5823 
5824   if (!IsGdsHardcoded) {
5825     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5826   }
5827   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5828 }
5829 
5830 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5831   OptionalImmIndexMap OptionalIdx;
5832 
5833   unsigned OperandIdx[4];
5834   unsigned EnMask = 0;
5835   int SrcIdx = 0;
5836 
5837   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5838     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5839 
5840     // Add the register arguments
5841     if (Op.isReg()) {
5842       assert(SrcIdx < 4);
5843       OperandIdx[SrcIdx] = Inst.size();
5844       Op.addRegOperands(Inst, 1);
5845       ++SrcIdx;
5846       continue;
5847     }
5848 
5849     if (Op.isOff()) {
5850       assert(SrcIdx < 4);
5851       OperandIdx[SrcIdx] = Inst.size();
5852       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5853       ++SrcIdx;
5854       continue;
5855     }
5856 
5857     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5858       Op.addImmOperands(Inst, 1);
5859       continue;
5860     }
5861 
5862     if (Op.isToken() && Op.getToken() == "done")
5863       continue;
5864 
5865     // Handle optional arguments
5866     OptionalIdx[Op.getImmTy()] = i;
5867   }
5868 
5869   assert(SrcIdx == 4);
5870 
5871   bool Compr = false;
5872   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5873     Compr = true;
5874     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5875     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5876     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5877   }
5878 
5879   for (auto i = 0; i < SrcIdx; ++i) {
5880     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5881       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5882     }
5883   }
5884 
5885   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5886   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5887 
5888   Inst.addOperand(MCOperand::createImm(EnMask));
5889 }
5890 
5891 //===----------------------------------------------------------------------===//
5892 // s_waitcnt
5893 //===----------------------------------------------------------------------===//
5894 
5895 static bool
5896 encodeCnt(
5897   const AMDGPU::IsaVersion ISA,
5898   int64_t &IntVal,
5899   int64_t CntVal,
5900   bool Saturate,
5901   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5902   unsigned (*decode)(const IsaVersion &Version, unsigned))
5903 {
5904   bool Failed = false;
5905 
5906   IntVal = encode(ISA, IntVal, CntVal);
5907   if (CntVal != decode(ISA, IntVal)) {
5908     if (Saturate) {
5909       IntVal = encode(ISA, IntVal, -1);
5910     } else {
5911       Failed = true;
5912     }
5913   }
5914   return Failed;
5915 }
5916 
5917 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5918 
5919   SMLoc CntLoc = getLoc();
5920   StringRef CntName = getTokenStr();
5921 
5922   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5923       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5924     return false;
5925 
5926   int64_t CntVal;
5927   SMLoc ValLoc = getLoc();
5928   if (!parseExpr(CntVal))
5929     return false;
5930 
5931   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5932 
5933   bool Failed = true;
5934   bool Sat = CntName.endswith("_sat");
5935 
5936   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5937     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5938   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5939     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5940   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5941     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5942   } else {
5943     Error(CntLoc, "invalid counter name " + CntName);
5944     return false;
5945   }
5946 
5947   if (Failed) {
5948     Error(ValLoc, "too large value for " + CntName);
5949     return false;
5950   }
5951 
5952   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5953     return false;
5954 
5955   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5956     if (isToken(AsmToken::EndOfStatement)) {
5957       Error(getLoc(), "expected a counter name");
5958       return false;
5959     }
5960   }
5961 
5962   return true;
5963 }
5964 
5965 OperandMatchResultTy
5966 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5967   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5968   int64_t Waitcnt = getWaitcntBitMask(ISA);
5969   SMLoc S = getLoc();
5970 
5971   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5972     while (!isToken(AsmToken::EndOfStatement)) {
5973       if (!parseCnt(Waitcnt))
5974         return MatchOperand_ParseFail;
5975     }
5976   } else {
5977     if (!parseExpr(Waitcnt))
5978       return MatchOperand_ParseFail;
5979   }
5980 
5981   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5982   return MatchOperand_Success;
5983 }
5984 
5985 bool
5986 AMDGPUOperand::isSWaitCnt() const {
5987   return isImm();
5988 }
5989 
5990 //===----------------------------------------------------------------------===//
5991 // hwreg
5992 //===----------------------------------------------------------------------===//
5993 
5994 bool
5995 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5996                                 OperandInfoTy &Offset,
5997                                 OperandInfoTy &Width) {
5998   using namespace llvm::AMDGPU::Hwreg;
5999 
6000   // The register may be specified by name or using a numeric code
6001   HwReg.Loc = getLoc();
6002   if (isToken(AsmToken::Identifier) &&
6003       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
6004     HwReg.IsSymbolic = true;
6005     lex(); // skip register name
6006   } else if (!parseExpr(HwReg.Id, "a register name")) {
6007     return false;
6008   }
6009 
6010   if (trySkipToken(AsmToken::RParen))
6011     return true;
6012 
6013   // parse optional params
6014   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6015     return false;
6016 
6017   Offset.Loc = getLoc();
6018   if (!parseExpr(Offset.Id))
6019     return false;
6020 
6021   if (!skipToken(AsmToken::Comma, "expected a comma"))
6022     return false;
6023 
6024   Width.Loc = getLoc();
6025   return parseExpr(Width.Id) &&
6026          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6027 }
6028 
6029 bool
6030 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6031                                const OperandInfoTy &Offset,
6032                                const OperandInfoTy &Width) {
6033 
6034   using namespace llvm::AMDGPU::Hwreg;
6035 
6036   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6037     Error(HwReg.Loc,
6038           "specified hardware register is not supported on this GPU");
6039     return false;
6040   }
6041   if (!isValidHwreg(HwReg.Id)) {
6042     Error(HwReg.Loc,
6043           "invalid code of hardware register: only 6-bit values are legal");
6044     return false;
6045   }
6046   if (!isValidHwregOffset(Offset.Id)) {
6047     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6048     return false;
6049   }
6050   if (!isValidHwregWidth(Width.Id)) {
6051     Error(Width.Loc,
6052           "invalid bitfield width: only values from 1 to 32 are legal");
6053     return false;
6054   }
6055   return true;
6056 }
6057 
6058 OperandMatchResultTy
6059 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6060   using namespace llvm::AMDGPU::Hwreg;
6061 
6062   int64_t ImmVal = 0;
6063   SMLoc Loc = getLoc();
6064 
6065   if (trySkipId("hwreg", AsmToken::LParen)) {
6066     OperandInfoTy HwReg(ID_UNKNOWN_);
6067     OperandInfoTy Offset(OFFSET_DEFAULT_);
6068     OperandInfoTy Width(WIDTH_DEFAULT_);
6069     if (parseHwregBody(HwReg, Offset, Width) &&
6070         validateHwreg(HwReg, Offset, Width)) {
6071       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6072     } else {
6073       return MatchOperand_ParseFail;
6074     }
6075   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6076     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6077       Error(Loc, "invalid immediate: only 16-bit values are legal");
6078       return MatchOperand_ParseFail;
6079     }
6080   } else {
6081     return MatchOperand_ParseFail;
6082   }
6083 
6084   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6085   return MatchOperand_Success;
6086 }
6087 
6088 bool AMDGPUOperand::isHwreg() const {
6089   return isImmTy(ImmTyHwreg);
6090 }
6091 
6092 //===----------------------------------------------------------------------===//
6093 // sendmsg
6094 //===----------------------------------------------------------------------===//
6095 
6096 bool
6097 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6098                                   OperandInfoTy &Op,
6099                                   OperandInfoTy &Stream) {
6100   using namespace llvm::AMDGPU::SendMsg;
6101 
6102   Msg.Loc = getLoc();
6103   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6104     Msg.IsSymbolic = true;
6105     lex(); // skip message name
6106   } else if (!parseExpr(Msg.Id, "a message name")) {
6107     return false;
6108   }
6109 
6110   if (trySkipToken(AsmToken::Comma)) {
6111     Op.IsDefined = true;
6112     Op.Loc = getLoc();
6113     if (isToken(AsmToken::Identifier) &&
6114         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6115       lex(); // skip operation name
6116     } else if (!parseExpr(Op.Id, "an operation name")) {
6117       return false;
6118     }
6119 
6120     if (trySkipToken(AsmToken::Comma)) {
6121       Stream.IsDefined = true;
6122       Stream.Loc = getLoc();
6123       if (!parseExpr(Stream.Id))
6124         return false;
6125     }
6126   }
6127 
6128   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6129 }
6130 
6131 bool
6132 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6133                                  const OperandInfoTy &Op,
6134                                  const OperandInfoTy &Stream) {
6135   using namespace llvm::AMDGPU::SendMsg;
6136 
6137   // Validation strictness depends on whether message is specified
6138   // in a symbolc or in a numeric form. In the latter case
6139   // only encoding possibility is checked.
6140   bool Strict = Msg.IsSymbolic;
6141 
6142   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6143     Error(Msg.Loc, "invalid message id");
6144     return false;
6145   }
6146   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6147     if (Op.IsDefined) {
6148       Error(Op.Loc, "message does not support operations");
6149     } else {
6150       Error(Msg.Loc, "missing message operation");
6151     }
6152     return false;
6153   }
6154   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6155     Error(Op.Loc, "invalid operation id");
6156     return false;
6157   }
6158   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6159     Error(Stream.Loc, "message operation does not support streams");
6160     return false;
6161   }
6162   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6163     Error(Stream.Loc, "invalid message stream id");
6164     return false;
6165   }
6166   return true;
6167 }
6168 
6169 OperandMatchResultTy
6170 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6171   using namespace llvm::AMDGPU::SendMsg;
6172 
6173   int64_t ImmVal = 0;
6174   SMLoc Loc = getLoc();
6175 
6176   if (trySkipId("sendmsg", AsmToken::LParen)) {
6177     OperandInfoTy Msg(ID_UNKNOWN_);
6178     OperandInfoTy Op(OP_NONE_);
6179     OperandInfoTy Stream(STREAM_ID_NONE_);
6180     if (parseSendMsgBody(Msg, Op, Stream) &&
6181         validateSendMsg(Msg, Op, Stream)) {
6182       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6183     } else {
6184       return MatchOperand_ParseFail;
6185     }
6186   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6187     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6188       Error(Loc, "invalid immediate: only 16-bit values are legal");
6189       return MatchOperand_ParseFail;
6190     }
6191   } else {
6192     return MatchOperand_ParseFail;
6193   }
6194 
6195   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6196   return MatchOperand_Success;
6197 }
6198 
6199 bool AMDGPUOperand::isSendMsg() const {
6200   return isImmTy(ImmTySendMsg);
6201 }
6202 
6203 //===----------------------------------------------------------------------===//
6204 // v_interp
6205 //===----------------------------------------------------------------------===//
6206 
6207 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6208   StringRef Str;
6209   SMLoc S = getLoc();
6210 
6211   if (!parseId(Str))
6212     return MatchOperand_NoMatch;
6213 
6214   int Slot = StringSwitch<int>(Str)
6215     .Case("p10", 0)
6216     .Case("p20", 1)
6217     .Case("p0", 2)
6218     .Default(-1);
6219 
6220   if (Slot == -1) {
6221     Error(S, "invalid interpolation slot");
6222     return MatchOperand_ParseFail;
6223   }
6224 
6225   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6226                                               AMDGPUOperand::ImmTyInterpSlot));
6227   return MatchOperand_Success;
6228 }
6229 
6230 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6231   StringRef Str;
6232   SMLoc S = getLoc();
6233 
6234   if (!parseId(Str))
6235     return MatchOperand_NoMatch;
6236 
6237   if (!Str.startswith("attr")) {
6238     Error(S, "invalid interpolation attribute");
6239     return MatchOperand_ParseFail;
6240   }
6241 
6242   StringRef Chan = Str.take_back(2);
6243   int AttrChan = StringSwitch<int>(Chan)
6244     .Case(".x", 0)
6245     .Case(".y", 1)
6246     .Case(".z", 2)
6247     .Case(".w", 3)
6248     .Default(-1);
6249   if (AttrChan == -1) {
6250     Error(S, "invalid or missing interpolation attribute channel");
6251     return MatchOperand_ParseFail;
6252   }
6253 
6254   Str = Str.drop_back(2).drop_front(4);
6255 
6256   uint8_t Attr;
6257   if (Str.getAsInteger(10, Attr)) {
6258     Error(S, "invalid or missing interpolation attribute number");
6259     return MatchOperand_ParseFail;
6260   }
6261 
6262   if (Attr > 63) {
6263     Error(S, "out of bounds interpolation attribute number");
6264     return MatchOperand_ParseFail;
6265   }
6266 
6267   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6268 
6269   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6270                                               AMDGPUOperand::ImmTyInterpAttr));
6271   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6272                                               AMDGPUOperand::ImmTyAttrChan));
6273   return MatchOperand_Success;
6274 }
6275 
6276 //===----------------------------------------------------------------------===//
6277 // exp
6278 //===----------------------------------------------------------------------===//
6279 
6280 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6281   using namespace llvm::AMDGPU::Exp;
6282 
6283   StringRef Str;
6284   SMLoc S = getLoc();
6285 
6286   if (!parseId(Str))
6287     return MatchOperand_NoMatch;
6288 
6289   unsigned Id = getTgtId(Str);
6290   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6291     Error(S, (Id == ET_INVALID) ?
6292                 "invalid exp target" :
6293                 "exp target is not supported on this GPU");
6294     return MatchOperand_ParseFail;
6295   }
6296 
6297   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6298                                               AMDGPUOperand::ImmTyExpTgt));
6299   return MatchOperand_Success;
6300 }
6301 
6302 //===----------------------------------------------------------------------===//
6303 // parser helpers
6304 //===----------------------------------------------------------------------===//
6305 
6306 bool
6307 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6308   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6309 }
6310 
6311 bool
6312 AMDGPUAsmParser::isId(const StringRef Id) const {
6313   return isId(getToken(), Id);
6314 }
6315 
6316 bool
6317 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6318   return getTokenKind() == Kind;
6319 }
6320 
6321 bool
6322 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6323   if (isId(Id)) {
6324     lex();
6325     return true;
6326   }
6327   return false;
6328 }
6329 
6330 bool
6331 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6332   if (isToken(AsmToken::Identifier)) {
6333     StringRef Tok = getTokenStr();
6334     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6335       lex();
6336       return true;
6337     }
6338   }
6339   return false;
6340 }
6341 
6342 bool
6343 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6344   if (isId(Id) && peekToken().is(Kind)) {
6345     lex();
6346     lex();
6347     return true;
6348   }
6349   return false;
6350 }
6351 
6352 bool
6353 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6354   if (isToken(Kind)) {
6355     lex();
6356     return true;
6357   }
6358   return false;
6359 }
6360 
6361 bool
6362 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6363                            const StringRef ErrMsg) {
6364   if (!trySkipToken(Kind)) {
6365     Error(getLoc(), ErrMsg);
6366     return false;
6367   }
6368   return true;
6369 }
6370 
6371 bool
6372 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6373   SMLoc S = getLoc();
6374 
6375   const MCExpr *Expr;
6376   if (Parser.parseExpression(Expr))
6377     return false;
6378 
6379   if (Expr->evaluateAsAbsolute(Imm))
6380     return true;
6381 
6382   if (Expected.empty()) {
6383     Error(S, "expected absolute expression");
6384   } else {
6385     Error(S, Twine("expected ", Expected) +
6386              Twine(" or an absolute expression"));
6387   }
6388   return false;
6389 }
6390 
6391 bool
6392 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6393   SMLoc S = getLoc();
6394 
6395   const MCExpr *Expr;
6396   if (Parser.parseExpression(Expr))
6397     return false;
6398 
6399   int64_t IntVal;
6400   if (Expr->evaluateAsAbsolute(IntVal)) {
6401     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6402   } else {
6403     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6404   }
6405   return true;
6406 }
6407 
6408 bool
6409 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6410   if (isToken(AsmToken::String)) {
6411     Val = getToken().getStringContents();
6412     lex();
6413     return true;
6414   } else {
6415     Error(getLoc(), ErrMsg);
6416     return false;
6417   }
6418 }
6419 
6420 bool
6421 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6422   if (isToken(AsmToken::Identifier)) {
6423     Val = getTokenStr();
6424     lex();
6425     return true;
6426   } else {
6427     if (!ErrMsg.empty())
6428       Error(getLoc(), ErrMsg);
6429     return false;
6430   }
6431 }
6432 
6433 AsmToken
6434 AMDGPUAsmParser::getToken() const {
6435   return Parser.getTok();
6436 }
6437 
6438 AsmToken
6439 AMDGPUAsmParser::peekToken() {
6440   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6441 }
6442 
6443 void
6444 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6445   auto TokCount = getLexer().peekTokens(Tokens);
6446 
6447   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6448     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6449 }
6450 
6451 AsmToken::TokenKind
6452 AMDGPUAsmParser::getTokenKind() const {
6453   return getLexer().getKind();
6454 }
6455 
6456 SMLoc
6457 AMDGPUAsmParser::getLoc() const {
6458   return getToken().getLoc();
6459 }
6460 
6461 StringRef
6462 AMDGPUAsmParser::getTokenStr() const {
6463   return getToken().getString();
6464 }
6465 
6466 void
6467 AMDGPUAsmParser::lex() {
6468   Parser.Lex();
6469 }
6470 
6471 SMLoc
6472 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6473                                const OperandVector &Operands) const {
6474   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6475     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6476     if (Test(Op))
6477       return Op.getStartLoc();
6478   }
6479   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6480 }
6481 
6482 SMLoc
6483 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6484                            const OperandVector &Operands) const {
6485   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6486   return getOperandLoc(Test, Operands);
6487 }
6488 
6489 SMLoc
6490 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6491                            const OperandVector &Operands) const {
6492   auto Test = [=](const AMDGPUOperand& Op) {
6493     return Op.isRegKind() && Op.getReg() == Reg;
6494   };
6495   return getOperandLoc(Test, Operands);
6496 }
6497 
6498 SMLoc
6499 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6500   auto Test = [](const AMDGPUOperand& Op) {
6501     return Op.IsImmKindLiteral() || Op.isExpr();
6502   };
6503   return getOperandLoc(Test, Operands);
6504 }
6505 
6506 SMLoc
6507 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6508   auto Test = [](const AMDGPUOperand& Op) {
6509     return Op.isImmKindConst();
6510   };
6511   return getOperandLoc(Test, Operands);
6512 }
6513 
6514 //===----------------------------------------------------------------------===//
6515 // swizzle
6516 //===----------------------------------------------------------------------===//
6517 
6518 LLVM_READNONE
6519 static unsigned
6520 encodeBitmaskPerm(const unsigned AndMask,
6521                   const unsigned OrMask,
6522                   const unsigned XorMask) {
6523   using namespace llvm::AMDGPU::Swizzle;
6524 
6525   return BITMASK_PERM_ENC |
6526          (AndMask << BITMASK_AND_SHIFT) |
6527          (OrMask  << BITMASK_OR_SHIFT)  |
6528          (XorMask << BITMASK_XOR_SHIFT);
6529 }
6530 
6531 bool
6532 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6533                                      const unsigned MinVal,
6534                                      const unsigned MaxVal,
6535                                      const StringRef ErrMsg,
6536                                      SMLoc &Loc) {
6537   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6538     return false;
6539   }
6540   Loc = getLoc();
6541   if (!parseExpr(Op)) {
6542     return false;
6543   }
6544   if (Op < MinVal || Op > MaxVal) {
6545     Error(Loc, ErrMsg);
6546     return false;
6547   }
6548 
6549   return true;
6550 }
6551 
6552 bool
6553 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6554                                       const unsigned MinVal,
6555                                       const unsigned MaxVal,
6556                                       const StringRef ErrMsg) {
6557   SMLoc Loc;
6558   for (unsigned i = 0; i < OpNum; ++i) {
6559     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6560       return false;
6561   }
6562 
6563   return true;
6564 }
6565 
6566 bool
6567 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6568   using namespace llvm::AMDGPU::Swizzle;
6569 
6570   int64_t Lane[LANE_NUM];
6571   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6572                            "expected a 2-bit lane id")) {
6573     Imm = QUAD_PERM_ENC;
6574     for (unsigned I = 0; I < LANE_NUM; ++I) {
6575       Imm |= Lane[I] << (LANE_SHIFT * I);
6576     }
6577     return true;
6578   }
6579   return false;
6580 }
6581 
6582 bool
6583 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6584   using namespace llvm::AMDGPU::Swizzle;
6585 
6586   SMLoc Loc;
6587   int64_t GroupSize;
6588   int64_t LaneIdx;
6589 
6590   if (!parseSwizzleOperand(GroupSize,
6591                            2, 32,
6592                            "group size must be in the interval [2,32]",
6593                            Loc)) {
6594     return false;
6595   }
6596   if (!isPowerOf2_64(GroupSize)) {
6597     Error(Loc, "group size must be a power of two");
6598     return false;
6599   }
6600   if (parseSwizzleOperand(LaneIdx,
6601                           0, GroupSize - 1,
6602                           "lane id must be in the interval [0,group size - 1]",
6603                           Loc)) {
6604     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6605     return true;
6606   }
6607   return false;
6608 }
6609 
6610 bool
6611 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6612   using namespace llvm::AMDGPU::Swizzle;
6613 
6614   SMLoc Loc;
6615   int64_t GroupSize;
6616 
6617   if (!parseSwizzleOperand(GroupSize,
6618                            2, 32,
6619                            "group size must be in the interval [2,32]",
6620                            Loc)) {
6621     return false;
6622   }
6623   if (!isPowerOf2_64(GroupSize)) {
6624     Error(Loc, "group size must be a power of two");
6625     return false;
6626   }
6627 
6628   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6629   return true;
6630 }
6631 
6632 bool
6633 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6634   using namespace llvm::AMDGPU::Swizzle;
6635 
6636   SMLoc Loc;
6637   int64_t GroupSize;
6638 
6639   if (!parseSwizzleOperand(GroupSize,
6640                            1, 16,
6641                            "group size must be in the interval [1,16]",
6642                            Loc)) {
6643     return false;
6644   }
6645   if (!isPowerOf2_64(GroupSize)) {
6646     Error(Loc, "group size must be a power of two");
6647     return false;
6648   }
6649 
6650   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6651   return true;
6652 }
6653 
6654 bool
6655 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6656   using namespace llvm::AMDGPU::Swizzle;
6657 
6658   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6659     return false;
6660   }
6661 
6662   StringRef Ctl;
6663   SMLoc StrLoc = getLoc();
6664   if (!parseString(Ctl)) {
6665     return false;
6666   }
6667   if (Ctl.size() != BITMASK_WIDTH) {
6668     Error(StrLoc, "expected a 5-character mask");
6669     return false;
6670   }
6671 
6672   unsigned AndMask = 0;
6673   unsigned OrMask = 0;
6674   unsigned XorMask = 0;
6675 
6676   for (size_t i = 0; i < Ctl.size(); ++i) {
6677     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6678     switch(Ctl[i]) {
6679     default:
6680       Error(StrLoc, "invalid mask");
6681       return false;
6682     case '0':
6683       break;
6684     case '1':
6685       OrMask |= Mask;
6686       break;
6687     case 'p':
6688       AndMask |= Mask;
6689       break;
6690     case 'i':
6691       AndMask |= Mask;
6692       XorMask |= Mask;
6693       break;
6694     }
6695   }
6696 
6697   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6698   return true;
6699 }
6700 
6701 bool
6702 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6703 
6704   SMLoc OffsetLoc = getLoc();
6705 
6706   if (!parseExpr(Imm, "a swizzle macro")) {
6707     return false;
6708   }
6709   if (!isUInt<16>(Imm)) {
6710     Error(OffsetLoc, "expected a 16-bit offset");
6711     return false;
6712   }
6713   return true;
6714 }
6715 
6716 bool
6717 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6718   using namespace llvm::AMDGPU::Swizzle;
6719 
6720   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6721 
6722     SMLoc ModeLoc = getLoc();
6723     bool Ok = false;
6724 
6725     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6726       Ok = parseSwizzleQuadPerm(Imm);
6727     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6728       Ok = parseSwizzleBitmaskPerm(Imm);
6729     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6730       Ok = parseSwizzleBroadcast(Imm);
6731     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6732       Ok = parseSwizzleSwap(Imm);
6733     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6734       Ok = parseSwizzleReverse(Imm);
6735     } else {
6736       Error(ModeLoc, "expected a swizzle mode");
6737     }
6738 
6739     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6740   }
6741 
6742   return false;
6743 }
6744 
6745 OperandMatchResultTy
6746 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6747   SMLoc S = getLoc();
6748   int64_t Imm = 0;
6749 
6750   if (trySkipId("offset")) {
6751 
6752     bool Ok = false;
6753     if (skipToken(AsmToken::Colon, "expected a colon")) {
6754       if (trySkipId("swizzle")) {
6755         Ok = parseSwizzleMacro(Imm);
6756       } else {
6757         Ok = parseSwizzleOffset(Imm);
6758       }
6759     }
6760 
6761     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6762 
6763     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6764   } else {
6765     // Swizzle "offset" operand is optional.
6766     // If it is omitted, try parsing other optional operands.
6767     return parseOptionalOpr(Operands);
6768   }
6769 }
6770 
6771 bool
6772 AMDGPUOperand::isSwizzle() const {
6773   return isImmTy(ImmTySwizzle);
6774 }
6775 
6776 //===----------------------------------------------------------------------===//
6777 // VGPR Index Mode
6778 //===----------------------------------------------------------------------===//
6779 
6780 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6781 
6782   using namespace llvm::AMDGPU::VGPRIndexMode;
6783 
6784   if (trySkipToken(AsmToken::RParen)) {
6785     return OFF;
6786   }
6787 
6788   int64_t Imm = 0;
6789 
6790   while (true) {
6791     unsigned Mode = 0;
6792     SMLoc S = getLoc();
6793 
6794     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6795       if (trySkipId(IdSymbolic[ModeId])) {
6796         Mode = 1 << ModeId;
6797         break;
6798       }
6799     }
6800 
6801     if (Mode == 0) {
6802       Error(S, (Imm == 0)?
6803                "expected a VGPR index mode or a closing parenthesis" :
6804                "expected a VGPR index mode");
6805       return UNDEF;
6806     }
6807 
6808     if (Imm & Mode) {
6809       Error(S, "duplicate VGPR index mode");
6810       return UNDEF;
6811     }
6812     Imm |= Mode;
6813 
6814     if (trySkipToken(AsmToken::RParen))
6815       break;
6816     if (!skipToken(AsmToken::Comma,
6817                    "expected a comma or a closing parenthesis"))
6818       return UNDEF;
6819   }
6820 
6821   return Imm;
6822 }
6823 
6824 OperandMatchResultTy
6825 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6826 
6827   using namespace llvm::AMDGPU::VGPRIndexMode;
6828 
6829   int64_t Imm = 0;
6830   SMLoc S = getLoc();
6831 
6832   if (trySkipId("gpr_idx", AsmToken::LParen)) {
6833     Imm = parseGPRIdxMacro();
6834     if (Imm == UNDEF)
6835       return MatchOperand_ParseFail;
6836   } else {
6837     if (getParser().parseAbsoluteExpression(Imm))
6838       return MatchOperand_ParseFail;
6839     if (Imm < 0 || !isUInt<4>(Imm)) {
6840       Error(S, "invalid immediate: only 4-bit values are legal");
6841       return MatchOperand_ParseFail;
6842     }
6843   }
6844 
6845   Operands.push_back(
6846       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6847   return MatchOperand_Success;
6848 }
6849 
6850 bool AMDGPUOperand::isGPRIdxMode() const {
6851   return isImmTy(ImmTyGprIdxMode);
6852 }
6853 
6854 //===----------------------------------------------------------------------===//
6855 // sopp branch targets
6856 //===----------------------------------------------------------------------===//
6857 
6858 OperandMatchResultTy
6859 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6860 
6861   // Make sure we are not parsing something
6862   // that looks like a label or an expression but is not.
6863   // This will improve error messages.
6864   if (isRegister() || isModifier())
6865     return MatchOperand_NoMatch;
6866 
6867   if (!parseExpr(Operands))
6868     return MatchOperand_ParseFail;
6869 
6870   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6871   assert(Opr.isImm() || Opr.isExpr());
6872   SMLoc Loc = Opr.getStartLoc();
6873 
6874   // Currently we do not support arbitrary expressions as branch targets.
6875   // Only labels and absolute expressions are accepted.
6876   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6877     Error(Loc, "expected an absolute expression or a label");
6878   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6879     Error(Loc, "expected a 16-bit signed jump offset");
6880   }
6881 
6882   return MatchOperand_Success;
6883 }
6884 
6885 //===----------------------------------------------------------------------===//
6886 // Boolean holding registers
6887 //===----------------------------------------------------------------------===//
6888 
6889 OperandMatchResultTy
6890 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6891   return parseReg(Operands);
6892 }
6893 
6894 //===----------------------------------------------------------------------===//
6895 // mubuf
6896 //===----------------------------------------------------------------------===//
6897 
6898 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
6899   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
6900 }
6901 
6902 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6903                                    const OperandVector &Operands,
6904                                    bool IsAtomic,
6905                                    bool IsLds) {
6906   bool IsLdsOpcode = IsLds;
6907   bool HasLdsModifier = false;
6908   OptionalImmIndexMap OptionalIdx;
6909   unsigned FirstOperandIdx = 1;
6910   bool IsAtomicReturn = false;
6911 
6912   if (IsAtomic) {
6913     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6914       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6915       if (!Op.isCPol())
6916         continue;
6917       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
6918       break;
6919     }
6920 
6921     if (!IsAtomicReturn) {
6922       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
6923       if (NewOpc != -1)
6924         Inst.setOpcode(NewOpc);
6925     }
6926 
6927     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
6928                       SIInstrFlags::IsAtomicRet;
6929   }
6930 
6931   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6932     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6933 
6934     // Add the register arguments
6935     if (Op.isReg()) {
6936       Op.addRegOperands(Inst, 1);
6937       // Insert a tied src for atomic return dst.
6938       // This cannot be postponed as subsequent calls to
6939       // addImmOperands rely on correct number of MC operands.
6940       if (IsAtomicReturn && i == FirstOperandIdx)
6941         Op.addRegOperands(Inst, 1);
6942       continue;
6943     }
6944 
6945     // Handle the case where soffset is an immediate
6946     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6947       Op.addImmOperands(Inst, 1);
6948       continue;
6949     }
6950 
6951     HasLdsModifier |= Op.isLDS();
6952 
6953     // Handle tokens like 'offen' which are sometimes hard-coded into the
6954     // asm string.  There are no MCInst operands for these.
6955     if (Op.isToken()) {
6956       continue;
6957     }
6958     assert(Op.isImm());
6959 
6960     // Handle optional arguments
6961     OptionalIdx[Op.getImmTy()] = i;
6962   }
6963 
6964   // This is a workaround for an llvm quirk which may result in an
6965   // incorrect instruction selection. Lds and non-lds versions of
6966   // MUBUF instructions are identical except that lds versions
6967   // have mandatory 'lds' modifier. However this modifier follows
6968   // optional modifiers and llvm asm matcher regards this 'lds'
6969   // modifier as an optional one. As a result, an lds version
6970   // of opcode may be selected even if it has no 'lds' modifier.
6971   if (IsLdsOpcode && !HasLdsModifier) {
6972     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6973     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6974       Inst.setOpcode(NoLdsOpcode);
6975       IsLdsOpcode = false;
6976     }
6977   }
6978 
6979   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6980   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
6981 
6982   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6983     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6984   }
6985   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
6986 }
6987 
6988 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6989   OptionalImmIndexMap OptionalIdx;
6990 
6991   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6992     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6993 
6994     // Add the register arguments
6995     if (Op.isReg()) {
6996       Op.addRegOperands(Inst, 1);
6997       continue;
6998     }
6999 
7000     // Handle the case where soffset is an immediate
7001     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7002       Op.addImmOperands(Inst, 1);
7003       continue;
7004     }
7005 
7006     // Handle tokens like 'offen' which are sometimes hard-coded into the
7007     // asm string.  There are no MCInst operands for these.
7008     if (Op.isToken()) {
7009       continue;
7010     }
7011     assert(Op.isImm());
7012 
7013     // Handle optional arguments
7014     OptionalIdx[Op.getImmTy()] = i;
7015   }
7016 
7017   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7018                         AMDGPUOperand::ImmTyOffset);
7019   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7020   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7021   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7022   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7023 }
7024 
7025 //===----------------------------------------------------------------------===//
7026 // mimg
7027 //===----------------------------------------------------------------------===//
7028 
7029 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7030                               bool IsAtomic) {
7031   unsigned I = 1;
7032   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7033   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7034     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7035   }
7036 
7037   if (IsAtomic) {
7038     // Add src, same as dst
7039     assert(Desc.getNumDefs() == 1);
7040     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7041   }
7042 
7043   OptionalImmIndexMap OptionalIdx;
7044 
7045   for (unsigned E = Operands.size(); I != E; ++I) {
7046     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7047 
7048     // Add the register arguments
7049     if (Op.isReg()) {
7050       Op.addRegOperands(Inst, 1);
7051     } else if (Op.isImmModifier()) {
7052       OptionalIdx[Op.getImmTy()] = I;
7053     } else if (!Op.isToken()) {
7054       llvm_unreachable("unexpected operand type");
7055     }
7056   }
7057 
7058   bool IsGFX10Plus = isGFX10Plus();
7059 
7060   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7061   if (IsGFX10Plus)
7062     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7063   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7064   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7065   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7066   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7067     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7068   if (IsGFX10Plus)
7069     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7070   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7071   if (!IsGFX10Plus)
7072     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7073   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7074 }
7075 
7076 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7077   cvtMIMG(Inst, Operands, true);
7078 }
7079 
7080 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7081   OptionalImmIndexMap OptionalIdx;
7082   bool IsAtomicReturn = false;
7083 
7084   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7085     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7086     if (!Op.isCPol())
7087       continue;
7088     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7089     break;
7090   }
7091 
7092   if (!IsAtomicReturn) {
7093     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7094     if (NewOpc != -1)
7095       Inst.setOpcode(NewOpc);
7096   }
7097 
7098   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7099                     SIInstrFlags::IsAtomicRet;
7100 
7101   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7102     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7103 
7104     // Add the register arguments
7105     if (Op.isReg()) {
7106       Op.addRegOperands(Inst, 1);
7107       if (IsAtomicReturn && i == 1)
7108         Op.addRegOperands(Inst, 1);
7109       continue;
7110     }
7111 
7112     // Handle the case where soffset is an immediate
7113     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7114       Op.addImmOperands(Inst, 1);
7115       continue;
7116     }
7117 
7118     // Handle tokens like 'offen' which are sometimes hard-coded into the
7119     // asm string.  There are no MCInst operands for these.
7120     if (Op.isToken()) {
7121       continue;
7122     }
7123     assert(Op.isImm());
7124 
7125     // Handle optional arguments
7126     OptionalIdx[Op.getImmTy()] = i;
7127   }
7128 
7129   if ((int)Inst.getNumOperands() <=
7130       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7131     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7132   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7133 }
7134 
7135 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7136                                       const OperandVector &Operands) {
7137   for (unsigned I = 1; I < Operands.size(); ++I) {
7138     auto &Operand = (AMDGPUOperand &)*Operands[I];
7139     if (Operand.isReg())
7140       Operand.addRegOperands(Inst, 1);
7141   }
7142 
7143   Inst.addOperand(MCOperand::createImm(1)); // a16
7144 }
7145 
7146 //===----------------------------------------------------------------------===//
7147 // smrd
7148 //===----------------------------------------------------------------------===//
7149 
7150 bool AMDGPUOperand::isSMRDOffset8() const {
7151   return isImm() && isUInt<8>(getImm());
7152 }
7153 
7154 bool AMDGPUOperand::isSMEMOffset() const {
7155   return isImm(); // Offset range is checked later by validator.
7156 }
7157 
7158 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7159   // 32-bit literals are only supported on CI and we only want to use them
7160   // when the offset is > 8-bits.
7161   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7162 }
7163 
7164 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7165   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7166 }
7167 
7168 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7169   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7170 }
7171 
7172 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7173   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7174 }
7175 
7176 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7177   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7178 }
7179 
7180 //===----------------------------------------------------------------------===//
7181 // vop3
7182 //===----------------------------------------------------------------------===//
7183 
7184 static bool ConvertOmodMul(int64_t &Mul) {
7185   if (Mul != 1 && Mul != 2 && Mul != 4)
7186     return false;
7187 
7188   Mul >>= 1;
7189   return true;
7190 }
7191 
7192 static bool ConvertOmodDiv(int64_t &Div) {
7193   if (Div == 1) {
7194     Div = 0;
7195     return true;
7196   }
7197 
7198   if (Div == 2) {
7199     Div = 3;
7200     return true;
7201   }
7202 
7203   return false;
7204 }
7205 
7206 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7207 // This is intentional and ensures compatibility with sp3.
7208 // See bug 35397 for details.
7209 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7210   if (BoundCtrl == 0 || BoundCtrl == 1) {
7211     BoundCtrl = 1;
7212     return true;
7213   }
7214   return false;
7215 }
7216 
7217 // Note: the order in this table matches the order of operands in AsmString.
7218 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7219   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7220   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7221   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7222   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7223   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7224   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7225   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7226   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7227   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7228   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7229   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7230   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7231   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7232   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7233   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7234   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7235   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7236   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7237   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7238   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7239   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7240   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7241   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7242   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7243   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7244   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7245   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7246   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7247   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7248   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7249   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7250   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7251   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7252   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7253   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7254   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7255   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7256   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7257   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7258   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7259   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7260 };
7261 
7262 void AMDGPUAsmParser::onBeginOfFile() {
7263   if (!getParser().getStreamer().getTargetStreamer() ||
7264       getSTI().getTargetTriple().getArch() == Triple::r600)
7265     return;
7266 
7267   if (!getTargetStreamer().getTargetID())
7268     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7269 
7270   if (isHsaAbiVersion3Or4(&getSTI()))
7271     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7272 }
7273 
7274 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7275 
7276   OperandMatchResultTy res = parseOptionalOpr(Operands);
7277 
7278   // This is a hack to enable hardcoded mandatory operands which follow
7279   // optional operands.
7280   //
7281   // Current design assumes that all operands after the first optional operand
7282   // are also optional. However implementation of some instructions violates
7283   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7284   //
7285   // To alleviate this problem, we have to (implicitly) parse extra operands
7286   // to make sure autogenerated parser of custom operands never hit hardcoded
7287   // mandatory operands.
7288 
7289   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7290     if (res != MatchOperand_Success ||
7291         isToken(AsmToken::EndOfStatement))
7292       break;
7293 
7294     trySkipToken(AsmToken::Comma);
7295     res = parseOptionalOpr(Operands);
7296   }
7297 
7298   return res;
7299 }
7300 
7301 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7302   OperandMatchResultTy res;
7303   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7304     // try to parse any optional operand here
7305     if (Op.IsBit) {
7306       res = parseNamedBit(Op.Name, Operands, Op.Type);
7307     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7308       res = parseOModOperand(Operands);
7309     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7310                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7311                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7312       res = parseSDWASel(Operands, Op.Name, Op.Type);
7313     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7314       res = parseSDWADstUnused(Operands);
7315     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7316                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7317                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7318                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7319       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7320                                         Op.ConvertResult);
7321     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7322       res = parseDim(Operands);
7323     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7324       res = parseCPol(Operands);
7325     } else {
7326       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7327     }
7328     if (res != MatchOperand_NoMatch) {
7329       return res;
7330     }
7331   }
7332   return MatchOperand_NoMatch;
7333 }
7334 
7335 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7336   StringRef Name = getTokenStr();
7337   if (Name == "mul") {
7338     return parseIntWithPrefix("mul", Operands,
7339                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7340   }
7341 
7342   if (Name == "div") {
7343     return parseIntWithPrefix("div", Operands,
7344                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7345   }
7346 
7347   return MatchOperand_NoMatch;
7348 }
7349 
7350 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7351   cvtVOP3P(Inst, Operands);
7352 
7353   int Opc = Inst.getOpcode();
7354 
7355   int SrcNum;
7356   const int Ops[] = { AMDGPU::OpName::src0,
7357                       AMDGPU::OpName::src1,
7358                       AMDGPU::OpName::src2 };
7359   for (SrcNum = 0;
7360        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7361        ++SrcNum);
7362   assert(SrcNum > 0);
7363 
7364   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7365   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7366 
7367   if ((OpSel & (1 << SrcNum)) != 0) {
7368     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7369     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7370     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7371   }
7372 }
7373 
7374 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7375       // 1. This operand is input modifiers
7376   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7377       // 2. This is not last operand
7378       && Desc.NumOperands > (OpNum + 1)
7379       // 3. Next operand is register class
7380       && Desc.OpInfo[OpNum + 1].RegClass != -1
7381       // 4. Next register is not tied to any other operand
7382       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7383 }
7384 
7385 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7386 {
7387   OptionalImmIndexMap OptionalIdx;
7388   unsigned Opc = Inst.getOpcode();
7389 
7390   unsigned I = 1;
7391   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7392   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7393     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7394   }
7395 
7396   for (unsigned E = Operands.size(); I != E; ++I) {
7397     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7398     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7399       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7400     } else if (Op.isInterpSlot() ||
7401                Op.isInterpAttr() ||
7402                Op.isAttrChan()) {
7403       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7404     } else if (Op.isImmModifier()) {
7405       OptionalIdx[Op.getImmTy()] = I;
7406     } else {
7407       llvm_unreachable("unhandled operand type");
7408     }
7409   }
7410 
7411   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7412     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7413   }
7414 
7415   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7416     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7417   }
7418 
7419   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7420     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7421   }
7422 }
7423 
7424 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7425                               OptionalImmIndexMap &OptionalIdx) {
7426   unsigned Opc = Inst.getOpcode();
7427 
7428   unsigned I = 1;
7429   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7430   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7431     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7432   }
7433 
7434   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7435     // This instruction has src modifiers
7436     for (unsigned E = Operands.size(); I != E; ++I) {
7437       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7438       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7439         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7440       } else if (Op.isImmModifier()) {
7441         OptionalIdx[Op.getImmTy()] = I;
7442       } else if (Op.isRegOrImm()) {
7443         Op.addRegOrImmOperands(Inst, 1);
7444       } else {
7445         llvm_unreachable("unhandled operand type");
7446       }
7447     }
7448   } else {
7449     // No src modifiers
7450     for (unsigned E = Operands.size(); I != E; ++I) {
7451       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7452       if (Op.isMod()) {
7453         OptionalIdx[Op.getImmTy()] = I;
7454       } else {
7455         Op.addRegOrImmOperands(Inst, 1);
7456       }
7457     }
7458   }
7459 
7460   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7461     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7462   }
7463 
7464   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7465     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7466   }
7467 
7468   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7469   // it has src2 register operand that is tied to dst operand
7470   // we don't allow modifiers for this operand in assembler so src2_modifiers
7471   // should be 0.
7472   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7473       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7474       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7475       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7476       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7477       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7478       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7479       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7480       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7481       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7482       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7483     auto it = Inst.begin();
7484     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7485     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7486     ++it;
7487     // Copy the operand to ensure it's not invalidated when Inst grows.
7488     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7489   }
7490 }
7491 
7492 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7493   OptionalImmIndexMap OptionalIdx;
7494   cvtVOP3(Inst, Operands, OptionalIdx);
7495 }
7496 
7497 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7498                                OptionalImmIndexMap &OptIdx) {
7499   const int Opc = Inst.getOpcode();
7500   const MCInstrDesc &Desc = MII.get(Opc);
7501 
7502   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7503 
7504   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7505     assert(!IsPacked);
7506     Inst.addOperand(Inst.getOperand(0));
7507   }
7508 
7509   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7510   // instruction, and then figure out where to actually put the modifiers
7511 
7512   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7513   if (OpSelIdx != -1) {
7514     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7515   }
7516 
7517   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7518   if (OpSelHiIdx != -1) {
7519     int DefaultVal = IsPacked ? -1 : 0;
7520     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7521                           DefaultVal);
7522   }
7523 
7524   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7525   if (NegLoIdx != -1) {
7526     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7527     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7528   }
7529 
7530   const int Ops[] = { AMDGPU::OpName::src0,
7531                       AMDGPU::OpName::src1,
7532                       AMDGPU::OpName::src2 };
7533   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7534                          AMDGPU::OpName::src1_modifiers,
7535                          AMDGPU::OpName::src2_modifiers };
7536 
7537   unsigned OpSel = 0;
7538   unsigned OpSelHi = 0;
7539   unsigned NegLo = 0;
7540   unsigned NegHi = 0;
7541 
7542   if (OpSelIdx != -1)
7543     OpSel = Inst.getOperand(OpSelIdx).getImm();
7544 
7545   if (OpSelHiIdx != -1)
7546     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7547 
7548   if (NegLoIdx != -1) {
7549     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7550     NegLo = Inst.getOperand(NegLoIdx).getImm();
7551     NegHi = Inst.getOperand(NegHiIdx).getImm();
7552   }
7553 
7554   for (int J = 0; J < 3; ++J) {
7555     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7556     if (OpIdx == -1)
7557       break;
7558 
7559     uint32_t ModVal = 0;
7560 
7561     if ((OpSel & (1 << J)) != 0)
7562       ModVal |= SISrcMods::OP_SEL_0;
7563 
7564     if ((OpSelHi & (1 << J)) != 0)
7565       ModVal |= SISrcMods::OP_SEL_1;
7566 
7567     if ((NegLo & (1 << J)) != 0)
7568       ModVal |= SISrcMods::NEG;
7569 
7570     if ((NegHi & (1 << J)) != 0)
7571       ModVal |= SISrcMods::NEG_HI;
7572 
7573     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7574 
7575     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7576   }
7577 }
7578 
7579 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7580   OptionalImmIndexMap OptIdx;
7581   cvtVOP3(Inst, Operands, OptIdx);
7582   cvtVOP3P(Inst, Operands, OptIdx);
7583 }
7584 
7585 //===----------------------------------------------------------------------===//
7586 // dpp
7587 //===----------------------------------------------------------------------===//
7588 
7589 bool AMDGPUOperand::isDPP8() const {
7590   return isImmTy(ImmTyDPP8);
7591 }
7592 
7593 bool AMDGPUOperand::isDPPCtrl() const {
7594   using namespace AMDGPU::DPP;
7595 
7596   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7597   if (result) {
7598     int64_t Imm = getImm();
7599     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7600            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7601            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7602            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7603            (Imm == DppCtrl::WAVE_SHL1) ||
7604            (Imm == DppCtrl::WAVE_ROL1) ||
7605            (Imm == DppCtrl::WAVE_SHR1) ||
7606            (Imm == DppCtrl::WAVE_ROR1) ||
7607            (Imm == DppCtrl::ROW_MIRROR) ||
7608            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7609            (Imm == DppCtrl::BCAST15) ||
7610            (Imm == DppCtrl::BCAST31) ||
7611            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7612            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7613   }
7614   return false;
7615 }
7616 
7617 //===----------------------------------------------------------------------===//
7618 // mAI
7619 //===----------------------------------------------------------------------===//
7620 
7621 bool AMDGPUOperand::isBLGP() const {
7622   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7623 }
7624 
7625 bool AMDGPUOperand::isCBSZ() const {
7626   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7627 }
7628 
7629 bool AMDGPUOperand::isABID() const {
7630   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7631 }
7632 
7633 bool AMDGPUOperand::isS16Imm() const {
7634   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7635 }
7636 
7637 bool AMDGPUOperand::isU16Imm() const {
7638   return isImm() && isUInt<16>(getImm());
7639 }
7640 
7641 //===----------------------------------------------------------------------===//
7642 // dim
7643 //===----------------------------------------------------------------------===//
7644 
7645 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7646   // We want to allow "dim:1D" etc.,
7647   // but the initial 1 is tokenized as an integer.
7648   std::string Token;
7649   if (isToken(AsmToken::Integer)) {
7650     SMLoc Loc = getToken().getEndLoc();
7651     Token = std::string(getTokenStr());
7652     lex();
7653     if (getLoc() != Loc)
7654       return false;
7655   }
7656 
7657   StringRef Suffix;
7658   if (!parseId(Suffix))
7659     return false;
7660   Token += Suffix;
7661 
7662   StringRef DimId = Token;
7663   if (DimId.startswith("SQ_RSRC_IMG_"))
7664     DimId = DimId.drop_front(12);
7665 
7666   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7667   if (!DimInfo)
7668     return false;
7669 
7670   Encoding = DimInfo->Encoding;
7671   return true;
7672 }
7673 
7674 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7675   if (!isGFX10Plus())
7676     return MatchOperand_NoMatch;
7677 
7678   SMLoc S = getLoc();
7679 
7680   if (!trySkipId("dim", AsmToken::Colon))
7681     return MatchOperand_NoMatch;
7682 
7683   unsigned Encoding;
7684   SMLoc Loc = getLoc();
7685   if (!parseDimId(Encoding)) {
7686     Error(Loc, "invalid dim value");
7687     return MatchOperand_ParseFail;
7688   }
7689 
7690   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7691                                               AMDGPUOperand::ImmTyDim));
7692   return MatchOperand_Success;
7693 }
7694 
7695 //===----------------------------------------------------------------------===//
7696 // dpp
7697 //===----------------------------------------------------------------------===//
7698 
7699 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7700   SMLoc S = getLoc();
7701 
7702   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7703     return MatchOperand_NoMatch;
7704 
7705   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7706 
7707   int64_t Sels[8];
7708 
7709   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7710     return MatchOperand_ParseFail;
7711 
7712   for (size_t i = 0; i < 8; ++i) {
7713     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7714       return MatchOperand_ParseFail;
7715 
7716     SMLoc Loc = getLoc();
7717     if (getParser().parseAbsoluteExpression(Sels[i]))
7718       return MatchOperand_ParseFail;
7719     if (0 > Sels[i] || 7 < Sels[i]) {
7720       Error(Loc, "expected a 3-bit value");
7721       return MatchOperand_ParseFail;
7722     }
7723   }
7724 
7725   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7726     return MatchOperand_ParseFail;
7727 
7728   unsigned DPP8 = 0;
7729   for (size_t i = 0; i < 8; ++i)
7730     DPP8 |= (Sels[i] << (i * 3));
7731 
7732   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7733   return MatchOperand_Success;
7734 }
7735 
7736 bool
7737 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7738                                     const OperandVector &Operands) {
7739   if (Ctrl == "row_newbcast")
7740       return isGFX90A();
7741 
7742   // DPP64 is supported for row_newbcast only.
7743   const MCRegisterInfo *MRI = getMRI();
7744   if (Operands.size() > 2 && Operands[1]->isReg() &&
7745       MRI->getSubReg(Operands[1]->getReg(), AMDGPU::sub1))
7746     return false;
7747 
7748   if (Ctrl == "row_share" ||
7749       Ctrl == "row_xmask")
7750     return isGFX10Plus();
7751 
7752   if (Ctrl == "wave_shl" ||
7753       Ctrl == "wave_shr" ||
7754       Ctrl == "wave_rol" ||
7755       Ctrl == "wave_ror" ||
7756       Ctrl == "row_bcast")
7757     return isVI() || isGFX9();
7758 
7759   return Ctrl == "row_mirror" ||
7760          Ctrl == "row_half_mirror" ||
7761          Ctrl == "quad_perm" ||
7762          Ctrl == "row_shl" ||
7763          Ctrl == "row_shr" ||
7764          Ctrl == "row_ror";
7765 }
7766 
7767 int64_t
7768 AMDGPUAsmParser::parseDPPCtrlPerm() {
7769   // quad_perm:[%d,%d,%d,%d]
7770 
7771   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7772     return -1;
7773 
7774   int64_t Val = 0;
7775   for (int i = 0; i < 4; ++i) {
7776     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7777       return -1;
7778 
7779     int64_t Temp;
7780     SMLoc Loc = getLoc();
7781     if (getParser().parseAbsoluteExpression(Temp))
7782       return -1;
7783     if (Temp < 0 || Temp > 3) {
7784       Error(Loc, "expected a 2-bit value");
7785       return -1;
7786     }
7787 
7788     Val += (Temp << i * 2);
7789   }
7790 
7791   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7792     return -1;
7793 
7794   return Val;
7795 }
7796 
7797 int64_t
7798 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7799   using namespace AMDGPU::DPP;
7800 
7801   // sel:%d
7802 
7803   int64_t Val;
7804   SMLoc Loc = getLoc();
7805 
7806   if (getParser().parseAbsoluteExpression(Val))
7807     return -1;
7808 
7809   struct DppCtrlCheck {
7810     int64_t Ctrl;
7811     int Lo;
7812     int Hi;
7813   };
7814 
7815   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7816     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
7817     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
7818     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
7819     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
7820     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
7821     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
7822     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
7823     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7824     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7825     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
7826     .Default({-1, 0, 0});
7827 
7828   bool Valid;
7829   if (Check.Ctrl == -1) {
7830     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
7831     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
7832   } else {
7833     Valid = Check.Lo <= Val && Val <= Check.Hi;
7834     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
7835   }
7836 
7837   if (!Valid) {
7838     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
7839     return -1;
7840   }
7841 
7842   return Val;
7843 }
7844 
7845 OperandMatchResultTy
7846 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7847   using namespace AMDGPU::DPP;
7848 
7849   if (!isToken(AsmToken::Identifier) ||
7850       !isSupportedDPPCtrl(getTokenStr(), Operands))
7851     return MatchOperand_NoMatch;
7852 
7853   SMLoc S = getLoc();
7854   int64_t Val = -1;
7855   StringRef Ctrl;
7856 
7857   parseId(Ctrl);
7858 
7859   if (Ctrl == "row_mirror") {
7860     Val = DppCtrl::ROW_MIRROR;
7861   } else if (Ctrl == "row_half_mirror") {
7862     Val = DppCtrl::ROW_HALF_MIRROR;
7863   } else {
7864     if (skipToken(AsmToken::Colon, "expected a colon")) {
7865       if (Ctrl == "quad_perm") {
7866         Val = parseDPPCtrlPerm();
7867       } else {
7868         Val = parseDPPCtrlSel(Ctrl);
7869       }
7870     }
7871   }
7872 
7873   if (Val == -1)
7874     return MatchOperand_ParseFail;
7875 
7876   Operands.push_back(
7877     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
7878   return MatchOperand_Success;
7879 }
7880 
7881 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7882   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7883 }
7884 
7885 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7886   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7887 }
7888 
7889 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7890   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7891 }
7892 
7893 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7894   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7895 }
7896 
7897 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7898   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7899 }
7900 
7901 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7902   OptionalImmIndexMap OptionalIdx;
7903 
7904   unsigned I = 1;
7905   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7906   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7907     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7908   }
7909 
7910   int Fi = 0;
7911   for (unsigned E = Operands.size(); I != E; ++I) {
7912     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7913                                             MCOI::TIED_TO);
7914     if (TiedTo != -1) {
7915       assert((unsigned)TiedTo < Inst.getNumOperands());
7916       // handle tied old or src2 for MAC instructions
7917       Inst.addOperand(Inst.getOperand(TiedTo));
7918     }
7919     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7920     // Add the register arguments
7921     if (Op.isReg() && validateVccOperand(Op.getReg())) {
7922       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7923       // Skip it.
7924       continue;
7925     }
7926 
7927     if (IsDPP8) {
7928       if (Op.isDPP8()) {
7929         Op.addImmOperands(Inst, 1);
7930       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7931         Op.addRegWithFPInputModsOperands(Inst, 2);
7932       } else if (Op.isFI()) {
7933         Fi = Op.getImm();
7934       } else if (Op.isReg()) {
7935         Op.addRegOperands(Inst, 1);
7936       } else {
7937         llvm_unreachable("Invalid operand type");
7938       }
7939     } else {
7940       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7941         Op.addRegWithFPInputModsOperands(Inst, 2);
7942       } else if (Op.isDPPCtrl()) {
7943         Op.addImmOperands(Inst, 1);
7944       } else if (Op.isImm()) {
7945         // Handle optional arguments
7946         OptionalIdx[Op.getImmTy()] = I;
7947       } else {
7948         llvm_unreachable("Invalid operand type");
7949       }
7950     }
7951   }
7952 
7953   if (IsDPP8) {
7954     using namespace llvm::AMDGPU::DPP;
7955     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7956   } else {
7957     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7958     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7959     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7960     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7961       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7962     }
7963   }
7964 }
7965 
7966 //===----------------------------------------------------------------------===//
7967 // sdwa
7968 //===----------------------------------------------------------------------===//
7969 
7970 OperandMatchResultTy
7971 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7972                               AMDGPUOperand::ImmTy Type) {
7973   using namespace llvm::AMDGPU::SDWA;
7974 
7975   SMLoc S = getLoc();
7976   StringRef Value;
7977   OperandMatchResultTy res;
7978 
7979   SMLoc StringLoc;
7980   res = parseStringWithPrefix(Prefix, Value, StringLoc);
7981   if (res != MatchOperand_Success) {
7982     return res;
7983   }
7984 
7985   int64_t Int;
7986   Int = StringSwitch<int64_t>(Value)
7987         .Case("BYTE_0", SdwaSel::BYTE_0)
7988         .Case("BYTE_1", SdwaSel::BYTE_1)
7989         .Case("BYTE_2", SdwaSel::BYTE_2)
7990         .Case("BYTE_3", SdwaSel::BYTE_3)
7991         .Case("WORD_0", SdwaSel::WORD_0)
7992         .Case("WORD_1", SdwaSel::WORD_1)
7993         .Case("DWORD", SdwaSel::DWORD)
7994         .Default(0xffffffff);
7995 
7996   if (Int == 0xffffffff) {
7997     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
7998     return MatchOperand_ParseFail;
7999   }
8000 
8001   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8002   return MatchOperand_Success;
8003 }
8004 
8005 OperandMatchResultTy
8006 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8007   using namespace llvm::AMDGPU::SDWA;
8008 
8009   SMLoc S = getLoc();
8010   StringRef Value;
8011   OperandMatchResultTy res;
8012 
8013   SMLoc StringLoc;
8014   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8015   if (res != MatchOperand_Success) {
8016     return res;
8017   }
8018 
8019   int64_t Int;
8020   Int = StringSwitch<int64_t>(Value)
8021         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8022         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8023         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8024         .Default(0xffffffff);
8025 
8026   if (Int == 0xffffffff) {
8027     Error(StringLoc, "invalid dst_unused value");
8028     return MatchOperand_ParseFail;
8029   }
8030 
8031   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8032   return MatchOperand_Success;
8033 }
8034 
8035 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8036   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8037 }
8038 
8039 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8040   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8041 }
8042 
8043 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8044   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8045 }
8046 
8047 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8048   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8049 }
8050 
8051 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8052   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8053 }
8054 
8055 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8056                               uint64_t BasicInstType,
8057                               bool SkipDstVcc,
8058                               bool SkipSrcVcc) {
8059   using namespace llvm::AMDGPU::SDWA;
8060 
8061   OptionalImmIndexMap OptionalIdx;
8062   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8063   bool SkippedVcc = false;
8064 
8065   unsigned I = 1;
8066   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8067   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8068     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8069   }
8070 
8071   for (unsigned E = Operands.size(); I != E; ++I) {
8072     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8073     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8074         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8075       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8076       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8077       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8078       // Skip VCC only if we didn't skip it on previous iteration.
8079       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8080       if (BasicInstType == SIInstrFlags::VOP2 &&
8081           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8082            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8083         SkippedVcc = true;
8084         continue;
8085       } else if (BasicInstType == SIInstrFlags::VOPC &&
8086                  Inst.getNumOperands() == 0) {
8087         SkippedVcc = true;
8088         continue;
8089       }
8090     }
8091     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8092       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8093     } else if (Op.isImm()) {
8094       // Handle optional arguments
8095       OptionalIdx[Op.getImmTy()] = I;
8096     } else {
8097       llvm_unreachable("Invalid operand type");
8098     }
8099     SkippedVcc = false;
8100   }
8101 
8102   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8103       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8104       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8105     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8106     switch (BasicInstType) {
8107     case SIInstrFlags::VOP1:
8108       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8109       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8110         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8111       }
8112       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8113       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8114       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8115       break;
8116 
8117     case SIInstrFlags::VOP2:
8118       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8119       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8120         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8121       }
8122       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8123       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8124       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8125       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8126       break;
8127 
8128     case SIInstrFlags::VOPC:
8129       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8130         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8131       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8132       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8133       break;
8134 
8135     default:
8136       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8137     }
8138   }
8139 
8140   // special case v_mac_{f16, f32}:
8141   // it has src2 register operand that is tied to dst operand
8142   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8143       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8144     auto it = Inst.begin();
8145     std::advance(
8146       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8147     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8148   }
8149 }
8150 
8151 //===----------------------------------------------------------------------===//
8152 // mAI
8153 //===----------------------------------------------------------------------===//
8154 
8155 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8156   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8157 }
8158 
8159 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8160   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8161 }
8162 
8163 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8164   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8165 }
8166 
8167 /// Force static initialization.
8168 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8169   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8170   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8171 }
8172 
8173 #define GET_REGISTER_MATCHER
8174 #define GET_MATCHER_IMPLEMENTATION
8175 #define GET_MNEMONIC_SPELL_CHECKER
8176 #define GET_MNEMONIC_CHECKER
8177 #include "AMDGPUGenAsmMatcher.inc"
8178 
8179 // This fuction should be defined after auto-generated include so that we have
8180 // MatchClassKind enum defined
8181 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8182                                                      unsigned Kind) {
8183   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8184   // But MatchInstructionImpl() expects to meet token and fails to validate
8185   // operand. This method checks if we are given immediate operand but expect to
8186   // get corresponding token.
8187   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8188   switch (Kind) {
8189   case MCK_addr64:
8190     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8191   case MCK_gds:
8192     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8193   case MCK_lds:
8194     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8195   case MCK_idxen:
8196     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8197   case MCK_offen:
8198     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8199   case MCK_SSrcB32:
8200     // When operands have expression values, they will return true for isToken,
8201     // because it is not possible to distinguish between a token and an
8202     // expression at parse time. MatchInstructionImpl() will always try to
8203     // match an operand as a token, when isToken returns true, and when the
8204     // name of the expression is not a valid token, the match will fail,
8205     // so we need to handle it here.
8206     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8207   case MCK_SSrcF32:
8208     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8209   case MCK_SoppBrTarget:
8210     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8211   case MCK_VReg32OrOff:
8212     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8213   case MCK_InterpSlot:
8214     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8215   case MCK_Attr:
8216     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8217   case MCK_AttrChan:
8218     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8219   case MCK_ImmSMEMOffset:
8220     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8221   case MCK_SReg_64:
8222   case MCK_SReg_64_XEXEC:
8223     // Null is defined as a 32-bit register but
8224     // it should also be enabled with 64-bit operands.
8225     // The following code enables it for SReg_64 operands
8226     // used as source and destination. Remaining source
8227     // operands are handled in isInlinableImm.
8228     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8229   default:
8230     return Match_InvalidOperand;
8231   }
8232 }
8233 
8234 //===----------------------------------------------------------------------===//
8235 // endpgm
8236 //===----------------------------------------------------------------------===//
8237 
8238 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8239   SMLoc S = getLoc();
8240   int64_t Imm = 0;
8241 
8242   if (!parseExpr(Imm)) {
8243     // The operand is optional, if not present default to 0
8244     Imm = 0;
8245   }
8246 
8247   if (!isUInt<16>(Imm)) {
8248     Error(S, "expected a 16-bit value");
8249     return MatchOperand_ParseFail;
8250   }
8251 
8252   Operands.push_back(
8253       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8254   return MatchOperand_Success;
8255 }
8256 
8257 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8258