1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/Support/AMDGPUMetadata.h"
32 #include "llvm/Support/AMDHSAKernelDescriptor.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/MachineValueType.h"
35 #include "llvm/Support/TargetParser.h"
36 #include "llvm/Support/TargetRegistry.h"
37 
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::amdhsa;
41 
42 namespace {
43 
44 class AMDGPUAsmParser;
45 
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47 
48 //===----------------------------------------------------------------------===//
49 // Operand
50 //===----------------------------------------------------------------------===//
51 
52 class AMDGPUOperand : public MCParsedAsmOperand {
53   enum KindTy {
54     Token,
55     Immediate,
56     Register,
57     Expression
58   } Kind;
59 
60   SMLoc StartLoc, EndLoc;
61   const AMDGPUAsmParser *AsmParser;
62 
63 public:
64   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
66 
67   using Ptr = std::unique_ptr<AMDGPUOperand>;
68 
69   struct Modifiers {
70     bool Abs = false;
71     bool Neg = false;
72     bool Sext = false;
73 
74     bool hasFPModifiers() const { return Abs || Neg; }
75     bool hasIntModifiers() const { return Sext; }
76     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77 
78     int64_t getFPModifiersOperand() const {
79       int64_t Operand = 0;
80       Operand |= Abs ? SISrcMods::ABS : 0u;
81       Operand |= Neg ? SISrcMods::NEG : 0u;
82       return Operand;
83     }
84 
85     int64_t getIntModifiersOperand() const {
86       int64_t Operand = 0;
87       Operand |= Sext ? SISrcMods::SEXT : 0u;
88       return Operand;
89     }
90 
91     int64_t getModifiersOperand() const {
92       assert(!(hasFPModifiers() && hasIntModifiers())
93            && "fp and int modifiers should not be used simultaneously");
94       if (hasFPModifiers()) {
95         return getFPModifiersOperand();
96       } else if (hasIntModifiers()) {
97         return getIntModifiersOperand();
98       } else {
99         return 0;
100       }
101     }
102 
103     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104   };
105 
106   enum ImmTy {
107     ImmTyNone,
108     ImmTyGDS,
109     ImmTyLDS,
110     ImmTyOffen,
111     ImmTyIdxen,
112     ImmTyAddr64,
113     ImmTyOffset,
114     ImmTyInstOffset,
115     ImmTyOffset0,
116     ImmTyOffset1,
117     ImmTyCPol,
118     ImmTySWZ,
119     ImmTyTFE,
120     ImmTyD16,
121     ImmTyClampSI,
122     ImmTyOModSI,
123     ImmTyDPP8,
124     ImmTyDppCtrl,
125     ImmTyDppRowMask,
126     ImmTyDppBankMask,
127     ImmTyDppBoundCtrl,
128     ImmTyDppFi,
129     ImmTySdwaDstSel,
130     ImmTySdwaSrc0Sel,
131     ImmTySdwaSrc1Sel,
132     ImmTySdwaDstUnused,
133     ImmTyDMask,
134     ImmTyDim,
135     ImmTyUNorm,
136     ImmTyDA,
137     ImmTyR128A16,
138     ImmTyA16,
139     ImmTyLWE,
140     ImmTyExpTgt,
141     ImmTyExpCompr,
142     ImmTyExpVM,
143     ImmTyFORMAT,
144     ImmTyHwreg,
145     ImmTyOff,
146     ImmTySendMsg,
147     ImmTyInterpSlot,
148     ImmTyInterpAttr,
149     ImmTyAttrChan,
150     ImmTyOpSel,
151     ImmTyOpSelHi,
152     ImmTyNegLo,
153     ImmTyNegHi,
154     ImmTySwizzle,
155     ImmTyGprIdxMode,
156     ImmTyHigh,
157     ImmTyBLGP,
158     ImmTyCBSZ,
159     ImmTyABID,
160     ImmTyEndpgm,
161   };
162 
163   enum ImmKindTy {
164     ImmKindTyNone,
165     ImmKindTyLiteral,
166     ImmKindTyConst,
167   };
168 
169 private:
170   struct TokOp {
171     const char *Data;
172     unsigned Length;
173   };
174 
175   struct ImmOp {
176     int64_t Val;
177     ImmTy Type;
178     bool IsFPImm;
179     mutable ImmKindTy Kind;
180     Modifiers Mods;
181   };
182 
183   struct RegOp {
184     unsigned RegNo;
185     Modifiers Mods;
186   };
187 
188   union {
189     TokOp Tok;
190     ImmOp Imm;
191     RegOp Reg;
192     const MCExpr *Expr;
193   };
194 
195 public:
196   bool isToken() const override {
197     if (Kind == Token)
198       return true;
199 
200     // When parsing operands, we can't always tell if something was meant to be
201     // a token, like 'gds', or an expression that references a global variable.
202     // In this case, we assume the string is an expression, and if we need to
203     // interpret is a token, then we treat the symbol name as the token.
204     return isSymbolRefExpr();
205   }
206 
207   bool isSymbolRefExpr() const {
208     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
209   }
210 
211   bool isImm() const override {
212     return Kind == Immediate;
213   }
214 
215   void setImmKindNone() const {
216     assert(isImm());
217     Imm.Kind = ImmKindTyNone;
218   }
219 
220   void setImmKindLiteral() const {
221     assert(isImm());
222     Imm.Kind = ImmKindTyLiteral;
223   }
224 
225   void setImmKindConst() const {
226     assert(isImm());
227     Imm.Kind = ImmKindTyConst;
228   }
229 
230   bool IsImmKindLiteral() const {
231     return isImm() && Imm.Kind == ImmKindTyLiteral;
232   }
233 
234   bool isImmKindConst() const {
235     return isImm() && Imm.Kind == ImmKindTyConst;
236   }
237 
238   bool isInlinableImm(MVT type) const;
239   bool isLiteralImm(MVT type) const;
240 
241   bool isRegKind() const {
242     return Kind == Register;
243   }
244 
245   bool isReg() const override {
246     return isRegKind() && !hasModifiers();
247   }
248 
249   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
250     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
251   }
252 
253   bool isRegOrImmWithInt16InputMods() const {
254     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
255   }
256 
257   bool isRegOrImmWithInt32InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259   }
260 
261   bool isRegOrImmWithInt64InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
263   }
264 
265   bool isRegOrImmWithFP16InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
267   }
268 
269   bool isRegOrImmWithFP32InputMods() const {
270     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
271   }
272 
273   bool isRegOrImmWithFP64InputMods() const {
274     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
275   }
276 
277   bool isVReg() const {
278     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
279            isRegClass(AMDGPU::VReg_64RegClassID) ||
280            isRegClass(AMDGPU::VReg_96RegClassID) ||
281            isRegClass(AMDGPU::VReg_128RegClassID) ||
282            isRegClass(AMDGPU::VReg_160RegClassID) ||
283            isRegClass(AMDGPU::VReg_192RegClassID) ||
284            isRegClass(AMDGPU::VReg_256RegClassID) ||
285            isRegClass(AMDGPU::VReg_512RegClassID) ||
286            isRegClass(AMDGPU::VReg_1024RegClassID);
287   }
288 
289   bool isVReg32() const {
290     return isRegClass(AMDGPU::VGPR_32RegClassID);
291   }
292 
293   bool isVReg32OrOff() const {
294     return isOff() || isVReg32();
295   }
296 
297   bool isNull() const {
298     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
299   }
300 
301   bool isVRegWithInputMods() const;
302 
303   bool isSDWAOperand(MVT type) const;
304   bool isSDWAFP16Operand() const;
305   bool isSDWAFP32Operand() const;
306   bool isSDWAInt16Operand() const;
307   bool isSDWAInt32Operand() const;
308 
309   bool isImmTy(ImmTy ImmT) const {
310     return isImm() && Imm.Type == ImmT;
311   }
312 
313   bool isImmModifier() const {
314     return isImm() && Imm.Type != ImmTyNone;
315   }
316 
317   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
318   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
319   bool isDMask() const { return isImmTy(ImmTyDMask); }
320   bool isDim() const { return isImmTy(ImmTyDim); }
321   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
322   bool isDA() const { return isImmTy(ImmTyDA); }
323   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
324   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
325   bool isLWE() const { return isImmTy(ImmTyLWE); }
326   bool isOff() const { return isImmTy(ImmTyOff); }
327   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
328   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
329   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
330   bool isOffen() const { return isImmTy(ImmTyOffen); }
331   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
332   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
333   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
334   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
335   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
336 
337   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
338   bool isGDS() const { return isImmTy(ImmTyGDS); }
339   bool isLDS() const { return isImmTy(ImmTyLDS); }
340   bool isCPol() const { return isImmTy(ImmTyCPol); }
341   bool isSWZ() const { return isImmTy(ImmTySWZ); }
342   bool isTFE() const { return isImmTy(ImmTyTFE); }
343   bool isD16() const { return isImmTy(ImmTyD16); }
344   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
345   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
346   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
347   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
348   bool isFI() const { return isImmTy(ImmTyDppFi); }
349   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
350   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
351   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
352   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
353   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
354   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
355   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
356   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
357   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
358   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
359   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
360   bool isHigh() const { return isImmTy(ImmTyHigh); }
361 
362   bool isMod() const {
363     return isClampSI() || isOModSI();
364   }
365 
366   bool isRegOrImm() const {
367     return isReg() || isImm();
368   }
369 
370   bool isRegClass(unsigned RCID) const;
371 
372   bool isInlineValue() const;
373 
374   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
375     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
376   }
377 
378   bool isSCSrcB16() const {
379     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
380   }
381 
382   bool isSCSrcV2B16() const {
383     return isSCSrcB16();
384   }
385 
386   bool isSCSrcB32() const {
387     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
388   }
389 
390   bool isSCSrcB64() const {
391     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
392   }
393 
394   bool isBoolReg() const;
395 
396   bool isSCSrcF16() const {
397     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
398   }
399 
400   bool isSCSrcV2F16() const {
401     return isSCSrcF16();
402   }
403 
404   bool isSCSrcF32() const {
405     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
406   }
407 
408   bool isSCSrcF64() const {
409     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
410   }
411 
412   bool isSSrcB32() const {
413     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
414   }
415 
416   bool isSSrcB16() const {
417     return isSCSrcB16() || isLiteralImm(MVT::i16);
418   }
419 
420   bool isSSrcV2B16() const {
421     llvm_unreachable("cannot happen");
422     return isSSrcB16();
423   }
424 
425   bool isSSrcB64() const {
426     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
427     // See isVSrc64().
428     return isSCSrcB64() || isLiteralImm(MVT::i64);
429   }
430 
431   bool isSSrcF32() const {
432     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
433   }
434 
435   bool isSSrcF64() const {
436     return isSCSrcB64() || isLiteralImm(MVT::f64);
437   }
438 
439   bool isSSrcF16() const {
440     return isSCSrcB16() || isLiteralImm(MVT::f16);
441   }
442 
443   bool isSSrcV2F16() const {
444     llvm_unreachable("cannot happen");
445     return isSSrcF16();
446   }
447 
448   bool isSSrcV2FP32() const {
449     llvm_unreachable("cannot happen");
450     return isSSrcF32();
451   }
452 
453   bool isSCSrcV2FP32() const {
454     llvm_unreachable("cannot happen");
455     return isSCSrcF32();
456   }
457 
458   bool isSSrcV2INT32() const {
459     llvm_unreachable("cannot happen");
460     return isSSrcB32();
461   }
462 
463   bool isSCSrcV2INT32() const {
464     llvm_unreachable("cannot happen");
465     return isSCSrcB32();
466   }
467 
468   bool isSSrcOrLdsB32() const {
469     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
470            isLiteralImm(MVT::i32) || isExpr();
471   }
472 
473   bool isVCSrcB32() const {
474     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
475   }
476 
477   bool isVCSrcB64() const {
478     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
479   }
480 
481   bool isVCSrcB16() const {
482     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
483   }
484 
485   bool isVCSrcV2B16() const {
486     return isVCSrcB16();
487   }
488 
489   bool isVCSrcF32() const {
490     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
491   }
492 
493   bool isVCSrcF64() const {
494     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
495   }
496 
497   bool isVCSrcF16() const {
498     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
499   }
500 
501   bool isVCSrcV2F16() const {
502     return isVCSrcF16();
503   }
504 
505   bool isVSrcB32() const {
506     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
507   }
508 
509   bool isVSrcB64() const {
510     return isVCSrcF64() || isLiteralImm(MVT::i64);
511   }
512 
513   bool isVSrcB16() const {
514     return isVCSrcB16() || isLiteralImm(MVT::i16);
515   }
516 
517   bool isVSrcV2B16() const {
518     return isVSrcB16() || isLiteralImm(MVT::v2i16);
519   }
520 
521   bool isVCSrcV2FP32() const {
522     return isVCSrcF64();
523   }
524 
525   bool isVSrcV2FP32() const {
526     return isVSrcF64() || isLiteralImm(MVT::v2f32);
527   }
528 
529   bool isVCSrcV2INT32() const {
530     return isVCSrcB64();
531   }
532 
533   bool isVSrcV2INT32() const {
534     return isVSrcB64() || isLiteralImm(MVT::v2i32);
535   }
536 
537   bool isVSrcF32() const {
538     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
539   }
540 
541   bool isVSrcF64() const {
542     return isVCSrcF64() || isLiteralImm(MVT::f64);
543   }
544 
545   bool isVSrcF16() const {
546     return isVCSrcF16() || isLiteralImm(MVT::f16);
547   }
548 
549   bool isVSrcV2F16() const {
550     return isVSrcF16() || isLiteralImm(MVT::v2f16);
551   }
552 
553   bool isVISrcB32() const {
554     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
555   }
556 
557   bool isVISrcB16() const {
558     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
559   }
560 
561   bool isVISrcV2B16() const {
562     return isVISrcB16();
563   }
564 
565   bool isVISrcF32() const {
566     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
567   }
568 
569   bool isVISrcF16() const {
570     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
571   }
572 
573   bool isVISrcV2F16() const {
574     return isVISrcF16() || isVISrcB32();
575   }
576 
577   bool isVISrc_64B64() const {
578     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
579   }
580 
581   bool isVISrc_64F64() const {
582     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
583   }
584 
585   bool isVISrc_64V2FP32() const {
586     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
587   }
588 
589   bool isVISrc_64V2INT32() const {
590     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
591   }
592 
593   bool isVISrc_256B64() const {
594     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
595   }
596 
597   bool isVISrc_256F64() const {
598     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
599   }
600 
601   bool isVISrc_128B16() const {
602     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
603   }
604 
605   bool isVISrc_128V2B16() const {
606     return isVISrc_128B16();
607   }
608 
609   bool isVISrc_128B32() const {
610     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
611   }
612 
613   bool isVISrc_128F32() const {
614     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
615   }
616 
617   bool isVISrc_256V2FP32() const {
618     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
619   }
620 
621   bool isVISrc_256V2INT32() const {
622     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
623   }
624 
625   bool isVISrc_512B32() const {
626     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
627   }
628 
629   bool isVISrc_512B16() const {
630     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
631   }
632 
633   bool isVISrc_512V2B16() const {
634     return isVISrc_512B16();
635   }
636 
637   bool isVISrc_512F32() const {
638     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
639   }
640 
641   bool isVISrc_512F16() const {
642     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
643   }
644 
645   bool isVISrc_512V2F16() const {
646     return isVISrc_512F16() || isVISrc_512B32();
647   }
648 
649   bool isVISrc_1024B32() const {
650     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
651   }
652 
653   bool isVISrc_1024B16() const {
654     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
655   }
656 
657   bool isVISrc_1024V2B16() const {
658     return isVISrc_1024B16();
659   }
660 
661   bool isVISrc_1024F32() const {
662     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
663   }
664 
665   bool isVISrc_1024F16() const {
666     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
667   }
668 
669   bool isVISrc_1024V2F16() const {
670     return isVISrc_1024F16() || isVISrc_1024B32();
671   }
672 
673   bool isAISrcB32() const {
674     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
675   }
676 
677   bool isAISrcB16() const {
678     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
679   }
680 
681   bool isAISrcV2B16() const {
682     return isAISrcB16();
683   }
684 
685   bool isAISrcF32() const {
686     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
687   }
688 
689   bool isAISrcF16() const {
690     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
691   }
692 
693   bool isAISrcV2F16() const {
694     return isAISrcF16() || isAISrcB32();
695   }
696 
697   bool isAISrc_64B64() const {
698     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
699   }
700 
701   bool isAISrc_64F64() const {
702     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
703   }
704 
705   bool isAISrc_128B32() const {
706     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
707   }
708 
709   bool isAISrc_128B16() const {
710     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
711   }
712 
713   bool isAISrc_128V2B16() const {
714     return isAISrc_128B16();
715   }
716 
717   bool isAISrc_128F32() const {
718     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
719   }
720 
721   bool isAISrc_128F16() const {
722     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
723   }
724 
725   bool isAISrc_128V2F16() const {
726     return isAISrc_128F16() || isAISrc_128B32();
727   }
728 
729   bool isVISrc_128F16() const {
730     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
731   }
732 
733   bool isVISrc_128V2F16() const {
734     return isVISrc_128F16() || isVISrc_128B32();
735   }
736 
737   bool isAISrc_256B64() const {
738     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
739   }
740 
741   bool isAISrc_256F64() const {
742     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
743   }
744 
745   bool isAISrc_512B32() const {
746     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
747   }
748 
749   bool isAISrc_512B16() const {
750     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
751   }
752 
753   bool isAISrc_512V2B16() const {
754     return isAISrc_512B16();
755   }
756 
757   bool isAISrc_512F32() const {
758     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
759   }
760 
761   bool isAISrc_512F16() const {
762     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
763   }
764 
765   bool isAISrc_512V2F16() const {
766     return isAISrc_512F16() || isAISrc_512B32();
767   }
768 
769   bool isAISrc_1024B32() const {
770     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
771   }
772 
773   bool isAISrc_1024B16() const {
774     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
775   }
776 
777   bool isAISrc_1024V2B16() const {
778     return isAISrc_1024B16();
779   }
780 
781   bool isAISrc_1024F32() const {
782     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
783   }
784 
785   bool isAISrc_1024F16() const {
786     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
787   }
788 
789   bool isAISrc_1024V2F16() const {
790     return isAISrc_1024F16() || isAISrc_1024B32();
791   }
792 
793   bool isKImmFP32() const {
794     return isLiteralImm(MVT::f32);
795   }
796 
797   bool isKImmFP16() const {
798     return isLiteralImm(MVT::f16);
799   }
800 
801   bool isMem() const override {
802     return false;
803   }
804 
805   bool isExpr() const {
806     return Kind == Expression;
807   }
808 
809   bool isSoppBrTarget() const {
810     return isExpr() || isImm();
811   }
812 
813   bool isSWaitCnt() const;
814   bool isHwreg() const;
815   bool isSendMsg() const;
816   bool isSwizzle() const;
817   bool isSMRDOffset8() const;
818   bool isSMEMOffset() const;
819   bool isSMRDLiteralOffset() const;
820   bool isDPP8() const;
821   bool isDPPCtrl() const;
822   bool isBLGP() const;
823   bool isCBSZ() const;
824   bool isABID() const;
825   bool isGPRIdxMode() const;
826   bool isS16Imm() const;
827   bool isU16Imm() const;
828   bool isEndpgm() const;
829 
830   StringRef getExpressionAsToken() const {
831     assert(isExpr());
832     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
833     return S->getSymbol().getName();
834   }
835 
836   StringRef getToken() const {
837     assert(isToken());
838 
839     if (Kind == Expression)
840       return getExpressionAsToken();
841 
842     return StringRef(Tok.Data, Tok.Length);
843   }
844 
845   int64_t getImm() const {
846     assert(isImm());
847     return Imm.Val;
848   }
849 
850   void setImm(int64_t Val) {
851     assert(isImm());
852     Imm.Val = Val;
853   }
854 
855   ImmTy getImmTy() const {
856     assert(isImm());
857     return Imm.Type;
858   }
859 
860   unsigned getReg() const override {
861     assert(isRegKind());
862     return Reg.RegNo;
863   }
864 
865   SMLoc getStartLoc() const override {
866     return StartLoc;
867   }
868 
869   SMLoc getEndLoc() const override {
870     return EndLoc;
871   }
872 
873   SMRange getLocRange() const {
874     return SMRange(StartLoc, EndLoc);
875   }
876 
877   Modifiers getModifiers() const {
878     assert(isRegKind() || isImmTy(ImmTyNone));
879     return isRegKind() ? Reg.Mods : Imm.Mods;
880   }
881 
882   void setModifiers(Modifiers Mods) {
883     assert(isRegKind() || isImmTy(ImmTyNone));
884     if (isRegKind())
885       Reg.Mods = Mods;
886     else
887       Imm.Mods = Mods;
888   }
889 
890   bool hasModifiers() const {
891     return getModifiers().hasModifiers();
892   }
893 
894   bool hasFPModifiers() const {
895     return getModifiers().hasFPModifiers();
896   }
897 
898   bool hasIntModifiers() const {
899     return getModifiers().hasIntModifiers();
900   }
901 
902   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
903 
904   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
905 
906   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
907 
908   template <unsigned Bitwidth>
909   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
910 
911   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
912     addKImmFPOperands<16>(Inst, N);
913   }
914 
915   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
916     addKImmFPOperands<32>(Inst, N);
917   }
918 
919   void addRegOperands(MCInst &Inst, unsigned N) const;
920 
921   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
922     addRegOperands(Inst, N);
923   }
924 
925   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
926     if (isRegKind())
927       addRegOperands(Inst, N);
928     else if (isExpr())
929       Inst.addOperand(MCOperand::createExpr(Expr));
930     else
931       addImmOperands(Inst, N);
932   }
933 
934   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
935     Modifiers Mods = getModifiers();
936     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
937     if (isRegKind()) {
938       addRegOperands(Inst, N);
939     } else {
940       addImmOperands(Inst, N, false);
941     }
942   }
943 
944   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
945     assert(!hasIntModifiers());
946     addRegOrImmWithInputModsOperands(Inst, N);
947   }
948 
949   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
950     assert(!hasFPModifiers());
951     addRegOrImmWithInputModsOperands(Inst, N);
952   }
953 
954   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
955     Modifiers Mods = getModifiers();
956     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
957     assert(isRegKind());
958     addRegOperands(Inst, N);
959   }
960 
961   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
962     assert(!hasIntModifiers());
963     addRegWithInputModsOperands(Inst, N);
964   }
965 
966   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
967     assert(!hasFPModifiers());
968     addRegWithInputModsOperands(Inst, N);
969   }
970 
971   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
972     if (isImm())
973       addImmOperands(Inst, N);
974     else {
975       assert(isExpr());
976       Inst.addOperand(MCOperand::createExpr(Expr));
977     }
978   }
979 
980   static void printImmTy(raw_ostream& OS, ImmTy Type) {
981     switch (Type) {
982     case ImmTyNone: OS << "None"; break;
983     case ImmTyGDS: OS << "GDS"; break;
984     case ImmTyLDS: OS << "LDS"; break;
985     case ImmTyOffen: OS << "Offen"; break;
986     case ImmTyIdxen: OS << "Idxen"; break;
987     case ImmTyAddr64: OS << "Addr64"; break;
988     case ImmTyOffset: OS << "Offset"; break;
989     case ImmTyInstOffset: OS << "InstOffset"; break;
990     case ImmTyOffset0: OS << "Offset0"; break;
991     case ImmTyOffset1: OS << "Offset1"; break;
992     case ImmTyCPol: OS << "CPol"; break;
993     case ImmTySWZ: OS << "SWZ"; break;
994     case ImmTyTFE: OS << "TFE"; break;
995     case ImmTyD16: OS << "D16"; break;
996     case ImmTyFORMAT: OS << "FORMAT"; break;
997     case ImmTyClampSI: OS << "ClampSI"; break;
998     case ImmTyOModSI: OS << "OModSI"; break;
999     case ImmTyDPP8: OS << "DPP8"; break;
1000     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1001     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1002     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1003     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1004     case ImmTyDppFi: OS << "FI"; break;
1005     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1006     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1007     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1008     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1009     case ImmTyDMask: OS << "DMask"; break;
1010     case ImmTyDim: OS << "Dim"; break;
1011     case ImmTyUNorm: OS << "UNorm"; break;
1012     case ImmTyDA: OS << "DA"; break;
1013     case ImmTyR128A16: OS << "R128A16"; break;
1014     case ImmTyA16: OS << "A16"; break;
1015     case ImmTyLWE: OS << "LWE"; break;
1016     case ImmTyOff: OS << "Off"; break;
1017     case ImmTyExpTgt: OS << "ExpTgt"; break;
1018     case ImmTyExpCompr: OS << "ExpCompr"; break;
1019     case ImmTyExpVM: OS << "ExpVM"; break;
1020     case ImmTyHwreg: OS << "Hwreg"; break;
1021     case ImmTySendMsg: OS << "SendMsg"; break;
1022     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1023     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1024     case ImmTyAttrChan: OS << "AttrChan"; break;
1025     case ImmTyOpSel: OS << "OpSel"; break;
1026     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1027     case ImmTyNegLo: OS << "NegLo"; break;
1028     case ImmTyNegHi: OS << "NegHi"; break;
1029     case ImmTySwizzle: OS << "Swizzle"; break;
1030     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1031     case ImmTyHigh: OS << "High"; break;
1032     case ImmTyBLGP: OS << "BLGP"; break;
1033     case ImmTyCBSZ: OS << "CBSZ"; break;
1034     case ImmTyABID: OS << "ABID"; break;
1035     case ImmTyEndpgm: OS << "Endpgm"; break;
1036     }
1037   }
1038 
1039   void print(raw_ostream &OS) const override {
1040     switch (Kind) {
1041     case Register:
1042       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1043       break;
1044     case Immediate:
1045       OS << '<' << getImm();
1046       if (getImmTy() != ImmTyNone) {
1047         OS << " type: "; printImmTy(OS, getImmTy());
1048       }
1049       OS << " mods: " << Imm.Mods << '>';
1050       break;
1051     case Token:
1052       OS << '\'' << getToken() << '\'';
1053       break;
1054     case Expression:
1055       OS << "<expr " << *Expr << '>';
1056       break;
1057     }
1058   }
1059 
1060   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1061                                       int64_t Val, SMLoc Loc,
1062                                       ImmTy Type = ImmTyNone,
1063                                       bool IsFPImm = false) {
1064     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1065     Op->Imm.Val = Val;
1066     Op->Imm.IsFPImm = IsFPImm;
1067     Op->Imm.Kind = ImmKindTyNone;
1068     Op->Imm.Type = Type;
1069     Op->Imm.Mods = Modifiers();
1070     Op->StartLoc = Loc;
1071     Op->EndLoc = Loc;
1072     return Op;
1073   }
1074 
1075   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1076                                         StringRef Str, SMLoc Loc,
1077                                         bool HasExplicitEncodingSize = true) {
1078     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1079     Res->Tok.Data = Str.data();
1080     Res->Tok.Length = Str.size();
1081     Res->StartLoc = Loc;
1082     Res->EndLoc = Loc;
1083     return Res;
1084   }
1085 
1086   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1087                                       unsigned RegNo, SMLoc S,
1088                                       SMLoc E) {
1089     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1090     Op->Reg.RegNo = RegNo;
1091     Op->Reg.Mods = Modifiers();
1092     Op->StartLoc = S;
1093     Op->EndLoc = E;
1094     return Op;
1095   }
1096 
1097   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1098                                        const class MCExpr *Expr, SMLoc S) {
1099     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1100     Op->Expr = Expr;
1101     Op->StartLoc = S;
1102     Op->EndLoc = S;
1103     return Op;
1104   }
1105 };
1106 
1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1108   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1109   return OS;
1110 }
1111 
1112 //===----------------------------------------------------------------------===//
1113 // AsmParser
1114 //===----------------------------------------------------------------------===//
1115 
1116 // Holds info related to the current kernel, e.g. count of SGPRs used.
1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1118 // .amdgpu_hsa_kernel or at EOF.
1119 class KernelScopeInfo {
1120   int SgprIndexUnusedMin = -1;
1121   int VgprIndexUnusedMin = -1;
1122   MCContext *Ctx = nullptr;
1123 
1124   void usesSgprAt(int i) {
1125     if (i >= SgprIndexUnusedMin) {
1126       SgprIndexUnusedMin = ++i;
1127       if (Ctx) {
1128         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1129         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1130       }
1131     }
1132   }
1133 
1134   void usesVgprAt(int i) {
1135     if (i >= VgprIndexUnusedMin) {
1136       VgprIndexUnusedMin = ++i;
1137       if (Ctx) {
1138         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1139         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1140       }
1141     }
1142   }
1143 
1144 public:
1145   KernelScopeInfo() = default;
1146 
1147   void initialize(MCContext &Context) {
1148     Ctx = &Context;
1149     usesSgprAt(SgprIndexUnusedMin = -1);
1150     usesVgprAt(VgprIndexUnusedMin = -1);
1151   }
1152 
1153   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1154     switch (RegKind) {
1155       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1156       case IS_AGPR: // fall through
1157       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1158       default: break;
1159     }
1160   }
1161 };
1162 
1163 class AMDGPUAsmParser : public MCTargetAsmParser {
1164   MCAsmParser &Parser;
1165 
1166   // Number of extra operands parsed after the first optional operand.
1167   // This may be necessary to skip hardcoded mandatory operands.
1168   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1169 
1170   unsigned ForcedEncodingSize = 0;
1171   bool ForcedDPP = false;
1172   bool ForcedSDWA = false;
1173   KernelScopeInfo KernelScope;
1174   unsigned CPolSeen;
1175 
1176   /// @name Auto-generated Match Functions
1177   /// {
1178 
1179 #define GET_ASSEMBLER_HEADER
1180 #include "AMDGPUGenAsmMatcher.inc"
1181 
1182   /// }
1183 
1184 private:
1185   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1186   bool OutOfRangeError(SMRange Range);
1187   /// Calculate VGPR/SGPR blocks required for given target, reserved
1188   /// registers, and user-specified NextFreeXGPR values.
1189   ///
1190   /// \param Features [in] Target features, used for bug corrections.
1191   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1192   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1193   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1194   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1195   /// descriptor field, if valid.
1196   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1197   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1198   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1199   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1200   /// \param VGPRBlocks [out] Result VGPR block count.
1201   /// \param SGPRBlocks [out] Result SGPR block count.
1202   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1203                           bool FlatScrUsed, bool XNACKUsed,
1204                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1205                           SMRange VGPRRange, unsigned NextFreeSGPR,
1206                           SMRange SGPRRange, unsigned &VGPRBlocks,
1207                           unsigned &SGPRBlocks);
1208   bool ParseDirectiveAMDGCNTarget();
1209   bool ParseDirectiveAMDHSAKernel();
1210   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1211   bool ParseDirectiveHSACodeObjectVersion();
1212   bool ParseDirectiveHSACodeObjectISA();
1213   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1214   bool ParseDirectiveAMDKernelCodeT();
1215   // TODO: Possibly make subtargetHasRegister const.
1216   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1217   bool ParseDirectiveAMDGPUHsaKernel();
1218 
1219   bool ParseDirectiveISAVersion();
1220   bool ParseDirectiveHSAMetadata();
1221   bool ParseDirectivePALMetadataBegin();
1222   bool ParseDirectivePALMetadata();
1223   bool ParseDirectiveAMDGPULDS();
1224 
1225   /// Common code to parse out a block of text (typically YAML) between start and
1226   /// end directives.
1227   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1228                            const char *AssemblerDirectiveEnd,
1229                            std::string &CollectString);
1230 
1231   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1232                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1233   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1234                            unsigned &RegNum, unsigned &RegWidth,
1235                            bool RestoreOnFailure = false);
1236   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1237                            unsigned &RegNum, unsigned &RegWidth,
1238                            SmallVectorImpl<AsmToken> &Tokens);
1239   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1240                            unsigned &RegWidth,
1241                            SmallVectorImpl<AsmToken> &Tokens);
1242   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1243                            unsigned &RegWidth,
1244                            SmallVectorImpl<AsmToken> &Tokens);
1245   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1246                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1247   bool ParseRegRange(unsigned& Num, unsigned& Width);
1248   unsigned getRegularReg(RegisterKind RegKind,
1249                          unsigned RegNum,
1250                          unsigned RegWidth,
1251                          SMLoc Loc);
1252 
1253   bool isRegister();
1254   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1255   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1256   void initializeGprCountSymbol(RegisterKind RegKind);
1257   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1258                              unsigned RegWidth);
1259   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1260                     bool IsAtomic, bool IsLds = false);
1261   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1262                  bool IsGdsHardcoded);
1263 
1264 public:
1265   enum AMDGPUMatchResultTy {
1266     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1267   };
1268   enum OperandMode {
1269     OperandMode_Default,
1270     OperandMode_NSA,
1271   };
1272 
1273   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1274 
1275   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1276                const MCInstrInfo &MII,
1277                const MCTargetOptions &Options)
1278       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1279     MCAsmParserExtension::Initialize(Parser);
1280 
1281     if (getFeatureBits().none()) {
1282       // Set default features.
1283       copySTI().ToggleFeature("southern-islands");
1284     }
1285 
1286     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1287 
1288     {
1289       // TODO: make those pre-defined variables read-only.
1290       // Currently there is none suitable machinery in the core llvm-mc for this.
1291       // MCSymbol::isRedefinable is intended for another purpose, and
1292       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1293       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1294       MCContext &Ctx = getContext();
1295       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1296         MCSymbol *Sym =
1297             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1298         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1299         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1300         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1301         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1302         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1303       } else {
1304         MCSymbol *Sym =
1305             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1306         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1307         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1308         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1309         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1310         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1311       }
1312       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1313         initializeGprCountSymbol(IS_VGPR);
1314         initializeGprCountSymbol(IS_SGPR);
1315       } else
1316         KernelScope.initialize(getContext());
1317     }
1318   }
1319 
1320   bool hasMIMG_R128() const {
1321     return AMDGPU::hasMIMG_R128(getSTI());
1322   }
1323 
1324   bool hasPackedD16() const {
1325     return AMDGPU::hasPackedD16(getSTI());
1326   }
1327 
1328   bool hasGFX10A16() const {
1329     return AMDGPU::hasGFX10A16(getSTI());
1330   }
1331 
1332   bool isSI() const {
1333     return AMDGPU::isSI(getSTI());
1334   }
1335 
1336   bool isCI() const {
1337     return AMDGPU::isCI(getSTI());
1338   }
1339 
1340   bool isVI() const {
1341     return AMDGPU::isVI(getSTI());
1342   }
1343 
1344   bool isGFX9() const {
1345     return AMDGPU::isGFX9(getSTI());
1346   }
1347 
1348   bool isGFX90A() const {
1349     return AMDGPU::isGFX90A(getSTI());
1350   }
1351 
1352   bool isGFX9Plus() const {
1353     return AMDGPU::isGFX9Plus(getSTI());
1354   }
1355 
1356   bool isGFX10() const {
1357     return AMDGPU::isGFX10(getSTI());
1358   }
1359 
1360   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1361 
1362   bool isGFX10_BEncoding() const {
1363     return AMDGPU::isGFX10_BEncoding(getSTI());
1364   }
1365 
1366   bool hasInv2PiInlineImm() const {
1367     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1368   }
1369 
1370   bool hasFlatOffsets() const {
1371     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1372   }
1373 
1374   bool hasSGPR102_SGPR103() const {
1375     return !isVI() && !isGFX9();
1376   }
1377 
1378   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1379 
1380   bool hasIntClamp() const {
1381     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1382   }
1383 
1384   AMDGPUTargetStreamer &getTargetStreamer() {
1385     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1386     return static_cast<AMDGPUTargetStreamer &>(TS);
1387   }
1388 
1389   const MCRegisterInfo *getMRI() const {
1390     // We need this const_cast because for some reason getContext() is not const
1391     // in MCAsmParser.
1392     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1393   }
1394 
1395   const MCInstrInfo *getMII() const {
1396     return &MII;
1397   }
1398 
1399   const FeatureBitset &getFeatureBits() const {
1400     return getSTI().getFeatureBits();
1401   }
1402 
1403   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1404   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1405   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1406 
1407   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1408   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1409   bool isForcedDPP() const { return ForcedDPP; }
1410   bool isForcedSDWA() const { return ForcedSDWA; }
1411   ArrayRef<unsigned> getMatchedVariants() const;
1412   StringRef getMatchedVariantName() const;
1413 
1414   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1415   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1416                      bool RestoreOnFailure);
1417   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1418   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1419                                         SMLoc &EndLoc) override;
1420   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1421   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1422                                       unsigned Kind) override;
1423   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1424                                OperandVector &Operands, MCStreamer &Out,
1425                                uint64_t &ErrorInfo,
1426                                bool MatchingInlineAsm) override;
1427   bool ParseDirective(AsmToken DirectiveID) override;
1428   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1429                                     OperandMode Mode = OperandMode_Default);
1430   StringRef parseMnemonicSuffix(StringRef Name);
1431   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1432                         SMLoc NameLoc, OperandVector &Operands) override;
1433   //bool ProcessInstruction(MCInst &Inst);
1434 
1435   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1436 
1437   OperandMatchResultTy
1438   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1439                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1440                      bool (*ConvertResult)(int64_t &) = nullptr);
1441 
1442   OperandMatchResultTy
1443   parseOperandArrayWithPrefix(const char *Prefix,
1444                               OperandVector &Operands,
1445                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1446                               bool (*ConvertResult)(int64_t&) = nullptr);
1447 
1448   OperandMatchResultTy
1449   parseNamedBit(StringRef Name, OperandVector &Operands,
1450                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1451   OperandMatchResultTy parseCPol(OperandVector &Operands);
1452   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1453                                              StringRef &Value,
1454                                              SMLoc &StringLoc);
1455 
1456   bool isModifier();
1457   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1458   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1459   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1460   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1461   bool parseSP3NegModifier();
1462   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1463   OperandMatchResultTy parseReg(OperandVector &Operands);
1464   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1465   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1466   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1467   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1468   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1469   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1470   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1471   OperandMatchResultTy parseUfmt(int64_t &Format);
1472   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1473   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1474   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1475   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1476   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1477   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1478   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1479 
1480   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1481   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1482   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1483   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1484 
1485   bool parseCnt(int64_t &IntVal);
1486   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1487   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1488 
1489 private:
1490   struct OperandInfoTy {
1491     SMLoc Loc;
1492     int64_t Id;
1493     bool IsSymbolic = false;
1494     bool IsDefined = false;
1495 
1496     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1497   };
1498 
1499   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1500   bool validateSendMsg(const OperandInfoTy &Msg,
1501                        const OperandInfoTy &Op,
1502                        const OperandInfoTy &Stream);
1503 
1504   bool parseHwregBody(OperandInfoTy &HwReg,
1505                       OperandInfoTy &Offset,
1506                       OperandInfoTy &Width);
1507   bool validateHwreg(const OperandInfoTy &HwReg,
1508                      const OperandInfoTy &Offset,
1509                      const OperandInfoTy &Width);
1510 
1511   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1512   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1513 
1514   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1515                       const OperandVector &Operands) const;
1516   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1517   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1518   SMLoc getLitLoc(const OperandVector &Operands) const;
1519   SMLoc getConstLoc(const OperandVector &Operands) const;
1520 
1521   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1522   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1523   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1524   bool validateSOPLiteral(const MCInst &Inst) const;
1525   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1526   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1527   bool validateIntClampSupported(const MCInst &Inst);
1528   bool validateMIMGAtomicDMask(const MCInst &Inst);
1529   bool validateMIMGGatherDMask(const MCInst &Inst);
1530   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1531   bool validateMIMGDataSize(const MCInst &Inst);
1532   bool validateMIMGAddrSize(const MCInst &Inst);
1533   bool validateMIMGD16(const MCInst &Inst);
1534   bool validateMIMGDim(const MCInst &Inst);
1535   bool validateMIMGMSAA(const MCInst &Inst);
1536   bool validateOpSel(const MCInst &Inst);
1537   bool validateVccOperand(unsigned Reg) const;
1538   bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1539   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1540   bool validateAGPRLdSt(const MCInst &Inst) const;
1541   bool validateVGPRAlign(const MCInst &Inst) const;
1542   bool validateDivScale(const MCInst &Inst);
1543   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1544                              const SMLoc &IDLoc);
1545   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1546   unsigned getConstantBusLimit(unsigned Opcode) const;
1547   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1548   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1549   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1550 
1551   bool isSupportedMnemo(StringRef Mnemo,
1552                         const FeatureBitset &FBS);
1553   bool isSupportedMnemo(StringRef Mnemo,
1554                         const FeatureBitset &FBS,
1555                         ArrayRef<unsigned> Variants);
1556   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1557 
1558   bool isId(const StringRef Id) const;
1559   bool isId(const AsmToken &Token, const StringRef Id) const;
1560   bool isToken(const AsmToken::TokenKind Kind) const;
1561   bool trySkipId(const StringRef Id);
1562   bool trySkipId(const StringRef Pref, const StringRef Id);
1563   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1564   bool trySkipToken(const AsmToken::TokenKind Kind);
1565   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1566   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1567   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1568 
1569   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1570   AsmToken::TokenKind getTokenKind() const;
1571   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1572   bool parseExpr(OperandVector &Operands);
1573   StringRef getTokenStr() const;
1574   AsmToken peekToken();
1575   AsmToken getToken() const;
1576   SMLoc getLoc() const;
1577   void lex();
1578 
1579 public:
1580   void onBeginOfFile() override;
1581 
1582   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1583   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1584 
1585   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1586   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1587   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1588   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1589   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1590   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1591 
1592   bool parseSwizzleOperand(int64_t &Op,
1593                            const unsigned MinVal,
1594                            const unsigned MaxVal,
1595                            const StringRef ErrMsg,
1596                            SMLoc &Loc);
1597   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1598                             const unsigned MinVal,
1599                             const unsigned MaxVal,
1600                             const StringRef ErrMsg);
1601   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1602   bool parseSwizzleOffset(int64_t &Imm);
1603   bool parseSwizzleMacro(int64_t &Imm);
1604   bool parseSwizzleQuadPerm(int64_t &Imm);
1605   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1606   bool parseSwizzleBroadcast(int64_t &Imm);
1607   bool parseSwizzleSwap(int64_t &Imm);
1608   bool parseSwizzleReverse(int64_t &Imm);
1609 
1610   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1611   int64_t parseGPRIdxMacro();
1612 
1613   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1614   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1615   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1616   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1617 
1618   AMDGPUOperand::Ptr defaultCPol() const;
1619 
1620   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1621   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1622   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1623   AMDGPUOperand::Ptr defaultFlatOffset() const;
1624 
1625   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1626 
1627   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1628                OptionalImmIndexMap &OptionalIdx);
1629   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1630   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1631   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1632   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1633                 OptionalImmIndexMap &OptionalIdx);
1634 
1635   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1636 
1637   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1638                bool IsAtomic = false);
1639   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1640   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1641 
1642   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1643 
1644   bool parseDimId(unsigned &Encoding);
1645   OperandMatchResultTy parseDim(OperandVector &Operands);
1646   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1647   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1648   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1649   int64_t parseDPPCtrlSel(StringRef Ctrl);
1650   int64_t parseDPPCtrlPerm();
1651   AMDGPUOperand::Ptr defaultRowMask() const;
1652   AMDGPUOperand::Ptr defaultBankMask() const;
1653   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1654   AMDGPUOperand::Ptr defaultFI() const;
1655   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1656   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1657 
1658   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1659                                     AMDGPUOperand::ImmTy Type);
1660   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1661   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1662   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1663   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1664   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1665   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1666   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1667                uint64_t BasicInstType,
1668                bool SkipDstVcc = false,
1669                bool SkipSrcVcc = false);
1670 
1671   AMDGPUOperand::Ptr defaultBLGP() const;
1672   AMDGPUOperand::Ptr defaultCBSZ() const;
1673   AMDGPUOperand::Ptr defaultABID() const;
1674 
1675   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1676   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1677 };
1678 
1679 struct OptionalOperand {
1680   const char *Name;
1681   AMDGPUOperand::ImmTy Type;
1682   bool IsBit;
1683   bool (*ConvertResult)(int64_t&);
1684 };
1685 
1686 } // end anonymous namespace
1687 
1688 // May be called with integer type with equivalent bitwidth.
1689 static const fltSemantics *getFltSemantics(unsigned Size) {
1690   switch (Size) {
1691   case 4:
1692     return &APFloat::IEEEsingle();
1693   case 8:
1694     return &APFloat::IEEEdouble();
1695   case 2:
1696     return &APFloat::IEEEhalf();
1697   default:
1698     llvm_unreachable("unsupported fp type");
1699   }
1700 }
1701 
1702 static const fltSemantics *getFltSemantics(MVT VT) {
1703   return getFltSemantics(VT.getSizeInBits() / 8);
1704 }
1705 
1706 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1707   switch (OperandType) {
1708   case AMDGPU::OPERAND_REG_IMM_INT32:
1709   case AMDGPU::OPERAND_REG_IMM_FP32:
1710   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1711   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1712   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1713   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1714   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1715   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1716   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1717   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1718     return &APFloat::IEEEsingle();
1719   case AMDGPU::OPERAND_REG_IMM_INT64:
1720   case AMDGPU::OPERAND_REG_IMM_FP64:
1721   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1722   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1723   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1724     return &APFloat::IEEEdouble();
1725   case AMDGPU::OPERAND_REG_IMM_INT16:
1726   case AMDGPU::OPERAND_REG_IMM_FP16:
1727   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1728   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1729   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1730   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1731   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1732   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1733   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1734   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1735   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1736   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1737     return &APFloat::IEEEhalf();
1738   default:
1739     llvm_unreachable("unsupported fp type");
1740   }
1741 }
1742 
1743 //===----------------------------------------------------------------------===//
1744 // Operand
1745 //===----------------------------------------------------------------------===//
1746 
1747 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1748   bool Lost;
1749 
1750   // Convert literal to single precision
1751   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1752                                                APFloat::rmNearestTiesToEven,
1753                                                &Lost);
1754   // We allow precision lost but not overflow or underflow
1755   if (Status != APFloat::opOK &&
1756       Lost &&
1757       ((Status & APFloat::opOverflow)  != 0 ||
1758        (Status & APFloat::opUnderflow) != 0)) {
1759     return false;
1760   }
1761 
1762   return true;
1763 }
1764 
1765 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1766   return isUIntN(Size, Val) || isIntN(Size, Val);
1767 }
1768 
1769 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1770   if (VT.getScalarType() == MVT::i16) {
1771     // FP immediate values are broken.
1772     return isInlinableIntLiteral(Val);
1773   }
1774 
1775   // f16/v2f16 operands work correctly for all values.
1776   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1777 }
1778 
1779 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1780 
1781   // This is a hack to enable named inline values like
1782   // shared_base with both 32-bit and 64-bit operands.
1783   // Note that these values are defined as
1784   // 32-bit operands only.
1785   if (isInlineValue()) {
1786     return true;
1787   }
1788 
1789   if (!isImmTy(ImmTyNone)) {
1790     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1791     return false;
1792   }
1793   // TODO: We should avoid using host float here. It would be better to
1794   // check the float bit values which is what a few other places do.
1795   // We've had bot failures before due to weird NaN support on mips hosts.
1796 
1797   APInt Literal(64, Imm.Val);
1798 
1799   if (Imm.IsFPImm) { // We got fp literal token
1800     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1801       return AMDGPU::isInlinableLiteral64(Imm.Val,
1802                                           AsmParser->hasInv2PiInlineImm());
1803     }
1804 
1805     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1806     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1807       return false;
1808 
1809     if (type.getScalarSizeInBits() == 16) {
1810       return isInlineableLiteralOp16(
1811         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1812         type, AsmParser->hasInv2PiInlineImm());
1813     }
1814 
1815     // Check if single precision literal is inlinable
1816     return AMDGPU::isInlinableLiteral32(
1817       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1818       AsmParser->hasInv2PiInlineImm());
1819   }
1820 
1821   // We got int literal token.
1822   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1823     return AMDGPU::isInlinableLiteral64(Imm.Val,
1824                                         AsmParser->hasInv2PiInlineImm());
1825   }
1826 
1827   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1828     return false;
1829   }
1830 
1831   if (type.getScalarSizeInBits() == 16) {
1832     return isInlineableLiteralOp16(
1833       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1834       type, AsmParser->hasInv2PiInlineImm());
1835   }
1836 
1837   return AMDGPU::isInlinableLiteral32(
1838     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1839     AsmParser->hasInv2PiInlineImm());
1840 }
1841 
1842 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1843   // Check that this immediate can be added as literal
1844   if (!isImmTy(ImmTyNone)) {
1845     return false;
1846   }
1847 
1848   if (!Imm.IsFPImm) {
1849     // We got int literal token.
1850 
1851     if (type == MVT::f64 && hasFPModifiers()) {
1852       // Cannot apply fp modifiers to int literals preserving the same semantics
1853       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1854       // disable these cases.
1855       return false;
1856     }
1857 
1858     unsigned Size = type.getSizeInBits();
1859     if (Size == 64)
1860       Size = 32;
1861 
1862     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1863     // types.
1864     return isSafeTruncation(Imm.Val, Size);
1865   }
1866 
1867   // We got fp literal token
1868   if (type == MVT::f64) { // Expected 64-bit fp operand
1869     // We would set low 64-bits of literal to zeroes but we accept this literals
1870     return true;
1871   }
1872 
1873   if (type == MVT::i64) { // Expected 64-bit int operand
1874     // We don't allow fp literals in 64-bit integer instructions. It is
1875     // unclear how we should encode them.
1876     return false;
1877   }
1878 
1879   // We allow fp literals with f16x2 operands assuming that the specified
1880   // literal goes into the lower half and the upper half is zero. We also
1881   // require that the literal may be losslesly converted to f16.
1882   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1883                      (type == MVT::v2i16)? MVT::i16 :
1884                      (type == MVT::v2f32)? MVT::f32 : type;
1885 
1886   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1887   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1888 }
1889 
1890 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1891   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1892 }
1893 
1894 bool AMDGPUOperand::isVRegWithInputMods() const {
1895   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1896          // GFX90A allows DPP on 64-bit operands.
1897          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1898           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1899 }
1900 
1901 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1902   if (AsmParser->isVI())
1903     return isVReg32();
1904   else if (AsmParser->isGFX9Plus())
1905     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1906   else
1907     return false;
1908 }
1909 
1910 bool AMDGPUOperand::isSDWAFP16Operand() const {
1911   return isSDWAOperand(MVT::f16);
1912 }
1913 
1914 bool AMDGPUOperand::isSDWAFP32Operand() const {
1915   return isSDWAOperand(MVT::f32);
1916 }
1917 
1918 bool AMDGPUOperand::isSDWAInt16Operand() const {
1919   return isSDWAOperand(MVT::i16);
1920 }
1921 
1922 bool AMDGPUOperand::isSDWAInt32Operand() const {
1923   return isSDWAOperand(MVT::i32);
1924 }
1925 
1926 bool AMDGPUOperand::isBoolReg() const {
1927   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1928          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1929 }
1930 
1931 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1932 {
1933   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1934   assert(Size == 2 || Size == 4 || Size == 8);
1935 
1936   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1937 
1938   if (Imm.Mods.Abs) {
1939     Val &= ~FpSignMask;
1940   }
1941   if (Imm.Mods.Neg) {
1942     Val ^= FpSignMask;
1943   }
1944 
1945   return Val;
1946 }
1947 
1948 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1949   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1950                              Inst.getNumOperands())) {
1951     addLiteralImmOperand(Inst, Imm.Val,
1952                          ApplyModifiers &
1953                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1954   } else {
1955     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1956     Inst.addOperand(MCOperand::createImm(Imm.Val));
1957     setImmKindNone();
1958   }
1959 }
1960 
1961 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1962   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1963   auto OpNum = Inst.getNumOperands();
1964   // Check that this operand accepts literals
1965   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1966 
1967   if (ApplyModifiers) {
1968     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1969     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1970     Val = applyInputFPModifiers(Val, Size);
1971   }
1972 
1973   APInt Literal(64, Val);
1974   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1975 
1976   if (Imm.IsFPImm) { // We got fp literal token
1977     switch (OpTy) {
1978     case AMDGPU::OPERAND_REG_IMM_INT64:
1979     case AMDGPU::OPERAND_REG_IMM_FP64:
1980     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1981     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1982     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1983       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1984                                        AsmParser->hasInv2PiInlineImm())) {
1985         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1986         setImmKindConst();
1987         return;
1988       }
1989 
1990       // Non-inlineable
1991       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1992         // For fp operands we check if low 32 bits are zeros
1993         if (Literal.getLoBits(32) != 0) {
1994           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1995           "Can't encode literal as exact 64-bit floating-point operand. "
1996           "Low 32-bits will be set to zero");
1997         }
1998 
1999         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2000         setImmKindLiteral();
2001         return;
2002       }
2003 
2004       // We don't allow fp literals in 64-bit integer instructions. It is
2005       // unclear how we should encode them. This case should be checked earlier
2006       // in predicate methods (isLiteralImm())
2007       llvm_unreachable("fp literal in 64-bit integer instruction.");
2008 
2009     case AMDGPU::OPERAND_REG_IMM_INT32:
2010     case AMDGPU::OPERAND_REG_IMM_FP32:
2011     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2012     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2013     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2014     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2015     case AMDGPU::OPERAND_REG_IMM_INT16:
2016     case AMDGPU::OPERAND_REG_IMM_FP16:
2017     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2018     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2019     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2020     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2021     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2022     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2023     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2024     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2025     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2026     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2027     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2028     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2029     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2030     case AMDGPU::OPERAND_REG_IMM_V2INT32: {
2031       bool lost;
2032       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2033       // Convert literal to single precision
2034       FPLiteral.convert(*getOpFltSemantics(OpTy),
2035                         APFloat::rmNearestTiesToEven, &lost);
2036       // We allow precision lost but not overflow or underflow. This should be
2037       // checked earlier in isLiteralImm()
2038 
2039       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2040       Inst.addOperand(MCOperand::createImm(ImmVal));
2041       setImmKindLiteral();
2042       return;
2043     }
2044     default:
2045       llvm_unreachable("invalid operand size");
2046     }
2047 
2048     return;
2049   }
2050 
2051   // We got int literal token.
2052   // Only sign extend inline immediates.
2053   switch (OpTy) {
2054   case AMDGPU::OPERAND_REG_IMM_INT32:
2055   case AMDGPU::OPERAND_REG_IMM_FP32:
2056   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2057   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2058   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2059   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2060   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2061   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2062   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2063   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2064   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2065   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2066     if (isSafeTruncation(Val, 32) &&
2067         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2068                                      AsmParser->hasInv2PiInlineImm())) {
2069       Inst.addOperand(MCOperand::createImm(Val));
2070       setImmKindConst();
2071       return;
2072     }
2073 
2074     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2075     setImmKindLiteral();
2076     return;
2077 
2078   case AMDGPU::OPERAND_REG_IMM_INT64:
2079   case AMDGPU::OPERAND_REG_IMM_FP64:
2080   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2081   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2082   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2083     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2084       Inst.addOperand(MCOperand::createImm(Val));
2085       setImmKindConst();
2086       return;
2087     }
2088 
2089     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2090     setImmKindLiteral();
2091     return;
2092 
2093   case AMDGPU::OPERAND_REG_IMM_INT16:
2094   case AMDGPU::OPERAND_REG_IMM_FP16:
2095   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2096   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2097   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2098   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2099     if (isSafeTruncation(Val, 16) &&
2100         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2101                                      AsmParser->hasInv2PiInlineImm())) {
2102       Inst.addOperand(MCOperand::createImm(Val));
2103       setImmKindConst();
2104       return;
2105     }
2106 
2107     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2108     setImmKindLiteral();
2109     return;
2110 
2111   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2112   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2113   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2114   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2115     assert(isSafeTruncation(Val, 16));
2116     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2117                                         AsmParser->hasInv2PiInlineImm()));
2118 
2119     Inst.addOperand(MCOperand::createImm(Val));
2120     return;
2121   }
2122   default:
2123     llvm_unreachable("invalid operand size");
2124   }
2125 }
2126 
2127 template <unsigned Bitwidth>
2128 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2129   APInt Literal(64, Imm.Val);
2130   setImmKindNone();
2131 
2132   if (!Imm.IsFPImm) {
2133     // We got int literal token.
2134     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2135     return;
2136   }
2137 
2138   bool Lost;
2139   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2140   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2141                     APFloat::rmNearestTiesToEven, &Lost);
2142   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2143 }
2144 
2145 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2146   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2147 }
2148 
2149 static bool isInlineValue(unsigned Reg) {
2150   switch (Reg) {
2151   case AMDGPU::SRC_SHARED_BASE:
2152   case AMDGPU::SRC_SHARED_LIMIT:
2153   case AMDGPU::SRC_PRIVATE_BASE:
2154   case AMDGPU::SRC_PRIVATE_LIMIT:
2155   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2156     return true;
2157   case AMDGPU::SRC_VCCZ:
2158   case AMDGPU::SRC_EXECZ:
2159   case AMDGPU::SRC_SCC:
2160     return true;
2161   case AMDGPU::SGPR_NULL:
2162     return true;
2163   default:
2164     return false;
2165   }
2166 }
2167 
2168 bool AMDGPUOperand::isInlineValue() const {
2169   return isRegKind() && ::isInlineValue(getReg());
2170 }
2171 
2172 //===----------------------------------------------------------------------===//
2173 // AsmParser
2174 //===----------------------------------------------------------------------===//
2175 
2176 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2177   if (Is == IS_VGPR) {
2178     switch (RegWidth) {
2179       default: return -1;
2180       case 1: return AMDGPU::VGPR_32RegClassID;
2181       case 2: return AMDGPU::VReg_64RegClassID;
2182       case 3: return AMDGPU::VReg_96RegClassID;
2183       case 4: return AMDGPU::VReg_128RegClassID;
2184       case 5: return AMDGPU::VReg_160RegClassID;
2185       case 6: return AMDGPU::VReg_192RegClassID;
2186       case 8: return AMDGPU::VReg_256RegClassID;
2187       case 16: return AMDGPU::VReg_512RegClassID;
2188       case 32: return AMDGPU::VReg_1024RegClassID;
2189     }
2190   } else if (Is == IS_TTMP) {
2191     switch (RegWidth) {
2192       default: return -1;
2193       case 1: return AMDGPU::TTMP_32RegClassID;
2194       case 2: return AMDGPU::TTMP_64RegClassID;
2195       case 4: return AMDGPU::TTMP_128RegClassID;
2196       case 8: return AMDGPU::TTMP_256RegClassID;
2197       case 16: return AMDGPU::TTMP_512RegClassID;
2198     }
2199   } else if (Is == IS_SGPR) {
2200     switch (RegWidth) {
2201       default: return -1;
2202       case 1: return AMDGPU::SGPR_32RegClassID;
2203       case 2: return AMDGPU::SGPR_64RegClassID;
2204       case 3: return AMDGPU::SGPR_96RegClassID;
2205       case 4: return AMDGPU::SGPR_128RegClassID;
2206       case 5: return AMDGPU::SGPR_160RegClassID;
2207       case 6: return AMDGPU::SGPR_192RegClassID;
2208       case 8: return AMDGPU::SGPR_256RegClassID;
2209       case 16: return AMDGPU::SGPR_512RegClassID;
2210     }
2211   } else if (Is == IS_AGPR) {
2212     switch (RegWidth) {
2213       default: return -1;
2214       case 1: return AMDGPU::AGPR_32RegClassID;
2215       case 2: return AMDGPU::AReg_64RegClassID;
2216       case 3: return AMDGPU::AReg_96RegClassID;
2217       case 4: return AMDGPU::AReg_128RegClassID;
2218       case 5: return AMDGPU::AReg_160RegClassID;
2219       case 6: return AMDGPU::AReg_192RegClassID;
2220       case 8: return AMDGPU::AReg_256RegClassID;
2221       case 16: return AMDGPU::AReg_512RegClassID;
2222       case 32: return AMDGPU::AReg_1024RegClassID;
2223     }
2224   }
2225   return -1;
2226 }
2227 
2228 static unsigned getSpecialRegForName(StringRef RegName) {
2229   return StringSwitch<unsigned>(RegName)
2230     .Case("exec", AMDGPU::EXEC)
2231     .Case("vcc", AMDGPU::VCC)
2232     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2233     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2234     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2235     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2236     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2237     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2238     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2239     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2240     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2241     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2242     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2243     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2244     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2245     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2246     .Case("m0", AMDGPU::M0)
2247     .Case("vccz", AMDGPU::SRC_VCCZ)
2248     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2249     .Case("execz", AMDGPU::SRC_EXECZ)
2250     .Case("src_execz", AMDGPU::SRC_EXECZ)
2251     .Case("scc", AMDGPU::SRC_SCC)
2252     .Case("src_scc", AMDGPU::SRC_SCC)
2253     .Case("tba", AMDGPU::TBA)
2254     .Case("tma", AMDGPU::TMA)
2255     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2256     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2257     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2258     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2259     .Case("vcc_lo", AMDGPU::VCC_LO)
2260     .Case("vcc_hi", AMDGPU::VCC_HI)
2261     .Case("exec_lo", AMDGPU::EXEC_LO)
2262     .Case("exec_hi", AMDGPU::EXEC_HI)
2263     .Case("tma_lo", AMDGPU::TMA_LO)
2264     .Case("tma_hi", AMDGPU::TMA_HI)
2265     .Case("tba_lo", AMDGPU::TBA_LO)
2266     .Case("tba_hi", AMDGPU::TBA_HI)
2267     .Case("pc", AMDGPU::PC_REG)
2268     .Case("null", AMDGPU::SGPR_NULL)
2269     .Default(AMDGPU::NoRegister);
2270 }
2271 
2272 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2273                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2274   auto R = parseRegister();
2275   if (!R) return true;
2276   assert(R->isReg());
2277   RegNo = R->getReg();
2278   StartLoc = R->getStartLoc();
2279   EndLoc = R->getEndLoc();
2280   return false;
2281 }
2282 
2283 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2284                                     SMLoc &EndLoc) {
2285   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2286 }
2287 
2288 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2289                                                        SMLoc &StartLoc,
2290                                                        SMLoc &EndLoc) {
2291   bool Result =
2292       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2293   bool PendingErrors = getParser().hasPendingError();
2294   getParser().clearPendingErrors();
2295   if (PendingErrors)
2296     return MatchOperand_ParseFail;
2297   if (Result)
2298     return MatchOperand_NoMatch;
2299   return MatchOperand_Success;
2300 }
2301 
2302 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2303                                             RegisterKind RegKind, unsigned Reg1,
2304                                             SMLoc Loc) {
2305   switch (RegKind) {
2306   case IS_SPECIAL:
2307     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2308       Reg = AMDGPU::EXEC;
2309       RegWidth = 2;
2310       return true;
2311     }
2312     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2313       Reg = AMDGPU::FLAT_SCR;
2314       RegWidth = 2;
2315       return true;
2316     }
2317     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2318       Reg = AMDGPU::XNACK_MASK;
2319       RegWidth = 2;
2320       return true;
2321     }
2322     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2323       Reg = AMDGPU::VCC;
2324       RegWidth = 2;
2325       return true;
2326     }
2327     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2328       Reg = AMDGPU::TBA;
2329       RegWidth = 2;
2330       return true;
2331     }
2332     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2333       Reg = AMDGPU::TMA;
2334       RegWidth = 2;
2335       return true;
2336     }
2337     Error(Loc, "register does not fit in the list");
2338     return false;
2339   case IS_VGPR:
2340   case IS_SGPR:
2341   case IS_AGPR:
2342   case IS_TTMP:
2343     if (Reg1 != Reg + RegWidth) {
2344       Error(Loc, "registers in a list must have consecutive indices");
2345       return false;
2346     }
2347     RegWidth++;
2348     return true;
2349   default:
2350     llvm_unreachable("unexpected register kind");
2351   }
2352 }
2353 
2354 struct RegInfo {
2355   StringLiteral Name;
2356   RegisterKind Kind;
2357 };
2358 
2359 static constexpr RegInfo RegularRegisters[] = {
2360   {{"v"},    IS_VGPR},
2361   {{"s"},    IS_SGPR},
2362   {{"ttmp"}, IS_TTMP},
2363   {{"acc"},  IS_AGPR},
2364   {{"a"},    IS_AGPR},
2365 };
2366 
2367 static bool isRegularReg(RegisterKind Kind) {
2368   return Kind == IS_VGPR ||
2369          Kind == IS_SGPR ||
2370          Kind == IS_TTMP ||
2371          Kind == IS_AGPR;
2372 }
2373 
2374 static const RegInfo* getRegularRegInfo(StringRef Str) {
2375   for (const RegInfo &Reg : RegularRegisters)
2376     if (Str.startswith(Reg.Name))
2377       return &Reg;
2378   return nullptr;
2379 }
2380 
2381 static bool getRegNum(StringRef Str, unsigned& Num) {
2382   return !Str.getAsInteger(10, Num);
2383 }
2384 
2385 bool
2386 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2387                             const AsmToken &NextToken) const {
2388 
2389   // A list of consecutive registers: [s0,s1,s2,s3]
2390   if (Token.is(AsmToken::LBrac))
2391     return true;
2392 
2393   if (!Token.is(AsmToken::Identifier))
2394     return false;
2395 
2396   // A single register like s0 or a range of registers like s[0:1]
2397 
2398   StringRef Str = Token.getString();
2399   const RegInfo *Reg = getRegularRegInfo(Str);
2400   if (Reg) {
2401     StringRef RegName = Reg->Name;
2402     StringRef RegSuffix = Str.substr(RegName.size());
2403     if (!RegSuffix.empty()) {
2404       unsigned Num;
2405       // A single register with an index: rXX
2406       if (getRegNum(RegSuffix, Num))
2407         return true;
2408     } else {
2409       // A range of registers: r[XX:YY].
2410       if (NextToken.is(AsmToken::LBrac))
2411         return true;
2412     }
2413   }
2414 
2415   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2416 }
2417 
2418 bool
2419 AMDGPUAsmParser::isRegister()
2420 {
2421   return isRegister(getToken(), peekToken());
2422 }
2423 
2424 unsigned
2425 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2426                                unsigned RegNum,
2427                                unsigned RegWidth,
2428                                SMLoc Loc) {
2429 
2430   assert(isRegularReg(RegKind));
2431 
2432   unsigned AlignSize = 1;
2433   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2434     // SGPR and TTMP registers must be aligned.
2435     // Max required alignment is 4 dwords.
2436     AlignSize = std::min(RegWidth, 4u);
2437   }
2438 
2439   if (RegNum % AlignSize != 0) {
2440     Error(Loc, "invalid register alignment");
2441     return AMDGPU::NoRegister;
2442   }
2443 
2444   unsigned RegIdx = RegNum / AlignSize;
2445   int RCID = getRegClass(RegKind, RegWidth);
2446   if (RCID == -1) {
2447     Error(Loc, "invalid or unsupported register size");
2448     return AMDGPU::NoRegister;
2449   }
2450 
2451   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2452   const MCRegisterClass RC = TRI->getRegClass(RCID);
2453   if (RegIdx >= RC.getNumRegs()) {
2454     Error(Loc, "register index is out of range");
2455     return AMDGPU::NoRegister;
2456   }
2457 
2458   return RC.getRegister(RegIdx);
2459 }
2460 
2461 bool
2462 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2463   int64_t RegLo, RegHi;
2464   if (!skipToken(AsmToken::LBrac, "missing register index"))
2465     return false;
2466 
2467   SMLoc FirstIdxLoc = getLoc();
2468   SMLoc SecondIdxLoc;
2469 
2470   if (!parseExpr(RegLo))
2471     return false;
2472 
2473   if (trySkipToken(AsmToken::Colon)) {
2474     SecondIdxLoc = getLoc();
2475     if (!parseExpr(RegHi))
2476       return false;
2477   } else {
2478     RegHi = RegLo;
2479   }
2480 
2481   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2482     return false;
2483 
2484   if (!isUInt<32>(RegLo)) {
2485     Error(FirstIdxLoc, "invalid register index");
2486     return false;
2487   }
2488 
2489   if (!isUInt<32>(RegHi)) {
2490     Error(SecondIdxLoc, "invalid register index");
2491     return false;
2492   }
2493 
2494   if (RegLo > RegHi) {
2495     Error(FirstIdxLoc, "first register index should not exceed second index");
2496     return false;
2497   }
2498 
2499   Num = static_cast<unsigned>(RegLo);
2500   Width = (RegHi - RegLo) + 1;
2501   return true;
2502 }
2503 
2504 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2505                                           unsigned &RegNum, unsigned &RegWidth,
2506                                           SmallVectorImpl<AsmToken> &Tokens) {
2507   assert(isToken(AsmToken::Identifier));
2508   unsigned Reg = getSpecialRegForName(getTokenStr());
2509   if (Reg) {
2510     RegNum = 0;
2511     RegWidth = 1;
2512     RegKind = IS_SPECIAL;
2513     Tokens.push_back(getToken());
2514     lex(); // skip register name
2515   }
2516   return Reg;
2517 }
2518 
2519 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2520                                           unsigned &RegNum, unsigned &RegWidth,
2521                                           SmallVectorImpl<AsmToken> &Tokens) {
2522   assert(isToken(AsmToken::Identifier));
2523   StringRef RegName = getTokenStr();
2524   auto Loc = getLoc();
2525 
2526   const RegInfo *RI = getRegularRegInfo(RegName);
2527   if (!RI) {
2528     Error(Loc, "invalid register name");
2529     return AMDGPU::NoRegister;
2530   }
2531 
2532   Tokens.push_back(getToken());
2533   lex(); // skip register name
2534 
2535   RegKind = RI->Kind;
2536   StringRef RegSuffix = RegName.substr(RI->Name.size());
2537   if (!RegSuffix.empty()) {
2538     // Single 32-bit register: vXX.
2539     if (!getRegNum(RegSuffix, RegNum)) {
2540       Error(Loc, "invalid register index");
2541       return AMDGPU::NoRegister;
2542     }
2543     RegWidth = 1;
2544   } else {
2545     // Range of registers: v[XX:YY]. ":YY" is optional.
2546     if (!ParseRegRange(RegNum, RegWidth))
2547       return AMDGPU::NoRegister;
2548   }
2549 
2550   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2551 }
2552 
2553 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2554                                        unsigned &RegWidth,
2555                                        SmallVectorImpl<AsmToken> &Tokens) {
2556   unsigned Reg = AMDGPU::NoRegister;
2557   auto ListLoc = getLoc();
2558 
2559   if (!skipToken(AsmToken::LBrac,
2560                  "expected a register or a list of registers")) {
2561     return AMDGPU::NoRegister;
2562   }
2563 
2564   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2565 
2566   auto Loc = getLoc();
2567   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2568     return AMDGPU::NoRegister;
2569   if (RegWidth != 1) {
2570     Error(Loc, "expected a single 32-bit register");
2571     return AMDGPU::NoRegister;
2572   }
2573 
2574   for (; trySkipToken(AsmToken::Comma); ) {
2575     RegisterKind NextRegKind;
2576     unsigned NextReg, NextRegNum, NextRegWidth;
2577     Loc = getLoc();
2578 
2579     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2580                              NextRegNum, NextRegWidth,
2581                              Tokens)) {
2582       return AMDGPU::NoRegister;
2583     }
2584     if (NextRegWidth != 1) {
2585       Error(Loc, "expected a single 32-bit register");
2586       return AMDGPU::NoRegister;
2587     }
2588     if (NextRegKind != RegKind) {
2589       Error(Loc, "registers in a list must be of the same kind");
2590       return AMDGPU::NoRegister;
2591     }
2592     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2593       return AMDGPU::NoRegister;
2594   }
2595 
2596   if (!skipToken(AsmToken::RBrac,
2597                  "expected a comma or a closing square bracket")) {
2598     return AMDGPU::NoRegister;
2599   }
2600 
2601   if (isRegularReg(RegKind))
2602     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2603 
2604   return Reg;
2605 }
2606 
2607 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2608                                           unsigned &RegNum, unsigned &RegWidth,
2609                                           SmallVectorImpl<AsmToken> &Tokens) {
2610   auto Loc = getLoc();
2611   Reg = AMDGPU::NoRegister;
2612 
2613   if (isToken(AsmToken::Identifier)) {
2614     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2615     if (Reg == AMDGPU::NoRegister)
2616       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2617   } else {
2618     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2619   }
2620 
2621   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2622   if (Reg == AMDGPU::NoRegister) {
2623     assert(Parser.hasPendingError());
2624     return false;
2625   }
2626 
2627   if (!subtargetHasRegister(*TRI, Reg)) {
2628     if (Reg == AMDGPU::SGPR_NULL) {
2629       Error(Loc, "'null' operand is not supported on this GPU");
2630     } else {
2631       Error(Loc, "register not available on this GPU");
2632     }
2633     return false;
2634   }
2635 
2636   return true;
2637 }
2638 
2639 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2640                                           unsigned &RegNum, unsigned &RegWidth,
2641                                           bool RestoreOnFailure /*=false*/) {
2642   Reg = AMDGPU::NoRegister;
2643 
2644   SmallVector<AsmToken, 1> Tokens;
2645   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2646     if (RestoreOnFailure) {
2647       while (!Tokens.empty()) {
2648         getLexer().UnLex(Tokens.pop_back_val());
2649       }
2650     }
2651     return true;
2652   }
2653   return false;
2654 }
2655 
2656 Optional<StringRef>
2657 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2658   switch (RegKind) {
2659   case IS_VGPR:
2660     return StringRef(".amdgcn.next_free_vgpr");
2661   case IS_SGPR:
2662     return StringRef(".amdgcn.next_free_sgpr");
2663   default:
2664     return None;
2665   }
2666 }
2667 
2668 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2669   auto SymbolName = getGprCountSymbolName(RegKind);
2670   assert(SymbolName && "initializing invalid register kind");
2671   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2672   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2673 }
2674 
2675 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2676                                             unsigned DwordRegIndex,
2677                                             unsigned RegWidth) {
2678   // Symbols are only defined for GCN targets
2679   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2680     return true;
2681 
2682   auto SymbolName = getGprCountSymbolName(RegKind);
2683   if (!SymbolName)
2684     return true;
2685   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2686 
2687   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2688   int64_t OldCount;
2689 
2690   if (!Sym->isVariable())
2691     return !Error(getLoc(),
2692                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2693   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2694     return !Error(
2695         getLoc(),
2696         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2697 
2698   if (OldCount <= NewMax)
2699     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2700 
2701   return true;
2702 }
2703 
2704 std::unique_ptr<AMDGPUOperand>
2705 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2706   const auto &Tok = getToken();
2707   SMLoc StartLoc = Tok.getLoc();
2708   SMLoc EndLoc = Tok.getEndLoc();
2709   RegisterKind RegKind;
2710   unsigned Reg, RegNum, RegWidth;
2711 
2712   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2713     return nullptr;
2714   }
2715   if (isHsaAbiVersion3Or4(&getSTI())) {
2716     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2717       return nullptr;
2718   } else
2719     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2720   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2721 }
2722 
2723 OperandMatchResultTy
2724 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2725   // TODO: add syntactic sugar for 1/(2*PI)
2726 
2727   assert(!isRegister());
2728   assert(!isModifier());
2729 
2730   const auto& Tok = getToken();
2731   const auto& NextTok = peekToken();
2732   bool IsReal = Tok.is(AsmToken::Real);
2733   SMLoc S = getLoc();
2734   bool Negate = false;
2735 
2736   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2737     lex();
2738     IsReal = true;
2739     Negate = true;
2740   }
2741 
2742   if (IsReal) {
2743     // Floating-point expressions are not supported.
2744     // Can only allow floating-point literals with an
2745     // optional sign.
2746 
2747     StringRef Num = getTokenStr();
2748     lex();
2749 
2750     APFloat RealVal(APFloat::IEEEdouble());
2751     auto roundMode = APFloat::rmNearestTiesToEven;
2752     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2753       return MatchOperand_ParseFail;
2754     }
2755     if (Negate)
2756       RealVal.changeSign();
2757 
2758     Operands.push_back(
2759       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2760                                AMDGPUOperand::ImmTyNone, true));
2761 
2762     return MatchOperand_Success;
2763 
2764   } else {
2765     int64_t IntVal;
2766     const MCExpr *Expr;
2767     SMLoc S = getLoc();
2768 
2769     if (HasSP3AbsModifier) {
2770       // This is a workaround for handling expressions
2771       // as arguments of SP3 'abs' modifier, for example:
2772       //     |1.0|
2773       //     |-1|
2774       //     |1+x|
2775       // This syntax is not compatible with syntax of standard
2776       // MC expressions (due to the trailing '|').
2777       SMLoc EndLoc;
2778       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2779         return MatchOperand_ParseFail;
2780     } else {
2781       if (Parser.parseExpression(Expr))
2782         return MatchOperand_ParseFail;
2783     }
2784 
2785     if (Expr->evaluateAsAbsolute(IntVal)) {
2786       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2787     } else {
2788       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2789     }
2790 
2791     return MatchOperand_Success;
2792   }
2793 
2794   return MatchOperand_NoMatch;
2795 }
2796 
2797 OperandMatchResultTy
2798 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2799   if (!isRegister())
2800     return MatchOperand_NoMatch;
2801 
2802   if (auto R = parseRegister()) {
2803     assert(R->isReg());
2804     Operands.push_back(std::move(R));
2805     return MatchOperand_Success;
2806   }
2807   return MatchOperand_ParseFail;
2808 }
2809 
2810 OperandMatchResultTy
2811 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2812   auto res = parseReg(Operands);
2813   if (res != MatchOperand_NoMatch) {
2814     return res;
2815   } else if (isModifier()) {
2816     return MatchOperand_NoMatch;
2817   } else {
2818     return parseImm(Operands, HasSP3AbsMod);
2819   }
2820 }
2821 
2822 bool
2823 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2824   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2825     const auto &str = Token.getString();
2826     return str == "abs" || str == "neg" || str == "sext";
2827   }
2828   return false;
2829 }
2830 
2831 bool
2832 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2833   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2834 }
2835 
2836 bool
2837 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2838   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2839 }
2840 
2841 bool
2842 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2843   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2844 }
2845 
2846 // Check if this is an operand modifier or an opcode modifier
2847 // which may look like an expression but it is not. We should
2848 // avoid parsing these modifiers as expressions. Currently
2849 // recognized sequences are:
2850 //   |...|
2851 //   abs(...)
2852 //   neg(...)
2853 //   sext(...)
2854 //   -reg
2855 //   -|...|
2856 //   -abs(...)
2857 //   name:...
2858 // Note that simple opcode modifiers like 'gds' may be parsed as
2859 // expressions; this is a special case. See getExpressionAsToken.
2860 //
2861 bool
2862 AMDGPUAsmParser::isModifier() {
2863 
2864   AsmToken Tok = getToken();
2865   AsmToken NextToken[2];
2866   peekTokens(NextToken);
2867 
2868   return isOperandModifier(Tok, NextToken[0]) ||
2869          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2870          isOpcodeModifierWithVal(Tok, NextToken[0]);
2871 }
2872 
2873 // Check if the current token is an SP3 'neg' modifier.
2874 // Currently this modifier is allowed in the following context:
2875 //
2876 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2877 // 2. Before an 'abs' modifier: -abs(...)
2878 // 3. Before an SP3 'abs' modifier: -|...|
2879 //
2880 // In all other cases "-" is handled as a part
2881 // of an expression that follows the sign.
2882 //
2883 // Note: When "-" is followed by an integer literal,
2884 // this is interpreted as integer negation rather
2885 // than a floating-point NEG modifier applied to N.
2886 // Beside being contr-intuitive, such use of floating-point
2887 // NEG modifier would have resulted in different meaning
2888 // of integer literals used with VOP1/2/C and VOP3,
2889 // for example:
2890 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2891 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2892 // Negative fp literals with preceding "-" are
2893 // handled likewise for unifomtity
2894 //
2895 bool
2896 AMDGPUAsmParser::parseSP3NegModifier() {
2897 
2898   AsmToken NextToken[2];
2899   peekTokens(NextToken);
2900 
2901   if (isToken(AsmToken::Minus) &&
2902       (isRegister(NextToken[0], NextToken[1]) ||
2903        NextToken[0].is(AsmToken::Pipe) ||
2904        isId(NextToken[0], "abs"))) {
2905     lex();
2906     return true;
2907   }
2908 
2909   return false;
2910 }
2911 
2912 OperandMatchResultTy
2913 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2914                                               bool AllowImm) {
2915   bool Neg, SP3Neg;
2916   bool Abs, SP3Abs;
2917   SMLoc Loc;
2918 
2919   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2920   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2921     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2922     return MatchOperand_ParseFail;
2923   }
2924 
2925   SP3Neg = parseSP3NegModifier();
2926 
2927   Loc = getLoc();
2928   Neg = trySkipId("neg");
2929   if (Neg && SP3Neg) {
2930     Error(Loc, "expected register or immediate");
2931     return MatchOperand_ParseFail;
2932   }
2933   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2934     return MatchOperand_ParseFail;
2935 
2936   Abs = trySkipId("abs");
2937   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2938     return MatchOperand_ParseFail;
2939 
2940   Loc = getLoc();
2941   SP3Abs = trySkipToken(AsmToken::Pipe);
2942   if (Abs && SP3Abs) {
2943     Error(Loc, "expected register or immediate");
2944     return MatchOperand_ParseFail;
2945   }
2946 
2947   OperandMatchResultTy Res;
2948   if (AllowImm) {
2949     Res = parseRegOrImm(Operands, SP3Abs);
2950   } else {
2951     Res = parseReg(Operands);
2952   }
2953   if (Res != MatchOperand_Success) {
2954     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2955   }
2956 
2957   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2958     return MatchOperand_ParseFail;
2959   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2960     return MatchOperand_ParseFail;
2961   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2962     return MatchOperand_ParseFail;
2963 
2964   AMDGPUOperand::Modifiers Mods;
2965   Mods.Abs = Abs || SP3Abs;
2966   Mods.Neg = Neg || SP3Neg;
2967 
2968   if (Mods.hasFPModifiers()) {
2969     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2970     if (Op.isExpr()) {
2971       Error(Op.getStartLoc(), "expected an absolute expression");
2972       return MatchOperand_ParseFail;
2973     }
2974     Op.setModifiers(Mods);
2975   }
2976   return MatchOperand_Success;
2977 }
2978 
2979 OperandMatchResultTy
2980 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2981                                                bool AllowImm) {
2982   bool Sext = trySkipId("sext");
2983   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2984     return MatchOperand_ParseFail;
2985 
2986   OperandMatchResultTy Res;
2987   if (AllowImm) {
2988     Res = parseRegOrImm(Operands);
2989   } else {
2990     Res = parseReg(Operands);
2991   }
2992   if (Res != MatchOperand_Success) {
2993     return Sext? MatchOperand_ParseFail : Res;
2994   }
2995 
2996   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2997     return MatchOperand_ParseFail;
2998 
2999   AMDGPUOperand::Modifiers Mods;
3000   Mods.Sext = Sext;
3001 
3002   if (Mods.hasIntModifiers()) {
3003     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3004     if (Op.isExpr()) {
3005       Error(Op.getStartLoc(), "expected an absolute expression");
3006       return MatchOperand_ParseFail;
3007     }
3008     Op.setModifiers(Mods);
3009   }
3010 
3011   return MatchOperand_Success;
3012 }
3013 
3014 OperandMatchResultTy
3015 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3016   return parseRegOrImmWithFPInputMods(Operands, false);
3017 }
3018 
3019 OperandMatchResultTy
3020 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3021   return parseRegOrImmWithIntInputMods(Operands, false);
3022 }
3023 
3024 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3025   auto Loc = getLoc();
3026   if (trySkipId("off")) {
3027     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3028                                                 AMDGPUOperand::ImmTyOff, false));
3029     return MatchOperand_Success;
3030   }
3031 
3032   if (!isRegister())
3033     return MatchOperand_NoMatch;
3034 
3035   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3036   if (Reg) {
3037     Operands.push_back(std::move(Reg));
3038     return MatchOperand_Success;
3039   }
3040 
3041   return MatchOperand_ParseFail;
3042 
3043 }
3044 
3045 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3046   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3047 
3048   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3049       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3050       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3051       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3052     return Match_InvalidOperand;
3053 
3054   if ((TSFlags & SIInstrFlags::VOP3) &&
3055       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3056       getForcedEncodingSize() != 64)
3057     return Match_PreferE32;
3058 
3059   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3060       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3061     // v_mac_f32/16 allow only dst_sel == DWORD;
3062     auto OpNum =
3063         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3064     const auto &Op = Inst.getOperand(OpNum);
3065     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3066       return Match_InvalidOperand;
3067     }
3068   }
3069 
3070   return Match_Success;
3071 }
3072 
3073 static ArrayRef<unsigned> getAllVariants() {
3074   static const unsigned Variants[] = {
3075     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3076     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3077   };
3078 
3079   return makeArrayRef(Variants);
3080 }
3081 
3082 // What asm variants we should check
3083 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3084   if (getForcedEncodingSize() == 32) {
3085     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3086     return makeArrayRef(Variants);
3087   }
3088 
3089   if (isForcedVOP3()) {
3090     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3091     return makeArrayRef(Variants);
3092   }
3093 
3094   if (isForcedSDWA()) {
3095     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3096                                         AMDGPUAsmVariants::SDWA9};
3097     return makeArrayRef(Variants);
3098   }
3099 
3100   if (isForcedDPP()) {
3101     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3102     return makeArrayRef(Variants);
3103   }
3104 
3105   return getAllVariants();
3106 }
3107 
3108 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3109   if (getForcedEncodingSize() == 32)
3110     return "e32";
3111 
3112   if (isForcedVOP3())
3113     return "e64";
3114 
3115   if (isForcedSDWA())
3116     return "sdwa";
3117 
3118   if (isForcedDPP())
3119     return "dpp";
3120 
3121   return "";
3122 }
3123 
3124 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3125   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3126   const unsigned Num = Desc.getNumImplicitUses();
3127   for (unsigned i = 0; i < Num; ++i) {
3128     unsigned Reg = Desc.ImplicitUses[i];
3129     switch (Reg) {
3130     case AMDGPU::FLAT_SCR:
3131     case AMDGPU::VCC:
3132     case AMDGPU::VCC_LO:
3133     case AMDGPU::VCC_HI:
3134     case AMDGPU::M0:
3135       return Reg;
3136     default:
3137       break;
3138     }
3139   }
3140   return AMDGPU::NoRegister;
3141 }
3142 
3143 // NB: This code is correct only when used to check constant
3144 // bus limitations because GFX7 support no f16 inline constants.
3145 // Note that there are no cases when a GFX7 opcode violates
3146 // constant bus limitations due to the use of an f16 constant.
3147 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3148                                        unsigned OpIdx) const {
3149   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3150 
3151   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3152     return false;
3153   }
3154 
3155   const MCOperand &MO = Inst.getOperand(OpIdx);
3156 
3157   int64_t Val = MO.getImm();
3158   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3159 
3160   switch (OpSize) { // expected operand size
3161   case 8:
3162     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3163   case 4:
3164     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3165   case 2: {
3166     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3167     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3168         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3169         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3170       return AMDGPU::isInlinableIntLiteral(Val);
3171 
3172     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3173         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3174         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3175       return AMDGPU::isInlinableIntLiteralV216(Val);
3176 
3177     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3178         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3179         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3180       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3181 
3182     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3183   }
3184   default:
3185     llvm_unreachable("invalid operand size");
3186   }
3187 }
3188 
3189 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3190   if (!isGFX10Plus())
3191     return 1;
3192 
3193   switch (Opcode) {
3194   // 64-bit shift instructions can use only one scalar value input
3195   case AMDGPU::V_LSHLREV_B64_e64:
3196   case AMDGPU::V_LSHLREV_B64_gfx10:
3197   case AMDGPU::V_LSHRREV_B64_e64:
3198   case AMDGPU::V_LSHRREV_B64_gfx10:
3199   case AMDGPU::V_ASHRREV_I64_e64:
3200   case AMDGPU::V_ASHRREV_I64_gfx10:
3201   case AMDGPU::V_LSHL_B64_e64:
3202   case AMDGPU::V_LSHR_B64_e64:
3203   case AMDGPU::V_ASHR_I64_e64:
3204     return 1;
3205   default:
3206     return 2;
3207   }
3208 }
3209 
3210 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3211   const MCOperand &MO = Inst.getOperand(OpIdx);
3212   if (MO.isImm()) {
3213     return !isInlineConstant(Inst, OpIdx);
3214   } else if (MO.isReg()) {
3215     auto Reg = MO.getReg();
3216     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3217     auto PReg = mc2PseudoReg(Reg);
3218     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3219   } else {
3220     return true;
3221   }
3222 }
3223 
3224 bool
3225 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3226                                                 const OperandVector &Operands) {
3227   const unsigned Opcode = Inst.getOpcode();
3228   const MCInstrDesc &Desc = MII.get(Opcode);
3229   unsigned LastSGPR = AMDGPU::NoRegister;
3230   unsigned ConstantBusUseCount = 0;
3231   unsigned NumLiterals = 0;
3232   unsigned LiteralSize;
3233 
3234   if (Desc.TSFlags &
3235       (SIInstrFlags::VOPC |
3236        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3237        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3238        SIInstrFlags::SDWA)) {
3239     // Check special imm operands (used by madmk, etc)
3240     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3241       ++ConstantBusUseCount;
3242     }
3243 
3244     SmallDenseSet<unsigned> SGPRsUsed;
3245     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3246     if (SGPRUsed != AMDGPU::NoRegister) {
3247       SGPRsUsed.insert(SGPRUsed);
3248       ++ConstantBusUseCount;
3249     }
3250 
3251     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3252     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3253     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3254 
3255     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3256 
3257     for (int OpIdx : OpIndices) {
3258       if (OpIdx == -1) break;
3259 
3260       const MCOperand &MO = Inst.getOperand(OpIdx);
3261       if (usesConstantBus(Inst, OpIdx)) {
3262         if (MO.isReg()) {
3263           LastSGPR = mc2PseudoReg(MO.getReg());
3264           // Pairs of registers with a partial intersections like these
3265           //   s0, s[0:1]
3266           //   flat_scratch_lo, flat_scratch
3267           //   flat_scratch_lo, flat_scratch_hi
3268           // are theoretically valid but they are disabled anyway.
3269           // Note that this code mimics SIInstrInfo::verifyInstruction
3270           if (!SGPRsUsed.count(LastSGPR)) {
3271             SGPRsUsed.insert(LastSGPR);
3272             ++ConstantBusUseCount;
3273           }
3274         } else { // Expression or a literal
3275 
3276           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3277             continue; // special operand like VINTERP attr_chan
3278 
3279           // An instruction may use only one literal.
3280           // This has been validated on the previous step.
3281           // See validateVOP3Literal.
3282           // This literal may be used as more than one operand.
3283           // If all these operands are of the same size,
3284           // this literal counts as one scalar value.
3285           // Otherwise it counts as 2 scalar values.
3286           // See "GFX10 Shader Programming", section 3.6.2.3.
3287 
3288           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3289           if (Size < 4) Size = 4;
3290 
3291           if (NumLiterals == 0) {
3292             NumLiterals = 1;
3293             LiteralSize = Size;
3294           } else if (LiteralSize != Size) {
3295             NumLiterals = 2;
3296           }
3297         }
3298       }
3299     }
3300   }
3301   ConstantBusUseCount += NumLiterals;
3302 
3303   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3304     return true;
3305 
3306   SMLoc LitLoc = getLitLoc(Operands);
3307   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3308   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3309   Error(Loc, "invalid operand (violates constant bus restrictions)");
3310   return false;
3311 }
3312 
3313 bool
3314 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3315                                                  const OperandVector &Operands) {
3316   const unsigned Opcode = Inst.getOpcode();
3317   const MCInstrDesc &Desc = MII.get(Opcode);
3318 
3319   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3320   if (DstIdx == -1 ||
3321       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3322     return true;
3323   }
3324 
3325   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3326 
3327   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3328   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3329   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3330 
3331   assert(DstIdx != -1);
3332   const MCOperand &Dst = Inst.getOperand(DstIdx);
3333   assert(Dst.isReg());
3334   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3335 
3336   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3337 
3338   for (int SrcIdx : SrcIndices) {
3339     if (SrcIdx == -1) break;
3340     const MCOperand &Src = Inst.getOperand(SrcIdx);
3341     if (Src.isReg()) {
3342       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3343       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3344         Error(getRegLoc(SrcReg, Operands),
3345           "destination must be different than all sources");
3346         return false;
3347       }
3348     }
3349   }
3350 
3351   return true;
3352 }
3353 
3354 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3355 
3356   const unsigned Opc = Inst.getOpcode();
3357   const MCInstrDesc &Desc = MII.get(Opc);
3358 
3359   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3360     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3361     assert(ClampIdx != -1);
3362     return Inst.getOperand(ClampIdx).getImm() == 0;
3363   }
3364 
3365   return true;
3366 }
3367 
3368 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3369 
3370   const unsigned Opc = Inst.getOpcode();
3371   const MCInstrDesc &Desc = MII.get(Opc);
3372 
3373   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3374     return true;
3375 
3376   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3377   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3378   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3379 
3380   assert(VDataIdx != -1);
3381 
3382   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3383     return true;
3384 
3385   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3386   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3387   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3388   if (DMask == 0)
3389     DMask = 1;
3390 
3391   unsigned DataSize =
3392     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3393   if (hasPackedD16()) {
3394     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3395     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3396       DataSize = (DataSize + 1) / 2;
3397   }
3398 
3399   return (VDataSize / 4) == DataSize + TFESize;
3400 }
3401 
3402 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3403   const unsigned Opc = Inst.getOpcode();
3404   const MCInstrDesc &Desc = MII.get(Opc);
3405 
3406   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3407     return true;
3408 
3409   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3410 
3411   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3412       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3413   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3414   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3415   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3416 
3417   assert(VAddr0Idx != -1);
3418   assert(SrsrcIdx != -1);
3419   assert(SrsrcIdx > VAddr0Idx);
3420 
3421   if (DimIdx == -1)
3422     return true; // intersect_ray
3423 
3424   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3425   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3426   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3427   unsigned VAddrSize =
3428       IsNSA ? SrsrcIdx - VAddr0Idx
3429             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3430 
3431   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3432                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3433                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3434                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3435   if (!IsNSA) {
3436     if (AddrSize > 8)
3437       AddrSize = 16;
3438     else if (AddrSize > 4)
3439       AddrSize = 8;
3440   }
3441 
3442   return VAddrSize == AddrSize;
3443 }
3444 
3445 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3446 
3447   const unsigned Opc = Inst.getOpcode();
3448   const MCInstrDesc &Desc = MII.get(Opc);
3449 
3450   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3451     return true;
3452   if (!Desc.mayLoad() || !Desc.mayStore())
3453     return true; // Not atomic
3454 
3455   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3456   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3457 
3458   // This is an incomplete check because image_atomic_cmpswap
3459   // may only use 0x3 and 0xf while other atomic operations
3460   // may use 0x1 and 0x3. However these limitations are
3461   // verified when we check that dmask matches dst size.
3462   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3463 }
3464 
3465 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3466 
3467   const unsigned Opc = Inst.getOpcode();
3468   const MCInstrDesc &Desc = MII.get(Opc);
3469 
3470   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3471     return true;
3472 
3473   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3474   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3475 
3476   // GATHER4 instructions use dmask in a different fashion compared to
3477   // other MIMG instructions. The only useful DMASK values are
3478   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3479   // (red,red,red,red) etc.) The ISA document doesn't mention
3480   // this.
3481   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3482 }
3483 
3484 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3485   const unsigned Opc = Inst.getOpcode();
3486   const MCInstrDesc &Desc = MII.get(Opc);
3487 
3488   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3489     return true;
3490 
3491   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3492   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3493       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3494 
3495   if (!BaseOpcode->MSAA)
3496     return true;
3497 
3498   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3499   assert(DimIdx != -1);
3500 
3501   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3502   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3503 
3504   return DimInfo->MSAA;
3505 }
3506 
3507 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3508 {
3509   switch (Opcode) {
3510   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3511   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3512   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3513     return true;
3514   default:
3515     return false;
3516   }
3517 }
3518 
3519 // movrels* opcodes should only allow VGPRS as src0.
3520 // This is specified in .td description for vop1/vop3,
3521 // but sdwa is handled differently. See isSDWAOperand.
3522 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3523                                       const OperandVector &Operands) {
3524 
3525   const unsigned Opc = Inst.getOpcode();
3526   const MCInstrDesc &Desc = MII.get(Opc);
3527 
3528   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3529     return true;
3530 
3531   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3532   assert(Src0Idx != -1);
3533 
3534   SMLoc ErrLoc;
3535   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3536   if (Src0.isReg()) {
3537     auto Reg = mc2PseudoReg(Src0.getReg());
3538     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3539     if (!isSGPR(Reg, TRI))
3540       return true;
3541     ErrLoc = getRegLoc(Reg, Operands);
3542   } else {
3543     ErrLoc = getConstLoc(Operands);
3544   }
3545 
3546   Error(ErrLoc, "source operand must be a VGPR");
3547   return false;
3548 }
3549 
3550 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3551                                           const OperandVector &Operands) {
3552 
3553   const unsigned Opc = Inst.getOpcode();
3554 
3555   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3556     return true;
3557 
3558   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3559   assert(Src0Idx != -1);
3560 
3561   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3562   if (!Src0.isReg())
3563     return true;
3564 
3565   auto Reg = mc2PseudoReg(Src0.getReg());
3566   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3567   if (isSGPR(Reg, TRI)) {
3568     Error(getRegLoc(Reg, Operands),
3569           "source operand must be either a VGPR or an inline constant");
3570     return false;
3571   }
3572 
3573   return true;
3574 }
3575 
3576 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3577   switch (Inst.getOpcode()) {
3578   default:
3579     return true;
3580   case V_DIV_SCALE_F32_gfx6_gfx7:
3581   case V_DIV_SCALE_F32_vi:
3582   case V_DIV_SCALE_F32_gfx10:
3583   case V_DIV_SCALE_F64_gfx6_gfx7:
3584   case V_DIV_SCALE_F64_vi:
3585   case V_DIV_SCALE_F64_gfx10:
3586     break;
3587   }
3588 
3589   // TODO: Check that src0 = src1 or src2.
3590 
3591   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3592                     AMDGPU::OpName::src2_modifiers,
3593                     AMDGPU::OpName::src2_modifiers}) {
3594     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3595             .getImm() &
3596         SISrcMods::ABS) {
3597       return false;
3598     }
3599   }
3600 
3601   return true;
3602 }
3603 
3604 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3605 
3606   const unsigned Opc = Inst.getOpcode();
3607   const MCInstrDesc &Desc = MII.get(Opc);
3608 
3609   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3610     return true;
3611 
3612   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3613   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3614     if (isCI() || isSI())
3615       return false;
3616   }
3617 
3618   return true;
3619 }
3620 
3621 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3622   const unsigned Opc = Inst.getOpcode();
3623   const MCInstrDesc &Desc = MII.get(Opc);
3624 
3625   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3626     return true;
3627 
3628   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3629   if (DimIdx < 0)
3630     return true;
3631 
3632   long Imm = Inst.getOperand(DimIdx).getImm();
3633   if (Imm < 0 || Imm >= 8)
3634     return false;
3635 
3636   return true;
3637 }
3638 
3639 static bool IsRevOpcode(const unsigned Opcode)
3640 {
3641   switch (Opcode) {
3642   case AMDGPU::V_SUBREV_F32_e32:
3643   case AMDGPU::V_SUBREV_F32_e64:
3644   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3645   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3646   case AMDGPU::V_SUBREV_F32_e32_vi:
3647   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3648   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3649   case AMDGPU::V_SUBREV_F32_e64_vi:
3650 
3651   case AMDGPU::V_SUBREV_CO_U32_e32:
3652   case AMDGPU::V_SUBREV_CO_U32_e64:
3653   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3654   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3655 
3656   case AMDGPU::V_SUBBREV_U32_e32:
3657   case AMDGPU::V_SUBBREV_U32_e64:
3658   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3659   case AMDGPU::V_SUBBREV_U32_e32_vi:
3660   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3661   case AMDGPU::V_SUBBREV_U32_e64_vi:
3662 
3663   case AMDGPU::V_SUBREV_U32_e32:
3664   case AMDGPU::V_SUBREV_U32_e64:
3665   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3666   case AMDGPU::V_SUBREV_U32_e32_vi:
3667   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3668   case AMDGPU::V_SUBREV_U32_e64_vi:
3669 
3670   case AMDGPU::V_SUBREV_F16_e32:
3671   case AMDGPU::V_SUBREV_F16_e64:
3672   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3673   case AMDGPU::V_SUBREV_F16_e32_vi:
3674   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3675   case AMDGPU::V_SUBREV_F16_e64_vi:
3676 
3677   case AMDGPU::V_SUBREV_U16_e32:
3678   case AMDGPU::V_SUBREV_U16_e64:
3679   case AMDGPU::V_SUBREV_U16_e32_vi:
3680   case AMDGPU::V_SUBREV_U16_e64_vi:
3681 
3682   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3683   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3684   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3685 
3686   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3687   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3688 
3689   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3690   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3691 
3692   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3693   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3694 
3695   case AMDGPU::V_LSHRREV_B32_e32:
3696   case AMDGPU::V_LSHRREV_B32_e64:
3697   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3698   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3699   case AMDGPU::V_LSHRREV_B32_e32_vi:
3700   case AMDGPU::V_LSHRREV_B32_e64_vi:
3701   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3702   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3703 
3704   case AMDGPU::V_ASHRREV_I32_e32:
3705   case AMDGPU::V_ASHRREV_I32_e64:
3706   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3707   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3708   case AMDGPU::V_ASHRREV_I32_e32_vi:
3709   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3710   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3711   case AMDGPU::V_ASHRREV_I32_e64_vi:
3712 
3713   case AMDGPU::V_LSHLREV_B32_e32:
3714   case AMDGPU::V_LSHLREV_B32_e64:
3715   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3716   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3717   case AMDGPU::V_LSHLREV_B32_e32_vi:
3718   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3719   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3720   case AMDGPU::V_LSHLREV_B32_e64_vi:
3721 
3722   case AMDGPU::V_LSHLREV_B16_e32:
3723   case AMDGPU::V_LSHLREV_B16_e64:
3724   case AMDGPU::V_LSHLREV_B16_e32_vi:
3725   case AMDGPU::V_LSHLREV_B16_e64_vi:
3726   case AMDGPU::V_LSHLREV_B16_gfx10:
3727 
3728   case AMDGPU::V_LSHRREV_B16_e32:
3729   case AMDGPU::V_LSHRREV_B16_e64:
3730   case AMDGPU::V_LSHRREV_B16_e32_vi:
3731   case AMDGPU::V_LSHRREV_B16_e64_vi:
3732   case AMDGPU::V_LSHRREV_B16_gfx10:
3733 
3734   case AMDGPU::V_ASHRREV_I16_e32:
3735   case AMDGPU::V_ASHRREV_I16_e64:
3736   case AMDGPU::V_ASHRREV_I16_e32_vi:
3737   case AMDGPU::V_ASHRREV_I16_e64_vi:
3738   case AMDGPU::V_ASHRREV_I16_gfx10:
3739 
3740   case AMDGPU::V_LSHLREV_B64_e64:
3741   case AMDGPU::V_LSHLREV_B64_gfx10:
3742   case AMDGPU::V_LSHLREV_B64_vi:
3743 
3744   case AMDGPU::V_LSHRREV_B64_e64:
3745   case AMDGPU::V_LSHRREV_B64_gfx10:
3746   case AMDGPU::V_LSHRREV_B64_vi:
3747 
3748   case AMDGPU::V_ASHRREV_I64_e64:
3749   case AMDGPU::V_ASHRREV_I64_gfx10:
3750   case AMDGPU::V_ASHRREV_I64_vi:
3751 
3752   case AMDGPU::V_PK_LSHLREV_B16:
3753   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3754   case AMDGPU::V_PK_LSHLREV_B16_vi:
3755 
3756   case AMDGPU::V_PK_LSHRREV_B16:
3757   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3758   case AMDGPU::V_PK_LSHRREV_B16_vi:
3759   case AMDGPU::V_PK_ASHRREV_I16:
3760   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3761   case AMDGPU::V_PK_ASHRREV_I16_vi:
3762     return true;
3763   default:
3764     return false;
3765   }
3766 }
3767 
3768 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3769 
3770   using namespace SIInstrFlags;
3771   const unsigned Opcode = Inst.getOpcode();
3772   const MCInstrDesc &Desc = MII.get(Opcode);
3773 
3774   // lds_direct register is defined so that it can be used
3775   // with 9-bit operands only. Ignore encodings which do not accept these.
3776   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3777   if ((Desc.TSFlags & Enc) == 0)
3778     return None;
3779 
3780   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3781     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3782     if (SrcIdx == -1)
3783       break;
3784     const auto &Src = Inst.getOperand(SrcIdx);
3785     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3786 
3787       if (isGFX90A())
3788         return StringRef("lds_direct is not supported on this GPU");
3789 
3790       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3791         return StringRef("lds_direct cannot be used with this instruction");
3792 
3793       if (SrcName != OpName::src0)
3794         return StringRef("lds_direct may be used as src0 only");
3795     }
3796   }
3797 
3798   return None;
3799 }
3800 
3801 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3802   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3803     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3804     if (Op.isFlatOffset())
3805       return Op.getStartLoc();
3806   }
3807   return getLoc();
3808 }
3809 
3810 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3811                                          const OperandVector &Operands) {
3812   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3813   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3814     return true;
3815 
3816   auto Opcode = Inst.getOpcode();
3817   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3818   assert(OpNum != -1);
3819 
3820   const auto &Op = Inst.getOperand(OpNum);
3821   if (!hasFlatOffsets() && Op.getImm() != 0) {
3822     Error(getFlatOffsetLoc(Operands),
3823           "flat offset modifier is not supported on this GPU");
3824     return false;
3825   }
3826 
3827   // For FLAT segment the offset must be positive;
3828   // MSB is ignored and forced to zero.
3829   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3830     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3831     if (!isIntN(OffsetSize, Op.getImm())) {
3832       Error(getFlatOffsetLoc(Operands),
3833             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3834       return false;
3835     }
3836   } else {
3837     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3838     if (!isUIntN(OffsetSize, Op.getImm())) {
3839       Error(getFlatOffsetLoc(Operands),
3840             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3841       return false;
3842     }
3843   }
3844 
3845   return true;
3846 }
3847 
3848 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3849   // Start with second operand because SMEM Offset cannot be dst or src0.
3850   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3851     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3852     if (Op.isSMEMOffset())
3853       return Op.getStartLoc();
3854   }
3855   return getLoc();
3856 }
3857 
3858 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3859                                          const OperandVector &Operands) {
3860   if (isCI() || isSI())
3861     return true;
3862 
3863   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3864   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3865     return true;
3866 
3867   auto Opcode = Inst.getOpcode();
3868   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3869   if (OpNum == -1)
3870     return true;
3871 
3872   const auto &Op = Inst.getOperand(OpNum);
3873   if (!Op.isImm())
3874     return true;
3875 
3876   uint64_t Offset = Op.getImm();
3877   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3878   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3879       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3880     return true;
3881 
3882   Error(getSMEMOffsetLoc(Operands),
3883         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3884                                "expected a 21-bit signed offset");
3885 
3886   return false;
3887 }
3888 
3889 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3890   unsigned Opcode = Inst.getOpcode();
3891   const MCInstrDesc &Desc = MII.get(Opcode);
3892   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3893     return true;
3894 
3895   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3896   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3897 
3898   const int OpIndices[] = { Src0Idx, Src1Idx };
3899 
3900   unsigned NumExprs = 0;
3901   unsigned NumLiterals = 0;
3902   uint32_t LiteralValue;
3903 
3904   for (int OpIdx : OpIndices) {
3905     if (OpIdx == -1) break;
3906 
3907     const MCOperand &MO = Inst.getOperand(OpIdx);
3908     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3909     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3910       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3911         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3912         if (NumLiterals == 0 || LiteralValue != Value) {
3913           LiteralValue = Value;
3914           ++NumLiterals;
3915         }
3916       } else if (MO.isExpr()) {
3917         ++NumExprs;
3918       }
3919     }
3920   }
3921 
3922   return NumLiterals + NumExprs <= 1;
3923 }
3924 
3925 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3926   const unsigned Opc = Inst.getOpcode();
3927   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3928       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3929     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3930     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3931 
3932     if (OpSel & ~3)
3933       return false;
3934   }
3935   return true;
3936 }
3937 
3938 // Check if VCC register matches wavefront size
3939 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3940   auto FB = getFeatureBits();
3941   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3942     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3943 }
3944 
3945 // VOP3 literal is only allowed in GFX10+ and only one can be used
3946 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3947                                           const OperandVector &Operands) {
3948   unsigned Opcode = Inst.getOpcode();
3949   const MCInstrDesc &Desc = MII.get(Opcode);
3950   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3951     return true;
3952 
3953   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3954   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3955   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3956 
3957   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3958 
3959   unsigned NumExprs = 0;
3960   unsigned NumLiterals = 0;
3961   uint32_t LiteralValue;
3962 
3963   for (int OpIdx : OpIndices) {
3964     if (OpIdx == -1) break;
3965 
3966     const MCOperand &MO = Inst.getOperand(OpIdx);
3967     if (!MO.isImm() && !MO.isExpr())
3968       continue;
3969     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3970       continue;
3971 
3972     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3973         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
3974       Error(getConstLoc(Operands),
3975             "inline constants are not allowed for this operand");
3976       return false;
3977     }
3978 
3979     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3980       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3981       if (NumLiterals == 0 || LiteralValue != Value) {
3982         LiteralValue = Value;
3983         ++NumLiterals;
3984       }
3985     } else if (MO.isExpr()) {
3986       ++NumExprs;
3987     }
3988   }
3989   NumLiterals += NumExprs;
3990 
3991   if (!NumLiterals)
3992     return true;
3993 
3994   if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
3995     Error(getLitLoc(Operands), "literal operands are not supported");
3996     return false;
3997   }
3998 
3999   if (NumLiterals > 1) {
4000     Error(getLitLoc(Operands), "only one literal operand is allowed");
4001     return false;
4002   }
4003 
4004   return true;
4005 }
4006 
4007 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4008 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4009                          const MCRegisterInfo *MRI) {
4010   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4011   if (OpIdx < 0)
4012     return -1;
4013 
4014   const MCOperand &Op = Inst.getOperand(OpIdx);
4015   if (!Op.isReg())
4016     return -1;
4017 
4018   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4019   auto Reg = Sub ? Sub : Op.getReg();
4020   const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4021   return AGRP32.contains(Reg) ? 1 : 0;
4022 }
4023 
4024 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4025   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4026   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4027                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4028                   SIInstrFlags::DS)) == 0)
4029     return true;
4030 
4031   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4032                                                       : AMDGPU::OpName::vdata;
4033 
4034   const MCRegisterInfo *MRI = getMRI();
4035   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4036   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4037 
4038   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4039     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4040     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4041       return false;
4042   }
4043 
4044   auto FB = getFeatureBits();
4045   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4046     if (DataAreg < 0 || DstAreg < 0)
4047       return true;
4048     return DstAreg == DataAreg;
4049   }
4050 
4051   return DstAreg < 1 && DataAreg < 1;
4052 }
4053 
4054 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4055   auto FB = getFeatureBits();
4056   if (!FB[AMDGPU::FeatureGFX90AInsts])
4057     return true;
4058 
4059   const MCRegisterInfo *MRI = getMRI();
4060   const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4061   const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4062   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4063     const MCOperand &Op = Inst.getOperand(I);
4064     if (!Op.isReg())
4065       continue;
4066 
4067     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4068     if (!Sub)
4069       continue;
4070 
4071     if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4072       return false;
4073     if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4074       return false;
4075   }
4076 
4077   return true;
4078 }
4079 
4080 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4081                                             const OperandVector &Operands,
4082                                             const SMLoc &IDLoc) {
4083   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4084                                            AMDGPU::OpName::cpol);
4085   if (CPolPos == -1)
4086     return true;
4087 
4088   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4089 
4090   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4091   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4092       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4093     Error(IDLoc, "invalid cache policy for SMRD instruction");
4094     return false;
4095   }
4096 
4097   if (isGFX90A() && (CPol & CPol::SCC)) {
4098     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4099     StringRef CStr(S.getPointer());
4100     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4101     Error(S, "scc is not supported on this GPU");
4102     return false;
4103   }
4104 
4105   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4106     return true;
4107 
4108   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4109     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4110       Error(IDLoc, "instruction must use glc");
4111       return false;
4112     }
4113   } else {
4114     if (CPol & CPol::GLC) {
4115       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4116       StringRef CStr(S.getPointer());
4117       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4118       Error(S, "instruction must not use glc");
4119       return false;
4120     }
4121   }
4122 
4123   return true;
4124 }
4125 
4126 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4127                                           const SMLoc &IDLoc,
4128                                           const OperandVector &Operands) {
4129   if (auto ErrMsg = validateLdsDirect(Inst)) {
4130     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4131     return false;
4132   }
4133   if (!validateSOPLiteral(Inst)) {
4134     Error(getLitLoc(Operands),
4135       "only one literal operand is allowed");
4136     return false;
4137   }
4138   if (!validateVOP3Literal(Inst, Operands)) {
4139     return false;
4140   }
4141   if (!validateConstantBusLimitations(Inst, Operands)) {
4142     return false;
4143   }
4144   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4145     return false;
4146   }
4147   if (!validateIntClampSupported(Inst)) {
4148     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4149       "integer clamping is not supported on this GPU");
4150     return false;
4151   }
4152   if (!validateOpSel(Inst)) {
4153     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4154       "invalid op_sel operand");
4155     return false;
4156   }
4157   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4158   if (!validateMIMGD16(Inst)) {
4159     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4160       "d16 modifier is not supported on this GPU");
4161     return false;
4162   }
4163   if (!validateMIMGDim(Inst)) {
4164     Error(IDLoc, "dim modifier is required on this GPU");
4165     return false;
4166   }
4167   if (!validateMIMGMSAA(Inst)) {
4168     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4169           "invalid dim; must be MSAA type");
4170     return false;
4171   }
4172   if (!validateMIMGDataSize(Inst)) {
4173     Error(IDLoc,
4174       "image data size does not match dmask and tfe");
4175     return false;
4176   }
4177   if (!validateMIMGAddrSize(Inst)) {
4178     Error(IDLoc,
4179       "image address size does not match dim and a16");
4180     return false;
4181   }
4182   if (!validateMIMGAtomicDMask(Inst)) {
4183     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4184       "invalid atomic image dmask");
4185     return false;
4186   }
4187   if (!validateMIMGGatherDMask(Inst)) {
4188     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4189       "invalid image_gather dmask: only one bit must be set");
4190     return false;
4191   }
4192   if (!validateMovrels(Inst, Operands)) {
4193     return false;
4194   }
4195   if (!validateFlatOffset(Inst, Operands)) {
4196     return false;
4197   }
4198   if (!validateSMEMOffset(Inst, Operands)) {
4199     return false;
4200   }
4201   if (!validateMAIAccWrite(Inst, Operands)) {
4202     return false;
4203   }
4204   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4205     return false;
4206   }
4207 
4208   if (!validateAGPRLdSt(Inst)) {
4209     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4210     ? "invalid register class: data and dst should be all VGPR or AGPR"
4211     : "invalid register class: agpr loads and stores not supported on this GPU"
4212     );
4213     return false;
4214   }
4215   if (!validateVGPRAlign(Inst)) {
4216     Error(IDLoc,
4217       "invalid register class: vgpr tuples must be 64 bit aligned");
4218     return false;
4219   }
4220 
4221   if (!validateDivScale(Inst)) {
4222     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4223     return false;
4224   }
4225   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4226     return false;
4227   }
4228 
4229   return true;
4230 }
4231 
4232 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4233                                             const FeatureBitset &FBS,
4234                                             unsigned VariantID = 0);
4235 
4236 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4237                                 const FeatureBitset &AvailableFeatures,
4238                                 unsigned VariantID);
4239 
4240 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4241                                        const FeatureBitset &FBS) {
4242   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4243 }
4244 
4245 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4246                                        const FeatureBitset &FBS,
4247                                        ArrayRef<unsigned> Variants) {
4248   for (auto Variant : Variants) {
4249     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4250       return true;
4251   }
4252 
4253   return false;
4254 }
4255 
4256 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4257                                                   const SMLoc &IDLoc) {
4258   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4259 
4260   // Check if requested instruction variant is supported.
4261   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4262     return false;
4263 
4264   // This instruction is not supported.
4265   // Clear any other pending errors because they are no longer relevant.
4266   getParser().clearPendingErrors();
4267 
4268   // Requested instruction variant is not supported.
4269   // Check if any other variants are supported.
4270   StringRef VariantName = getMatchedVariantName();
4271   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4272     return Error(IDLoc,
4273                  Twine(VariantName,
4274                        " variant of this instruction is not supported"));
4275   }
4276 
4277   // Finally check if this instruction is supported on any other GPU.
4278   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4279     return Error(IDLoc, "instruction not supported on this GPU");
4280   }
4281 
4282   // Instruction not supported on any GPU. Probably a typo.
4283   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4284   return Error(IDLoc, "invalid instruction" + Suggestion);
4285 }
4286 
4287 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4288                                               OperandVector &Operands,
4289                                               MCStreamer &Out,
4290                                               uint64_t &ErrorInfo,
4291                                               bool MatchingInlineAsm) {
4292   MCInst Inst;
4293   unsigned Result = Match_Success;
4294   for (auto Variant : getMatchedVariants()) {
4295     uint64_t EI;
4296     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4297                                   Variant);
4298     // We order match statuses from least to most specific. We use most specific
4299     // status as resulting
4300     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4301     if ((R == Match_Success) ||
4302         (R == Match_PreferE32) ||
4303         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4304         (R == Match_InvalidOperand && Result != Match_MissingFeature
4305                                    && Result != Match_PreferE32) ||
4306         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4307                                    && Result != Match_MissingFeature
4308                                    && Result != Match_PreferE32)) {
4309       Result = R;
4310       ErrorInfo = EI;
4311     }
4312     if (R == Match_Success)
4313       break;
4314   }
4315 
4316   if (Result == Match_Success) {
4317     if (!validateInstruction(Inst, IDLoc, Operands)) {
4318       return true;
4319     }
4320     Inst.setLoc(IDLoc);
4321     Out.emitInstruction(Inst, getSTI());
4322     return false;
4323   }
4324 
4325   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4326   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4327     return true;
4328   }
4329 
4330   switch (Result) {
4331   default: break;
4332   case Match_MissingFeature:
4333     // It has been verified that the specified instruction
4334     // mnemonic is valid. A match was found but it requires
4335     // features which are not supported on this GPU.
4336     return Error(IDLoc, "operands are not valid for this GPU or mode");
4337 
4338   case Match_InvalidOperand: {
4339     SMLoc ErrorLoc = IDLoc;
4340     if (ErrorInfo != ~0ULL) {
4341       if (ErrorInfo >= Operands.size()) {
4342         return Error(IDLoc, "too few operands for instruction");
4343       }
4344       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4345       if (ErrorLoc == SMLoc())
4346         ErrorLoc = IDLoc;
4347     }
4348     return Error(ErrorLoc, "invalid operand for instruction");
4349   }
4350 
4351   case Match_PreferE32:
4352     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4353                         "should be encoded as e32");
4354   case Match_MnemonicFail:
4355     llvm_unreachable("Invalid instructions should have been handled already");
4356   }
4357   llvm_unreachable("Implement any new match types added!");
4358 }
4359 
4360 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4361   int64_t Tmp = -1;
4362   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4363     return true;
4364   }
4365   if (getParser().parseAbsoluteExpression(Tmp)) {
4366     return true;
4367   }
4368   Ret = static_cast<uint32_t>(Tmp);
4369   return false;
4370 }
4371 
4372 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4373                                                uint32_t &Minor) {
4374   if (ParseAsAbsoluteExpression(Major))
4375     return TokError("invalid major version");
4376 
4377   if (!trySkipToken(AsmToken::Comma))
4378     return TokError("minor version number required, comma expected");
4379 
4380   if (ParseAsAbsoluteExpression(Minor))
4381     return TokError("invalid minor version");
4382 
4383   return false;
4384 }
4385 
4386 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4387   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4388     return TokError("directive only supported for amdgcn architecture");
4389 
4390   std::string TargetIDDirective;
4391   SMLoc TargetStart = getTok().getLoc();
4392   if (getParser().parseEscapedString(TargetIDDirective))
4393     return true;
4394 
4395   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4396   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4397     return getParser().Error(TargetRange.Start,
4398         (Twine(".amdgcn_target directive's target id ") +
4399          Twine(TargetIDDirective) +
4400          Twine(" does not match the specified target id ") +
4401          Twine(getTargetStreamer().getTargetID()->toString())).str());
4402 
4403   return false;
4404 }
4405 
4406 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4407   return Error(Range.Start, "value out of range", Range);
4408 }
4409 
4410 bool AMDGPUAsmParser::calculateGPRBlocks(
4411     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4412     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4413     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4414     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4415   // TODO(scott.linder): These calculations are duplicated from
4416   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4417   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4418 
4419   unsigned NumVGPRs = NextFreeVGPR;
4420   unsigned NumSGPRs = NextFreeSGPR;
4421 
4422   if (Version.Major >= 10)
4423     NumSGPRs = 0;
4424   else {
4425     unsigned MaxAddressableNumSGPRs =
4426         IsaInfo::getAddressableNumSGPRs(&getSTI());
4427 
4428     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4429         NumSGPRs > MaxAddressableNumSGPRs)
4430       return OutOfRangeError(SGPRRange);
4431 
4432     NumSGPRs +=
4433         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4434 
4435     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4436         NumSGPRs > MaxAddressableNumSGPRs)
4437       return OutOfRangeError(SGPRRange);
4438 
4439     if (Features.test(FeatureSGPRInitBug))
4440       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4441   }
4442 
4443   VGPRBlocks =
4444       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4445   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4446 
4447   return false;
4448 }
4449 
4450 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4451   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4452     return TokError("directive only supported for amdgcn architecture");
4453 
4454   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4455     return TokError("directive only supported for amdhsa OS");
4456 
4457   StringRef KernelName;
4458   if (getParser().parseIdentifier(KernelName))
4459     return true;
4460 
4461   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4462 
4463   StringSet<> Seen;
4464 
4465   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4466 
4467   SMRange VGPRRange;
4468   uint64_t NextFreeVGPR = 0;
4469   uint64_t AccumOffset = 0;
4470   SMRange SGPRRange;
4471   uint64_t NextFreeSGPR = 0;
4472   unsigned UserSGPRCount = 0;
4473   bool ReserveVCC = true;
4474   bool ReserveFlatScr = true;
4475   Optional<bool> EnableWavefrontSize32;
4476 
4477   while (true) {
4478     while (trySkipToken(AsmToken::EndOfStatement));
4479 
4480     StringRef ID;
4481     SMRange IDRange = getTok().getLocRange();
4482     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4483       return true;
4484 
4485     if (ID == ".end_amdhsa_kernel")
4486       break;
4487 
4488     if (Seen.find(ID) != Seen.end())
4489       return TokError(".amdhsa_ directives cannot be repeated");
4490     Seen.insert(ID);
4491 
4492     SMLoc ValStart = getLoc();
4493     int64_t IVal;
4494     if (getParser().parseAbsoluteExpression(IVal))
4495       return true;
4496     SMLoc ValEnd = getLoc();
4497     SMRange ValRange = SMRange(ValStart, ValEnd);
4498 
4499     if (IVal < 0)
4500       return OutOfRangeError(ValRange);
4501 
4502     uint64_t Val = IVal;
4503 
4504 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4505   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4506     return OutOfRangeError(RANGE);                                             \
4507   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4508 
4509     if (ID == ".amdhsa_group_segment_fixed_size") {
4510       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4511         return OutOfRangeError(ValRange);
4512       KD.group_segment_fixed_size = Val;
4513     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4514       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4515         return OutOfRangeError(ValRange);
4516       KD.private_segment_fixed_size = Val;
4517     } else if (ID == ".amdhsa_kernarg_size") {
4518       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4519         return OutOfRangeError(ValRange);
4520       KD.kernarg_size = Val;
4521     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4522       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4523                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4524                        Val, ValRange);
4525       if (Val)
4526         UserSGPRCount += 4;
4527     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4528       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4529                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4530                        ValRange);
4531       if (Val)
4532         UserSGPRCount += 2;
4533     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4534       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4535                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4536                        ValRange);
4537       if (Val)
4538         UserSGPRCount += 2;
4539     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4540       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4541                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4542                        Val, ValRange);
4543       if (Val)
4544         UserSGPRCount += 2;
4545     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4546       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4547                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4548                        ValRange);
4549       if (Val)
4550         UserSGPRCount += 2;
4551     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4552       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4553                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4554                        ValRange);
4555       if (Val)
4556         UserSGPRCount += 2;
4557     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4558       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4559                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4560                        Val, ValRange);
4561       if (Val)
4562         UserSGPRCount += 1;
4563     } else if (ID == ".amdhsa_wavefront_size32") {
4564       if (IVersion.Major < 10)
4565         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4566       EnableWavefrontSize32 = Val;
4567       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4568                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4569                        Val, ValRange);
4570     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4571       PARSE_BITS_ENTRY(
4572           KD.compute_pgm_rsrc2,
4573           COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val,
4574           ValRange);
4575     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4576       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4577                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4578                        ValRange);
4579     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4580       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4581                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4582                        ValRange);
4583     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4584       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4585                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4586                        ValRange);
4587     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4588       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4589                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4590                        ValRange);
4591     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4592       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4593                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4594                        ValRange);
4595     } else if (ID == ".amdhsa_next_free_vgpr") {
4596       VGPRRange = ValRange;
4597       NextFreeVGPR = Val;
4598     } else if (ID == ".amdhsa_next_free_sgpr") {
4599       SGPRRange = ValRange;
4600       NextFreeSGPR = Val;
4601     } else if (ID == ".amdhsa_accum_offset") {
4602       if (!isGFX90A())
4603         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4604       AccumOffset = Val;
4605     } else if (ID == ".amdhsa_reserve_vcc") {
4606       if (!isUInt<1>(Val))
4607         return OutOfRangeError(ValRange);
4608       ReserveVCC = Val;
4609     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4610       if (IVersion.Major < 7)
4611         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4612       if (!isUInt<1>(Val))
4613         return OutOfRangeError(ValRange);
4614       ReserveFlatScr = Val;
4615     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4616       if (IVersion.Major < 8)
4617         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4618       if (!isUInt<1>(Val))
4619         return OutOfRangeError(ValRange);
4620       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4621         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4622                                  IDRange);
4623     } else if (ID == ".amdhsa_float_round_mode_32") {
4624       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4625                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4626     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4627       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4628                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4629     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4630       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4631                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4632     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4633       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4634                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4635                        ValRange);
4636     } else if (ID == ".amdhsa_dx10_clamp") {
4637       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4638                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4639     } else if (ID == ".amdhsa_ieee_mode") {
4640       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4641                        Val, ValRange);
4642     } else if (ID == ".amdhsa_fp16_overflow") {
4643       if (IVersion.Major < 9)
4644         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4645       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4646                        ValRange);
4647     } else if (ID == ".amdhsa_tg_split") {
4648       if (!isGFX90A())
4649         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4650       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4651                        ValRange);
4652     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4653       if (IVersion.Major < 10)
4654         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4655       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4656                        ValRange);
4657     } else if (ID == ".amdhsa_memory_ordered") {
4658       if (IVersion.Major < 10)
4659         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4660       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4661                        ValRange);
4662     } else if (ID == ".amdhsa_forward_progress") {
4663       if (IVersion.Major < 10)
4664         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4665       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4666                        ValRange);
4667     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4668       PARSE_BITS_ENTRY(
4669           KD.compute_pgm_rsrc2,
4670           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4671           ValRange);
4672     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4673       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4674                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4675                        Val, ValRange);
4676     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4677       PARSE_BITS_ENTRY(
4678           KD.compute_pgm_rsrc2,
4679           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4680           ValRange);
4681     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4682       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4683                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4684                        Val, ValRange);
4685     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4686       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4687                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4688                        Val, ValRange);
4689     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4690       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4691                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4692                        Val, ValRange);
4693     } else if (ID == ".amdhsa_exception_int_div_zero") {
4694       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4695                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4696                        Val, ValRange);
4697     } else {
4698       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4699     }
4700 
4701 #undef PARSE_BITS_ENTRY
4702   }
4703 
4704   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4705     return TokError(".amdhsa_next_free_vgpr directive is required");
4706 
4707   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4708     return TokError(".amdhsa_next_free_sgpr directive is required");
4709 
4710   unsigned VGPRBlocks;
4711   unsigned SGPRBlocks;
4712   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4713                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4714                          EnableWavefrontSize32, NextFreeVGPR,
4715                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4716                          SGPRBlocks))
4717     return true;
4718 
4719   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4720           VGPRBlocks))
4721     return OutOfRangeError(VGPRRange);
4722   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4723                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4724 
4725   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4726           SGPRBlocks))
4727     return OutOfRangeError(SGPRRange);
4728   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4729                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4730                   SGPRBlocks);
4731 
4732   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4733     return TokError("too many user SGPRs enabled");
4734   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4735                   UserSGPRCount);
4736 
4737   if (isGFX90A()) {
4738     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4739       return TokError(".amdhsa_accum_offset directive is required");
4740     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4741       return TokError("accum_offset should be in range [4..256] in "
4742                       "increments of 4");
4743     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4744       return TokError("accum_offset exceeds total VGPR allocation");
4745     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4746                     (AccumOffset / 4 - 1));
4747   }
4748 
4749   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4750       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4751       ReserveFlatScr);
4752   return false;
4753 }
4754 
4755 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4756   uint32_t Major;
4757   uint32_t Minor;
4758 
4759   if (ParseDirectiveMajorMinor(Major, Minor))
4760     return true;
4761 
4762   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4763   return false;
4764 }
4765 
4766 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4767   uint32_t Major;
4768   uint32_t Minor;
4769   uint32_t Stepping;
4770   StringRef VendorName;
4771   StringRef ArchName;
4772 
4773   // If this directive has no arguments, then use the ISA version for the
4774   // targeted GPU.
4775   if (isToken(AsmToken::EndOfStatement)) {
4776     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4777     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4778                                                         ISA.Stepping,
4779                                                         "AMD", "AMDGPU");
4780     return false;
4781   }
4782 
4783   if (ParseDirectiveMajorMinor(Major, Minor))
4784     return true;
4785 
4786   if (!trySkipToken(AsmToken::Comma))
4787     return TokError("stepping version number required, comma expected");
4788 
4789   if (ParseAsAbsoluteExpression(Stepping))
4790     return TokError("invalid stepping version");
4791 
4792   if (!trySkipToken(AsmToken::Comma))
4793     return TokError("vendor name required, comma expected");
4794 
4795   if (!parseString(VendorName, "invalid vendor name"))
4796     return true;
4797 
4798   if (!trySkipToken(AsmToken::Comma))
4799     return TokError("arch name required, comma expected");
4800 
4801   if (!parseString(ArchName, "invalid arch name"))
4802     return true;
4803 
4804   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
4805                                                       VendorName, ArchName);
4806   return false;
4807 }
4808 
4809 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4810                                                amd_kernel_code_t &Header) {
4811   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4812   // assembly for backwards compatibility.
4813   if (ID == "max_scratch_backing_memory_byte_size") {
4814     Parser.eatToEndOfStatement();
4815     return false;
4816   }
4817 
4818   SmallString<40> ErrStr;
4819   raw_svector_ostream Err(ErrStr);
4820   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4821     return TokError(Err.str());
4822   }
4823   Lex();
4824 
4825   if (ID == "enable_wavefront_size32") {
4826     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4827       if (!isGFX10Plus())
4828         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4829       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4830         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4831     } else {
4832       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4833         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4834     }
4835   }
4836 
4837   if (ID == "wavefront_size") {
4838     if (Header.wavefront_size == 5) {
4839       if (!isGFX10Plus())
4840         return TokError("wavefront_size=5 is only allowed on GFX10+");
4841       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4842         return TokError("wavefront_size=5 requires +WavefrontSize32");
4843     } else if (Header.wavefront_size == 6) {
4844       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4845         return TokError("wavefront_size=6 requires +WavefrontSize64");
4846     }
4847   }
4848 
4849   if (ID == "enable_wgp_mode") {
4850     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
4851         !isGFX10Plus())
4852       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4853   }
4854 
4855   if (ID == "enable_mem_ordered") {
4856     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
4857         !isGFX10Plus())
4858       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4859   }
4860 
4861   if (ID == "enable_fwd_progress") {
4862     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
4863         !isGFX10Plus())
4864       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4865   }
4866 
4867   return false;
4868 }
4869 
4870 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4871   amd_kernel_code_t Header;
4872   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4873 
4874   while (true) {
4875     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4876     // will set the current token to EndOfStatement.
4877     while(trySkipToken(AsmToken::EndOfStatement));
4878 
4879     StringRef ID;
4880     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4881       return true;
4882 
4883     if (ID == ".end_amd_kernel_code_t")
4884       break;
4885 
4886     if (ParseAMDKernelCodeTValue(ID, Header))
4887       return true;
4888   }
4889 
4890   getTargetStreamer().EmitAMDKernelCodeT(Header);
4891 
4892   return false;
4893 }
4894 
4895 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4896   StringRef KernelName;
4897   if (!parseId(KernelName, "expected symbol name"))
4898     return true;
4899 
4900   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4901                                            ELF::STT_AMDGPU_HSA_KERNEL);
4902 
4903   KernelScope.initialize(getContext());
4904   return false;
4905 }
4906 
4907 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4908   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4909     return Error(getLoc(),
4910                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4911                  "architectures");
4912   }
4913 
4914   auto TargetIDDirective = getLexer().getTok().getStringContents();
4915   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4916     return Error(getParser().getTok().getLoc(), "target id must match options");
4917 
4918   getTargetStreamer().EmitISAVersion();
4919   Lex();
4920 
4921   return false;
4922 }
4923 
4924 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4925   const char *AssemblerDirectiveBegin;
4926   const char *AssemblerDirectiveEnd;
4927   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4928       isHsaAbiVersion3Or4(&getSTI())
4929           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4930                             HSAMD::V3::AssemblerDirectiveEnd)
4931           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4932                             HSAMD::AssemblerDirectiveEnd);
4933 
4934   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4935     return Error(getLoc(),
4936                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4937                  "not available on non-amdhsa OSes")).str());
4938   }
4939 
4940   std::string HSAMetadataString;
4941   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4942                           HSAMetadataString))
4943     return true;
4944 
4945   if (isHsaAbiVersion3Or4(&getSTI())) {
4946     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4947       return Error(getLoc(), "invalid HSA metadata");
4948   } else {
4949     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4950       return Error(getLoc(), "invalid HSA metadata");
4951   }
4952 
4953   return false;
4954 }
4955 
4956 /// Common code to parse out a block of text (typically YAML) between start and
4957 /// end directives.
4958 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4959                                           const char *AssemblerDirectiveEnd,
4960                                           std::string &CollectString) {
4961 
4962   raw_string_ostream CollectStream(CollectString);
4963 
4964   getLexer().setSkipSpace(false);
4965 
4966   bool FoundEnd = false;
4967   while (!isToken(AsmToken::Eof)) {
4968     while (isToken(AsmToken::Space)) {
4969       CollectStream << getTokenStr();
4970       Lex();
4971     }
4972 
4973     if (trySkipId(AssemblerDirectiveEnd)) {
4974       FoundEnd = true;
4975       break;
4976     }
4977 
4978     CollectStream << Parser.parseStringToEndOfStatement()
4979                   << getContext().getAsmInfo()->getSeparatorString();
4980 
4981     Parser.eatToEndOfStatement();
4982   }
4983 
4984   getLexer().setSkipSpace(true);
4985 
4986   if (isToken(AsmToken::Eof) && !FoundEnd) {
4987     return TokError(Twine("expected directive ") +
4988                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4989   }
4990 
4991   CollectStream.flush();
4992   return false;
4993 }
4994 
4995 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4996 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4997   std::string String;
4998   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4999                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5000     return true;
5001 
5002   auto PALMetadata = getTargetStreamer().getPALMetadata();
5003   if (!PALMetadata->setFromString(String))
5004     return Error(getLoc(), "invalid PAL metadata");
5005   return false;
5006 }
5007 
5008 /// Parse the assembler directive for old linear-format PAL metadata.
5009 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5010   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5011     return Error(getLoc(),
5012                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5013                  "not available on non-amdpal OSes")).str());
5014   }
5015 
5016   auto PALMetadata = getTargetStreamer().getPALMetadata();
5017   PALMetadata->setLegacy();
5018   for (;;) {
5019     uint32_t Key, Value;
5020     if (ParseAsAbsoluteExpression(Key)) {
5021       return TokError(Twine("invalid value in ") +
5022                       Twine(PALMD::AssemblerDirective));
5023     }
5024     if (!trySkipToken(AsmToken::Comma)) {
5025       return TokError(Twine("expected an even number of values in ") +
5026                       Twine(PALMD::AssemblerDirective));
5027     }
5028     if (ParseAsAbsoluteExpression(Value)) {
5029       return TokError(Twine("invalid value in ") +
5030                       Twine(PALMD::AssemblerDirective));
5031     }
5032     PALMetadata->setRegister(Key, Value);
5033     if (!trySkipToken(AsmToken::Comma))
5034       break;
5035   }
5036   return false;
5037 }
5038 
5039 /// ParseDirectiveAMDGPULDS
5040 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5041 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5042   if (getParser().checkForValidSection())
5043     return true;
5044 
5045   StringRef Name;
5046   SMLoc NameLoc = getLoc();
5047   if (getParser().parseIdentifier(Name))
5048     return TokError("expected identifier in directive");
5049 
5050   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5051   if (parseToken(AsmToken::Comma, "expected ','"))
5052     return true;
5053 
5054   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5055 
5056   int64_t Size;
5057   SMLoc SizeLoc = getLoc();
5058   if (getParser().parseAbsoluteExpression(Size))
5059     return true;
5060   if (Size < 0)
5061     return Error(SizeLoc, "size must be non-negative");
5062   if (Size > LocalMemorySize)
5063     return Error(SizeLoc, "size is too large");
5064 
5065   int64_t Alignment = 4;
5066   if (trySkipToken(AsmToken::Comma)) {
5067     SMLoc AlignLoc = getLoc();
5068     if (getParser().parseAbsoluteExpression(Alignment))
5069       return true;
5070     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5071       return Error(AlignLoc, "alignment must be a power of two");
5072 
5073     // Alignment larger than the size of LDS is possible in theory, as long
5074     // as the linker manages to place to symbol at address 0, but we do want
5075     // to make sure the alignment fits nicely into a 32-bit integer.
5076     if (Alignment >= 1u << 31)
5077       return Error(AlignLoc, "alignment is too large");
5078   }
5079 
5080   if (parseToken(AsmToken::EndOfStatement,
5081                  "unexpected token in '.amdgpu_lds' directive"))
5082     return true;
5083 
5084   Symbol->redefineIfPossible();
5085   if (!Symbol->isUndefined())
5086     return Error(NameLoc, "invalid symbol redefinition");
5087 
5088   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5089   return false;
5090 }
5091 
5092 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5093   StringRef IDVal = DirectiveID.getString();
5094 
5095   if (isHsaAbiVersion3Or4(&getSTI())) {
5096     if (IDVal == ".amdhsa_kernel")
5097      return ParseDirectiveAMDHSAKernel();
5098 
5099     // TODO: Restructure/combine with PAL metadata directive.
5100     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5101       return ParseDirectiveHSAMetadata();
5102   } else {
5103     if (IDVal == ".hsa_code_object_version")
5104       return ParseDirectiveHSACodeObjectVersion();
5105 
5106     if (IDVal == ".hsa_code_object_isa")
5107       return ParseDirectiveHSACodeObjectISA();
5108 
5109     if (IDVal == ".amd_kernel_code_t")
5110       return ParseDirectiveAMDKernelCodeT();
5111 
5112     if (IDVal == ".amdgpu_hsa_kernel")
5113       return ParseDirectiveAMDGPUHsaKernel();
5114 
5115     if (IDVal == ".amd_amdgpu_isa")
5116       return ParseDirectiveISAVersion();
5117 
5118     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5119       return ParseDirectiveHSAMetadata();
5120   }
5121 
5122   if (IDVal == ".amdgcn_target")
5123     return ParseDirectiveAMDGCNTarget();
5124 
5125   if (IDVal == ".amdgpu_lds")
5126     return ParseDirectiveAMDGPULDS();
5127 
5128   if (IDVal == PALMD::AssemblerDirectiveBegin)
5129     return ParseDirectivePALMetadataBegin();
5130 
5131   if (IDVal == PALMD::AssemblerDirective)
5132     return ParseDirectivePALMetadata();
5133 
5134   return true;
5135 }
5136 
5137 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5138                                            unsigned RegNo) {
5139 
5140   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5141        R.isValid(); ++R) {
5142     if (*R == RegNo)
5143       return isGFX9Plus();
5144   }
5145 
5146   // GFX10 has 2 more SGPRs 104 and 105.
5147   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5148        R.isValid(); ++R) {
5149     if (*R == RegNo)
5150       return hasSGPR104_SGPR105();
5151   }
5152 
5153   switch (RegNo) {
5154   case AMDGPU::SRC_SHARED_BASE:
5155   case AMDGPU::SRC_SHARED_LIMIT:
5156   case AMDGPU::SRC_PRIVATE_BASE:
5157   case AMDGPU::SRC_PRIVATE_LIMIT:
5158   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5159     return isGFX9Plus();
5160   case AMDGPU::TBA:
5161   case AMDGPU::TBA_LO:
5162   case AMDGPU::TBA_HI:
5163   case AMDGPU::TMA:
5164   case AMDGPU::TMA_LO:
5165   case AMDGPU::TMA_HI:
5166     return !isGFX9Plus();
5167   case AMDGPU::XNACK_MASK:
5168   case AMDGPU::XNACK_MASK_LO:
5169   case AMDGPU::XNACK_MASK_HI:
5170     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5171   case AMDGPU::SGPR_NULL:
5172     return isGFX10Plus();
5173   default:
5174     break;
5175   }
5176 
5177   if (isCI())
5178     return true;
5179 
5180   if (isSI() || isGFX10Plus()) {
5181     // No flat_scr on SI.
5182     // On GFX10 flat scratch is not a valid register operand and can only be
5183     // accessed with s_setreg/s_getreg.
5184     switch (RegNo) {
5185     case AMDGPU::FLAT_SCR:
5186     case AMDGPU::FLAT_SCR_LO:
5187     case AMDGPU::FLAT_SCR_HI:
5188       return false;
5189     default:
5190       return true;
5191     }
5192   }
5193 
5194   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5195   // SI/CI have.
5196   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5197        R.isValid(); ++R) {
5198     if (*R == RegNo)
5199       return hasSGPR102_SGPR103();
5200   }
5201 
5202   return true;
5203 }
5204 
5205 OperandMatchResultTy
5206 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5207                               OperandMode Mode) {
5208   // Try to parse with a custom parser
5209   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5210 
5211   // If we successfully parsed the operand or if there as an error parsing,
5212   // we are done.
5213   //
5214   // If we are parsing after we reach EndOfStatement then this means we
5215   // are appending default values to the Operands list.  This is only done
5216   // by custom parser, so we shouldn't continue on to the generic parsing.
5217   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5218       isToken(AsmToken::EndOfStatement))
5219     return ResTy;
5220 
5221   SMLoc RBraceLoc;
5222   SMLoc LBraceLoc = getLoc();
5223   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5224     unsigned Prefix = Operands.size();
5225 
5226     for (;;) {
5227       auto Loc = getLoc();
5228       ResTy = parseReg(Operands);
5229       if (ResTy == MatchOperand_NoMatch)
5230         Error(Loc, "expected a register");
5231       if (ResTy != MatchOperand_Success)
5232         return MatchOperand_ParseFail;
5233 
5234       RBraceLoc = getLoc();
5235       if (trySkipToken(AsmToken::RBrac))
5236         break;
5237 
5238       if (!skipToken(AsmToken::Comma,
5239                      "expected a comma or a closing square bracket")) {
5240         return MatchOperand_ParseFail;
5241       }
5242     }
5243 
5244     if (Operands.size() - Prefix > 1) {
5245       Operands.insert(Operands.begin() + Prefix,
5246                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5247       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5248     }
5249 
5250     return MatchOperand_Success;
5251   }
5252 
5253   return parseRegOrImm(Operands);
5254 }
5255 
5256 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5257   // Clear any forced encodings from the previous instruction.
5258   setForcedEncodingSize(0);
5259   setForcedDPP(false);
5260   setForcedSDWA(false);
5261 
5262   if (Name.endswith("_e64")) {
5263     setForcedEncodingSize(64);
5264     return Name.substr(0, Name.size() - 4);
5265   } else if (Name.endswith("_e32")) {
5266     setForcedEncodingSize(32);
5267     return Name.substr(0, Name.size() - 4);
5268   } else if (Name.endswith("_dpp")) {
5269     setForcedDPP(true);
5270     return Name.substr(0, Name.size() - 4);
5271   } else if (Name.endswith("_sdwa")) {
5272     setForcedSDWA(true);
5273     return Name.substr(0, Name.size() - 5);
5274   }
5275   return Name;
5276 }
5277 
5278 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5279                                        StringRef Name,
5280                                        SMLoc NameLoc, OperandVector &Operands) {
5281   // Add the instruction mnemonic
5282   Name = parseMnemonicSuffix(Name);
5283   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5284 
5285   bool IsMIMG = Name.startswith("image_");
5286 
5287   while (!trySkipToken(AsmToken::EndOfStatement)) {
5288     OperandMode Mode = OperandMode_Default;
5289     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5290       Mode = OperandMode_NSA;
5291     CPolSeen = 0;
5292     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5293 
5294     if (Res != MatchOperand_Success) {
5295       checkUnsupportedInstruction(Name, NameLoc);
5296       if (!Parser.hasPendingError()) {
5297         // FIXME: use real operand location rather than the current location.
5298         StringRef Msg =
5299           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5300                                             "not a valid operand.";
5301         Error(getLoc(), Msg);
5302       }
5303       while (!trySkipToken(AsmToken::EndOfStatement)) {
5304         lex();
5305       }
5306       return true;
5307     }
5308 
5309     // Eat the comma or space if there is one.
5310     trySkipToken(AsmToken::Comma);
5311   }
5312 
5313   return false;
5314 }
5315 
5316 //===----------------------------------------------------------------------===//
5317 // Utility functions
5318 //===----------------------------------------------------------------------===//
5319 
5320 OperandMatchResultTy
5321 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5322 
5323   if (!trySkipId(Prefix, AsmToken::Colon))
5324     return MatchOperand_NoMatch;
5325 
5326   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5327 }
5328 
5329 OperandMatchResultTy
5330 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5331                                     AMDGPUOperand::ImmTy ImmTy,
5332                                     bool (*ConvertResult)(int64_t&)) {
5333   SMLoc S = getLoc();
5334   int64_t Value = 0;
5335 
5336   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5337   if (Res != MatchOperand_Success)
5338     return Res;
5339 
5340   if (ConvertResult && !ConvertResult(Value)) {
5341     Error(S, "invalid " + StringRef(Prefix) + " value.");
5342   }
5343 
5344   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5345   return MatchOperand_Success;
5346 }
5347 
5348 OperandMatchResultTy
5349 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5350                                              OperandVector &Operands,
5351                                              AMDGPUOperand::ImmTy ImmTy,
5352                                              bool (*ConvertResult)(int64_t&)) {
5353   SMLoc S = getLoc();
5354   if (!trySkipId(Prefix, AsmToken::Colon))
5355     return MatchOperand_NoMatch;
5356 
5357   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5358     return MatchOperand_ParseFail;
5359 
5360   unsigned Val = 0;
5361   const unsigned MaxSize = 4;
5362 
5363   // FIXME: How to verify the number of elements matches the number of src
5364   // operands?
5365   for (int I = 0; ; ++I) {
5366     int64_t Op;
5367     SMLoc Loc = getLoc();
5368     if (!parseExpr(Op))
5369       return MatchOperand_ParseFail;
5370 
5371     if (Op != 0 && Op != 1) {
5372       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5373       return MatchOperand_ParseFail;
5374     }
5375 
5376     Val |= (Op << I);
5377 
5378     if (trySkipToken(AsmToken::RBrac))
5379       break;
5380 
5381     if (I + 1 == MaxSize) {
5382       Error(getLoc(), "expected a closing square bracket");
5383       return MatchOperand_ParseFail;
5384     }
5385 
5386     if (!skipToken(AsmToken::Comma, "expected a comma"))
5387       return MatchOperand_ParseFail;
5388   }
5389 
5390   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5391   return MatchOperand_Success;
5392 }
5393 
5394 OperandMatchResultTy
5395 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5396                                AMDGPUOperand::ImmTy ImmTy) {
5397   int64_t Bit;
5398   SMLoc S = getLoc();
5399 
5400   if (trySkipId(Name)) {
5401     Bit = 1;
5402   } else if (trySkipId("no", Name)) {
5403     Bit = 0;
5404   } else {
5405     return MatchOperand_NoMatch;
5406   }
5407 
5408   if (Name == "r128" && !hasMIMG_R128()) {
5409     Error(S, "r128 modifier is not supported on this GPU");
5410     return MatchOperand_ParseFail;
5411   }
5412   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5413     Error(S, "a16 modifier is not supported on this GPU");
5414     return MatchOperand_ParseFail;
5415   }
5416 
5417   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5418     ImmTy = AMDGPUOperand::ImmTyR128A16;
5419 
5420   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5421   return MatchOperand_Success;
5422 }
5423 
5424 OperandMatchResultTy
5425 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5426   unsigned CPolOn = 0;
5427   unsigned CPolOff = 0;
5428   SMLoc S = getLoc();
5429 
5430   if (trySkipId("glc"))
5431     CPolOn = AMDGPU::CPol::GLC;
5432   else if (trySkipId("noglc"))
5433     CPolOff = AMDGPU::CPol::GLC;
5434   else if (trySkipId("slc"))
5435     CPolOn = AMDGPU::CPol::SLC;
5436   else if (trySkipId("noslc"))
5437     CPolOff = AMDGPU::CPol::SLC;
5438   else if (trySkipId("dlc"))
5439     CPolOn = AMDGPU::CPol::DLC;
5440   else if (trySkipId("nodlc"))
5441     CPolOff = AMDGPU::CPol::DLC;
5442   else if (trySkipId("scc"))
5443     CPolOn = AMDGPU::CPol::SCC;
5444   else if (trySkipId("noscc"))
5445     CPolOff = AMDGPU::CPol::SCC;
5446   else
5447     return MatchOperand_NoMatch;
5448 
5449   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5450     Error(S, "dlc modifier is not supported on this GPU");
5451     return MatchOperand_ParseFail;
5452   }
5453 
5454   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5455     Error(S, "scc modifier is not supported on this GPU");
5456     return MatchOperand_ParseFail;
5457   }
5458 
5459   if (CPolSeen & (CPolOn | CPolOff)) {
5460     Error(S, "duplicate cache policy modifier");
5461     return MatchOperand_ParseFail;
5462   }
5463 
5464   CPolSeen |= (CPolOn | CPolOff);
5465 
5466   for (unsigned I = 1; I != Operands.size(); ++I) {
5467     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5468     if (Op.isCPol()) {
5469       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5470       return MatchOperand_Success;
5471     }
5472   }
5473 
5474   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5475                                               AMDGPUOperand::ImmTyCPol));
5476 
5477   return MatchOperand_Success;
5478 }
5479 
5480 static void addOptionalImmOperand(
5481   MCInst& Inst, const OperandVector& Operands,
5482   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5483   AMDGPUOperand::ImmTy ImmT,
5484   int64_t Default = 0) {
5485   auto i = OptionalIdx.find(ImmT);
5486   if (i != OptionalIdx.end()) {
5487     unsigned Idx = i->second;
5488     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5489   } else {
5490     Inst.addOperand(MCOperand::createImm(Default));
5491   }
5492 }
5493 
5494 OperandMatchResultTy
5495 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5496                                        StringRef &Value,
5497                                        SMLoc &StringLoc) {
5498   if (!trySkipId(Prefix, AsmToken::Colon))
5499     return MatchOperand_NoMatch;
5500 
5501   StringLoc = getLoc();
5502   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5503                                                   : MatchOperand_ParseFail;
5504 }
5505 
5506 //===----------------------------------------------------------------------===//
5507 // MTBUF format
5508 //===----------------------------------------------------------------------===//
5509 
5510 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5511                                   int64_t MaxVal,
5512                                   int64_t &Fmt) {
5513   int64_t Val;
5514   SMLoc Loc = getLoc();
5515 
5516   auto Res = parseIntWithPrefix(Pref, Val);
5517   if (Res == MatchOperand_ParseFail)
5518     return false;
5519   if (Res == MatchOperand_NoMatch)
5520     return true;
5521 
5522   if (Val < 0 || Val > MaxVal) {
5523     Error(Loc, Twine("out of range ", StringRef(Pref)));
5524     return false;
5525   }
5526 
5527   Fmt = Val;
5528   return true;
5529 }
5530 
5531 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5532 // values to live in a joint format operand in the MCInst encoding.
5533 OperandMatchResultTy
5534 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5535   using namespace llvm::AMDGPU::MTBUFFormat;
5536 
5537   int64_t Dfmt = DFMT_UNDEF;
5538   int64_t Nfmt = NFMT_UNDEF;
5539 
5540   // dfmt and nfmt can appear in either order, and each is optional.
5541   for (int I = 0; I < 2; ++I) {
5542     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5543       return MatchOperand_ParseFail;
5544 
5545     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5546       return MatchOperand_ParseFail;
5547     }
5548     // Skip optional comma between dfmt/nfmt
5549     // but guard against 2 commas following each other.
5550     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5551         !peekToken().is(AsmToken::Comma)) {
5552       trySkipToken(AsmToken::Comma);
5553     }
5554   }
5555 
5556   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5557     return MatchOperand_NoMatch;
5558 
5559   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5560   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5561 
5562   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5563   return MatchOperand_Success;
5564 }
5565 
5566 OperandMatchResultTy
5567 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5568   using namespace llvm::AMDGPU::MTBUFFormat;
5569 
5570   int64_t Fmt = UFMT_UNDEF;
5571 
5572   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5573     return MatchOperand_ParseFail;
5574 
5575   if (Fmt == UFMT_UNDEF)
5576     return MatchOperand_NoMatch;
5577 
5578   Format = Fmt;
5579   return MatchOperand_Success;
5580 }
5581 
5582 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5583                                     int64_t &Nfmt,
5584                                     StringRef FormatStr,
5585                                     SMLoc Loc) {
5586   using namespace llvm::AMDGPU::MTBUFFormat;
5587   int64_t Format;
5588 
5589   Format = getDfmt(FormatStr);
5590   if (Format != DFMT_UNDEF) {
5591     Dfmt = Format;
5592     return true;
5593   }
5594 
5595   Format = getNfmt(FormatStr, getSTI());
5596   if (Format != NFMT_UNDEF) {
5597     Nfmt = Format;
5598     return true;
5599   }
5600 
5601   Error(Loc, "unsupported format");
5602   return false;
5603 }
5604 
5605 OperandMatchResultTy
5606 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5607                                           SMLoc FormatLoc,
5608                                           int64_t &Format) {
5609   using namespace llvm::AMDGPU::MTBUFFormat;
5610 
5611   int64_t Dfmt = DFMT_UNDEF;
5612   int64_t Nfmt = NFMT_UNDEF;
5613   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5614     return MatchOperand_ParseFail;
5615 
5616   if (trySkipToken(AsmToken::Comma)) {
5617     StringRef Str;
5618     SMLoc Loc = getLoc();
5619     if (!parseId(Str, "expected a format string") ||
5620         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5621       return MatchOperand_ParseFail;
5622     }
5623     if (Dfmt == DFMT_UNDEF) {
5624       Error(Loc, "duplicate numeric format");
5625       return MatchOperand_ParseFail;
5626     } else if (Nfmt == NFMT_UNDEF) {
5627       Error(Loc, "duplicate data format");
5628       return MatchOperand_ParseFail;
5629     }
5630   }
5631 
5632   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5633   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5634 
5635   if (isGFX10Plus()) {
5636     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5637     if (Ufmt == UFMT_UNDEF) {
5638       Error(FormatLoc, "unsupported format");
5639       return MatchOperand_ParseFail;
5640     }
5641     Format = Ufmt;
5642   } else {
5643     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5644   }
5645 
5646   return MatchOperand_Success;
5647 }
5648 
5649 OperandMatchResultTy
5650 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5651                                             SMLoc Loc,
5652                                             int64_t &Format) {
5653   using namespace llvm::AMDGPU::MTBUFFormat;
5654 
5655   auto Id = getUnifiedFormat(FormatStr);
5656   if (Id == UFMT_UNDEF)
5657     return MatchOperand_NoMatch;
5658 
5659   if (!isGFX10Plus()) {
5660     Error(Loc, "unified format is not supported on this GPU");
5661     return MatchOperand_ParseFail;
5662   }
5663 
5664   Format = Id;
5665   return MatchOperand_Success;
5666 }
5667 
5668 OperandMatchResultTy
5669 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5670   using namespace llvm::AMDGPU::MTBUFFormat;
5671   SMLoc Loc = getLoc();
5672 
5673   if (!parseExpr(Format))
5674     return MatchOperand_ParseFail;
5675   if (!isValidFormatEncoding(Format, getSTI())) {
5676     Error(Loc, "out of range format");
5677     return MatchOperand_ParseFail;
5678   }
5679 
5680   return MatchOperand_Success;
5681 }
5682 
5683 OperandMatchResultTy
5684 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5685   using namespace llvm::AMDGPU::MTBUFFormat;
5686 
5687   if (!trySkipId("format", AsmToken::Colon))
5688     return MatchOperand_NoMatch;
5689 
5690   if (trySkipToken(AsmToken::LBrac)) {
5691     StringRef FormatStr;
5692     SMLoc Loc = getLoc();
5693     if (!parseId(FormatStr, "expected a format string"))
5694       return MatchOperand_ParseFail;
5695 
5696     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5697     if (Res == MatchOperand_NoMatch)
5698       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5699     if (Res != MatchOperand_Success)
5700       return Res;
5701 
5702     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5703       return MatchOperand_ParseFail;
5704 
5705     return MatchOperand_Success;
5706   }
5707 
5708   return parseNumericFormat(Format);
5709 }
5710 
5711 OperandMatchResultTy
5712 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5713   using namespace llvm::AMDGPU::MTBUFFormat;
5714 
5715   int64_t Format = getDefaultFormatEncoding(getSTI());
5716   OperandMatchResultTy Res;
5717   SMLoc Loc = getLoc();
5718 
5719   // Parse legacy format syntax.
5720   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5721   if (Res == MatchOperand_ParseFail)
5722     return Res;
5723 
5724   bool FormatFound = (Res == MatchOperand_Success);
5725 
5726   Operands.push_back(
5727     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5728 
5729   if (FormatFound)
5730     trySkipToken(AsmToken::Comma);
5731 
5732   if (isToken(AsmToken::EndOfStatement)) {
5733     // We are expecting an soffset operand,
5734     // but let matcher handle the error.
5735     return MatchOperand_Success;
5736   }
5737 
5738   // Parse soffset.
5739   Res = parseRegOrImm(Operands);
5740   if (Res != MatchOperand_Success)
5741     return Res;
5742 
5743   trySkipToken(AsmToken::Comma);
5744 
5745   if (!FormatFound) {
5746     Res = parseSymbolicOrNumericFormat(Format);
5747     if (Res == MatchOperand_ParseFail)
5748       return Res;
5749     if (Res == MatchOperand_Success) {
5750       auto Size = Operands.size();
5751       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5752       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5753       Op.setImm(Format);
5754     }
5755     return MatchOperand_Success;
5756   }
5757 
5758   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5759     Error(getLoc(), "duplicate format");
5760     return MatchOperand_ParseFail;
5761   }
5762   return MatchOperand_Success;
5763 }
5764 
5765 //===----------------------------------------------------------------------===//
5766 // ds
5767 //===----------------------------------------------------------------------===//
5768 
5769 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5770                                     const OperandVector &Operands) {
5771   OptionalImmIndexMap OptionalIdx;
5772 
5773   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5774     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5775 
5776     // Add the register arguments
5777     if (Op.isReg()) {
5778       Op.addRegOperands(Inst, 1);
5779       continue;
5780     }
5781 
5782     // Handle optional arguments
5783     OptionalIdx[Op.getImmTy()] = i;
5784   }
5785 
5786   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5787   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5788   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5789 
5790   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5791 }
5792 
5793 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5794                                 bool IsGdsHardcoded) {
5795   OptionalImmIndexMap OptionalIdx;
5796 
5797   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5798     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5799 
5800     // Add the register arguments
5801     if (Op.isReg()) {
5802       Op.addRegOperands(Inst, 1);
5803       continue;
5804     }
5805 
5806     if (Op.isToken() && Op.getToken() == "gds") {
5807       IsGdsHardcoded = true;
5808       continue;
5809     }
5810 
5811     // Handle optional arguments
5812     OptionalIdx[Op.getImmTy()] = i;
5813   }
5814 
5815   AMDGPUOperand::ImmTy OffsetType =
5816     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5817      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5818      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5819                                                       AMDGPUOperand::ImmTyOffset;
5820 
5821   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5822 
5823   if (!IsGdsHardcoded) {
5824     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5825   }
5826   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5827 }
5828 
5829 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5830   OptionalImmIndexMap OptionalIdx;
5831 
5832   unsigned OperandIdx[4];
5833   unsigned EnMask = 0;
5834   int SrcIdx = 0;
5835 
5836   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5837     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5838 
5839     // Add the register arguments
5840     if (Op.isReg()) {
5841       assert(SrcIdx < 4);
5842       OperandIdx[SrcIdx] = Inst.size();
5843       Op.addRegOperands(Inst, 1);
5844       ++SrcIdx;
5845       continue;
5846     }
5847 
5848     if (Op.isOff()) {
5849       assert(SrcIdx < 4);
5850       OperandIdx[SrcIdx] = Inst.size();
5851       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5852       ++SrcIdx;
5853       continue;
5854     }
5855 
5856     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5857       Op.addImmOperands(Inst, 1);
5858       continue;
5859     }
5860 
5861     if (Op.isToken() && Op.getToken() == "done")
5862       continue;
5863 
5864     // Handle optional arguments
5865     OptionalIdx[Op.getImmTy()] = i;
5866   }
5867 
5868   assert(SrcIdx == 4);
5869 
5870   bool Compr = false;
5871   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5872     Compr = true;
5873     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5874     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5875     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5876   }
5877 
5878   for (auto i = 0; i < SrcIdx; ++i) {
5879     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5880       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5881     }
5882   }
5883 
5884   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5885   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5886 
5887   Inst.addOperand(MCOperand::createImm(EnMask));
5888 }
5889 
5890 //===----------------------------------------------------------------------===//
5891 // s_waitcnt
5892 //===----------------------------------------------------------------------===//
5893 
5894 static bool
5895 encodeCnt(
5896   const AMDGPU::IsaVersion ISA,
5897   int64_t &IntVal,
5898   int64_t CntVal,
5899   bool Saturate,
5900   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5901   unsigned (*decode)(const IsaVersion &Version, unsigned))
5902 {
5903   bool Failed = false;
5904 
5905   IntVal = encode(ISA, IntVal, CntVal);
5906   if (CntVal != decode(ISA, IntVal)) {
5907     if (Saturate) {
5908       IntVal = encode(ISA, IntVal, -1);
5909     } else {
5910       Failed = true;
5911     }
5912   }
5913   return Failed;
5914 }
5915 
5916 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5917 
5918   SMLoc CntLoc = getLoc();
5919   StringRef CntName = getTokenStr();
5920 
5921   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5922       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5923     return false;
5924 
5925   int64_t CntVal;
5926   SMLoc ValLoc = getLoc();
5927   if (!parseExpr(CntVal))
5928     return false;
5929 
5930   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5931 
5932   bool Failed = true;
5933   bool Sat = CntName.endswith("_sat");
5934 
5935   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5936     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5937   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5938     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5939   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5940     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5941   } else {
5942     Error(CntLoc, "invalid counter name " + CntName);
5943     return false;
5944   }
5945 
5946   if (Failed) {
5947     Error(ValLoc, "too large value for " + CntName);
5948     return false;
5949   }
5950 
5951   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5952     return false;
5953 
5954   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5955     if (isToken(AsmToken::EndOfStatement)) {
5956       Error(getLoc(), "expected a counter name");
5957       return false;
5958     }
5959   }
5960 
5961   return true;
5962 }
5963 
5964 OperandMatchResultTy
5965 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5966   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5967   int64_t Waitcnt = getWaitcntBitMask(ISA);
5968   SMLoc S = getLoc();
5969 
5970   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5971     while (!isToken(AsmToken::EndOfStatement)) {
5972       if (!parseCnt(Waitcnt))
5973         return MatchOperand_ParseFail;
5974     }
5975   } else {
5976     if (!parseExpr(Waitcnt))
5977       return MatchOperand_ParseFail;
5978   }
5979 
5980   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5981   return MatchOperand_Success;
5982 }
5983 
5984 bool
5985 AMDGPUOperand::isSWaitCnt() const {
5986   return isImm();
5987 }
5988 
5989 //===----------------------------------------------------------------------===//
5990 // hwreg
5991 //===----------------------------------------------------------------------===//
5992 
5993 bool
5994 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5995                                 OperandInfoTy &Offset,
5996                                 OperandInfoTy &Width) {
5997   using namespace llvm::AMDGPU::Hwreg;
5998 
5999   // The register may be specified by name or using a numeric code
6000   HwReg.Loc = getLoc();
6001   if (isToken(AsmToken::Identifier) &&
6002       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
6003     HwReg.IsSymbolic = true;
6004     lex(); // skip register name
6005   } else if (!parseExpr(HwReg.Id, "a register name")) {
6006     return false;
6007   }
6008 
6009   if (trySkipToken(AsmToken::RParen))
6010     return true;
6011 
6012   // parse optional params
6013   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6014     return false;
6015 
6016   Offset.Loc = getLoc();
6017   if (!parseExpr(Offset.Id))
6018     return false;
6019 
6020   if (!skipToken(AsmToken::Comma, "expected a comma"))
6021     return false;
6022 
6023   Width.Loc = getLoc();
6024   return parseExpr(Width.Id) &&
6025          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6026 }
6027 
6028 bool
6029 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6030                                const OperandInfoTy &Offset,
6031                                const OperandInfoTy &Width) {
6032 
6033   using namespace llvm::AMDGPU::Hwreg;
6034 
6035   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6036     Error(HwReg.Loc,
6037           "specified hardware register is not supported on this GPU");
6038     return false;
6039   }
6040   if (!isValidHwreg(HwReg.Id)) {
6041     Error(HwReg.Loc,
6042           "invalid code of hardware register: only 6-bit values are legal");
6043     return false;
6044   }
6045   if (!isValidHwregOffset(Offset.Id)) {
6046     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6047     return false;
6048   }
6049   if (!isValidHwregWidth(Width.Id)) {
6050     Error(Width.Loc,
6051           "invalid bitfield width: only values from 1 to 32 are legal");
6052     return false;
6053   }
6054   return true;
6055 }
6056 
6057 OperandMatchResultTy
6058 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6059   using namespace llvm::AMDGPU::Hwreg;
6060 
6061   int64_t ImmVal = 0;
6062   SMLoc Loc = getLoc();
6063 
6064   if (trySkipId("hwreg", AsmToken::LParen)) {
6065     OperandInfoTy HwReg(ID_UNKNOWN_);
6066     OperandInfoTy Offset(OFFSET_DEFAULT_);
6067     OperandInfoTy Width(WIDTH_DEFAULT_);
6068     if (parseHwregBody(HwReg, Offset, Width) &&
6069         validateHwreg(HwReg, Offset, Width)) {
6070       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6071     } else {
6072       return MatchOperand_ParseFail;
6073     }
6074   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6075     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6076       Error(Loc, "invalid immediate: only 16-bit values are legal");
6077       return MatchOperand_ParseFail;
6078     }
6079   } else {
6080     return MatchOperand_ParseFail;
6081   }
6082 
6083   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6084   return MatchOperand_Success;
6085 }
6086 
6087 bool AMDGPUOperand::isHwreg() const {
6088   return isImmTy(ImmTyHwreg);
6089 }
6090 
6091 //===----------------------------------------------------------------------===//
6092 // sendmsg
6093 //===----------------------------------------------------------------------===//
6094 
6095 bool
6096 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6097                                   OperandInfoTy &Op,
6098                                   OperandInfoTy &Stream) {
6099   using namespace llvm::AMDGPU::SendMsg;
6100 
6101   Msg.Loc = getLoc();
6102   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6103     Msg.IsSymbolic = true;
6104     lex(); // skip message name
6105   } else if (!parseExpr(Msg.Id, "a message name")) {
6106     return false;
6107   }
6108 
6109   if (trySkipToken(AsmToken::Comma)) {
6110     Op.IsDefined = true;
6111     Op.Loc = getLoc();
6112     if (isToken(AsmToken::Identifier) &&
6113         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6114       lex(); // skip operation name
6115     } else if (!parseExpr(Op.Id, "an operation name")) {
6116       return false;
6117     }
6118 
6119     if (trySkipToken(AsmToken::Comma)) {
6120       Stream.IsDefined = true;
6121       Stream.Loc = getLoc();
6122       if (!parseExpr(Stream.Id))
6123         return false;
6124     }
6125   }
6126 
6127   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6128 }
6129 
6130 bool
6131 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6132                                  const OperandInfoTy &Op,
6133                                  const OperandInfoTy &Stream) {
6134   using namespace llvm::AMDGPU::SendMsg;
6135 
6136   // Validation strictness depends on whether message is specified
6137   // in a symbolc or in a numeric form. In the latter case
6138   // only encoding possibility is checked.
6139   bool Strict = Msg.IsSymbolic;
6140 
6141   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6142     Error(Msg.Loc, "invalid message id");
6143     return false;
6144   }
6145   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6146     if (Op.IsDefined) {
6147       Error(Op.Loc, "message does not support operations");
6148     } else {
6149       Error(Msg.Loc, "missing message operation");
6150     }
6151     return false;
6152   }
6153   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6154     Error(Op.Loc, "invalid operation id");
6155     return false;
6156   }
6157   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6158     Error(Stream.Loc, "message operation does not support streams");
6159     return false;
6160   }
6161   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6162     Error(Stream.Loc, "invalid message stream id");
6163     return false;
6164   }
6165   return true;
6166 }
6167 
6168 OperandMatchResultTy
6169 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6170   using namespace llvm::AMDGPU::SendMsg;
6171 
6172   int64_t ImmVal = 0;
6173   SMLoc Loc = getLoc();
6174 
6175   if (trySkipId("sendmsg", AsmToken::LParen)) {
6176     OperandInfoTy Msg(ID_UNKNOWN_);
6177     OperandInfoTy Op(OP_NONE_);
6178     OperandInfoTy Stream(STREAM_ID_NONE_);
6179     if (parseSendMsgBody(Msg, Op, Stream) &&
6180         validateSendMsg(Msg, Op, Stream)) {
6181       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6182     } else {
6183       return MatchOperand_ParseFail;
6184     }
6185   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6186     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6187       Error(Loc, "invalid immediate: only 16-bit values are legal");
6188       return MatchOperand_ParseFail;
6189     }
6190   } else {
6191     return MatchOperand_ParseFail;
6192   }
6193 
6194   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6195   return MatchOperand_Success;
6196 }
6197 
6198 bool AMDGPUOperand::isSendMsg() const {
6199   return isImmTy(ImmTySendMsg);
6200 }
6201 
6202 //===----------------------------------------------------------------------===//
6203 // v_interp
6204 //===----------------------------------------------------------------------===//
6205 
6206 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6207   StringRef Str;
6208   SMLoc S = getLoc();
6209 
6210   if (!parseId(Str))
6211     return MatchOperand_NoMatch;
6212 
6213   int Slot = StringSwitch<int>(Str)
6214     .Case("p10", 0)
6215     .Case("p20", 1)
6216     .Case("p0", 2)
6217     .Default(-1);
6218 
6219   if (Slot == -1) {
6220     Error(S, "invalid interpolation slot");
6221     return MatchOperand_ParseFail;
6222   }
6223 
6224   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6225                                               AMDGPUOperand::ImmTyInterpSlot));
6226   return MatchOperand_Success;
6227 }
6228 
6229 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6230   StringRef Str;
6231   SMLoc S = getLoc();
6232 
6233   if (!parseId(Str))
6234     return MatchOperand_NoMatch;
6235 
6236   if (!Str.startswith("attr")) {
6237     Error(S, "invalid interpolation attribute");
6238     return MatchOperand_ParseFail;
6239   }
6240 
6241   StringRef Chan = Str.take_back(2);
6242   int AttrChan = StringSwitch<int>(Chan)
6243     .Case(".x", 0)
6244     .Case(".y", 1)
6245     .Case(".z", 2)
6246     .Case(".w", 3)
6247     .Default(-1);
6248   if (AttrChan == -1) {
6249     Error(S, "invalid or missing interpolation attribute channel");
6250     return MatchOperand_ParseFail;
6251   }
6252 
6253   Str = Str.drop_back(2).drop_front(4);
6254 
6255   uint8_t Attr;
6256   if (Str.getAsInteger(10, Attr)) {
6257     Error(S, "invalid or missing interpolation attribute number");
6258     return MatchOperand_ParseFail;
6259   }
6260 
6261   if (Attr > 63) {
6262     Error(S, "out of bounds interpolation attribute number");
6263     return MatchOperand_ParseFail;
6264   }
6265 
6266   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6267 
6268   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6269                                               AMDGPUOperand::ImmTyInterpAttr));
6270   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6271                                               AMDGPUOperand::ImmTyAttrChan));
6272   return MatchOperand_Success;
6273 }
6274 
6275 //===----------------------------------------------------------------------===//
6276 // exp
6277 //===----------------------------------------------------------------------===//
6278 
6279 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6280   using namespace llvm::AMDGPU::Exp;
6281 
6282   StringRef Str;
6283   SMLoc S = getLoc();
6284 
6285   if (!parseId(Str))
6286     return MatchOperand_NoMatch;
6287 
6288   unsigned Id = getTgtId(Str);
6289   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6290     Error(S, (Id == ET_INVALID) ?
6291                 "invalid exp target" :
6292                 "exp target is not supported on this GPU");
6293     return MatchOperand_ParseFail;
6294   }
6295 
6296   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6297                                               AMDGPUOperand::ImmTyExpTgt));
6298   return MatchOperand_Success;
6299 }
6300 
6301 //===----------------------------------------------------------------------===//
6302 // parser helpers
6303 //===----------------------------------------------------------------------===//
6304 
6305 bool
6306 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6307   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6308 }
6309 
6310 bool
6311 AMDGPUAsmParser::isId(const StringRef Id) const {
6312   return isId(getToken(), Id);
6313 }
6314 
6315 bool
6316 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6317   return getTokenKind() == Kind;
6318 }
6319 
6320 bool
6321 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6322   if (isId(Id)) {
6323     lex();
6324     return true;
6325   }
6326   return false;
6327 }
6328 
6329 bool
6330 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6331   if (isToken(AsmToken::Identifier)) {
6332     StringRef Tok = getTokenStr();
6333     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6334       lex();
6335       return true;
6336     }
6337   }
6338   return false;
6339 }
6340 
6341 bool
6342 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6343   if (isId(Id) && peekToken().is(Kind)) {
6344     lex();
6345     lex();
6346     return true;
6347   }
6348   return false;
6349 }
6350 
6351 bool
6352 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6353   if (isToken(Kind)) {
6354     lex();
6355     return true;
6356   }
6357   return false;
6358 }
6359 
6360 bool
6361 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6362                            const StringRef ErrMsg) {
6363   if (!trySkipToken(Kind)) {
6364     Error(getLoc(), ErrMsg);
6365     return false;
6366   }
6367   return true;
6368 }
6369 
6370 bool
6371 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6372   SMLoc S = getLoc();
6373 
6374   const MCExpr *Expr;
6375   if (Parser.parseExpression(Expr))
6376     return false;
6377 
6378   if (Expr->evaluateAsAbsolute(Imm))
6379     return true;
6380 
6381   if (Expected.empty()) {
6382     Error(S, "expected absolute expression");
6383   } else {
6384     Error(S, Twine("expected ", Expected) +
6385              Twine(" or an absolute expression"));
6386   }
6387   return false;
6388 }
6389 
6390 bool
6391 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6392   SMLoc S = getLoc();
6393 
6394   const MCExpr *Expr;
6395   if (Parser.parseExpression(Expr))
6396     return false;
6397 
6398   int64_t IntVal;
6399   if (Expr->evaluateAsAbsolute(IntVal)) {
6400     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6401   } else {
6402     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6403   }
6404   return true;
6405 }
6406 
6407 bool
6408 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6409   if (isToken(AsmToken::String)) {
6410     Val = getToken().getStringContents();
6411     lex();
6412     return true;
6413   } else {
6414     Error(getLoc(), ErrMsg);
6415     return false;
6416   }
6417 }
6418 
6419 bool
6420 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6421   if (isToken(AsmToken::Identifier)) {
6422     Val = getTokenStr();
6423     lex();
6424     return true;
6425   } else {
6426     if (!ErrMsg.empty())
6427       Error(getLoc(), ErrMsg);
6428     return false;
6429   }
6430 }
6431 
6432 AsmToken
6433 AMDGPUAsmParser::getToken() const {
6434   return Parser.getTok();
6435 }
6436 
6437 AsmToken
6438 AMDGPUAsmParser::peekToken() {
6439   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6440 }
6441 
6442 void
6443 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6444   auto TokCount = getLexer().peekTokens(Tokens);
6445 
6446   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6447     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6448 }
6449 
6450 AsmToken::TokenKind
6451 AMDGPUAsmParser::getTokenKind() const {
6452   return getLexer().getKind();
6453 }
6454 
6455 SMLoc
6456 AMDGPUAsmParser::getLoc() const {
6457   return getToken().getLoc();
6458 }
6459 
6460 StringRef
6461 AMDGPUAsmParser::getTokenStr() const {
6462   return getToken().getString();
6463 }
6464 
6465 void
6466 AMDGPUAsmParser::lex() {
6467   Parser.Lex();
6468 }
6469 
6470 SMLoc
6471 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6472                                const OperandVector &Operands) const {
6473   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6474     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6475     if (Test(Op))
6476       return Op.getStartLoc();
6477   }
6478   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6479 }
6480 
6481 SMLoc
6482 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6483                            const OperandVector &Operands) const {
6484   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6485   return getOperandLoc(Test, Operands);
6486 }
6487 
6488 SMLoc
6489 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6490                            const OperandVector &Operands) const {
6491   auto Test = [=](const AMDGPUOperand& Op) {
6492     return Op.isRegKind() && Op.getReg() == Reg;
6493   };
6494   return getOperandLoc(Test, Operands);
6495 }
6496 
6497 SMLoc
6498 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6499   auto Test = [](const AMDGPUOperand& Op) {
6500     return Op.IsImmKindLiteral() || Op.isExpr();
6501   };
6502   return getOperandLoc(Test, Operands);
6503 }
6504 
6505 SMLoc
6506 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6507   auto Test = [](const AMDGPUOperand& Op) {
6508     return Op.isImmKindConst();
6509   };
6510   return getOperandLoc(Test, Operands);
6511 }
6512 
6513 //===----------------------------------------------------------------------===//
6514 // swizzle
6515 //===----------------------------------------------------------------------===//
6516 
6517 LLVM_READNONE
6518 static unsigned
6519 encodeBitmaskPerm(const unsigned AndMask,
6520                   const unsigned OrMask,
6521                   const unsigned XorMask) {
6522   using namespace llvm::AMDGPU::Swizzle;
6523 
6524   return BITMASK_PERM_ENC |
6525          (AndMask << BITMASK_AND_SHIFT) |
6526          (OrMask  << BITMASK_OR_SHIFT)  |
6527          (XorMask << BITMASK_XOR_SHIFT);
6528 }
6529 
6530 bool
6531 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6532                                      const unsigned MinVal,
6533                                      const unsigned MaxVal,
6534                                      const StringRef ErrMsg,
6535                                      SMLoc &Loc) {
6536   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6537     return false;
6538   }
6539   Loc = getLoc();
6540   if (!parseExpr(Op)) {
6541     return false;
6542   }
6543   if (Op < MinVal || Op > MaxVal) {
6544     Error(Loc, ErrMsg);
6545     return false;
6546   }
6547 
6548   return true;
6549 }
6550 
6551 bool
6552 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6553                                       const unsigned MinVal,
6554                                       const unsigned MaxVal,
6555                                       const StringRef ErrMsg) {
6556   SMLoc Loc;
6557   for (unsigned i = 0; i < OpNum; ++i) {
6558     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6559       return false;
6560   }
6561 
6562   return true;
6563 }
6564 
6565 bool
6566 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6567   using namespace llvm::AMDGPU::Swizzle;
6568 
6569   int64_t Lane[LANE_NUM];
6570   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6571                            "expected a 2-bit lane id")) {
6572     Imm = QUAD_PERM_ENC;
6573     for (unsigned I = 0; I < LANE_NUM; ++I) {
6574       Imm |= Lane[I] << (LANE_SHIFT * I);
6575     }
6576     return true;
6577   }
6578   return false;
6579 }
6580 
6581 bool
6582 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6583   using namespace llvm::AMDGPU::Swizzle;
6584 
6585   SMLoc Loc;
6586   int64_t GroupSize;
6587   int64_t LaneIdx;
6588 
6589   if (!parseSwizzleOperand(GroupSize,
6590                            2, 32,
6591                            "group size must be in the interval [2,32]",
6592                            Loc)) {
6593     return false;
6594   }
6595   if (!isPowerOf2_64(GroupSize)) {
6596     Error(Loc, "group size must be a power of two");
6597     return false;
6598   }
6599   if (parseSwizzleOperand(LaneIdx,
6600                           0, GroupSize - 1,
6601                           "lane id must be in the interval [0,group size - 1]",
6602                           Loc)) {
6603     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6604     return true;
6605   }
6606   return false;
6607 }
6608 
6609 bool
6610 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6611   using namespace llvm::AMDGPU::Swizzle;
6612 
6613   SMLoc Loc;
6614   int64_t GroupSize;
6615 
6616   if (!parseSwizzleOperand(GroupSize,
6617                            2, 32,
6618                            "group size must be in the interval [2,32]",
6619                            Loc)) {
6620     return false;
6621   }
6622   if (!isPowerOf2_64(GroupSize)) {
6623     Error(Loc, "group size must be a power of two");
6624     return false;
6625   }
6626 
6627   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6628   return true;
6629 }
6630 
6631 bool
6632 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6633   using namespace llvm::AMDGPU::Swizzle;
6634 
6635   SMLoc Loc;
6636   int64_t GroupSize;
6637 
6638   if (!parseSwizzleOperand(GroupSize,
6639                            1, 16,
6640                            "group size must be in the interval [1,16]",
6641                            Loc)) {
6642     return false;
6643   }
6644   if (!isPowerOf2_64(GroupSize)) {
6645     Error(Loc, "group size must be a power of two");
6646     return false;
6647   }
6648 
6649   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6650   return true;
6651 }
6652 
6653 bool
6654 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6655   using namespace llvm::AMDGPU::Swizzle;
6656 
6657   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6658     return false;
6659   }
6660 
6661   StringRef Ctl;
6662   SMLoc StrLoc = getLoc();
6663   if (!parseString(Ctl)) {
6664     return false;
6665   }
6666   if (Ctl.size() != BITMASK_WIDTH) {
6667     Error(StrLoc, "expected a 5-character mask");
6668     return false;
6669   }
6670 
6671   unsigned AndMask = 0;
6672   unsigned OrMask = 0;
6673   unsigned XorMask = 0;
6674 
6675   for (size_t i = 0; i < Ctl.size(); ++i) {
6676     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6677     switch(Ctl[i]) {
6678     default:
6679       Error(StrLoc, "invalid mask");
6680       return false;
6681     case '0':
6682       break;
6683     case '1':
6684       OrMask |= Mask;
6685       break;
6686     case 'p':
6687       AndMask |= Mask;
6688       break;
6689     case 'i':
6690       AndMask |= Mask;
6691       XorMask |= Mask;
6692       break;
6693     }
6694   }
6695 
6696   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6697   return true;
6698 }
6699 
6700 bool
6701 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6702 
6703   SMLoc OffsetLoc = getLoc();
6704 
6705   if (!parseExpr(Imm, "a swizzle macro")) {
6706     return false;
6707   }
6708   if (!isUInt<16>(Imm)) {
6709     Error(OffsetLoc, "expected a 16-bit offset");
6710     return false;
6711   }
6712   return true;
6713 }
6714 
6715 bool
6716 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6717   using namespace llvm::AMDGPU::Swizzle;
6718 
6719   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6720 
6721     SMLoc ModeLoc = getLoc();
6722     bool Ok = false;
6723 
6724     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6725       Ok = parseSwizzleQuadPerm(Imm);
6726     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6727       Ok = parseSwizzleBitmaskPerm(Imm);
6728     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6729       Ok = parseSwizzleBroadcast(Imm);
6730     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6731       Ok = parseSwizzleSwap(Imm);
6732     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6733       Ok = parseSwizzleReverse(Imm);
6734     } else {
6735       Error(ModeLoc, "expected a swizzle mode");
6736     }
6737 
6738     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6739   }
6740 
6741   return false;
6742 }
6743 
6744 OperandMatchResultTy
6745 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6746   SMLoc S = getLoc();
6747   int64_t Imm = 0;
6748 
6749   if (trySkipId("offset")) {
6750 
6751     bool Ok = false;
6752     if (skipToken(AsmToken::Colon, "expected a colon")) {
6753       if (trySkipId("swizzle")) {
6754         Ok = parseSwizzleMacro(Imm);
6755       } else {
6756         Ok = parseSwizzleOffset(Imm);
6757       }
6758     }
6759 
6760     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6761 
6762     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6763   } else {
6764     // Swizzle "offset" operand is optional.
6765     // If it is omitted, try parsing other optional operands.
6766     return parseOptionalOpr(Operands);
6767   }
6768 }
6769 
6770 bool
6771 AMDGPUOperand::isSwizzle() const {
6772   return isImmTy(ImmTySwizzle);
6773 }
6774 
6775 //===----------------------------------------------------------------------===//
6776 // VGPR Index Mode
6777 //===----------------------------------------------------------------------===//
6778 
6779 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6780 
6781   using namespace llvm::AMDGPU::VGPRIndexMode;
6782 
6783   if (trySkipToken(AsmToken::RParen)) {
6784     return OFF;
6785   }
6786 
6787   int64_t Imm = 0;
6788 
6789   while (true) {
6790     unsigned Mode = 0;
6791     SMLoc S = getLoc();
6792 
6793     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6794       if (trySkipId(IdSymbolic[ModeId])) {
6795         Mode = 1 << ModeId;
6796         break;
6797       }
6798     }
6799 
6800     if (Mode == 0) {
6801       Error(S, (Imm == 0)?
6802                "expected a VGPR index mode or a closing parenthesis" :
6803                "expected a VGPR index mode");
6804       return UNDEF;
6805     }
6806 
6807     if (Imm & Mode) {
6808       Error(S, "duplicate VGPR index mode");
6809       return UNDEF;
6810     }
6811     Imm |= Mode;
6812 
6813     if (trySkipToken(AsmToken::RParen))
6814       break;
6815     if (!skipToken(AsmToken::Comma,
6816                    "expected a comma or a closing parenthesis"))
6817       return UNDEF;
6818   }
6819 
6820   return Imm;
6821 }
6822 
6823 OperandMatchResultTy
6824 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6825 
6826   using namespace llvm::AMDGPU::VGPRIndexMode;
6827 
6828   int64_t Imm = 0;
6829   SMLoc S = getLoc();
6830 
6831   if (trySkipId("gpr_idx", AsmToken::LParen)) {
6832     Imm = parseGPRIdxMacro();
6833     if (Imm == UNDEF)
6834       return MatchOperand_ParseFail;
6835   } else {
6836     if (getParser().parseAbsoluteExpression(Imm))
6837       return MatchOperand_ParseFail;
6838     if (Imm < 0 || !isUInt<4>(Imm)) {
6839       Error(S, "invalid immediate: only 4-bit values are legal");
6840       return MatchOperand_ParseFail;
6841     }
6842   }
6843 
6844   Operands.push_back(
6845       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6846   return MatchOperand_Success;
6847 }
6848 
6849 bool AMDGPUOperand::isGPRIdxMode() const {
6850   return isImmTy(ImmTyGprIdxMode);
6851 }
6852 
6853 //===----------------------------------------------------------------------===//
6854 // sopp branch targets
6855 //===----------------------------------------------------------------------===//
6856 
6857 OperandMatchResultTy
6858 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6859 
6860   // Make sure we are not parsing something
6861   // that looks like a label or an expression but is not.
6862   // This will improve error messages.
6863   if (isRegister() || isModifier())
6864     return MatchOperand_NoMatch;
6865 
6866   if (!parseExpr(Operands))
6867     return MatchOperand_ParseFail;
6868 
6869   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6870   assert(Opr.isImm() || Opr.isExpr());
6871   SMLoc Loc = Opr.getStartLoc();
6872 
6873   // Currently we do not support arbitrary expressions as branch targets.
6874   // Only labels and absolute expressions are accepted.
6875   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6876     Error(Loc, "expected an absolute expression or a label");
6877   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6878     Error(Loc, "expected a 16-bit signed jump offset");
6879   }
6880 
6881   return MatchOperand_Success;
6882 }
6883 
6884 //===----------------------------------------------------------------------===//
6885 // Boolean holding registers
6886 //===----------------------------------------------------------------------===//
6887 
6888 OperandMatchResultTy
6889 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6890   return parseReg(Operands);
6891 }
6892 
6893 //===----------------------------------------------------------------------===//
6894 // mubuf
6895 //===----------------------------------------------------------------------===//
6896 
6897 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
6898   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
6899 }
6900 
6901 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6902                                    const OperandVector &Operands,
6903                                    bool IsAtomic,
6904                                    bool IsLds) {
6905   bool IsLdsOpcode = IsLds;
6906   bool HasLdsModifier = false;
6907   OptionalImmIndexMap OptionalIdx;
6908   unsigned FirstOperandIdx = 1;
6909   bool IsAtomicReturn = false;
6910 
6911   if (IsAtomic) {
6912     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6913       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6914       if (!Op.isCPol())
6915         continue;
6916       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
6917       break;
6918     }
6919 
6920     if (!IsAtomicReturn) {
6921       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
6922       if (NewOpc != -1)
6923         Inst.setOpcode(NewOpc);
6924     }
6925 
6926     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
6927                       SIInstrFlags::IsAtomicRet;
6928   }
6929 
6930   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6931     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6932 
6933     // Add the register arguments
6934     if (Op.isReg()) {
6935       Op.addRegOperands(Inst, 1);
6936       // Insert a tied src for atomic return dst.
6937       // This cannot be postponed as subsequent calls to
6938       // addImmOperands rely on correct number of MC operands.
6939       if (IsAtomicReturn && i == FirstOperandIdx)
6940         Op.addRegOperands(Inst, 1);
6941       continue;
6942     }
6943 
6944     // Handle the case where soffset is an immediate
6945     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6946       Op.addImmOperands(Inst, 1);
6947       continue;
6948     }
6949 
6950     HasLdsModifier |= Op.isLDS();
6951 
6952     // Handle tokens like 'offen' which are sometimes hard-coded into the
6953     // asm string.  There are no MCInst operands for these.
6954     if (Op.isToken()) {
6955       continue;
6956     }
6957     assert(Op.isImm());
6958 
6959     // Handle optional arguments
6960     OptionalIdx[Op.getImmTy()] = i;
6961   }
6962 
6963   // This is a workaround for an llvm quirk which may result in an
6964   // incorrect instruction selection. Lds and non-lds versions of
6965   // MUBUF instructions are identical except that lds versions
6966   // have mandatory 'lds' modifier. However this modifier follows
6967   // optional modifiers and llvm asm matcher regards this 'lds'
6968   // modifier as an optional one. As a result, an lds version
6969   // of opcode may be selected even if it has no 'lds' modifier.
6970   if (IsLdsOpcode && !HasLdsModifier) {
6971     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6972     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6973       Inst.setOpcode(NoLdsOpcode);
6974       IsLdsOpcode = false;
6975     }
6976   }
6977 
6978   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6979   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
6980 
6981   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6982     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6983   }
6984   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
6985 }
6986 
6987 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6988   OptionalImmIndexMap OptionalIdx;
6989 
6990   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6991     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6992 
6993     // Add the register arguments
6994     if (Op.isReg()) {
6995       Op.addRegOperands(Inst, 1);
6996       continue;
6997     }
6998 
6999     // Handle the case where soffset is an immediate
7000     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7001       Op.addImmOperands(Inst, 1);
7002       continue;
7003     }
7004 
7005     // Handle tokens like 'offen' which are sometimes hard-coded into the
7006     // asm string.  There are no MCInst operands for these.
7007     if (Op.isToken()) {
7008       continue;
7009     }
7010     assert(Op.isImm());
7011 
7012     // Handle optional arguments
7013     OptionalIdx[Op.getImmTy()] = i;
7014   }
7015 
7016   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7017                         AMDGPUOperand::ImmTyOffset);
7018   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7019   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7020   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7021   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7022 }
7023 
7024 //===----------------------------------------------------------------------===//
7025 // mimg
7026 //===----------------------------------------------------------------------===//
7027 
7028 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7029                               bool IsAtomic) {
7030   unsigned I = 1;
7031   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7032   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7033     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7034   }
7035 
7036   if (IsAtomic) {
7037     // Add src, same as dst
7038     assert(Desc.getNumDefs() == 1);
7039     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7040   }
7041 
7042   OptionalImmIndexMap OptionalIdx;
7043 
7044   for (unsigned E = Operands.size(); I != E; ++I) {
7045     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7046 
7047     // Add the register arguments
7048     if (Op.isReg()) {
7049       Op.addRegOperands(Inst, 1);
7050     } else if (Op.isImmModifier()) {
7051       OptionalIdx[Op.getImmTy()] = I;
7052     } else if (!Op.isToken()) {
7053       llvm_unreachable("unexpected operand type");
7054     }
7055   }
7056 
7057   bool IsGFX10Plus = isGFX10Plus();
7058 
7059   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7060   if (IsGFX10Plus)
7061     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7062   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7063   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7064   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7065   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7066     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7067   if (IsGFX10Plus)
7068     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7069   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7070   if (!IsGFX10Plus)
7071     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7072   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7073 }
7074 
7075 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7076   cvtMIMG(Inst, Operands, true);
7077 }
7078 
7079 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7080   OptionalImmIndexMap OptionalIdx;
7081   bool IsAtomicReturn = false;
7082 
7083   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7084     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7085     if (!Op.isCPol())
7086       continue;
7087     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7088     break;
7089   }
7090 
7091   if (!IsAtomicReturn) {
7092     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7093     if (NewOpc != -1)
7094       Inst.setOpcode(NewOpc);
7095   }
7096 
7097   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7098                     SIInstrFlags::IsAtomicRet;
7099 
7100   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7101     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7102 
7103     // Add the register arguments
7104     if (Op.isReg()) {
7105       Op.addRegOperands(Inst, 1);
7106       if (IsAtomicReturn && i == 1)
7107         Op.addRegOperands(Inst, 1);
7108       continue;
7109     }
7110 
7111     // Handle the case where soffset is an immediate
7112     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7113       Op.addImmOperands(Inst, 1);
7114       continue;
7115     }
7116 
7117     // Handle tokens like 'offen' which are sometimes hard-coded into the
7118     // asm string.  There are no MCInst operands for these.
7119     if (Op.isToken()) {
7120       continue;
7121     }
7122     assert(Op.isImm());
7123 
7124     // Handle optional arguments
7125     OptionalIdx[Op.getImmTy()] = i;
7126   }
7127 
7128   if ((int)Inst.getNumOperands() <=
7129       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7130     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7131   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7132 }
7133 
7134 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7135                                       const OperandVector &Operands) {
7136   for (unsigned I = 1; I < Operands.size(); ++I) {
7137     auto &Operand = (AMDGPUOperand &)*Operands[I];
7138     if (Operand.isReg())
7139       Operand.addRegOperands(Inst, 1);
7140   }
7141 
7142   Inst.addOperand(MCOperand::createImm(1)); // a16
7143 }
7144 
7145 //===----------------------------------------------------------------------===//
7146 // smrd
7147 //===----------------------------------------------------------------------===//
7148 
7149 bool AMDGPUOperand::isSMRDOffset8() const {
7150   return isImm() && isUInt<8>(getImm());
7151 }
7152 
7153 bool AMDGPUOperand::isSMEMOffset() const {
7154   return isImm(); // Offset range is checked later by validator.
7155 }
7156 
7157 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7158   // 32-bit literals are only supported on CI and we only want to use them
7159   // when the offset is > 8-bits.
7160   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7161 }
7162 
7163 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7164   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7165 }
7166 
7167 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7168   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7169 }
7170 
7171 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7172   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7173 }
7174 
7175 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7176   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7177 }
7178 
7179 //===----------------------------------------------------------------------===//
7180 // vop3
7181 //===----------------------------------------------------------------------===//
7182 
7183 static bool ConvertOmodMul(int64_t &Mul) {
7184   if (Mul != 1 && Mul != 2 && Mul != 4)
7185     return false;
7186 
7187   Mul >>= 1;
7188   return true;
7189 }
7190 
7191 static bool ConvertOmodDiv(int64_t &Div) {
7192   if (Div == 1) {
7193     Div = 0;
7194     return true;
7195   }
7196 
7197   if (Div == 2) {
7198     Div = 3;
7199     return true;
7200   }
7201 
7202   return false;
7203 }
7204 
7205 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7206 // This is intentional and ensures compatibility with sp3.
7207 // See bug 35397 for details.
7208 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7209   if (BoundCtrl == 0 || BoundCtrl == 1) {
7210     BoundCtrl = 1;
7211     return true;
7212   }
7213   return false;
7214 }
7215 
7216 // Note: the order in this table matches the order of operands in AsmString.
7217 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7218   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7219   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7220   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7221   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7222   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7223   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7224   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7225   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7226   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7227   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7228   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7229   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7230   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7231   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7232   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7233   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7234   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7235   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7236   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7237   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7238   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7239   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7240   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7241   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7242   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7243   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7244   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7245   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7246   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7247   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7248   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7249   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7250   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7251   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7252   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7253   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7254   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7255   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7256   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7257   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7258   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7259 };
7260 
7261 void AMDGPUAsmParser::onBeginOfFile() {
7262   if (!getParser().getStreamer().getTargetStreamer() ||
7263       getSTI().getTargetTriple().getArch() == Triple::r600)
7264     return;
7265 
7266   if (!getTargetStreamer().getTargetID())
7267     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7268 
7269   if (isHsaAbiVersion3Or4(&getSTI()))
7270     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7271 }
7272 
7273 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7274 
7275   OperandMatchResultTy res = parseOptionalOpr(Operands);
7276 
7277   // This is a hack to enable hardcoded mandatory operands which follow
7278   // optional operands.
7279   //
7280   // Current design assumes that all operands after the first optional operand
7281   // are also optional. However implementation of some instructions violates
7282   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7283   //
7284   // To alleviate this problem, we have to (implicitly) parse extra operands
7285   // to make sure autogenerated parser of custom operands never hit hardcoded
7286   // mandatory operands.
7287 
7288   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7289     if (res != MatchOperand_Success ||
7290         isToken(AsmToken::EndOfStatement))
7291       break;
7292 
7293     trySkipToken(AsmToken::Comma);
7294     res = parseOptionalOpr(Operands);
7295   }
7296 
7297   return res;
7298 }
7299 
7300 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7301   OperandMatchResultTy res;
7302   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7303     // try to parse any optional operand here
7304     if (Op.IsBit) {
7305       res = parseNamedBit(Op.Name, Operands, Op.Type);
7306     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7307       res = parseOModOperand(Operands);
7308     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7309                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7310                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7311       res = parseSDWASel(Operands, Op.Name, Op.Type);
7312     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7313       res = parseSDWADstUnused(Operands);
7314     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7315                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7316                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7317                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7318       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7319                                         Op.ConvertResult);
7320     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7321       res = parseDim(Operands);
7322     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7323       res = parseCPol(Operands);
7324     } else {
7325       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7326     }
7327     if (res != MatchOperand_NoMatch) {
7328       return res;
7329     }
7330   }
7331   return MatchOperand_NoMatch;
7332 }
7333 
7334 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7335   StringRef Name = getTokenStr();
7336   if (Name == "mul") {
7337     return parseIntWithPrefix("mul", Operands,
7338                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7339   }
7340 
7341   if (Name == "div") {
7342     return parseIntWithPrefix("div", Operands,
7343                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7344   }
7345 
7346   return MatchOperand_NoMatch;
7347 }
7348 
7349 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7350   cvtVOP3P(Inst, Operands);
7351 
7352   int Opc = Inst.getOpcode();
7353 
7354   int SrcNum;
7355   const int Ops[] = { AMDGPU::OpName::src0,
7356                       AMDGPU::OpName::src1,
7357                       AMDGPU::OpName::src2 };
7358   for (SrcNum = 0;
7359        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7360        ++SrcNum);
7361   assert(SrcNum > 0);
7362 
7363   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7364   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7365 
7366   if ((OpSel & (1 << SrcNum)) != 0) {
7367     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7368     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7369     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7370   }
7371 }
7372 
7373 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7374       // 1. This operand is input modifiers
7375   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7376       // 2. This is not last operand
7377       && Desc.NumOperands > (OpNum + 1)
7378       // 3. Next operand is register class
7379       && Desc.OpInfo[OpNum + 1].RegClass != -1
7380       // 4. Next register is not tied to any other operand
7381       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7382 }
7383 
7384 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7385 {
7386   OptionalImmIndexMap OptionalIdx;
7387   unsigned Opc = Inst.getOpcode();
7388 
7389   unsigned I = 1;
7390   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7391   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7392     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7393   }
7394 
7395   for (unsigned E = Operands.size(); I != E; ++I) {
7396     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7397     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7398       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7399     } else if (Op.isInterpSlot() ||
7400                Op.isInterpAttr() ||
7401                Op.isAttrChan()) {
7402       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7403     } else if (Op.isImmModifier()) {
7404       OptionalIdx[Op.getImmTy()] = I;
7405     } else {
7406       llvm_unreachable("unhandled operand type");
7407     }
7408   }
7409 
7410   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7411     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7412   }
7413 
7414   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7415     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7416   }
7417 
7418   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7419     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7420   }
7421 }
7422 
7423 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7424                               OptionalImmIndexMap &OptionalIdx) {
7425   unsigned Opc = Inst.getOpcode();
7426 
7427   unsigned I = 1;
7428   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7429   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7430     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7431   }
7432 
7433   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7434     // This instruction has src modifiers
7435     for (unsigned E = Operands.size(); I != E; ++I) {
7436       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7437       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7438         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7439       } else if (Op.isImmModifier()) {
7440         OptionalIdx[Op.getImmTy()] = I;
7441       } else if (Op.isRegOrImm()) {
7442         Op.addRegOrImmOperands(Inst, 1);
7443       } else {
7444         llvm_unreachable("unhandled operand type");
7445       }
7446     }
7447   } else {
7448     // No src modifiers
7449     for (unsigned E = Operands.size(); I != E; ++I) {
7450       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7451       if (Op.isMod()) {
7452         OptionalIdx[Op.getImmTy()] = I;
7453       } else {
7454         Op.addRegOrImmOperands(Inst, 1);
7455       }
7456     }
7457   }
7458 
7459   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7460     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7461   }
7462 
7463   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7464     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7465   }
7466 
7467   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7468   // it has src2 register operand that is tied to dst operand
7469   // we don't allow modifiers for this operand in assembler so src2_modifiers
7470   // should be 0.
7471   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7472       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7473       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7474       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7475       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7476       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7477       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7478       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7479       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7480       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7481       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7482     auto it = Inst.begin();
7483     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7484     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7485     ++it;
7486     // Copy the operand to ensure it's not invalidated when Inst grows.
7487     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7488   }
7489 }
7490 
7491 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7492   OptionalImmIndexMap OptionalIdx;
7493   cvtVOP3(Inst, Operands, OptionalIdx);
7494 }
7495 
7496 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7497                                OptionalImmIndexMap &OptIdx) {
7498   const int Opc = Inst.getOpcode();
7499   const MCInstrDesc &Desc = MII.get(Opc);
7500 
7501   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7502 
7503   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7504     assert(!IsPacked);
7505     Inst.addOperand(Inst.getOperand(0));
7506   }
7507 
7508   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7509   // instruction, and then figure out where to actually put the modifiers
7510 
7511   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7512   if (OpSelIdx != -1) {
7513     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7514   }
7515 
7516   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7517   if (OpSelHiIdx != -1) {
7518     int DefaultVal = IsPacked ? -1 : 0;
7519     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7520                           DefaultVal);
7521   }
7522 
7523   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7524   if (NegLoIdx != -1) {
7525     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7526     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7527   }
7528 
7529   const int Ops[] = { AMDGPU::OpName::src0,
7530                       AMDGPU::OpName::src1,
7531                       AMDGPU::OpName::src2 };
7532   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7533                          AMDGPU::OpName::src1_modifiers,
7534                          AMDGPU::OpName::src2_modifiers };
7535 
7536   unsigned OpSel = 0;
7537   unsigned OpSelHi = 0;
7538   unsigned NegLo = 0;
7539   unsigned NegHi = 0;
7540 
7541   if (OpSelIdx != -1)
7542     OpSel = Inst.getOperand(OpSelIdx).getImm();
7543 
7544   if (OpSelHiIdx != -1)
7545     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7546 
7547   if (NegLoIdx != -1) {
7548     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7549     NegLo = Inst.getOperand(NegLoIdx).getImm();
7550     NegHi = Inst.getOperand(NegHiIdx).getImm();
7551   }
7552 
7553   for (int J = 0; J < 3; ++J) {
7554     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7555     if (OpIdx == -1)
7556       break;
7557 
7558     uint32_t ModVal = 0;
7559 
7560     if ((OpSel & (1 << J)) != 0)
7561       ModVal |= SISrcMods::OP_SEL_0;
7562 
7563     if ((OpSelHi & (1 << J)) != 0)
7564       ModVal |= SISrcMods::OP_SEL_1;
7565 
7566     if ((NegLo & (1 << J)) != 0)
7567       ModVal |= SISrcMods::NEG;
7568 
7569     if ((NegHi & (1 << J)) != 0)
7570       ModVal |= SISrcMods::NEG_HI;
7571 
7572     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7573 
7574     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7575   }
7576 }
7577 
7578 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7579   OptionalImmIndexMap OptIdx;
7580   cvtVOP3(Inst, Operands, OptIdx);
7581   cvtVOP3P(Inst, Operands, OptIdx);
7582 }
7583 
7584 //===----------------------------------------------------------------------===//
7585 // dpp
7586 //===----------------------------------------------------------------------===//
7587 
7588 bool AMDGPUOperand::isDPP8() const {
7589   return isImmTy(ImmTyDPP8);
7590 }
7591 
7592 bool AMDGPUOperand::isDPPCtrl() const {
7593   using namespace AMDGPU::DPP;
7594 
7595   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7596   if (result) {
7597     int64_t Imm = getImm();
7598     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7599            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7600            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7601            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7602            (Imm == DppCtrl::WAVE_SHL1) ||
7603            (Imm == DppCtrl::WAVE_ROL1) ||
7604            (Imm == DppCtrl::WAVE_SHR1) ||
7605            (Imm == DppCtrl::WAVE_ROR1) ||
7606            (Imm == DppCtrl::ROW_MIRROR) ||
7607            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7608            (Imm == DppCtrl::BCAST15) ||
7609            (Imm == DppCtrl::BCAST31) ||
7610            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7611            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7612   }
7613   return false;
7614 }
7615 
7616 //===----------------------------------------------------------------------===//
7617 // mAI
7618 //===----------------------------------------------------------------------===//
7619 
7620 bool AMDGPUOperand::isBLGP() const {
7621   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7622 }
7623 
7624 bool AMDGPUOperand::isCBSZ() const {
7625   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7626 }
7627 
7628 bool AMDGPUOperand::isABID() const {
7629   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7630 }
7631 
7632 bool AMDGPUOperand::isS16Imm() const {
7633   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7634 }
7635 
7636 bool AMDGPUOperand::isU16Imm() const {
7637   return isImm() && isUInt<16>(getImm());
7638 }
7639 
7640 //===----------------------------------------------------------------------===//
7641 // dim
7642 //===----------------------------------------------------------------------===//
7643 
7644 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7645   // We want to allow "dim:1D" etc.,
7646   // but the initial 1 is tokenized as an integer.
7647   std::string Token;
7648   if (isToken(AsmToken::Integer)) {
7649     SMLoc Loc = getToken().getEndLoc();
7650     Token = std::string(getTokenStr());
7651     lex();
7652     if (getLoc() != Loc)
7653       return false;
7654   }
7655 
7656   StringRef Suffix;
7657   if (!parseId(Suffix))
7658     return false;
7659   Token += Suffix;
7660 
7661   StringRef DimId = Token;
7662   if (DimId.startswith("SQ_RSRC_IMG_"))
7663     DimId = DimId.drop_front(12);
7664 
7665   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7666   if (!DimInfo)
7667     return false;
7668 
7669   Encoding = DimInfo->Encoding;
7670   return true;
7671 }
7672 
7673 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7674   if (!isGFX10Plus())
7675     return MatchOperand_NoMatch;
7676 
7677   SMLoc S = getLoc();
7678 
7679   if (!trySkipId("dim", AsmToken::Colon))
7680     return MatchOperand_NoMatch;
7681 
7682   unsigned Encoding;
7683   SMLoc Loc = getLoc();
7684   if (!parseDimId(Encoding)) {
7685     Error(Loc, "invalid dim value");
7686     return MatchOperand_ParseFail;
7687   }
7688 
7689   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7690                                               AMDGPUOperand::ImmTyDim));
7691   return MatchOperand_Success;
7692 }
7693 
7694 //===----------------------------------------------------------------------===//
7695 // dpp
7696 //===----------------------------------------------------------------------===//
7697 
7698 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7699   SMLoc S = getLoc();
7700 
7701   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7702     return MatchOperand_NoMatch;
7703 
7704   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7705 
7706   int64_t Sels[8];
7707 
7708   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7709     return MatchOperand_ParseFail;
7710 
7711   for (size_t i = 0; i < 8; ++i) {
7712     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7713       return MatchOperand_ParseFail;
7714 
7715     SMLoc Loc = getLoc();
7716     if (getParser().parseAbsoluteExpression(Sels[i]))
7717       return MatchOperand_ParseFail;
7718     if (0 > Sels[i] || 7 < Sels[i]) {
7719       Error(Loc, "expected a 3-bit value");
7720       return MatchOperand_ParseFail;
7721     }
7722   }
7723 
7724   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7725     return MatchOperand_ParseFail;
7726 
7727   unsigned DPP8 = 0;
7728   for (size_t i = 0; i < 8; ++i)
7729     DPP8 |= (Sels[i] << (i * 3));
7730 
7731   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7732   return MatchOperand_Success;
7733 }
7734 
7735 bool
7736 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7737                                     const OperandVector &Operands) {
7738   if (Ctrl == "row_newbcast")
7739       return isGFX90A();
7740 
7741   // DPP64 is supported for row_newbcast only.
7742   const MCRegisterInfo *MRI = getMRI();
7743   if (Operands.size() > 2 && Operands[1]->isReg() &&
7744       MRI->getSubReg(Operands[1]->getReg(), AMDGPU::sub1))
7745     return false;
7746 
7747   if (Ctrl == "row_share" ||
7748       Ctrl == "row_xmask")
7749     return isGFX10Plus();
7750 
7751   if (Ctrl == "wave_shl" ||
7752       Ctrl == "wave_shr" ||
7753       Ctrl == "wave_rol" ||
7754       Ctrl == "wave_ror" ||
7755       Ctrl == "row_bcast")
7756     return isVI() || isGFX9();
7757 
7758   return Ctrl == "row_mirror" ||
7759          Ctrl == "row_half_mirror" ||
7760          Ctrl == "quad_perm" ||
7761          Ctrl == "row_shl" ||
7762          Ctrl == "row_shr" ||
7763          Ctrl == "row_ror";
7764 }
7765 
7766 int64_t
7767 AMDGPUAsmParser::parseDPPCtrlPerm() {
7768   // quad_perm:[%d,%d,%d,%d]
7769 
7770   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7771     return -1;
7772 
7773   int64_t Val = 0;
7774   for (int i = 0; i < 4; ++i) {
7775     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7776       return -1;
7777 
7778     int64_t Temp;
7779     SMLoc Loc = getLoc();
7780     if (getParser().parseAbsoluteExpression(Temp))
7781       return -1;
7782     if (Temp < 0 || Temp > 3) {
7783       Error(Loc, "expected a 2-bit value");
7784       return -1;
7785     }
7786 
7787     Val += (Temp << i * 2);
7788   }
7789 
7790   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7791     return -1;
7792 
7793   return Val;
7794 }
7795 
7796 int64_t
7797 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7798   using namespace AMDGPU::DPP;
7799 
7800   // sel:%d
7801 
7802   int64_t Val;
7803   SMLoc Loc = getLoc();
7804 
7805   if (getParser().parseAbsoluteExpression(Val))
7806     return -1;
7807 
7808   struct DppCtrlCheck {
7809     int64_t Ctrl;
7810     int Lo;
7811     int Hi;
7812   };
7813 
7814   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7815     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
7816     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
7817     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
7818     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
7819     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
7820     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
7821     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
7822     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7823     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7824     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
7825     .Default({-1, 0, 0});
7826 
7827   bool Valid;
7828   if (Check.Ctrl == -1) {
7829     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
7830     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
7831   } else {
7832     Valid = Check.Lo <= Val && Val <= Check.Hi;
7833     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
7834   }
7835 
7836   if (!Valid) {
7837     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
7838     return -1;
7839   }
7840 
7841   return Val;
7842 }
7843 
7844 OperandMatchResultTy
7845 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7846   using namespace AMDGPU::DPP;
7847 
7848   if (!isToken(AsmToken::Identifier) ||
7849       !isSupportedDPPCtrl(getTokenStr(), Operands))
7850     return MatchOperand_NoMatch;
7851 
7852   SMLoc S = getLoc();
7853   int64_t Val = -1;
7854   StringRef Ctrl;
7855 
7856   parseId(Ctrl);
7857 
7858   if (Ctrl == "row_mirror") {
7859     Val = DppCtrl::ROW_MIRROR;
7860   } else if (Ctrl == "row_half_mirror") {
7861     Val = DppCtrl::ROW_HALF_MIRROR;
7862   } else {
7863     if (skipToken(AsmToken::Colon, "expected a colon")) {
7864       if (Ctrl == "quad_perm") {
7865         Val = parseDPPCtrlPerm();
7866       } else {
7867         Val = parseDPPCtrlSel(Ctrl);
7868       }
7869     }
7870   }
7871 
7872   if (Val == -1)
7873     return MatchOperand_ParseFail;
7874 
7875   Operands.push_back(
7876     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
7877   return MatchOperand_Success;
7878 }
7879 
7880 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7881   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7882 }
7883 
7884 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7885   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7886 }
7887 
7888 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7889   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7890 }
7891 
7892 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7893   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7894 }
7895 
7896 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7897   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7898 }
7899 
7900 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7901   OptionalImmIndexMap OptionalIdx;
7902 
7903   unsigned I = 1;
7904   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7905   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7906     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7907   }
7908 
7909   int Fi = 0;
7910   for (unsigned E = Operands.size(); I != E; ++I) {
7911     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7912                                             MCOI::TIED_TO);
7913     if (TiedTo != -1) {
7914       assert((unsigned)TiedTo < Inst.getNumOperands());
7915       // handle tied old or src2 for MAC instructions
7916       Inst.addOperand(Inst.getOperand(TiedTo));
7917     }
7918     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7919     // Add the register arguments
7920     if (Op.isReg() && validateVccOperand(Op.getReg())) {
7921       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7922       // Skip it.
7923       continue;
7924     }
7925 
7926     if (IsDPP8) {
7927       if (Op.isDPP8()) {
7928         Op.addImmOperands(Inst, 1);
7929       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7930         Op.addRegWithFPInputModsOperands(Inst, 2);
7931       } else if (Op.isFI()) {
7932         Fi = Op.getImm();
7933       } else if (Op.isReg()) {
7934         Op.addRegOperands(Inst, 1);
7935       } else {
7936         llvm_unreachable("Invalid operand type");
7937       }
7938     } else {
7939       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7940         Op.addRegWithFPInputModsOperands(Inst, 2);
7941       } else if (Op.isDPPCtrl()) {
7942         Op.addImmOperands(Inst, 1);
7943       } else if (Op.isImm()) {
7944         // Handle optional arguments
7945         OptionalIdx[Op.getImmTy()] = I;
7946       } else {
7947         llvm_unreachable("Invalid operand type");
7948       }
7949     }
7950   }
7951 
7952   if (IsDPP8) {
7953     using namespace llvm::AMDGPU::DPP;
7954     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7955   } else {
7956     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7957     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7958     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7959     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7960       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7961     }
7962   }
7963 }
7964 
7965 //===----------------------------------------------------------------------===//
7966 // sdwa
7967 //===----------------------------------------------------------------------===//
7968 
7969 OperandMatchResultTy
7970 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7971                               AMDGPUOperand::ImmTy Type) {
7972   using namespace llvm::AMDGPU::SDWA;
7973 
7974   SMLoc S = getLoc();
7975   StringRef Value;
7976   OperandMatchResultTy res;
7977 
7978   SMLoc StringLoc;
7979   res = parseStringWithPrefix(Prefix, Value, StringLoc);
7980   if (res != MatchOperand_Success) {
7981     return res;
7982   }
7983 
7984   int64_t Int;
7985   Int = StringSwitch<int64_t>(Value)
7986         .Case("BYTE_0", SdwaSel::BYTE_0)
7987         .Case("BYTE_1", SdwaSel::BYTE_1)
7988         .Case("BYTE_2", SdwaSel::BYTE_2)
7989         .Case("BYTE_3", SdwaSel::BYTE_3)
7990         .Case("WORD_0", SdwaSel::WORD_0)
7991         .Case("WORD_1", SdwaSel::WORD_1)
7992         .Case("DWORD", SdwaSel::DWORD)
7993         .Default(0xffffffff);
7994 
7995   if (Int == 0xffffffff) {
7996     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
7997     return MatchOperand_ParseFail;
7998   }
7999 
8000   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8001   return MatchOperand_Success;
8002 }
8003 
8004 OperandMatchResultTy
8005 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8006   using namespace llvm::AMDGPU::SDWA;
8007 
8008   SMLoc S = getLoc();
8009   StringRef Value;
8010   OperandMatchResultTy res;
8011 
8012   SMLoc StringLoc;
8013   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8014   if (res != MatchOperand_Success) {
8015     return res;
8016   }
8017 
8018   int64_t Int;
8019   Int = StringSwitch<int64_t>(Value)
8020         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8021         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8022         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8023         .Default(0xffffffff);
8024 
8025   if (Int == 0xffffffff) {
8026     Error(StringLoc, "invalid dst_unused value");
8027     return MatchOperand_ParseFail;
8028   }
8029 
8030   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8031   return MatchOperand_Success;
8032 }
8033 
8034 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8035   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8036 }
8037 
8038 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8039   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8040 }
8041 
8042 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8043   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8044 }
8045 
8046 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8047   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8048 }
8049 
8050 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8051   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8052 }
8053 
8054 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8055                               uint64_t BasicInstType,
8056                               bool SkipDstVcc,
8057                               bool SkipSrcVcc) {
8058   using namespace llvm::AMDGPU::SDWA;
8059 
8060   OptionalImmIndexMap OptionalIdx;
8061   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8062   bool SkippedVcc = false;
8063 
8064   unsigned I = 1;
8065   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8066   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8067     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8068   }
8069 
8070   for (unsigned E = Operands.size(); I != E; ++I) {
8071     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8072     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8073         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8074       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8075       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8076       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8077       // Skip VCC only if we didn't skip it on previous iteration.
8078       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8079       if (BasicInstType == SIInstrFlags::VOP2 &&
8080           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8081            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8082         SkippedVcc = true;
8083         continue;
8084       } else if (BasicInstType == SIInstrFlags::VOPC &&
8085                  Inst.getNumOperands() == 0) {
8086         SkippedVcc = true;
8087         continue;
8088       }
8089     }
8090     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8091       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8092     } else if (Op.isImm()) {
8093       // Handle optional arguments
8094       OptionalIdx[Op.getImmTy()] = I;
8095     } else {
8096       llvm_unreachable("Invalid operand type");
8097     }
8098     SkippedVcc = false;
8099   }
8100 
8101   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8102       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8103       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8104     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8105     switch (BasicInstType) {
8106     case SIInstrFlags::VOP1:
8107       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8108       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8109         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8110       }
8111       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8112       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8113       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8114       break;
8115 
8116     case SIInstrFlags::VOP2:
8117       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8118       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8119         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8120       }
8121       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8122       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8123       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8124       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8125       break;
8126 
8127     case SIInstrFlags::VOPC:
8128       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8129         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8130       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8131       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8132       break;
8133 
8134     default:
8135       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8136     }
8137   }
8138 
8139   // special case v_mac_{f16, f32}:
8140   // it has src2 register operand that is tied to dst operand
8141   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8142       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8143     auto it = Inst.begin();
8144     std::advance(
8145       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8146     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8147   }
8148 }
8149 
8150 //===----------------------------------------------------------------------===//
8151 // mAI
8152 //===----------------------------------------------------------------------===//
8153 
8154 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8155   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8156 }
8157 
8158 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8159   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8160 }
8161 
8162 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8163   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8164 }
8165 
8166 /// Force static initialization.
8167 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8168   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8169   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8170 }
8171 
8172 #define GET_REGISTER_MATCHER
8173 #define GET_MATCHER_IMPLEMENTATION
8174 #define GET_MNEMONIC_SPELL_CHECKER
8175 #define GET_MNEMONIC_CHECKER
8176 #include "AMDGPUGenAsmMatcher.inc"
8177 
8178 // This fuction should be defined after auto-generated include so that we have
8179 // MatchClassKind enum defined
8180 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8181                                                      unsigned Kind) {
8182   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8183   // But MatchInstructionImpl() expects to meet token and fails to validate
8184   // operand. This method checks if we are given immediate operand but expect to
8185   // get corresponding token.
8186   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8187   switch (Kind) {
8188   case MCK_addr64:
8189     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8190   case MCK_gds:
8191     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8192   case MCK_lds:
8193     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8194   case MCK_idxen:
8195     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8196   case MCK_offen:
8197     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8198   case MCK_SSrcB32:
8199     // When operands have expression values, they will return true for isToken,
8200     // because it is not possible to distinguish between a token and an
8201     // expression at parse time. MatchInstructionImpl() will always try to
8202     // match an operand as a token, when isToken returns true, and when the
8203     // name of the expression is not a valid token, the match will fail,
8204     // so we need to handle it here.
8205     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8206   case MCK_SSrcF32:
8207     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8208   case MCK_SoppBrTarget:
8209     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8210   case MCK_VReg32OrOff:
8211     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8212   case MCK_InterpSlot:
8213     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8214   case MCK_Attr:
8215     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8216   case MCK_AttrChan:
8217     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8218   case MCK_ImmSMEMOffset:
8219     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8220   case MCK_SReg_64:
8221   case MCK_SReg_64_XEXEC:
8222     // Null is defined as a 32-bit register but
8223     // it should also be enabled with 64-bit operands.
8224     // The following code enables it for SReg_64 operands
8225     // used as source and destination. Remaining source
8226     // operands are handled in isInlinableImm.
8227     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8228   default:
8229     return Match_InvalidOperand;
8230   }
8231 }
8232 
8233 //===----------------------------------------------------------------------===//
8234 // endpgm
8235 //===----------------------------------------------------------------------===//
8236 
8237 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8238   SMLoc S = getLoc();
8239   int64_t Imm = 0;
8240 
8241   if (!parseExpr(Imm)) {
8242     // The operand is optional, if not present default to 0
8243     Imm = 0;
8244   }
8245 
8246   if (!isUInt<16>(Imm)) {
8247     Error(S, "expected a 16-bit value");
8248     return MatchOperand_ParseFail;
8249   }
8250 
8251   Operands.push_back(
8252       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8253   return MatchOperand_Success;
8254 }
8255 
8256 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8257