1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/Support/AMDGPUMetadata.h"
32 #include "llvm/Support/AMDHSAKernelDescriptor.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/MachineValueType.h"
35 #include "llvm/Support/TargetParser.h"
36 #include "llvm/Support/TargetRegistry.h"
37 
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::amdhsa;
41 
42 namespace {
43 
44 class AMDGPUAsmParser;
45 
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47 
48 //===----------------------------------------------------------------------===//
49 // Operand
50 //===----------------------------------------------------------------------===//
51 
52 class AMDGPUOperand : public MCParsedAsmOperand {
53   enum KindTy {
54     Token,
55     Immediate,
56     Register,
57     Expression
58   } Kind;
59 
60   SMLoc StartLoc, EndLoc;
61   const AMDGPUAsmParser *AsmParser;
62 
63 public:
64   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
66 
67   using Ptr = std::unique_ptr<AMDGPUOperand>;
68 
69   struct Modifiers {
70     bool Abs = false;
71     bool Neg = false;
72     bool Sext = false;
73 
74     bool hasFPModifiers() const { return Abs || Neg; }
75     bool hasIntModifiers() const { return Sext; }
76     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77 
78     int64_t getFPModifiersOperand() const {
79       int64_t Operand = 0;
80       Operand |= Abs ? SISrcMods::ABS : 0u;
81       Operand |= Neg ? SISrcMods::NEG : 0u;
82       return Operand;
83     }
84 
85     int64_t getIntModifiersOperand() const {
86       int64_t Operand = 0;
87       Operand |= Sext ? SISrcMods::SEXT : 0u;
88       return Operand;
89     }
90 
91     int64_t getModifiersOperand() const {
92       assert(!(hasFPModifiers() && hasIntModifiers())
93            && "fp and int modifiers should not be used simultaneously");
94       if (hasFPModifiers()) {
95         return getFPModifiersOperand();
96       } else if (hasIntModifiers()) {
97         return getIntModifiersOperand();
98       } else {
99         return 0;
100       }
101     }
102 
103     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104   };
105 
106   enum ImmTy {
107     ImmTyNone,
108     ImmTyGDS,
109     ImmTyLDS,
110     ImmTyOffen,
111     ImmTyIdxen,
112     ImmTyAddr64,
113     ImmTyOffset,
114     ImmTyInstOffset,
115     ImmTyOffset0,
116     ImmTyOffset1,
117     ImmTyCPol,
118     ImmTySWZ,
119     ImmTyTFE,
120     ImmTyD16,
121     ImmTyClampSI,
122     ImmTyOModSI,
123     ImmTyDPP8,
124     ImmTyDppCtrl,
125     ImmTyDppRowMask,
126     ImmTyDppBankMask,
127     ImmTyDppBoundCtrl,
128     ImmTyDppFi,
129     ImmTySdwaDstSel,
130     ImmTySdwaSrc0Sel,
131     ImmTySdwaSrc1Sel,
132     ImmTySdwaDstUnused,
133     ImmTyDMask,
134     ImmTyDim,
135     ImmTyUNorm,
136     ImmTyDA,
137     ImmTyR128A16,
138     ImmTyA16,
139     ImmTyLWE,
140     ImmTyExpTgt,
141     ImmTyExpCompr,
142     ImmTyExpVM,
143     ImmTyFORMAT,
144     ImmTyHwreg,
145     ImmTyOff,
146     ImmTySendMsg,
147     ImmTyInterpSlot,
148     ImmTyInterpAttr,
149     ImmTyAttrChan,
150     ImmTyOpSel,
151     ImmTyOpSelHi,
152     ImmTyNegLo,
153     ImmTyNegHi,
154     ImmTySwizzle,
155     ImmTyGprIdxMode,
156     ImmTyHigh,
157     ImmTyBLGP,
158     ImmTyCBSZ,
159     ImmTyABID,
160     ImmTyEndpgm,
161   };
162 
163   enum ImmKindTy {
164     ImmKindTyNone,
165     ImmKindTyLiteral,
166     ImmKindTyConst,
167   };
168 
169 private:
170   struct TokOp {
171     const char *Data;
172     unsigned Length;
173   };
174 
175   struct ImmOp {
176     int64_t Val;
177     ImmTy Type;
178     bool IsFPImm;
179     mutable ImmKindTy Kind;
180     Modifiers Mods;
181   };
182 
183   struct RegOp {
184     unsigned RegNo;
185     Modifiers Mods;
186   };
187 
188   union {
189     TokOp Tok;
190     ImmOp Imm;
191     RegOp Reg;
192     const MCExpr *Expr;
193   };
194 
195 public:
196   bool isToken() const override {
197     if (Kind == Token)
198       return true;
199 
200     // When parsing operands, we can't always tell if something was meant to be
201     // a token, like 'gds', or an expression that references a global variable.
202     // In this case, we assume the string is an expression, and if we need to
203     // interpret is a token, then we treat the symbol name as the token.
204     return isSymbolRefExpr();
205   }
206 
207   bool isSymbolRefExpr() const {
208     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
209   }
210 
211   bool isImm() const override {
212     return Kind == Immediate;
213   }
214 
215   void setImmKindNone() const {
216     assert(isImm());
217     Imm.Kind = ImmKindTyNone;
218   }
219 
220   void setImmKindLiteral() const {
221     assert(isImm());
222     Imm.Kind = ImmKindTyLiteral;
223   }
224 
225   void setImmKindConst() const {
226     assert(isImm());
227     Imm.Kind = ImmKindTyConst;
228   }
229 
230   bool IsImmKindLiteral() const {
231     return isImm() && Imm.Kind == ImmKindTyLiteral;
232   }
233 
234   bool isImmKindConst() const {
235     return isImm() && Imm.Kind == ImmKindTyConst;
236   }
237 
238   bool isInlinableImm(MVT type) const;
239   bool isLiteralImm(MVT type) const;
240 
241   bool isRegKind() const {
242     return Kind == Register;
243   }
244 
245   bool isReg() const override {
246     return isRegKind() && !hasModifiers();
247   }
248 
249   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
250     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
251   }
252 
253   bool isRegOrImmWithInt16InputMods() const {
254     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
255   }
256 
257   bool isRegOrImmWithInt32InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259   }
260 
261   bool isRegOrImmWithInt64InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
263   }
264 
265   bool isRegOrImmWithFP16InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
267   }
268 
269   bool isRegOrImmWithFP32InputMods() const {
270     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
271   }
272 
273   bool isRegOrImmWithFP64InputMods() const {
274     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
275   }
276 
277   bool isVReg() const {
278     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
279            isRegClass(AMDGPU::VReg_64RegClassID) ||
280            isRegClass(AMDGPU::VReg_96RegClassID) ||
281            isRegClass(AMDGPU::VReg_128RegClassID) ||
282            isRegClass(AMDGPU::VReg_160RegClassID) ||
283            isRegClass(AMDGPU::VReg_192RegClassID) ||
284            isRegClass(AMDGPU::VReg_256RegClassID) ||
285            isRegClass(AMDGPU::VReg_512RegClassID) ||
286            isRegClass(AMDGPU::VReg_1024RegClassID);
287   }
288 
289   bool isVReg32() const {
290     return isRegClass(AMDGPU::VGPR_32RegClassID);
291   }
292 
293   bool isVReg32OrOff() const {
294     return isOff() || isVReg32();
295   }
296 
297   bool isNull() const {
298     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
299   }
300 
301   bool isVRegWithInputMods() const;
302 
303   bool isSDWAOperand(MVT type) const;
304   bool isSDWAFP16Operand() const;
305   bool isSDWAFP32Operand() const;
306   bool isSDWAInt16Operand() const;
307   bool isSDWAInt32Operand() const;
308 
309   bool isImmTy(ImmTy ImmT) const {
310     return isImm() && Imm.Type == ImmT;
311   }
312 
313   bool isImmModifier() const {
314     return isImm() && Imm.Type != ImmTyNone;
315   }
316 
317   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
318   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
319   bool isDMask() const { return isImmTy(ImmTyDMask); }
320   bool isDim() const { return isImmTy(ImmTyDim); }
321   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
322   bool isDA() const { return isImmTy(ImmTyDA); }
323   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
324   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
325   bool isLWE() const { return isImmTy(ImmTyLWE); }
326   bool isOff() const { return isImmTy(ImmTyOff); }
327   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
328   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
329   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
330   bool isOffen() const { return isImmTy(ImmTyOffen); }
331   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
332   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
333   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
334   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
335   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
336 
337   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
338   bool isGDS() const { return isImmTy(ImmTyGDS); }
339   bool isLDS() const { return isImmTy(ImmTyLDS); }
340   bool isCPol() const { return isImmTy(ImmTyCPol); }
341   bool isSWZ() const { return isImmTy(ImmTySWZ); }
342   bool isTFE() const { return isImmTy(ImmTyTFE); }
343   bool isD16() const { return isImmTy(ImmTyD16); }
344   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
345   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
346   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
347   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
348   bool isFI() const { return isImmTy(ImmTyDppFi); }
349   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
350   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
351   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
352   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
353   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
354   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
355   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
356   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
357   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
358   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
359   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
360   bool isHigh() const { return isImmTy(ImmTyHigh); }
361 
362   bool isMod() const {
363     return isClampSI() || isOModSI();
364   }
365 
366   bool isRegOrImm() const {
367     return isReg() || isImm();
368   }
369 
370   bool isRegClass(unsigned RCID) const;
371 
372   bool isInlineValue() const;
373 
374   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
375     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
376   }
377 
378   bool isSCSrcB16() const {
379     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
380   }
381 
382   bool isSCSrcV2B16() const {
383     return isSCSrcB16();
384   }
385 
386   bool isSCSrcB32() const {
387     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
388   }
389 
390   bool isSCSrcB64() const {
391     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
392   }
393 
394   bool isBoolReg() const;
395 
396   bool isSCSrcF16() const {
397     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
398   }
399 
400   bool isSCSrcV2F16() const {
401     return isSCSrcF16();
402   }
403 
404   bool isSCSrcF32() const {
405     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
406   }
407 
408   bool isSCSrcF64() const {
409     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
410   }
411 
412   bool isSSrcB32() const {
413     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
414   }
415 
416   bool isSSrcB16() const {
417     return isSCSrcB16() || isLiteralImm(MVT::i16);
418   }
419 
420   bool isSSrcV2B16() const {
421     llvm_unreachable("cannot happen");
422     return isSSrcB16();
423   }
424 
425   bool isSSrcB64() const {
426     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
427     // See isVSrc64().
428     return isSCSrcB64() || isLiteralImm(MVT::i64);
429   }
430 
431   bool isSSrcF32() const {
432     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
433   }
434 
435   bool isSSrcF64() const {
436     return isSCSrcB64() || isLiteralImm(MVT::f64);
437   }
438 
439   bool isSSrcF16() const {
440     return isSCSrcB16() || isLiteralImm(MVT::f16);
441   }
442 
443   bool isSSrcV2F16() const {
444     llvm_unreachable("cannot happen");
445     return isSSrcF16();
446   }
447 
448   bool isSSrcV2FP32() const {
449     llvm_unreachable("cannot happen");
450     return isSSrcF32();
451   }
452 
453   bool isSCSrcV2FP32() const {
454     llvm_unreachable("cannot happen");
455     return isSCSrcF32();
456   }
457 
458   bool isSSrcV2INT32() const {
459     llvm_unreachable("cannot happen");
460     return isSSrcB32();
461   }
462 
463   bool isSCSrcV2INT32() const {
464     llvm_unreachable("cannot happen");
465     return isSCSrcB32();
466   }
467 
468   bool isSSrcOrLdsB32() const {
469     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
470            isLiteralImm(MVT::i32) || isExpr();
471   }
472 
473   bool isVCSrcB32() const {
474     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
475   }
476 
477   bool isVCSrcB64() const {
478     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
479   }
480 
481   bool isVCSrcB16() const {
482     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
483   }
484 
485   bool isVCSrcV2B16() const {
486     return isVCSrcB16();
487   }
488 
489   bool isVCSrcF32() const {
490     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
491   }
492 
493   bool isVCSrcF64() const {
494     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
495   }
496 
497   bool isVCSrcF16() const {
498     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
499   }
500 
501   bool isVCSrcV2F16() const {
502     return isVCSrcF16();
503   }
504 
505   bool isVSrcB32() const {
506     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
507   }
508 
509   bool isVSrcB64() const {
510     return isVCSrcF64() || isLiteralImm(MVT::i64);
511   }
512 
513   bool isVSrcB16() const {
514     return isVCSrcB16() || isLiteralImm(MVT::i16);
515   }
516 
517   bool isVSrcV2B16() const {
518     return isVSrcB16() || isLiteralImm(MVT::v2i16);
519   }
520 
521   bool isVCSrcV2FP32() const {
522     return isVCSrcF64();
523   }
524 
525   bool isVSrcV2FP32() const {
526     return isVSrcF64() || isLiteralImm(MVT::v2f32);
527   }
528 
529   bool isVCSrcV2INT32() const {
530     return isVCSrcB64();
531   }
532 
533   bool isVSrcV2INT32() const {
534     return isVSrcB64() || isLiteralImm(MVT::v2i32);
535   }
536 
537   bool isVSrcF32() const {
538     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
539   }
540 
541   bool isVSrcF64() const {
542     return isVCSrcF64() || isLiteralImm(MVT::f64);
543   }
544 
545   bool isVSrcF16() const {
546     return isVCSrcF16() || isLiteralImm(MVT::f16);
547   }
548 
549   bool isVSrcV2F16() const {
550     return isVSrcF16() || isLiteralImm(MVT::v2f16);
551   }
552 
553   bool isVISrcB32() const {
554     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
555   }
556 
557   bool isVISrcB16() const {
558     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
559   }
560 
561   bool isVISrcV2B16() const {
562     return isVISrcB16();
563   }
564 
565   bool isVISrcF32() const {
566     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
567   }
568 
569   bool isVISrcF16() const {
570     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
571   }
572 
573   bool isVISrcV2F16() const {
574     return isVISrcF16() || isVISrcB32();
575   }
576 
577   bool isVISrc_64B64() const {
578     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
579   }
580 
581   bool isVISrc_64F64() const {
582     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
583   }
584 
585   bool isVISrc_64V2FP32() const {
586     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
587   }
588 
589   bool isVISrc_64V2INT32() const {
590     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
591   }
592 
593   bool isVISrc_256B64() const {
594     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
595   }
596 
597   bool isVISrc_256F64() const {
598     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
599   }
600 
601   bool isVISrc_128B16() const {
602     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
603   }
604 
605   bool isVISrc_128V2B16() const {
606     return isVISrc_128B16();
607   }
608 
609   bool isVISrc_128B32() const {
610     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
611   }
612 
613   bool isVISrc_128F32() const {
614     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
615   }
616 
617   bool isVISrc_256V2FP32() const {
618     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
619   }
620 
621   bool isVISrc_256V2INT32() const {
622     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
623   }
624 
625   bool isVISrc_512B32() const {
626     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
627   }
628 
629   bool isVISrc_512B16() const {
630     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
631   }
632 
633   bool isVISrc_512V2B16() const {
634     return isVISrc_512B16();
635   }
636 
637   bool isVISrc_512F32() const {
638     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
639   }
640 
641   bool isVISrc_512F16() const {
642     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
643   }
644 
645   bool isVISrc_512V2F16() const {
646     return isVISrc_512F16() || isVISrc_512B32();
647   }
648 
649   bool isVISrc_1024B32() const {
650     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
651   }
652 
653   bool isVISrc_1024B16() const {
654     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
655   }
656 
657   bool isVISrc_1024V2B16() const {
658     return isVISrc_1024B16();
659   }
660 
661   bool isVISrc_1024F32() const {
662     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
663   }
664 
665   bool isVISrc_1024F16() const {
666     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
667   }
668 
669   bool isVISrc_1024V2F16() const {
670     return isVISrc_1024F16() || isVISrc_1024B32();
671   }
672 
673   bool isAISrcB32() const {
674     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
675   }
676 
677   bool isAISrcB16() const {
678     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
679   }
680 
681   bool isAISrcV2B16() const {
682     return isAISrcB16();
683   }
684 
685   bool isAISrcF32() const {
686     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
687   }
688 
689   bool isAISrcF16() const {
690     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
691   }
692 
693   bool isAISrcV2F16() const {
694     return isAISrcF16() || isAISrcB32();
695   }
696 
697   bool isAISrc_64B64() const {
698     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
699   }
700 
701   bool isAISrc_64F64() const {
702     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
703   }
704 
705   bool isAISrc_128B32() const {
706     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
707   }
708 
709   bool isAISrc_128B16() const {
710     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
711   }
712 
713   bool isAISrc_128V2B16() const {
714     return isAISrc_128B16();
715   }
716 
717   bool isAISrc_128F32() const {
718     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
719   }
720 
721   bool isAISrc_128F16() const {
722     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
723   }
724 
725   bool isAISrc_128V2F16() const {
726     return isAISrc_128F16() || isAISrc_128B32();
727   }
728 
729   bool isVISrc_128F16() const {
730     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
731   }
732 
733   bool isVISrc_128V2F16() const {
734     return isVISrc_128F16() || isVISrc_128B32();
735   }
736 
737   bool isAISrc_256B64() const {
738     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
739   }
740 
741   bool isAISrc_256F64() const {
742     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
743   }
744 
745   bool isAISrc_512B32() const {
746     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
747   }
748 
749   bool isAISrc_512B16() const {
750     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
751   }
752 
753   bool isAISrc_512V2B16() const {
754     return isAISrc_512B16();
755   }
756 
757   bool isAISrc_512F32() const {
758     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
759   }
760 
761   bool isAISrc_512F16() const {
762     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
763   }
764 
765   bool isAISrc_512V2F16() const {
766     return isAISrc_512F16() || isAISrc_512B32();
767   }
768 
769   bool isAISrc_1024B32() const {
770     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
771   }
772 
773   bool isAISrc_1024B16() const {
774     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
775   }
776 
777   bool isAISrc_1024V2B16() const {
778     return isAISrc_1024B16();
779   }
780 
781   bool isAISrc_1024F32() const {
782     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
783   }
784 
785   bool isAISrc_1024F16() const {
786     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
787   }
788 
789   bool isAISrc_1024V2F16() const {
790     return isAISrc_1024F16() || isAISrc_1024B32();
791   }
792 
793   bool isKImmFP32() const {
794     return isLiteralImm(MVT::f32);
795   }
796 
797   bool isKImmFP16() const {
798     return isLiteralImm(MVT::f16);
799   }
800 
801   bool isMem() const override {
802     return false;
803   }
804 
805   bool isExpr() const {
806     return Kind == Expression;
807   }
808 
809   bool isSoppBrTarget() const {
810     return isExpr() || isImm();
811   }
812 
813   bool isSWaitCnt() const;
814   bool isHwreg() const;
815   bool isSendMsg() const;
816   bool isSwizzle() const;
817   bool isSMRDOffset8() const;
818   bool isSMEMOffset() const;
819   bool isSMRDLiteralOffset() const;
820   bool isDPP8() const;
821   bool isDPPCtrl() const;
822   bool isBLGP() const;
823   bool isCBSZ() const;
824   bool isABID() const;
825   bool isGPRIdxMode() const;
826   bool isS16Imm() const;
827   bool isU16Imm() const;
828   bool isEndpgm() const;
829 
830   StringRef getExpressionAsToken() const {
831     assert(isExpr());
832     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
833     return S->getSymbol().getName();
834   }
835 
836   StringRef getToken() const {
837     assert(isToken());
838 
839     if (Kind == Expression)
840       return getExpressionAsToken();
841 
842     return StringRef(Tok.Data, Tok.Length);
843   }
844 
845   int64_t getImm() const {
846     assert(isImm());
847     return Imm.Val;
848   }
849 
850   void setImm(int64_t Val) {
851     assert(isImm());
852     Imm.Val = Val;
853   }
854 
855   ImmTy getImmTy() const {
856     assert(isImm());
857     return Imm.Type;
858   }
859 
860   unsigned getReg() const override {
861     assert(isRegKind());
862     return Reg.RegNo;
863   }
864 
865   SMLoc getStartLoc() const override {
866     return StartLoc;
867   }
868 
869   SMLoc getEndLoc() const override {
870     return EndLoc;
871   }
872 
873   SMRange getLocRange() const {
874     return SMRange(StartLoc, EndLoc);
875   }
876 
877   Modifiers getModifiers() const {
878     assert(isRegKind() || isImmTy(ImmTyNone));
879     return isRegKind() ? Reg.Mods : Imm.Mods;
880   }
881 
882   void setModifiers(Modifiers Mods) {
883     assert(isRegKind() || isImmTy(ImmTyNone));
884     if (isRegKind())
885       Reg.Mods = Mods;
886     else
887       Imm.Mods = Mods;
888   }
889 
890   bool hasModifiers() const {
891     return getModifiers().hasModifiers();
892   }
893 
894   bool hasFPModifiers() const {
895     return getModifiers().hasFPModifiers();
896   }
897 
898   bool hasIntModifiers() const {
899     return getModifiers().hasIntModifiers();
900   }
901 
902   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
903 
904   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
905 
906   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
907 
908   template <unsigned Bitwidth>
909   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
910 
911   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
912     addKImmFPOperands<16>(Inst, N);
913   }
914 
915   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
916     addKImmFPOperands<32>(Inst, N);
917   }
918 
919   void addRegOperands(MCInst &Inst, unsigned N) const;
920 
921   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
922     addRegOperands(Inst, N);
923   }
924 
925   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
926     if (isRegKind())
927       addRegOperands(Inst, N);
928     else if (isExpr())
929       Inst.addOperand(MCOperand::createExpr(Expr));
930     else
931       addImmOperands(Inst, N);
932   }
933 
934   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
935     Modifiers Mods = getModifiers();
936     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
937     if (isRegKind()) {
938       addRegOperands(Inst, N);
939     } else {
940       addImmOperands(Inst, N, false);
941     }
942   }
943 
944   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
945     assert(!hasIntModifiers());
946     addRegOrImmWithInputModsOperands(Inst, N);
947   }
948 
949   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
950     assert(!hasFPModifiers());
951     addRegOrImmWithInputModsOperands(Inst, N);
952   }
953 
954   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
955     Modifiers Mods = getModifiers();
956     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
957     assert(isRegKind());
958     addRegOperands(Inst, N);
959   }
960 
961   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
962     assert(!hasIntModifiers());
963     addRegWithInputModsOperands(Inst, N);
964   }
965 
966   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
967     assert(!hasFPModifiers());
968     addRegWithInputModsOperands(Inst, N);
969   }
970 
971   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
972     if (isImm())
973       addImmOperands(Inst, N);
974     else {
975       assert(isExpr());
976       Inst.addOperand(MCOperand::createExpr(Expr));
977     }
978   }
979 
980   static void printImmTy(raw_ostream& OS, ImmTy Type) {
981     switch (Type) {
982     case ImmTyNone: OS << "None"; break;
983     case ImmTyGDS: OS << "GDS"; break;
984     case ImmTyLDS: OS << "LDS"; break;
985     case ImmTyOffen: OS << "Offen"; break;
986     case ImmTyIdxen: OS << "Idxen"; break;
987     case ImmTyAddr64: OS << "Addr64"; break;
988     case ImmTyOffset: OS << "Offset"; break;
989     case ImmTyInstOffset: OS << "InstOffset"; break;
990     case ImmTyOffset0: OS << "Offset0"; break;
991     case ImmTyOffset1: OS << "Offset1"; break;
992     case ImmTyCPol: OS << "CPol"; break;
993     case ImmTySWZ: OS << "SWZ"; break;
994     case ImmTyTFE: OS << "TFE"; break;
995     case ImmTyD16: OS << "D16"; break;
996     case ImmTyFORMAT: OS << "FORMAT"; break;
997     case ImmTyClampSI: OS << "ClampSI"; break;
998     case ImmTyOModSI: OS << "OModSI"; break;
999     case ImmTyDPP8: OS << "DPP8"; break;
1000     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1001     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1002     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1003     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1004     case ImmTyDppFi: OS << "FI"; break;
1005     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1006     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1007     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1008     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1009     case ImmTyDMask: OS << "DMask"; break;
1010     case ImmTyDim: OS << "Dim"; break;
1011     case ImmTyUNorm: OS << "UNorm"; break;
1012     case ImmTyDA: OS << "DA"; break;
1013     case ImmTyR128A16: OS << "R128A16"; break;
1014     case ImmTyA16: OS << "A16"; break;
1015     case ImmTyLWE: OS << "LWE"; break;
1016     case ImmTyOff: OS << "Off"; break;
1017     case ImmTyExpTgt: OS << "ExpTgt"; break;
1018     case ImmTyExpCompr: OS << "ExpCompr"; break;
1019     case ImmTyExpVM: OS << "ExpVM"; break;
1020     case ImmTyHwreg: OS << "Hwreg"; break;
1021     case ImmTySendMsg: OS << "SendMsg"; break;
1022     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1023     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1024     case ImmTyAttrChan: OS << "AttrChan"; break;
1025     case ImmTyOpSel: OS << "OpSel"; break;
1026     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1027     case ImmTyNegLo: OS << "NegLo"; break;
1028     case ImmTyNegHi: OS << "NegHi"; break;
1029     case ImmTySwizzle: OS << "Swizzle"; break;
1030     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1031     case ImmTyHigh: OS << "High"; break;
1032     case ImmTyBLGP: OS << "BLGP"; break;
1033     case ImmTyCBSZ: OS << "CBSZ"; break;
1034     case ImmTyABID: OS << "ABID"; break;
1035     case ImmTyEndpgm: OS << "Endpgm"; break;
1036     }
1037   }
1038 
1039   void print(raw_ostream &OS) const override {
1040     switch (Kind) {
1041     case Register:
1042       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1043       break;
1044     case Immediate:
1045       OS << '<' << getImm();
1046       if (getImmTy() != ImmTyNone) {
1047         OS << " type: "; printImmTy(OS, getImmTy());
1048       }
1049       OS << " mods: " << Imm.Mods << '>';
1050       break;
1051     case Token:
1052       OS << '\'' << getToken() << '\'';
1053       break;
1054     case Expression:
1055       OS << "<expr " << *Expr << '>';
1056       break;
1057     }
1058   }
1059 
1060   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1061                                       int64_t Val, SMLoc Loc,
1062                                       ImmTy Type = ImmTyNone,
1063                                       bool IsFPImm = false) {
1064     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1065     Op->Imm.Val = Val;
1066     Op->Imm.IsFPImm = IsFPImm;
1067     Op->Imm.Kind = ImmKindTyNone;
1068     Op->Imm.Type = Type;
1069     Op->Imm.Mods = Modifiers();
1070     Op->StartLoc = Loc;
1071     Op->EndLoc = Loc;
1072     return Op;
1073   }
1074 
1075   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1076                                         StringRef Str, SMLoc Loc,
1077                                         bool HasExplicitEncodingSize = true) {
1078     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1079     Res->Tok.Data = Str.data();
1080     Res->Tok.Length = Str.size();
1081     Res->StartLoc = Loc;
1082     Res->EndLoc = Loc;
1083     return Res;
1084   }
1085 
1086   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1087                                       unsigned RegNo, SMLoc S,
1088                                       SMLoc E) {
1089     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1090     Op->Reg.RegNo = RegNo;
1091     Op->Reg.Mods = Modifiers();
1092     Op->StartLoc = S;
1093     Op->EndLoc = E;
1094     return Op;
1095   }
1096 
1097   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1098                                        const class MCExpr *Expr, SMLoc S) {
1099     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1100     Op->Expr = Expr;
1101     Op->StartLoc = S;
1102     Op->EndLoc = S;
1103     return Op;
1104   }
1105 };
1106 
1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1108   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1109   return OS;
1110 }
1111 
1112 //===----------------------------------------------------------------------===//
1113 // AsmParser
1114 //===----------------------------------------------------------------------===//
1115 
1116 // Holds info related to the current kernel, e.g. count of SGPRs used.
1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1118 // .amdgpu_hsa_kernel or at EOF.
1119 class KernelScopeInfo {
1120   int SgprIndexUnusedMin = -1;
1121   int VgprIndexUnusedMin = -1;
1122   MCContext *Ctx = nullptr;
1123 
1124   void usesSgprAt(int i) {
1125     if (i >= SgprIndexUnusedMin) {
1126       SgprIndexUnusedMin = ++i;
1127       if (Ctx) {
1128         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1129         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1130       }
1131     }
1132   }
1133 
1134   void usesVgprAt(int i) {
1135     if (i >= VgprIndexUnusedMin) {
1136       VgprIndexUnusedMin = ++i;
1137       if (Ctx) {
1138         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1139         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1140       }
1141     }
1142   }
1143 
1144 public:
1145   KernelScopeInfo() = default;
1146 
1147   void initialize(MCContext &Context) {
1148     Ctx = &Context;
1149     usesSgprAt(SgprIndexUnusedMin = -1);
1150     usesVgprAt(VgprIndexUnusedMin = -1);
1151   }
1152 
1153   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1154     switch (RegKind) {
1155       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1156       case IS_AGPR: // fall through
1157       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1158       default: break;
1159     }
1160   }
1161 };
1162 
1163 class AMDGPUAsmParser : public MCTargetAsmParser {
1164   MCAsmParser &Parser;
1165 
1166   // Number of extra operands parsed after the first optional operand.
1167   // This may be necessary to skip hardcoded mandatory operands.
1168   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1169 
1170   unsigned ForcedEncodingSize = 0;
1171   bool ForcedDPP = false;
1172   bool ForcedSDWA = false;
1173   KernelScopeInfo KernelScope;
1174   unsigned CPolSeen;
1175 
1176   /// @name Auto-generated Match Functions
1177   /// {
1178 
1179 #define GET_ASSEMBLER_HEADER
1180 #include "AMDGPUGenAsmMatcher.inc"
1181 
1182   /// }
1183 
1184 private:
1185   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1186   bool OutOfRangeError(SMRange Range);
1187   /// Calculate VGPR/SGPR blocks required for given target, reserved
1188   /// registers, and user-specified NextFreeXGPR values.
1189   ///
1190   /// \param Features [in] Target features, used for bug corrections.
1191   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1192   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1193   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1194   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1195   /// descriptor field, if valid.
1196   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1197   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1198   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1199   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1200   /// \param VGPRBlocks [out] Result VGPR block count.
1201   /// \param SGPRBlocks [out] Result SGPR block count.
1202   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1203                           bool FlatScrUsed, bool XNACKUsed,
1204                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1205                           SMRange VGPRRange, unsigned NextFreeSGPR,
1206                           SMRange SGPRRange, unsigned &VGPRBlocks,
1207                           unsigned &SGPRBlocks);
1208   bool ParseDirectiveAMDGCNTarget();
1209   bool ParseDirectiveAMDHSAKernel();
1210   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1211   bool ParseDirectiveHSACodeObjectVersion();
1212   bool ParseDirectiveHSACodeObjectISA();
1213   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1214   bool ParseDirectiveAMDKernelCodeT();
1215   // TODO: Possibly make subtargetHasRegister const.
1216   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1217   bool ParseDirectiveAMDGPUHsaKernel();
1218 
1219   bool ParseDirectiveISAVersion();
1220   bool ParseDirectiveHSAMetadata();
1221   bool ParseDirectivePALMetadataBegin();
1222   bool ParseDirectivePALMetadata();
1223   bool ParseDirectiveAMDGPULDS();
1224 
1225   /// Common code to parse out a block of text (typically YAML) between start and
1226   /// end directives.
1227   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1228                            const char *AssemblerDirectiveEnd,
1229                            std::string &CollectString);
1230 
1231   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1232                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1233   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1234                            unsigned &RegNum, unsigned &RegWidth,
1235                            bool RestoreOnFailure = false);
1236   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1237                            unsigned &RegNum, unsigned &RegWidth,
1238                            SmallVectorImpl<AsmToken> &Tokens);
1239   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1240                            unsigned &RegWidth,
1241                            SmallVectorImpl<AsmToken> &Tokens);
1242   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1243                            unsigned &RegWidth,
1244                            SmallVectorImpl<AsmToken> &Tokens);
1245   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1246                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1247   bool ParseRegRange(unsigned& Num, unsigned& Width);
1248   unsigned getRegularReg(RegisterKind RegKind,
1249                          unsigned RegNum,
1250                          unsigned RegWidth,
1251                          SMLoc Loc);
1252 
1253   bool isRegister();
1254   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1255   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1256   void initializeGprCountSymbol(RegisterKind RegKind);
1257   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1258                              unsigned RegWidth);
1259   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1260                     bool IsAtomic, bool IsLds = false);
1261   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1262                  bool IsGdsHardcoded);
1263 
1264 public:
1265   enum AMDGPUMatchResultTy {
1266     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1267   };
1268   enum OperandMode {
1269     OperandMode_Default,
1270     OperandMode_NSA,
1271   };
1272 
1273   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1274 
1275   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1276                const MCInstrInfo &MII,
1277                const MCTargetOptions &Options)
1278       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1279     MCAsmParserExtension::Initialize(Parser);
1280 
1281     if (getFeatureBits().none()) {
1282       // Set default features.
1283       copySTI().ToggleFeature("southern-islands");
1284     }
1285 
1286     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1287 
1288     {
1289       // TODO: make those pre-defined variables read-only.
1290       // Currently there is none suitable machinery in the core llvm-mc for this.
1291       // MCSymbol::isRedefinable is intended for another purpose, and
1292       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1293       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1294       MCContext &Ctx = getContext();
1295       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1296         MCSymbol *Sym =
1297             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1298         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1299         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1300         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1301         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1302         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1303       } else {
1304         MCSymbol *Sym =
1305             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1306         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1307         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1308         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1309         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1310         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1311       }
1312       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1313         initializeGprCountSymbol(IS_VGPR);
1314         initializeGprCountSymbol(IS_SGPR);
1315       } else
1316         KernelScope.initialize(getContext());
1317     }
1318   }
1319 
1320   bool hasMIMG_R128() const {
1321     return AMDGPU::hasMIMG_R128(getSTI());
1322   }
1323 
1324   bool hasPackedD16() const {
1325     return AMDGPU::hasPackedD16(getSTI());
1326   }
1327 
1328   bool hasGFX10A16() const {
1329     return AMDGPU::hasGFX10A16(getSTI());
1330   }
1331 
1332   bool isSI() const {
1333     return AMDGPU::isSI(getSTI());
1334   }
1335 
1336   bool isCI() const {
1337     return AMDGPU::isCI(getSTI());
1338   }
1339 
1340   bool isVI() const {
1341     return AMDGPU::isVI(getSTI());
1342   }
1343 
1344   bool isGFX9() const {
1345     return AMDGPU::isGFX9(getSTI());
1346   }
1347 
1348   bool isGFX90A() const {
1349     return AMDGPU::isGFX90A(getSTI());
1350   }
1351 
1352   bool isGFX9Plus() const {
1353     return AMDGPU::isGFX9Plus(getSTI());
1354   }
1355 
1356   bool isGFX10() const {
1357     return AMDGPU::isGFX10(getSTI());
1358   }
1359 
1360   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1361 
1362   bool isGFX10_BEncoding() const {
1363     return AMDGPU::isGFX10_BEncoding(getSTI());
1364   }
1365 
1366   bool hasInv2PiInlineImm() const {
1367     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1368   }
1369 
1370   bool hasFlatOffsets() const {
1371     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1372   }
1373 
1374   bool hasSGPR102_SGPR103() const {
1375     return !isVI() && !isGFX9();
1376   }
1377 
1378   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1379 
1380   bool hasIntClamp() const {
1381     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1382   }
1383 
1384   AMDGPUTargetStreamer &getTargetStreamer() {
1385     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1386     return static_cast<AMDGPUTargetStreamer &>(TS);
1387   }
1388 
1389   const MCRegisterInfo *getMRI() const {
1390     // We need this const_cast because for some reason getContext() is not const
1391     // in MCAsmParser.
1392     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1393   }
1394 
1395   const MCInstrInfo *getMII() const {
1396     return &MII;
1397   }
1398 
1399   const FeatureBitset &getFeatureBits() const {
1400     return getSTI().getFeatureBits();
1401   }
1402 
1403   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1404   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1405   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1406 
1407   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1408   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1409   bool isForcedDPP() const { return ForcedDPP; }
1410   bool isForcedSDWA() const { return ForcedSDWA; }
1411   ArrayRef<unsigned> getMatchedVariants() const;
1412   StringRef getMatchedVariantName() const;
1413 
1414   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1415   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1416                      bool RestoreOnFailure);
1417   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1418   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1419                                         SMLoc &EndLoc) override;
1420   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1421   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1422                                       unsigned Kind) override;
1423   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1424                                OperandVector &Operands, MCStreamer &Out,
1425                                uint64_t &ErrorInfo,
1426                                bool MatchingInlineAsm) override;
1427   bool ParseDirective(AsmToken DirectiveID) override;
1428   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1429                                     OperandMode Mode = OperandMode_Default);
1430   StringRef parseMnemonicSuffix(StringRef Name);
1431   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1432                         SMLoc NameLoc, OperandVector &Operands) override;
1433   //bool ProcessInstruction(MCInst &Inst);
1434 
1435   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1436 
1437   OperandMatchResultTy
1438   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1439                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1440                      bool (*ConvertResult)(int64_t &) = nullptr);
1441 
1442   OperandMatchResultTy
1443   parseOperandArrayWithPrefix(const char *Prefix,
1444                               OperandVector &Operands,
1445                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1446                               bool (*ConvertResult)(int64_t&) = nullptr);
1447 
1448   OperandMatchResultTy
1449   parseNamedBit(StringRef Name, OperandVector &Operands,
1450                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1451   OperandMatchResultTy parseCPol(OperandVector &Operands);
1452   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1453                                              StringRef &Value,
1454                                              SMLoc &StringLoc);
1455 
1456   bool isModifier();
1457   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1458   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1459   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1460   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1461   bool parseSP3NegModifier();
1462   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1463   OperandMatchResultTy parseReg(OperandVector &Operands);
1464   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1465   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1466   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1467   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1468   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1469   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1470   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1471   OperandMatchResultTy parseUfmt(int64_t &Format);
1472   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1473   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1474   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1475   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1476   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1477   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1478   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1479 
1480   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1481   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1482   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1483   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1484 
1485   bool parseCnt(int64_t &IntVal);
1486   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1487   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1488 
1489 private:
1490   struct OperandInfoTy {
1491     SMLoc Loc;
1492     int64_t Id;
1493     bool IsSymbolic = false;
1494     bool IsDefined = false;
1495 
1496     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1497   };
1498 
1499   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1500   bool validateSendMsg(const OperandInfoTy &Msg,
1501                        const OperandInfoTy &Op,
1502                        const OperandInfoTy &Stream);
1503 
1504   bool parseHwregBody(OperandInfoTy &HwReg,
1505                       OperandInfoTy &Offset,
1506                       OperandInfoTy &Width);
1507   bool validateHwreg(const OperandInfoTy &HwReg,
1508                      const OperandInfoTy &Offset,
1509                      const OperandInfoTy &Width);
1510 
1511   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1512   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1513 
1514   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1515                       const OperandVector &Operands) const;
1516   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1517   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1518   SMLoc getLitLoc(const OperandVector &Operands) const;
1519   SMLoc getConstLoc(const OperandVector &Operands) const;
1520 
1521   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1522   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1523   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1524   bool validateSOPLiteral(const MCInst &Inst) const;
1525   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1526   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1527   bool validateIntClampSupported(const MCInst &Inst);
1528   bool validateMIMGAtomicDMask(const MCInst &Inst);
1529   bool validateMIMGGatherDMask(const MCInst &Inst);
1530   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1531   bool validateMIMGDataSize(const MCInst &Inst);
1532   bool validateMIMGAddrSize(const MCInst &Inst);
1533   bool validateMIMGD16(const MCInst &Inst);
1534   bool validateMIMGDim(const MCInst &Inst);
1535   bool validateMIMGMSAA(const MCInst &Inst);
1536   bool validateOpSel(const MCInst &Inst);
1537   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1538   bool validateVccOperand(unsigned Reg) const;
1539   bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1540   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1541   bool validateAGPRLdSt(const MCInst &Inst) const;
1542   bool validateVGPRAlign(const MCInst &Inst) const;
1543   bool validateDivScale(const MCInst &Inst);
1544   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1545                              const SMLoc &IDLoc);
1546   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1547   unsigned getConstantBusLimit(unsigned Opcode) const;
1548   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1549   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1550   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1551 
1552   bool isSupportedMnemo(StringRef Mnemo,
1553                         const FeatureBitset &FBS);
1554   bool isSupportedMnemo(StringRef Mnemo,
1555                         const FeatureBitset &FBS,
1556                         ArrayRef<unsigned> Variants);
1557   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1558 
1559   bool isId(const StringRef Id) const;
1560   bool isId(const AsmToken &Token, const StringRef Id) const;
1561   bool isToken(const AsmToken::TokenKind Kind) const;
1562   bool trySkipId(const StringRef Id);
1563   bool trySkipId(const StringRef Pref, const StringRef Id);
1564   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1565   bool trySkipToken(const AsmToken::TokenKind Kind);
1566   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1567   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1568   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1569 
1570   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1571   AsmToken::TokenKind getTokenKind() const;
1572   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1573   bool parseExpr(OperandVector &Operands);
1574   StringRef getTokenStr() const;
1575   AsmToken peekToken();
1576   AsmToken getToken() const;
1577   SMLoc getLoc() const;
1578   void lex();
1579 
1580 public:
1581   void onBeginOfFile() override;
1582 
1583   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1584   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1585 
1586   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1587   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1588   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1589   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1590   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1591   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1592 
1593   bool parseSwizzleOperand(int64_t &Op,
1594                            const unsigned MinVal,
1595                            const unsigned MaxVal,
1596                            const StringRef ErrMsg,
1597                            SMLoc &Loc);
1598   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1599                             const unsigned MinVal,
1600                             const unsigned MaxVal,
1601                             const StringRef ErrMsg);
1602   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1603   bool parseSwizzleOffset(int64_t &Imm);
1604   bool parseSwizzleMacro(int64_t &Imm);
1605   bool parseSwizzleQuadPerm(int64_t &Imm);
1606   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1607   bool parseSwizzleBroadcast(int64_t &Imm);
1608   bool parseSwizzleSwap(int64_t &Imm);
1609   bool parseSwizzleReverse(int64_t &Imm);
1610 
1611   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1612   int64_t parseGPRIdxMacro();
1613 
1614   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1615   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1616   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1617   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1618 
1619   AMDGPUOperand::Ptr defaultCPol() const;
1620 
1621   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1622   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1623   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1624   AMDGPUOperand::Ptr defaultFlatOffset() const;
1625 
1626   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1627 
1628   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1629                OptionalImmIndexMap &OptionalIdx);
1630   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1631   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1632   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1633   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1634                 OptionalImmIndexMap &OptionalIdx);
1635 
1636   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1637 
1638   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1639                bool IsAtomic = false);
1640   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1641   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1642 
1643   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1644 
1645   bool parseDimId(unsigned &Encoding);
1646   OperandMatchResultTy parseDim(OperandVector &Operands);
1647   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1648   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1649   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1650   int64_t parseDPPCtrlSel(StringRef Ctrl);
1651   int64_t parseDPPCtrlPerm();
1652   AMDGPUOperand::Ptr defaultRowMask() const;
1653   AMDGPUOperand::Ptr defaultBankMask() const;
1654   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1655   AMDGPUOperand::Ptr defaultFI() const;
1656   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1657   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1658 
1659   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1660                                     AMDGPUOperand::ImmTy Type);
1661   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1662   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1663   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1664   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1665   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1666   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1667   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1668                uint64_t BasicInstType,
1669                bool SkipDstVcc = false,
1670                bool SkipSrcVcc = false);
1671 
1672   AMDGPUOperand::Ptr defaultBLGP() const;
1673   AMDGPUOperand::Ptr defaultCBSZ() const;
1674   AMDGPUOperand::Ptr defaultABID() const;
1675 
1676   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1677   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1678 };
1679 
1680 struct OptionalOperand {
1681   const char *Name;
1682   AMDGPUOperand::ImmTy Type;
1683   bool IsBit;
1684   bool (*ConvertResult)(int64_t&);
1685 };
1686 
1687 } // end anonymous namespace
1688 
1689 // May be called with integer type with equivalent bitwidth.
1690 static const fltSemantics *getFltSemantics(unsigned Size) {
1691   switch (Size) {
1692   case 4:
1693     return &APFloat::IEEEsingle();
1694   case 8:
1695     return &APFloat::IEEEdouble();
1696   case 2:
1697     return &APFloat::IEEEhalf();
1698   default:
1699     llvm_unreachable("unsupported fp type");
1700   }
1701 }
1702 
1703 static const fltSemantics *getFltSemantics(MVT VT) {
1704   return getFltSemantics(VT.getSizeInBits() / 8);
1705 }
1706 
1707 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1708   switch (OperandType) {
1709   case AMDGPU::OPERAND_REG_IMM_INT32:
1710   case AMDGPU::OPERAND_REG_IMM_FP32:
1711   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1712   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1713   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1714   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1715   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1716   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1717   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1718   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1719     return &APFloat::IEEEsingle();
1720   case AMDGPU::OPERAND_REG_IMM_INT64:
1721   case AMDGPU::OPERAND_REG_IMM_FP64:
1722   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1723   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1724   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1725     return &APFloat::IEEEdouble();
1726   case AMDGPU::OPERAND_REG_IMM_INT16:
1727   case AMDGPU::OPERAND_REG_IMM_FP16:
1728   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1729   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1730   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1731   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1732   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1733   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1734   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1735   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1736   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1737   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1738     return &APFloat::IEEEhalf();
1739   default:
1740     llvm_unreachable("unsupported fp type");
1741   }
1742 }
1743 
1744 //===----------------------------------------------------------------------===//
1745 // Operand
1746 //===----------------------------------------------------------------------===//
1747 
1748 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1749   bool Lost;
1750 
1751   // Convert literal to single precision
1752   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1753                                                APFloat::rmNearestTiesToEven,
1754                                                &Lost);
1755   // We allow precision lost but not overflow or underflow
1756   if (Status != APFloat::opOK &&
1757       Lost &&
1758       ((Status & APFloat::opOverflow)  != 0 ||
1759        (Status & APFloat::opUnderflow) != 0)) {
1760     return false;
1761   }
1762 
1763   return true;
1764 }
1765 
1766 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1767   return isUIntN(Size, Val) || isIntN(Size, Val);
1768 }
1769 
1770 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1771   if (VT.getScalarType() == MVT::i16) {
1772     // FP immediate values are broken.
1773     return isInlinableIntLiteral(Val);
1774   }
1775 
1776   // f16/v2f16 operands work correctly for all values.
1777   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1778 }
1779 
1780 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1781 
1782   // This is a hack to enable named inline values like
1783   // shared_base with both 32-bit and 64-bit operands.
1784   // Note that these values are defined as
1785   // 32-bit operands only.
1786   if (isInlineValue()) {
1787     return true;
1788   }
1789 
1790   if (!isImmTy(ImmTyNone)) {
1791     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1792     return false;
1793   }
1794   // TODO: We should avoid using host float here. It would be better to
1795   // check the float bit values which is what a few other places do.
1796   // We've had bot failures before due to weird NaN support on mips hosts.
1797 
1798   APInt Literal(64, Imm.Val);
1799 
1800   if (Imm.IsFPImm) { // We got fp literal token
1801     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1802       return AMDGPU::isInlinableLiteral64(Imm.Val,
1803                                           AsmParser->hasInv2PiInlineImm());
1804     }
1805 
1806     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1807     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1808       return false;
1809 
1810     if (type.getScalarSizeInBits() == 16) {
1811       return isInlineableLiteralOp16(
1812         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1813         type, AsmParser->hasInv2PiInlineImm());
1814     }
1815 
1816     // Check if single precision literal is inlinable
1817     return AMDGPU::isInlinableLiteral32(
1818       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1819       AsmParser->hasInv2PiInlineImm());
1820   }
1821 
1822   // We got int literal token.
1823   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1824     return AMDGPU::isInlinableLiteral64(Imm.Val,
1825                                         AsmParser->hasInv2PiInlineImm());
1826   }
1827 
1828   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1829     return false;
1830   }
1831 
1832   if (type.getScalarSizeInBits() == 16) {
1833     return isInlineableLiteralOp16(
1834       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1835       type, AsmParser->hasInv2PiInlineImm());
1836   }
1837 
1838   return AMDGPU::isInlinableLiteral32(
1839     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1840     AsmParser->hasInv2PiInlineImm());
1841 }
1842 
1843 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1844   // Check that this immediate can be added as literal
1845   if (!isImmTy(ImmTyNone)) {
1846     return false;
1847   }
1848 
1849   if (!Imm.IsFPImm) {
1850     // We got int literal token.
1851 
1852     if (type == MVT::f64 && hasFPModifiers()) {
1853       // Cannot apply fp modifiers to int literals preserving the same semantics
1854       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1855       // disable these cases.
1856       return false;
1857     }
1858 
1859     unsigned Size = type.getSizeInBits();
1860     if (Size == 64)
1861       Size = 32;
1862 
1863     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1864     // types.
1865     return isSafeTruncation(Imm.Val, Size);
1866   }
1867 
1868   // We got fp literal token
1869   if (type == MVT::f64) { // Expected 64-bit fp operand
1870     // We would set low 64-bits of literal to zeroes but we accept this literals
1871     return true;
1872   }
1873 
1874   if (type == MVT::i64) { // Expected 64-bit int operand
1875     // We don't allow fp literals in 64-bit integer instructions. It is
1876     // unclear how we should encode them.
1877     return false;
1878   }
1879 
1880   // We allow fp literals with f16x2 operands assuming that the specified
1881   // literal goes into the lower half and the upper half is zero. We also
1882   // require that the literal may be losslesly converted to f16.
1883   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1884                      (type == MVT::v2i16)? MVT::i16 :
1885                      (type == MVT::v2f32)? MVT::f32 : type;
1886 
1887   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1888   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1889 }
1890 
1891 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1892   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1893 }
1894 
1895 bool AMDGPUOperand::isVRegWithInputMods() const {
1896   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1897          // GFX90A allows DPP on 64-bit operands.
1898          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1899           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1900 }
1901 
1902 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1903   if (AsmParser->isVI())
1904     return isVReg32();
1905   else if (AsmParser->isGFX9Plus())
1906     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1907   else
1908     return false;
1909 }
1910 
1911 bool AMDGPUOperand::isSDWAFP16Operand() const {
1912   return isSDWAOperand(MVT::f16);
1913 }
1914 
1915 bool AMDGPUOperand::isSDWAFP32Operand() const {
1916   return isSDWAOperand(MVT::f32);
1917 }
1918 
1919 bool AMDGPUOperand::isSDWAInt16Operand() const {
1920   return isSDWAOperand(MVT::i16);
1921 }
1922 
1923 bool AMDGPUOperand::isSDWAInt32Operand() const {
1924   return isSDWAOperand(MVT::i32);
1925 }
1926 
1927 bool AMDGPUOperand::isBoolReg() const {
1928   auto FB = AsmParser->getFeatureBits();
1929   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1930                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1931 }
1932 
1933 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1934 {
1935   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1936   assert(Size == 2 || Size == 4 || Size == 8);
1937 
1938   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1939 
1940   if (Imm.Mods.Abs) {
1941     Val &= ~FpSignMask;
1942   }
1943   if (Imm.Mods.Neg) {
1944     Val ^= FpSignMask;
1945   }
1946 
1947   return Val;
1948 }
1949 
1950 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1951   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1952                              Inst.getNumOperands())) {
1953     addLiteralImmOperand(Inst, Imm.Val,
1954                          ApplyModifiers &
1955                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1956   } else {
1957     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1958     Inst.addOperand(MCOperand::createImm(Imm.Val));
1959     setImmKindNone();
1960   }
1961 }
1962 
1963 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1964   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1965   auto OpNum = Inst.getNumOperands();
1966   // Check that this operand accepts literals
1967   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1968 
1969   if (ApplyModifiers) {
1970     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1971     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1972     Val = applyInputFPModifiers(Val, Size);
1973   }
1974 
1975   APInt Literal(64, Val);
1976   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1977 
1978   if (Imm.IsFPImm) { // We got fp literal token
1979     switch (OpTy) {
1980     case AMDGPU::OPERAND_REG_IMM_INT64:
1981     case AMDGPU::OPERAND_REG_IMM_FP64:
1982     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1983     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1984     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1985       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1986                                        AsmParser->hasInv2PiInlineImm())) {
1987         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1988         setImmKindConst();
1989         return;
1990       }
1991 
1992       // Non-inlineable
1993       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1994         // For fp operands we check if low 32 bits are zeros
1995         if (Literal.getLoBits(32) != 0) {
1996           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1997           "Can't encode literal as exact 64-bit floating-point operand. "
1998           "Low 32-bits will be set to zero");
1999         }
2000 
2001         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2002         setImmKindLiteral();
2003         return;
2004       }
2005 
2006       // We don't allow fp literals in 64-bit integer instructions. It is
2007       // unclear how we should encode them. This case should be checked earlier
2008       // in predicate methods (isLiteralImm())
2009       llvm_unreachable("fp literal in 64-bit integer instruction.");
2010 
2011     case AMDGPU::OPERAND_REG_IMM_INT32:
2012     case AMDGPU::OPERAND_REG_IMM_FP32:
2013     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2014     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2015     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2016     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2017     case AMDGPU::OPERAND_REG_IMM_INT16:
2018     case AMDGPU::OPERAND_REG_IMM_FP16:
2019     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2020     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2021     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2022     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2023     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2024     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2025     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2026     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2027     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2028     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2029     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2030     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2031     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2032     case AMDGPU::OPERAND_REG_IMM_V2INT32: {
2033       bool lost;
2034       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2035       // Convert literal to single precision
2036       FPLiteral.convert(*getOpFltSemantics(OpTy),
2037                         APFloat::rmNearestTiesToEven, &lost);
2038       // We allow precision lost but not overflow or underflow. This should be
2039       // checked earlier in isLiteralImm()
2040 
2041       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2042       Inst.addOperand(MCOperand::createImm(ImmVal));
2043       setImmKindLiteral();
2044       return;
2045     }
2046     default:
2047       llvm_unreachable("invalid operand size");
2048     }
2049 
2050     return;
2051   }
2052 
2053   // We got int literal token.
2054   // Only sign extend inline immediates.
2055   switch (OpTy) {
2056   case AMDGPU::OPERAND_REG_IMM_INT32:
2057   case AMDGPU::OPERAND_REG_IMM_FP32:
2058   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2059   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2060   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2061   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2062   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2063   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2064   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2065   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2066   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2067   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2068     if (isSafeTruncation(Val, 32) &&
2069         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2070                                      AsmParser->hasInv2PiInlineImm())) {
2071       Inst.addOperand(MCOperand::createImm(Val));
2072       setImmKindConst();
2073       return;
2074     }
2075 
2076     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2077     setImmKindLiteral();
2078     return;
2079 
2080   case AMDGPU::OPERAND_REG_IMM_INT64:
2081   case AMDGPU::OPERAND_REG_IMM_FP64:
2082   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2083   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2084   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2085     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2086       Inst.addOperand(MCOperand::createImm(Val));
2087       setImmKindConst();
2088       return;
2089     }
2090 
2091     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2092     setImmKindLiteral();
2093     return;
2094 
2095   case AMDGPU::OPERAND_REG_IMM_INT16:
2096   case AMDGPU::OPERAND_REG_IMM_FP16:
2097   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2098   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2099   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2100   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2101     if (isSafeTruncation(Val, 16) &&
2102         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2103                                      AsmParser->hasInv2PiInlineImm())) {
2104       Inst.addOperand(MCOperand::createImm(Val));
2105       setImmKindConst();
2106       return;
2107     }
2108 
2109     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2110     setImmKindLiteral();
2111     return;
2112 
2113   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2114   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2115   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2116   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2117     assert(isSafeTruncation(Val, 16));
2118     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2119                                         AsmParser->hasInv2PiInlineImm()));
2120 
2121     Inst.addOperand(MCOperand::createImm(Val));
2122     return;
2123   }
2124   default:
2125     llvm_unreachable("invalid operand size");
2126   }
2127 }
2128 
2129 template <unsigned Bitwidth>
2130 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2131   APInt Literal(64, Imm.Val);
2132   setImmKindNone();
2133 
2134   if (!Imm.IsFPImm) {
2135     // We got int literal token.
2136     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2137     return;
2138   }
2139 
2140   bool Lost;
2141   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2142   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2143                     APFloat::rmNearestTiesToEven, &Lost);
2144   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2145 }
2146 
2147 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2148   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2149 }
2150 
2151 static bool isInlineValue(unsigned Reg) {
2152   switch (Reg) {
2153   case AMDGPU::SRC_SHARED_BASE:
2154   case AMDGPU::SRC_SHARED_LIMIT:
2155   case AMDGPU::SRC_PRIVATE_BASE:
2156   case AMDGPU::SRC_PRIVATE_LIMIT:
2157   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2158     return true;
2159   case AMDGPU::SRC_VCCZ:
2160   case AMDGPU::SRC_EXECZ:
2161   case AMDGPU::SRC_SCC:
2162     return true;
2163   case AMDGPU::SGPR_NULL:
2164     return true;
2165   default:
2166     return false;
2167   }
2168 }
2169 
2170 bool AMDGPUOperand::isInlineValue() const {
2171   return isRegKind() && ::isInlineValue(getReg());
2172 }
2173 
2174 //===----------------------------------------------------------------------===//
2175 // AsmParser
2176 //===----------------------------------------------------------------------===//
2177 
2178 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2179   if (Is == IS_VGPR) {
2180     switch (RegWidth) {
2181       default: return -1;
2182       case 1: return AMDGPU::VGPR_32RegClassID;
2183       case 2: return AMDGPU::VReg_64RegClassID;
2184       case 3: return AMDGPU::VReg_96RegClassID;
2185       case 4: return AMDGPU::VReg_128RegClassID;
2186       case 5: return AMDGPU::VReg_160RegClassID;
2187       case 6: return AMDGPU::VReg_192RegClassID;
2188       case 8: return AMDGPU::VReg_256RegClassID;
2189       case 16: return AMDGPU::VReg_512RegClassID;
2190       case 32: return AMDGPU::VReg_1024RegClassID;
2191     }
2192   } else if (Is == IS_TTMP) {
2193     switch (RegWidth) {
2194       default: return -1;
2195       case 1: return AMDGPU::TTMP_32RegClassID;
2196       case 2: return AMDGPU::TTMP_64RegClassID;
2197       case 4: return AMDGPU::TTMP_128RegClassID;
2198       case 8: return AMDGPU::TTMP_256RegClassID;
2199       case 16: return AMDGPU::TTMP_512RegClassID;
2200     }
2201   } else if (Is == IS_SGPR) {
2202     switch (RegWidth) {
2203       default: return -1;
2204       case 1: return AMDGPU::SGPR_32RegClassID;
2205       case 2: return AMDGPU::SGPR_64RegClassID;
2206       case 3: return AMDGPU::SGPR_96RegClassID;
2207       case 4: return AMDGPU::SGPR_128RegClassID;
2208       case 5: return AMDGPU::SGPR_160RegClassID;
2209       case 6: return AMDGPU::SGPR_192RegClassID;
2210       case 8: return AMDGPU::SGPR_256RegClassID;
2211       case 16: return AMDGPU::SGPR_512RegClassID;
2212     }
2213   } else if (Is == IS_AGPR) {
2214     switch (RegWidth) {
2215       default: return -1;
2216       case 1: return AMDGPU::AGPR_32RegClassID;
2217       case 2: return AMDGPU::AReg_64RegClassID;
2218       case 3: return AMDGPU::AReg_96RegClassID;
2219       case 4: return AMDGPU::AReg_128RegClassID;
2220       case 5: return AMDGPU::AReg_160RegClassID;
2221       case 6: return AMDGPU::AReg_192RegClassID;
2222       case 8: return AMDGPU::AReg_256RegClassID;
2223       case 16: return AMDGPU::AReg_512RegClassID;
2224       case 32: return AMDGPU::AReg_1024RegClassID;
2225     }
2226   }
2227   return -1;
2228 }
2229 
2230 static unsigned getSpecialRegForName(StringRef RegName) {
2231   return StringSwitch<unsigned>(RegName)
2232     .Case("exec", AMDGPU::EXEC)
2233     .Case("vcc", AMDGPU::VCC)
2234     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2235     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2236     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2237     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2238     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2239     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2240     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2241     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2242     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2243     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2244     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2245     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2246     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2247     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2248     .Case("m0", AMDGPU::M0)
2249     .Case("vccz", AMDGPU::SRC_VCCZ)
2250     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2251     .Case("execz", AMDGPU::SRC_EXECZ)
2252     .Case("src_execz", AMDGPU::SRC_EXECZ)
2253     .Case("scc", AMDGPU::SRC_SCC)
2254     .Case("src_scc", AMDGPU::SRC_SCC)
2255     .Case("tba", AMDGPU::TBA)
2256     .Case("tma", AMDGPU::TMA)
2257     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2258     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2259     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2260     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2261     .Case("vcc_lo", AMDGPU::VCC_LO)
2262     .Case("vcc_hi", AMDGPU::VCC_HI)
2263     .Case("exec_lo", AMDGPU::EXEC_LO)
2264     .Case("exec_hi", AMDGPU::EXEC_HI)
2265     .Case("tma_lo", AMDGPU::TMA_LO)
2266     .Case("tma_hi", AMDGPU::TMA_HI)
2267     .Case("tba_lo", AMDGPU::TBA_LO)
2268     .Case("tba_hi", AMDGPU::TBA_HI)
2269     .Case("pc", AMDGPU::PC_REG)
2270     .Case("null", AMDGPU::SGPR_NULL)
2271     .Default(AMDGPU::NoRegister);
2272 }
2273 
2274 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2275                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2276   auto R = parseRegister();
2277   if (!R) return true;
2278   assert(R->isReg());
2279   RegNo = R->getReg();
2280   StartLoc = R->getStartLoc();
2281   EndLoc = R->getEndLoc();
2282   return false;
2283 }
2284 
2285 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2286                                     SMLoc &EndLoc) {
2287   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2288 }
2289 
2290 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2291                                                        SMLoc &StartLoc,
2292                                                        SMLoc &EndLoc) {
2293   bool Result =
2294       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2295   bool PendingErrors = getParser().hasPendingError();
2296   getParser().clearPendingErrors();
2297   if (PendingErrors)
2298     return MatchOperand_ParseFail;
2299   if (Result)
2300     return MatchOperand_NoMatch;
2301   return MatchOperand_Success;
2302 }
2303 
2304 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2305                                             RegisterKind RegKind, unsigned Reg1,
2306                                             SMLoc Loc) {
2307   switch (RegKind) {
2308   case IS_SPECIAL:
2309     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2310       Reg = AMDGPU::EXEC;
2311       RegWidth = 2;
2312       return true;
2313     }
2314     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2315       Reg = AMDGPU::FLAT_SCR;
2316       RegWidth = 2;
2317       return true;
2318     }
2319     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2320       Reg = AMDGPU::XNACK_MASK;
2321       RegWidth = 2;
2322       return true;
2323     }
2324     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2325       Reg = AMDGPU::VCC;
2326       RegWidth = 2;
2327       return true;
2328     }
2329     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2330       Reg = AMDGPU::TBA;
2331       RegWidth = 2;
2332       return true;
2333     }
2334     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2335       Reg = AMDGPU::TMA;
2336       RegWidth = 2;
2337       return true;
2338     }
2339     Error(Loc, "register does not fit in the list");
2340     return false;
2341   case IS_VGPR:
2342   case IS_SGPR:
2343   case IS_AGPR:
2344   case IS_TTMP:
2345     if (Reg1 != Reg + RegWidth) {
2346       Error(Loc, "registers in a list must have consecutive indices");
2347       return false;
2348     }
2349     RegWidth++;
2350     return true;
2351   default:
2352     llvm_unreachable("unexpected register kind");
2353   }
2354 }
2355 
2356 struct RegInfo {
2357   StringLiteral Name;
2358   RegisterKind Kind;
2359 };
2360 
2361 static constexpr RegInfo RegularRegisters[] = {
2362   {{"v"},    IS_VGPR},
2363   {{"s"},    IS_SGPR},
2364   {{"ttmp"}, IS_TTMP},
2365   {{"acc"},  IS_AGPR},
2366   {{"a"},    IS_AGPR},
2367 };
2368 
2369 static bool isRegularReg(RegisterKind Kind) {
2370   return Kind == IS_VGPR ||
2371          Kind == IS_SGPR ||
2372          Kind == IS_TTMP ||
2373          Kind == IS_AGPR;
2374 }
2375 
2376 static const RegInfo* getRegularRegInfo(StringRef Str) {
2377   for (const RegInfo &Reg : RegularRegisters)
2378     if (Str.startswith(Reg.Name))
2379       return &Reg;
2380   return nullptr;
2381 }
2382 
2383 static bool getRegNum(StringRef Str, unsigned& Num) {
2384   return !Str.getAsInteger(10, Num);
2385 }
2386 
2387 bool
2388 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2389                             const AsmToken &NextToken) const {
2390 
2391   // A list of consecutive registers: [s0,s1,s2,s3]
2392   if (Token.is(AsmToken::LBrac))
2393     return true;
2394 
2395   if (!Token.is(AsmToken::Identifier))
2396     return false;
2397 
2398   // A single register like s0 or a range of registers like s[0:1]
2399 
2400   StringRef Str = Token.getString();
2401   const RegInfo *Reg = getRegularRegInfo(Str);
2402   if (Reg) {
2403     StringRef RegName = Reg->Name;
2404     StringRef RegSuffix = Str.substr(RegName.size());
2405     if (!RegSuffix.empty()) {
2406       unsigned Num;
2407       // A single register with an index: rXX
2408       if (getRegNum(RegSuffix, Num))
2409         return true;
2410     } else {
2411       // A range of registers: r[XX:YY].
2412       if (NextToken.is(AsmToken::LBrac))
2413         return true;
2414     }
2415   }
2416 
2417   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2418 }
2419 
2420 bool
2421 AMDGPUAsmParser::isRegister()
2422 {
2423   return isRegister(getToken(), peekToken());
2424 }
2425 
2426 unsigned
2427 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2428                                unsigned RegNum,
2429                                unsigned RegWidth,
2430                                SMLoc Loc) {
2431 
2432   assert(isRegularReg(RegKind));
2433 
2434   unsigned AlignSize = 1;
2435   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2436     // SGPR and TTMP registers must be aligned.
2437     // Max required alignment is 4 dwords.
2438     AlignSize = std::min(RegWidth, 4u);
2439   }
2440 
2441   if (RegNum % AlignSize != 0) {
2442     Error(Loc, "invalid register alignment");
2443     return AMDGPU::NoRegister;
2444   }
2445 
2446   unsigned RegIdx = RegNum / AlignSize;
2447   int RCID = getRegClass(RegKind, RegWidth);
2448   if (RCID == -1) {
2449     Error(Loc, "invalid or unsupported register size");
2450     return AMDGPU::NoRegister;
2451   }
2452 
2453   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2454   const MCRegisterClass RC = TRI->getRegClass(RCID);
2455   if (RegIdx >= RC.getNumRegs()) {
2456     Error(Loc, "register index is out of range");
2457     return AMDGPU::NoRegister;
2458   }
2459 
2460   return RC.getRegister(RegIdx);
2461 }
2462 
2463 bool
2464 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2465   int64_t RegLo, RegHi;
2466   if (!skipToken(AsmToken::LBrac, "missing register index"))
2467     return false;
2468 
2469   SMLoc FirstIdxLoc = getLoc();
2470   SMLoc SecondIdxLoc;
2471 
2472   if (!parseExpr(RegLo))
2473     return false;
2474 
2475   if (trySkipToken(AsmToken::Colon)) {
2476     SecondIdxLoc = getLoc();
2477     if (!parseExpr(RegHi))
2478       return false;
2479   } else {
2480     RegHi = RegLo;
2481   }
2482 
2483   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2484     return false;
2485 
2486   if (!isUInt<32>(RegLo)) {
2487     Error(FirstIdxLoc, "invalid register index");
2488     return false;
2489   }
2490 
2491   if (!isUInt<32>(RegHi)) {
2492     Error(SecondIdxLoc, "invalid register index");
2493     return false;
2494   }
2495 
2496   if (RegLo > RegHi) {
2497     Error(FirstIdxLoc, "first register index should not exceed second index");
2498     return false;
2499   }
2500 
2501   Num = static_cast<unsigned>(RegLo);
2502   Width = (RegHi - RegLo) + 1;
2503   return true;
2504 }
2505 
2506 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2507                                           unsigned &RegNum, unsigned &RegWidth,
2508                                           SmallVectorImpl<AsmToken> &Tokens) {
2509   assert(isToken(AsmToken::Identifier));
2510   unsigned Reg = getSpecialRegForName(getTokenStr());
2511   if (Reg) {
2512     RegNum = 0;
2513     RegWidth = 1;
2514     RegKind = IS_SPECIAL;
2515     Tokens.push_back(getToken());
2516     lex(); // skip register name
2517   }
2518   return Reg;
2519 }
2520 
2521 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2522                                           unsigned &RegNum, unsigned &RegWidth,
2523                                           SmallVectorImpl<AsmToken> &Tokens) {
2524   assert(isToken(AsmToken::Identifier));
2525   StringRef RegName = getTokenStr();
2526   auto Loc = getLoc();
2527 
2528   const RegInfo *RI = getRegularRegInfo(RegName);
2529   if (!RI) {
2530     Error(Loc, "invalid register name");
2531     return AMDGPU::NoRegister;
2532   }
2533 
2534   Tokens.push_back(getToken());
2535   lex(); // skip register name
2536 
2537   RegKind = RI->Kind;
2538   StringRef RegSuffix = RegName.substr(RI->Name.size());
2539   if (!RegSuffix.empty()) {
2540     // Single 32-bit register: vXX.
2541     if (!getRegNum(RegSuffix, RegNum)) {
2542       Error(Loc, "invalid register index");
2543       return AMDGPU::NoRegister;
2544     }
2545     RegWidth = 1;
2546   } else {
2547     // Range of registers: v[XX:YY]. ":YY" is optional.
2548     if (!ParseRegRange(RegNum, RegWidth))
2549       return AMDGPU::NoRegister;
2550   }
2551 
2552   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2553 }
2554 
2555 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2556                                        unsigned &RegWidth,
2557                                        SmallVectorImpl<AsmToken> &Tokens) {
2558   unsigned Reg = AMDGPU::NoRegister;
2559   auto ListLoc = getLoc();
2560 
2561   if (!skipToken(AsmToken::LBrac,
2562                  "expected a register or a list of registers")) {
2563     return AMDGPU::NoRegister;
2564   }
2565 
2566   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2567 
2568   auto Loc = getLoc();
2569   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2570     return AMDGPU::NoRegister;
2571   if (RegWidth != 1) {
2572     Error(Loc, "expected a single 32-bit register");
2573     return AMDGPU::NoRegister;
2574   }
2575 
2576   for (; trySkipToken(AsmToken::Comma); ) {
2577     RegisterKind NextRegKind;
2578     unsigned NextReg, NextRegNum, NextRegWidth;
2579     Loc = getLoc();
2580 
2581     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2582                              NextRegNum, NextRegWidth,
2583                              Tokens)) {
2584       return AMDGPU::NoRegister;
2585     }
2586     if (NextRegWidth != 1) {
2587       Error(Loc, "expected a single 32-bit register");
2588       return AMDGPU::NoRegister;
2589     }
2590     if (NextRegKind != RegKind) {
2591       Error(Loc, "registers in a list must be of the same kind");
2592       return AMDGPU::NoRegister;
2593     }
2594     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2595       return AMDGPU::NoRegister;
2596   }
2597 
2598   if (!skipToken(AsmToken::RBrac,
2599                  "expected a comma or a closing square bracket")) {
2600     return AMDGPU::NoRegister;
2601   }
2602 
2603   if (isRegularReg(RegKind))
2604     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2605 
2606   return Reg;
2607 }
2608 
2609 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2610                                           unsigned &RegNum, unsigned &RegWidth,
2611                                           SmallVectorImpl<AsmToken> &Tokens) {
2612   auto Loc = getLoc();
2613   Reg = AMDGPU::NoRegister;
2614 
2615   if (isToken(AsmToken::Identifier)) {
2616     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2617     if (Reg == AMDGPU::NoRegister)
2618       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2619   } else {
2620     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2621   }
2622 
2623   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2624   if (Reg == AMDGPU::NoRegister) {
2625     assert(Parser.hasPendingError());
2626     return false;
2627   }
2628 
2629   if (!subtargetHasRegister(*TRI, Reg)) {
2630     if (Reg == AMDGPU::SGPR_NULL) {
2631       Error(Loc, "'null' operand is not supported on this GPU");
2632     } else {
2633       Error(Loc, "register not available on this GPU");
2634     }
2635     return false;
2636   }
2637 
2638   return true;
2639 }
2640 
2641 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2642                                           unsigned &RegNum, unsigned &RegWidth,
2643                                           bool RestoreOnFailure /*=false*/) {
2644   Reg = AMDGPU::NoRegister;
2645 
2646   SmallVector<AsmToken, 1> Tokens;
2647   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2648     if (RestoreOnFailure) {
2649       while (!Tokens.empty()) {
2650         getLexer().UnLex(Tokens.pop_back_val());
2651       }
2652     }
2653     return true;
2654   }
2655   return false;
2656 }
2657 
2658 Optional<StringRef>
2659 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2660   switch (RegKind) {
2661   case IS_VGPR:
2662     return StringRef(".amdgcn.next_free_vgpr");
2663   case IS_SGPR:
2664     return StringRef(".amdgcn.next_free_sgpr");
2665   default:
2666     return None;
2667   }
2668 }
2669 
2670 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2671   auto SymbolName = getGprCountSymbolName(RegKind);
2672   assert(SymbolName && "initializing invalid register kind");
2673   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2674   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2675 }
2676 
2677 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2678                                             unsigned DwordRegIndex,
2679                                             unsigned RegWidth) {
2680   // Symbols are only defined for GCN targets
2681   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2682     return true;
2683 
2684   auto SymbolName = getGprCountSymbolName(RegKind);
2685   if (!SymbolName)
2686     return true;
2687   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2688 
2689   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2690   int64_t OldCount;
2691 
2692   if (!Sym->isVariable())
2693     return !Error(getLoc(),
2694                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2695   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2696     return !Error(
2697         getLoc(),
2698         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2699 
2700   if (OldCount <= NewMax)
2701     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2702 
2703   return true;
2704 }
2705 
2706 std::unique_ptr<AMDGPUOperand>
2707 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2708   const auto &Tok = getToken();
2709   SMLoc StartLoc = Tok.getLoc();
2710   SMLoc EndLoc = Tok.getEndLoc();
2711   RegisterKind RegKind;
2712   unsigned Reg, RegNum, RegWidth;
2713 
2714   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2715     return nullptr;
2716   }
2717   if (isHsaAbiVersion3Or4(&getSTI())) {
2718     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2719       return nullptr;
2720   } else
2721     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2722   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2723 }
2724 
2725 OperandMatchResultTy
2726 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2727   // TODO: add syntactic sugar for 1/(2*PI)
2728 
2729   assert(!isRegister());
2730   assert(!isModifier());
2731 
2732   const auto& Tok = getToken();
2733   const auto& NextTok = peekToken();
2734   bool IsReal = Tok.is(AsmToken::Real);
2735   SMLoc S = getLoc();
2736   bool Negate = false;
2737 
2738   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2739     lex();
2740     IsReal = true;
2741     Negate = true;
2742   }
2743 
2744   if (IsReal) {
2745     // Floating-point expressions are not supported.
2746     // Can only allow floating-point literals with an
2747     // optional sign.
2748 
2749     StringRef Num = getTokenStr();
2750     lex();
2751 
2752     APFloat RealVal(APFloat::IEEEdouble());
2753     auto roundMode = APFloat::rmNearestTiesToEven;
2754     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2755       return MatchOperand_ParseFail;
2756     }
2757     if (Negate)
2758       RealVal.changeSign();
2759 
2760     Operands.push_back(
2761       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2762                                AMDGPUOperand::ImmTyNone, true));
2763 
2764     return MatchOperand_Success;
2765 
2766   } else {
2767     int64_t IntVal;
2768     const MCExpr *Expr;
2769     SMLoc S = getLoc();
2770 
2771     if (HasSP3AbsModifier) {
2772       // This is a workaround for handling expressions
2773       // as arguments of SP3 'abs' modifier, for example:
2774       //     |1.0|
2775       //     |-1|
2776       //     |1+x|
2777       // This syntax is not compatible with syntax of standard
2778       // MC expressions (due to the trailing '|').
2779       SMLoc EndLoc;
2780       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2781         return MatchOperand_ParseFail;
2782     } else {
2783       if (Parser.parseExpression(Expr))
2784         return MatchOperand_ParseFail;
2785     }
2786 
2787     if (Expr->evaluateAsAbsolute(IntVal)) {
2788       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2789     } else {
2790       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2791     }
2792 
2793     return MatchOperand_Success;
2794   }
2795 
2796   return MatchOperand_NoMatch;
2797 }
2798 
2799 OperandMatchResultTy
2800 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2801   if (!isRegister())
2802     return MatchOperand_NoMatch;
2803 
2804   if (auto R = parseRegister()) {
2805     assert(R->isReg());
2806     Operands.push_back(std::move(R));
2807     return MatchOperand_Success;
2808   }
2809   return MatchOperand_ParseFail;
2810 }
2811 
2812 OperandMatchResultTy
2813 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2814   auto res = parseReg(Operands);
2815   if (res != MatchOperand_NoMatch) {
2816     return res;
2817   } else if (isModifier()) {
2818     return MatchOperand_NoMatch;
2819   } else {
2820     return parseImm(Operands, HasSP3AbsMod);
2821   }
2822 }
2823 
2824 bool
2825 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2826   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2827     const auto &str = Token.getString();
2828     return str == "abs" || str == "neg" || str == "sext";
2829   }
2830   return false;
2831 }
2832 
2833 bool
2834 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2835   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2836 }
2837 
2838 bool
2839 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2840   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2841 }
2842 
2843 bool
2844 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2845   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2846 }
2847 
2848 // Check if this is an operand modifier or an opcode modifier
2849 // which may look like an expression but it is not. We should
2850 // avoid parsing these modifiers as expressions. Currently
2851 // recognized sequences are:
2852 //   |...|
2853 //   abs(...)
2854 //   neg(...)
2855 //   sext(...)
2856 //   -reg
2857 //   -|...|
2858 //   -abs(...)
2859 //   name:...
2860 // Note that simple opcode modifiers like 'gds' may be parsed as
2861 // expressions; this is a special case. See getExpressionAsToken.
2862 //
2863 bool
2864 AMDGPUAsmParser::isModifier() {
2865 
2866   AsmToken Tok = getToken();
2867   AsmToken NextToken[2];
2868   peekTokens(NextToken);
2869 
2870   return isOperandModifier(Tok, NextToken[0]) ||
2871          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2872          isOpcodeModifierWithVal(Tok, NextToken[0]);
2873 }
2874 
2875 // Check if the current token is an SP3 'neg' modifier.
2876 // Currently this modifier is allowed in the following context:
2877 //
2878 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2879 // 2. Before an 'abs' modifier: -abs(...)
2880 // 3. Before an SP3 'abs' modifier: -|...|
2881 //
2882 // In all other cases "-" is handled as a part
2883 // of an expression that follows the sign.
2884 //
2885 // Note: When "-" is followed by an integer literal,
2886 // this is interpreted as integer negation rather
2887 // than a floating-point NEG modifier applied to N.
2888 // Beside being contr-intuitive, such use of floating-point
2889 // NEG modifier would have resulted in different meaning
2890 // of integer literals used with VOP1/2/C and VOP3,
2891 // for example:
2892 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2893 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2894 // Negative fp literals with preceding "-" are
2895 // handled likewise for unifomtity
2896 //
2897 bool
2898 AMDGPUAsmParser::parseSP3NegModifier() {
2899 
2900   AsmToken NextToken[2];
2901   peekTokens(NextToken);
2902 
2903   if (isToken(AsmToken::Minus) &&
2904       (isRegister(NextToken[0], NextToken[1]) ||
2905        NextToken[0].is(AsmToken::Pipe) ||
2906        isId(NextToken[0], "abs"))) {
2907     lex();
2908     return true;
2909   }
2910 
2911   return false;
2912 }
2913 
2914 OperandMatchResultTy
2915 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2916                                               bool AllowImm) {
2917   bool Neg, SP3Neg;
2918   bool Abs, SP3Abs;
2919   SMLoc Loc;
2920 
2921   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2922   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2923     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2924     return MatchOperand_ParseFail;
2925   }
2926 
2927   SP3Neg = parseSP3NegModifier();
2928 
2929   Loc = getLoc();
2930   Neg = trySkipId("neg");
2931   if (Neg && SP3Neg) {
2932     Error(Loc, "expected register or immediate");
2933     return MatchOperand_ParseFail;
2934   }
2935   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2936     return MatchOperand_ParseFail;
2937 
2938   Abs = trySkipId("abs");
2939   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2940     return MatchOperand_ParseFail;
2941 
2942   Loc = getLoc();
2943   SP3Abs = trySkipToken(AsmToken::Pipe);
2944   if (Abs && SP3Abs) {
2945     Error(Loc, "expected register or immediate");
2946     return MatchOperand_ParseFail;
2947   }
2948 
2949   OperandMatchResultTy Res;
2950   if (AllowImm) {
2951     Res = parseRegOrImm(Operands, SP3Abs);
2952   } else {
2953     Res = parseReg(Operands);
2954   }
2955   if (Res != MatchOperand_Success) {
2956     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2957   }
2958 
2959   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2960     return MatchOperand_ParseFail;
2961   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2962     return MatchOperand_ParseFail;
2963   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2964     return MatchOperand_ParseFail;
2965 
2966   AMDGPUOperand::Modifiers Mods;
2967   Mods.Abs = Abs || SP3Abs;
2968   Mods.Neg = Neg || SP3Neg;
2969 
2970   if (Mods.hasFPModifiers()) {
2971     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2972     if (Op.isExpr()) {
2973       Error(Op.getStartLoc(), "expected an absolute expression");
2974       return MatchOperand_ParseFail;
2975     }
2976     Op.setModifiers(Mods);
2977   }
2978   return MatchOperand_Success;
2979 }
2980 
2981 OperandMatchResultTy
2982 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2983                                                bool AllowImm) {
2984   bool Sext = trySkipId("sext");
2985   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2986     return MatchOperand_ParseFail;
2987 
2988   OperandMatchResultTy Res;
2989   if (AllowImm) {
2990     Res = parseRegOrImm(Operands);
2991   } else {
2992     Res = parseReg(Operands);
2993   }
2994   if (Res != MatchOperand_Success) {
2995     return Sext? MatchOperand_ParseFail : Res;
2996   }
2997 
2998   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2999     return MatchOperand_ParseFail;
3000 
3001   AMDGPUOperand::Modifiers Mods;
3002   Mods.Sext = Sext;
3003 
3004   if (Mods.hasIntModifiers()) {
3005     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3006     if (Op.isExpr()) {
3007       Error(Op.getStartLoc(), "expected an absolute expression");
3008       return MatchOperand_ParseFail;
3009     }
3010     Op.setModifiers(Mods);
3011   }
3012 
3013   return MatchOperand_Success;
3014 }
3015 
3016 OperandMatchResultTy
3017 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3018   return parseRegOrImmWithFPInputMods(Operands, false);
3019 }
3020 
3021 OperandMatchResultTy
3022 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3023   return parseRegOrImmWithIntInputMods(Operands, false);
3024 }
3025 
3026 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3027   auto Loc = getLoc();
3028   if (trySkipId("off")) {
3029     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3030                                                 AMDGPUOperand::ImmTyOff, false));
3031     return MatchOperand_Success;
3032   }
3033 
3034   if (!isRegister())
3035     return MatchOperand_NoMatch;
3036 
3037   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3038   if (Reg) {
3039     Operands.push_back(std::move(Reg));
3040     return MatchOperand_Success;
3041   }
3042 
3043   return MatchOperand_ParseFail;
3044 
3045 }
3046 
3047 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3048   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3049 
3050   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3051       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3052       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3053       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3054     return Match_InvalidOperand;
3055 
3056   if ((TSFlags & SIInstrFlags::VOP3) &&
3057       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3058       getForcedEncodingSize() != 64)
3059     return Match_PreferE32;
3060 
3061   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3062       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3063     // v_mac_f32/16 allow only dst_sel == DWORD;
3064     auto OpNum =
3065         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3066     const auto &Op = Inst.getOperand(OpNum);
3067     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3068       return Match_InvalidOperand;
3069     }
3070   }
3071 
3072   return Match_Success;
3073 }
3074 
3075 static ArrayRef<unsigned> getAllVariants() {
3076   static const unsigned Variants[] = {
3077     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3078     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3079   };
3080 
3081   return makeArrayRef(Variants);
3082 }
3083 
3084 // What asm variants we should check
3085 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3086   if (getForcedEncodingSize() == 32) {
3087     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3088     return makeArrayRef(Variants);
3089   }
3090 
3091   if (isForcedVOP3()) {
3092     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3093     return makeArrayRef(Variants);
3094   }
3095 
3096   if (isForcedSDWA()) {
3097     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3098                                         AMDGPUAsmVariants::SDWA9};
3099     return makeArrayRef(Variants);
3100   }
3101 
3102   if (isForcedDPP()) {
3103     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3104     return makeArrayRef(Variants);
3105   }
3106 
3107   return getAllVariants();
3108 }
3109 
3110 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3111   if (getForcedEncodingSize() == 32)
3112     return "e32";
3113 
3114   if (isForcedVOP3())
3115     return "e64";
3116 
3117   if (isForcedSDWA())
3118     return "sdwa";
3119 
3120   if (isForcedDPP())
3121     return "dpp";
3122 
3123   return "";
3124 }
3125 
3126 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3127   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3128   const unsigned Num = Desc.getNumImplicitUses();
3129   for (unsigned i = 0; i < Num; ++i) {
3130     unsigned Reg = Desc.ImplicitUses[i];
3131     switch (Reg) {
3132     case AMDGPU::FLAT_SCR:
3133     case AMDGPU::VCC:
3134     case AMDGPU::VCC_LO:
3135     case AMDGPU::VCC_HI:
3136     case AMDGPU::M0:
3137       return Reg;
3138     default:
3139       break;
3140     }
3141   }
3142   return AMDGPU::NoRegister;
3143 }
3144 
3145 // NB: This code is correct only when used to check constant
3146 // bus limitations because GFX7 support no f16 inline constants.
3147 // Note that there are no cases when a GFX7 opcode violates
3148 // constant bus limitations due to the use of an f16 constant.
3149 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3150                                        unsigned OpIdx) const {
3151   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3152 
3153   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3154     return false;
3155   }
3156 
3157   const MCOperand &MO = Inst.getOperand(OpIdx);
3158 
3159   int64_t Val = MO.getImm();
3160   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3161 
3162   switch (OpSize) { // expected operand size
3163   case 8:
3164     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3165   case 4:
3166     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3167   case 2: {
3168     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3169     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3170         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3171         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3172       return AMDGPU::isInlinableIntLiteral(Val);
3173 
3174     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3175         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3176         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3177       return AMDGPU::isInlinableIntLiteralV216(Val);
3178 
3179     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3180         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3181         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3182       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3183 
3184     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3185   }
3186   default:
3187     llvm_unreachable("invalid operand size");
3188   }
3189 }
3190 
3191 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3192   if (!isGFX10Plus())
3193     return 1;
3194 
3195   switch (Opcode) {
3196   // 64-bit shift instructions can use only one scalar value input
3197   case AMDGPU::V_LSHLREV_B64_e64:
3198   case AMDGPU::V_LSHLREV_B64_gfx10:
3199   case AMDGPU::V_LSHRREV_B64_e64:
3200   case AMDGPU::V_LSHRREV_B64_gfx10:
3201   case AMDGPU::V_ASHRREV_I64_e64:
3202   case AMDGPU::V_ASHRREV_I64_gfx10:
3203   case AMDGPU::V_LSHL_B64_e64:
3204   case AMDGPU::V_LSHR_B64_e64:
3205   case AMDGPU::V_ASHR_I64_e64:
3206     return 1;
3207   default:
3208     return 2;
3209   }
3210 }
3211 
3212 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3213   const MCOperand &MO = Inst.getOperand(OpIdx);
3214   if (MO.isImm()) {
3215     return !isInlineConstant(Inst, OpIdx);
3216   } else if (MO.isReg()) {
3217     auto Reg = MO.getReg();
3218     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3219     auto PReg = mc2PseudoReg(Reg);
3220     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3221   } else {
3222     return true;
3223   }
3224 }
3225 
3226 bool
3227 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3228                                                 const OperandVector &Operands) {
3229   const unsigned Opcode = Inst.getOpcode();
3230   const MCInstrDesc &Desc = MII.get(Opcode);
3231   unsigned LastSGPR = AMDGPU::NoRegister;
3232   unsigned ConstantBusUseCount = 0;
3233   unsigned NumLiterals = 0;
3234   unsigned LiteralSize;
3235 
3236   if (Desc.TSFlags &
3237       (SIInstrFlags::VOPC |
3238        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3239        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3240        SIInstrFlags::SDWA)) {
3241     // Check special imm operands (used by madmk, etc)
3242     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3243       ++ConstantBusUseCount;
3244     }
3245 
3246     SmallDenseSet<unsigned> SGPRsUsed;
3247     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3248     if (SGPRUsed != AMDGPU::NoRegister) {
3249       SGPRsUsed.insert(SGPRUsed);
3250       ++ConstantBusUseCount;
3251     }
3252 
3253     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3254     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3255     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3256 
3257     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3258 
3259     for (int OpIdx : OpIndices) {
3260       if (OpIdx == -1) break;
3261 
3262       const MCOperand &MO = Inst.getOperand(OpIdx);
3263       if (usesConstantBus(Inst, OpIdx)) {
3264         if (MO.isReg()) {
3265           LastSGPR = mc2PseudoReg(MO.getReg());
3266           // Pairs of registers with a partial intersections like these
3267           //   s0, s[0:1]
3268           //   flat_scratch_lo, flat_scratch
3269           //   flat_scratch_lo, flat_scratch_hi
3270           // are theoretically valid but they are disabled anyway.
3271           // Note that this code mimics SIInstrInfo::verifyInstruction
3272           if (!SGPRsUsed.count(LastSGPR)) {
3273             SGPRsUsed.insert(LastSGPR);
3274             ++ConstantBusUseCount;
3275           }
3276         } else { // Expression or a literal
3277 
3278           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3279             continue; // special operand like VINTERP attr_chan
3280 
3281           // An instruction may use only one literal.
3282           // This has been validated on the previous step.
3283           // See validateVOP3Literal.
3284           // This literal may be used as more than one operand.
3285           // If all these operands are of the same size,
3286           // this literal counts as one scalar value.
3287           // Otherwise it counts as 2 scalar values.
3288           // See "GFX10 Shader Programming", section 3.6.2.3.
3289 
3290           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3291           if (Size < 4) Size = 4;
3292 
3293           if (NumLiterals == 0) {
3294             NumLiterals = 1;
3295             LiteralSize = Size;
3296           } else if (LiteralSize != Size) {
3297             NumLiterals = 2;
3298           }
3299         }
3300       }
3301     }
3302   }
3303   ConstantBusUseCount += NumLiterals;
3304 
3305   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3306     return true;
3307 
3308   SMLoc LitLoc = getLitLoc(Operands);
3309   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3310   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3311   Error(Loc, "invalid operand (violates constant bus restrictions)");
3312   return false;
3313 }
3314 
3315 bool
3316 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3317                                                  const OperandVector &Operands) {
3318   const unsigned Opcode = Inst.getOpcode();
3319   const MCInstrDesc &Desc = MII.get(Opcode);
3320 
3321   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3322   if (DstIdx == -1 ||
3323       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3324     return true;
3325   }
3326 
3327   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3328 
3329   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3330   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3331   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3332 
3333   assert(DstIdx != -1);
3334   const MCOperand &Dst = Inst.getOperand(DstIdx);
3335   assert(Dst.isReg());
3336   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3337 
3338   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3339 
3340   for (int SrcIdx : SrcIndices) {
3341     if (SrcIdx == -1) break;
3342     const MCOperand &Src = Inst.getOperand(SrcIdx);
3343     if (Src.isReg()) {
3344       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3345       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3346         Error(getRegLoc(SrcReg, Operands),
3347           "destination must be different than all sources");
3348         return false;
3349       }
3350     }
3351   }
3352 
3353   return true;
3354 }
3355 
3356 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3357 
3358   const unsigned Opc = Inst.getOpcode();
3359   const MCInstrDesc &Desc = MII.get(Opc);
3360 
3361   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3362     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3363     assert(ClampIdx != -1);
3364     return Inst.getOperand(ClampIdx).getImm() == 0;
3365   }
3366 
3367   return true;
3368 }
3369 
3370 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3371 
3372   const unsigned Opc = Inst.getOpcode();
3373   const MCInstrDesc &Desc = MII.get(Opc);
3374 
3375   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3376     return true;
3377 
3378   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3379   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3380   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3381 
3382   assert(VDataIdx != -1);
3383 
3384   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3385     return true;
3386 
3387   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3388   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3389   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3390   if (DMask == 0)
3391     DMask = 1;
3392 
3393   unsigned DataSize =
3394     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3395   if (hasPackedD16()) {
3396     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3397     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3398       DataSize = (DataSize + 1) / 2;
3399   }
3400 
3401   return (VDataSize / 4) == DataSize + TFESize;
3402 }
3403 
3404 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3405   const unsigned Opc = Inst.getOpcode();
3406   const MCInstrDesc &Desc = MII.get(Opc);
3407 
3408   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3409     return true;
3410 
3411   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3412 
3413   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3414       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3415   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3416   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3417   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3418 
3419   assert(VAddr0Idx != -1);
3420   assert(SrsrcIdx != -1);
3421   assert(SrsrcIdx > VAddr0Idx);
3422 
3423   if (DimIdx == -1)
3424     return true; // intersect_ray
3425 
3426   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3427   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3428   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3429   unsigned VAddrSize =
3430       IsNSA ? SrsrcIdx - VAddr0Idx
3431             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3432 
3433   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3434                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3435                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3436                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3437   if (!IsNSA) {
3438     if (AddrSize > 8)
3439       AddrSize = 16;
3440     else if (AddrSize > 4)
3441       AddrSize = 8;
3442   }
3443 
3444   return VAddrSize == AddrSize;
3445 }
3446 
3447 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3448 
3449   const unsigned Opc = Inst.getOpcode();
3450   const MCInstrDesc &Desc = MII.get(Opc);
3451 
3452   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3453     return true;
3454   if (!Desc.mayLoad() || !Desc.mayStore())
3455     return true; // Not atomic
3456 
3457   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3458   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3459 
3460   // This is an incomplete check because image_atomic_cmpswap
3461   // may only use 0x3 and 0xf while other atomic operations
3462   // may use 0x1 and 0x3. However these limitations are
3463   // verified when we check that dmask matches dst size.
3464   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3465 }
3466 
3467 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3468 
3469   const unsigned Opc = Inst.getOpcode();
3470   const MCInstrDesc &Desc = MII.get(Opc);
3471 
3472   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3473     return true;
3474 
3475   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3476   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3477 
3478   // GATHER4 instructions use dmask in a different fashion compared to
3479   // other MIMG instructions. The only useful DMASK values are
3480   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3481   // (red,red,red,red) etc.) The ISA document doesn't mention
3482   // this.
3483   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3484 }
3485 
3486 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3487   const unsigned Opc = Inst.getOpcode();
3488   const MCInstrDesc &Desc = MII.get(Opc);
3489 
3490   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3491     return true;
3492 
3493   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3494   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3495       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3496 
3497   if (!BaseOpcode->MSAA)
3498     return true;
3499 
3500   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3501   assert(DimIdx != -1);
3502 
3503   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3504   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3505 
3506   return DimInfo->MSAA;
3507 }
3508 
3509 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3510 {
3511   switch (Opcode) {
3512   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3513   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3514   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3515     return true;
3516   default:
3517     return false;
3518   }
3519 }
3520 
3521 // movrels* opcodes should only allow VGPRS as src0.
3522 // This is specified in .td description for vop1/vop3,
3523 // but sdwa is handled differently. See isSDWAOperand.
3524 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3525                                       const OperandVector &Operands) {
3526 
3527   const unsigned Opc = Inst.getOpcode();
3528   const MCInstrDesc &Desc = MII.get(Opc);
3529 
3530   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3531     return true;
3532 
3533   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3534   assert(Src0Idx != -1);
3535 
3536   SMLoc ErrLoc;
3537   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3538   if (Src0.isReg()) {
3539     auto Reg = mc2PseudoReg(Src0.getReg());
3540     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3541     if (!isSGPR(Reg, TRI))
3542       return true;
3543     ErrLoc = getRegLoc(Reg, Operands);
3544   } else {
3545     ErrLoc = getConstLoc(Operands);
3546   }
3547 
3548   Error(ErrLoc, "source operand must be a VGPR");
3549   return false;
3550 }
3551 
3552 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3553                                           const OperandVector &Operands) {
3554 
3555   const unsigned Opc = Inst.getOpcode();
3556 
3557   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3558     return true;
3559 
3560   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3561   assert(Src0Idx != -1);
3562 
3563   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3564   if (!Src0.isReg())
3565     return true;
3566 
3567   auto Reg = mc2PseudoReg(Src0.getReg());
3568   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3569   if (isSGPR(Reg, TRI)) {
3570     Error(getRegLoc(Reg, Operands),
3571           "source operand must be either a VGPR or an inline constant");
3572     return false;
3573   }
3574 
3575   return true;
3576 }
3577 
3578 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3579   switch (Inst.getOpcode()) {
3580   default:
3581     return true;
3582   case V_DIV_SCALE_F32_gfx6_gfx7:
3583   case V_DIV_SCALE_F32_vi:
3584   case V_DIV_SCALE_F32_gfx10:
3585   case V_DIV_SCALE_F64_gfx6_gfx7:
3586   case V_DIV_SCALE_F64_vi:
3587   case V_DIV_SCALE_F64_gfx10:
3588     break;
3589   }
3590 
3591   // TODO: Check that src0 = src1 or src2.
3592 
3593   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3594                     AMDGPU::OpName::src2_modifiers,
3595                     AMDGPU::OpName::src2_modifiers}) {
3596     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3597             .getImm() &
3598         SISrcMods::ABS) {
3599       return false;
3600     }
3601   }
3602 
3603   return true;
3604 }
3605 
3606 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3607 
3608   const unsigned Opc = Inst.getOpcode();
3609   const MCInstrDesc &Desc = MII.get(Opc);
3610 
3611   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3612     return true;
3613 
3614   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3615   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3616     if (isCI() || isSI())
3617       return false;
3618   }
3619 
3620   return true;
3621 }
3622 
3623 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3624   const unsigned Opc = Inst.getOpcode();
3625   const MCInstrDesc &Desc = MII.get(Opc);
3626 
3627   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3628     return true;
3629 
3630   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3631   if (DimIdx < 0)
3632     return true;
3633 
3634   long Imm = Inst.getOperand(DimIdx).getImm();
3635   if (Imm < 0 || Imm >= 8)
3636     return false;
3637 
3638   return true;
3639 }
3640 
3641 static bool IsRevOpcode(const unsigned Opcode)
3642 {
3643   switch (Opcode) {
3644   case AMDGPU::V_SUBREV_F32_e32:
3645   case AMDGPU::V_SUBREV_F32_e64:
3646   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3647   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3648   case AMDGPU::V_SUBREV_F32_e32_vi:
3649   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3650   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3651   case AMDGPU::V_SUBREV_F32_e64_vi:
3652 
3653   case AMDGPU::V_SUBREV_CO_U32_e32:
3654   case AMDGPU::V_SUBREV_CO_U32_e64:
3655   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3656   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3657 
3658   case AMDGPU::V_SUBBREV_U32_e32:
3659   case AMDGPU::V_SUBBREV_U32_e64:
3660   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3661   case AMDGPU::V_SUBBREV_U32_e32_vi:
3662   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3663   case AMDGPU::V_SUBBREV_U32_e64_vi:
3664 
3665   case AMDGPU::V_SUBREV_U32_e32:
3666   case AMDGPU::V_SUBREV_U32_e64:
3667   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3668   case AMDGPU::V_SUBREV_U32_e32_vi:
3669   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3670   case AMDGPU::V_SUBREV_U32_e64_vi:
3671 
3672   case AMDGPU::V_SUBREV_F16_e32:
3673   case AMDGPU::V_SUBREV_F16_e64:
3674   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3675   case AMDGPU::V_SUBREV_F16_e32_vi:
3676   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3677   case AMDGPU::V_SUBREV_F16_e64_vi:
3678 
3679   case AMDGPU::V_SUBREV_U16_e32:
3680   case AMDGPU::V_SUBREV_U16_e64:
3681   case AMDGPU::V_SUBREV_U16_e32_vi:
3682   case AMDGPU::V_SUBREV_U16_e64_vi:
3683 
3684   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3685   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3686   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3687 
3688   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3689   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3690 
3691   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3692   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3693 
3694   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3695   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3696 
3697   case AMDGPU::V_LSHRREV_B32_e32:
3698   case AMDGPU::V_LSHRREV_B32_e64:
3699   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3700   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3701   case AMDGPU::V_LSHRREV_B32_e32_vi:
3702   case AMDGPU::V_LSHRREV_B32_e64_vi:
3703   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3704   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3705 
3706   case AMDGPU::V_ASHRREV_I32_e32:
3707   case AMDGPU::V_ASHRREV_I32_e64:
3708   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3709   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3710   case AMDGPU::V_ASHRREV_I32_e32_vi:
3711   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3712   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3713   case AMDGPU::V_ASHRREV_I32_e64_vi:
3714 
3715   case AMDGPU::V_LSHLREV_B32_e32:
3716   case AMDGPU::V_LSHLREV_B32_e64:
3717   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3718   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3719   case AMDGPU::V_LSHLREV_B32_e32_vi:
3720   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3721   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3722   case AMDGPU::V_LSHLREV_B32_e64_vi:
3723 
3724   case AMDGPU::V_LSHLREV_B16_e32:
3725   case AMDGPU::V_LSHLREV_B16_e64:
3726   case AMDGPU::V_LSHLREV_B16_e32_vi:
3727   case AMDGPU::V_LSHLREV_B16_e64_vi:
3728   case AMDGPU::V_LSHLREV_B16_gfx10:
3729 
3730   case AMDGPU::V_LSHRREV_B16_e32:
3731   case AMDGPU::V_LSHRREV_B16_e64:
3732   case AMDGPU::V_LSHRREV_B16_e32_vi:
3733   case AMDGPU::V_LSHRREV_B16_e64_vi:
3734   case AMDGPU::V_LSHRREV_B16_gfx10:
3735 
3736   case AMDGPU::V_ASHRREV_I16_e32:
3737   case AMDGPU::V_ASHRREV_I16_e64:
3738   case AMDGPU::V_ASHRREV_I16_e32_vi:
3739   case AMDGPU::V_ASHRREV_I16_e64_vi:
3740   case AMDGPU::V_ASHRREV_I16_gfx10:
3741 
3742   case AMDGPU::V_LSHLREV_B64_e64:
3743   case AMDGPU::V_LSHLREV_B64_gfx10:
3744   case AMDGPU::V_LSHLREV_B64_vi:
3745 
3746   case AMDGPU::V_LSHRREV_B64_e64:
3747   case AMDGPU::V_LSHRREV_B64_gfx10:
3748   case AMDGPU::V_LSHRREV_B64_vi:
3749 
3750   case AMDGPU::V_ASHRREV_I64_e64:
3751   case AMDGPU::V_ASHRREV_I64_gfx10:
3752   case AMDGPU::V_ASHRREV_I64_vi:
3753 
3754   case AMDGPU::V_PK_LSHLREV_B16:
3755   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3756   case AMDGPU::V_PK_LSHLREV_B16_vi:
3757 
3758   case AMDGPU::V_PK_LSHRREV_B16:
3759   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3760   case AMDGPU::V_PK_LSHRREV_B16_vi:
3761   case AMDGPU::V_PK_ASHRREV_I16:
3762   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3763   case AMDGPU::V_PK_ASHRREV_I16_vi:
3764     return true;
3765   default:
3766     return false;
3767   }
3768 }
3769 
3770 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3771 
3772   using namespace SIInstrFlags;
3773   const unsigned Opcode = Inst.getOpcode();
3774   const MCInstrDesc &Desc = MII.get(Opcode);
3775 
3776   // lds_direct register is defined so that it can be used
3777   // with 9-bit operands only. Ignore encodings which do not accept these.
3778   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3779   if ((Desc.TSFlags & Enc) == 0)
3780     return None;
3781 
3782   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3783     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3784     if (SrcIdx == -1)
3785       break;
3786     const auto &Src = Inst.getOperand(SrcIdx);
3787     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3788 
3789       if (isGFX90A())
3790         return StringRef("lds_direct is not supported on this GPU");
3791 
3792       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3793         return StringRef("lds_direct cannot be used with this instruction");
3794 
3795       if (SrcName != OpName::src0)
3796         return StringRef("lds_direct may be used as src0 only");
3797     }
3798   }
3799 
3800   return None;
3801 }
3802 
3803 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3804   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3805     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3806     if (Op.isFlatOffset())
3807       return Op.getStartLoc();
3808   }
3809   return getLoc();
3810 }
3811 
3812 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3813                                          const OperandVector &Operands) {
3814   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3815   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3816     return true;
3817 
3818   auto Opcode = Inst.getOpcode();
3819   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3820   assert(OpNum != -1);
3821 
3822   const auto &Op = Inst.getOperand(OpNum);
3823   if (!hasFlatOffsets() && Op.getImm() != 0) {
3824     Error(getFlatOffsetLoc(Operands),
3825           "flat offset modifier is not supported on this GPU");
3826     return false;
3827   }
3828 
3829   // For FLAT segment the offset must be positive;
3830   // MSB is ignored and forced to zero.
3831   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3832     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3833     if (!isIntN(OffsetSize, Op.getImm())) {
3834       Error(getFlatOffsetLoc(Operands),
3835             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3836       return false;
3837     }
3838   } else {
3839     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3840     if (!isUIntN(OffsetSize, Op.getImm())) {
3841       Error(getFlatOffsetLoc(Operands),
3842             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3843       return false;
3844     }
3845   }
3846 
3847   return true;
3848 }
3849 
3850 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3851   // Start with second operand because SMEM Offset cannot be dst or src0.
3852   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3853     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3854     if (Op.isSMEMOffset())
3855       return Op.getStartLoc();
3856   }
3857   return getLoc();
3858 }
3859 
3860 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3861                                          const OperandVector &Operands) {
3862   if (isCI() || isSI())
3863     return true;
3864 
3865   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3866   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3867     return true;
3868 
3869   auto Opcode = Inst.getOpcode();
3870   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3871   if (OpNum == -1)
3872     return true;
3873 
3874   const auto &Op = Inst.getOperand(OpNum);
3875   if (!Op.isImm())
3876     return true;
3877 
3878   uint64_t Offset = Op.getImm();
3879   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3880   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3881       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3882     return true;
3883 
3884   Error(getSMEMOffsetLoc(Operands),
3885         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3886                                "expected a 21-bit signed offset");
3887 
3888   return false;
3889 }
3890 
3891 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3892   unsigned Opcode = Inst.getOpcode();
3893   const MCInstrDesc &Desc = MII.get(Opcode);
3894   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3895     return true;
3896 
3897   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3898   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3899 
3900   const int OpIndices[] = { Src0Idx, Src1Idx };
3901 
3902   unsigned NumExprs = 0;
3903   unsigned NumLiterals = 0;
3904   uint32_t LiteralValue;
3905 
3906   for (int OpIdx : OpIndices) {
3907     if (OpIdx == -1) break;
3908 
3909     const MCOperand &MO = Inst.getOperand(OpIdx);
3910     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3911     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3912       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3913         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3914         if (NumLiterals == 0 || LiteralValue != Value) {
3915           LiteralValue = Value;
3916           ++NumLiterals;
3917         }
3918       } else if (MO.isExpr()) {
3919         ++NumExprs;
3920       }
3921     }
3922   }
3923 
3924   return NumLiterals + NumExprs <= 1;
3925 }
3926 
3927 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3928   const unsigned Opc = Inst.getOpcode();
3929   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3930       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3931     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3932     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3933 
3934     if (OpSel & ~3)
3935       return false;
3936   }
3937   return true;
3938 }
3939 
3940 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
3941                                   const OperandVector &Operands) {
3942   const unsigned Opc = Inst.getOpcode();
3943   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
3944   if (DppCtrlIdx < 0)
3945     return true;
3946   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
3947 
3948   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
3949     // DPP64 is supported for row_newbcast only.
3950     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3951     if (Src0Idx >= 0 &&
3952         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
3953       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
3954       Error(S, "64 bit dpp only supports row_newbcast");
3955       return false;
3956     }
3957   }
3958 
3959   return true;
3960 }
3961 
3962 // Check if VCC register matches wavefront size
3963 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3964   auto FB = getFeatureBits();
3965   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3966     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3967 }
3968 
3969 // VOP3 literal is only allowed in GFX10+ and only one can be used
3970 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3971                                           const OperandVector &Operands) {
3972   unsigned Opcode = Inst.getOpcode();
3973   const MCInstrDesc &Desc = MII.get(Opcode);
3974   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3975     return true;
3976 
3977   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3978   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3979   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3980 
3981   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3982 
3983   unsigned NumExprs = 0;
3984   unsigned NumLiterals = 0;
3985   uint32_t LiteralValue;
3986 
3987   for (int OpIdx : OpIndices) {
3988     if (OpIdx == -1) break;
3989 
3990     const MCOperand &MO = Inst.getOperand(OpIdx);
3991     if (!MO.isImm() && !MO.isExpr())
3992       continue;
3993     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3994       continue;
3995 
3996     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3997         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
3998       Error(getConstLoc(Operands),
3999             "inline constants are not allowed for this operand");
4000       return false;
4001     }
4002 
4003     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4004       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4005       if (NumLiterals == 0 || LiteralValue != Value) {
4006         LiteralValue = Value;
4007         ++NumLiterals;
4008       }
4009     } else if (MO.isExpr()) {
4010       ++NumExprs;
4011     }
4012   }
4013   NumLiterals += NumExprs;
4014 
4015   if (!NumLiterals)
4016     return true;
4017 
4018   if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4019     Error(getLitLoc(Operands), "literal operands are not supported");
4020     return false;
4021   }
4022 
4023   if (NumLiterals > 1) {
4024     Error(getLitLoc(Operands), "only one literal operand is allowed");
4025     return false;
4026   }
4027 
4028   return true;
4029 }
4030 
4031 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4032 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4033                          const MCRegisterInfo *MRI) {
4034   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4035   if (OpIdx < 0)
4036     return -1;
4037 
4038   const MCOperand &Op = Inst.getOperand(OpIdx);
4039   if (!Op.isReg())
4040     return -1;
4041 
4042   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4043   auto Reg = Sub ? Sub : Op.getReg();
4044   const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4045   return AGRP32.contains(Reg) ? 1 : 0;
4046 }
4047 
4048 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4049   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4050   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4051                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4052                   SIInstrFlags::DS)) == 0)
4053     return true;
4054 
4055   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4056                                                       : AMDGPU::OpName::vdata;
4057 
4058   const MCRegisterInfo *MRI = getMRI();
4059   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4060   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4061 
4062   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4063     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4064     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4065       return false;
4066   }
4067 
4068   auto FB = getFeatureBits();
4069   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4070     if (DataAreg < 0 || DstAreg < 0)
4071       return true;
4072     return DstAreg == DataAreg;
4073   }
4074 
4075   return DstAreg < 1 && DataAreg < 1;
4076 }
4077 
4078 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4079   auto FB = getFeatureBits();
4080   if (!FB[AMDGPU::FeatureGFX90AInsts])
4081     return true;
4082 
4083   const MCRegisterInfo *MRI = getMRI();
4084   const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4085   const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4086   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4087     const MCOperand &Op = Inst.getOperand(I);
4088     if (!Op.isReg())
4089       continue;
4090 
4091     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4092     if (!Sub)
4093       continue;
4094 
4095     if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4096       return false;
4097     if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4098       return false;
4099   }
4100 
4101   return true;
4102 }
4103 
4104 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4105                                             const OperandVector &Operands,
4106                                             const SMLoc &IDLoc) {
4107   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4108                                            AMDGPU::OpName::cpol);
4109   if (CPolPos == -1)
4110     return true;
4111 
4112   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4113 
4114   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4115   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4116       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4117     Error(IDLoc, "invalid cache policy for SMRD instruction");
4118     return false;
4119   }
4120 
4121   if (isGFX90A() && (CPol & CPol::SCC)) {
4122     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4123     StringRef CStr(S.getPointer());
4124     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4125     Error(S, "scc is not supported on this GPU");
4126     return false;
4127   }
4128 
4129   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4130     return true;
4131 
4132   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4133     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4134       Error(IDLoc, "instruction must use glc");
4135       return false;
4136     }
4137   } else {
4138     if (CPol & CPol::GLC) {
4139       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4140       StringRef CStr(S.getPointer());
4141       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4142       Error(S, "instruction must not use glc");
4143       return false;
4144     }
4145   }
4146 
4147   return true;
4148 }
4149 
4150 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4151                                           const SMLoc &IDLoc,
4152                                           const OperandVector &Operands) {
4153   if (auto ErrMsg = validateLdsDirect(Inst)) {
4154     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4155     return false;
4156   }
4157   if (!validateSOPLiteral(Inst)) {
4158     Error(getLitLoc(Operands),
4159       "only one literal operand is allowed");
4160     return false;
4161   }
4162   if (!validateVOP3Literal(Inst, Operands)) {
4163     return false;
4164   }
4165   if (!validateConstantBusLimitations(Inst, Operands)) {
4166     return false;
4167   }
4168   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4169     return false;
4170   }
4171   if (!validateIntClampSupported(Inst)) {
4172     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4173       "integer clamping is not supported on this GPU");
4174     return false;
4175   }
4176   if (!validateOpSel(Inst)) {
4177     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4178       "invalid op_sel operand");
4179     return false;
4180   }
4181   if (!validateDPP(Inst, Operands)) {
4182     return false;
4183   }
4184   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4185   if (!validateMIMGD16(Inst)) {
4186     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4187       "d16 modifier is not supported on this GPU");
4188     return false;
4189   }
4190   if (!validateMIMGDim(Inst)) {
4191     Error(IDLoc, "dim modifier is required on this GPU");
4192     return false;
4193   }
4194   if (!validateMIMGMSAA(Inst)) {
4195     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4196           "invalid dim; must be MSAA type");
4197     return false;
4198   }
4199   if (!validateMIMGDataSize(Inst)) {
4200     Error(IDLoc,
4201       "image data size does not match dmask and tfe");
4202     return false;
4203   }
4204   if (!validateMIMGAddrSize(Inst)) {
4205     Error(IDLoc,
4206       "image address size does not match dim and a16");
4207     return false;
4208   }
4209   if (!validateMIMGAtomicDMask(Inst)) {
4210     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4211       "invalid atomic image dmask");
4212     return false;
4213   }
4214   if (!validateMIMGGatherDMask(Inst)) {
4215     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4216       "invalid image_gather dmask: only one bit must be set");
4217     return false;
4218   }
4219   if (!validateMovrels(Inst, Operands)) {
4220     return false;
4221   }
4222   if (!validateFlatOffset(Inst, Operands)) {
4223     return false;
4224   }
4225   if (!validateSMEMOffset(Inst, Operands)) {
4226     return false;
4227   }
4228   if (!validateMAIAccWrite(Inst, Operands)) {
4229     return false;
4230   }
4231   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4232     return false;
4233   }
4234 
4235   if (!validateAGPRLdSt(Inst)) {
4236     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4237     ? "invalid register class: data and dst should be all VGPR or AGPR"
4238     : "invalid register class: agpr loads and stores not supported on this GPU"
4239     );
4240     return false;
4241   }
4242   if (!validateVGPRAlign(Inst)) {
4243     Error(IDLoc,
4244       "invalid register class: vgpr tuples must be 64 bit aligned");
4245     return false;
4246   }
4247 
4248   if (!validateDivScale(Inst)) {
4249     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4250     return false;
4251   }
4252   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4253     return false;
4254   }
4255 
4256   return true;
4257 }
4258 
4259 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4260                                             const FeatureBitset &FBS,
4261                                             unsigned VariantID = 0);
4262 
4263 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4264                                 const FeatureBitset &AvailableFeatures,
4265                                 unsigned VariantID);
4266 
4267 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4268                                        const FeatureBitset &FBS) {
4269   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4270 }
4271 
4272 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4273                                        const FeatureBitset &FBS,
4274                                        ArrayRef<unsigned> Variants) {
4275   for (auto Variant : Variants) {
4276     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4277       return true;
4278   }
4279 
4280   return false;
4281 }
4282 
4283 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4284                                                   const SMLoc &IDLoc) {
4285   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4286 
4287   // Check if requested instruction variant is supported.
4288   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4289     return false;
4290 
4291   // This instruction is not supported.
4292   // Clear any other pending errors because they are no longer relevant.
4293   getParser().clearPendingErrors();
4294 
4295   // Requested instruction variant is not supported.
4296   // Check if any other variants are supported.
4297   StringRef VariantName = getMatchedVariantName();
4298   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4299     return Error(IDLoc,
4300                  Twine(VariantName,
4301                        " variant of this instruction is not supported"));
4302   }
4303 
4304   // Finally check if this instruction is supported on any other GPU.
4305   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4306     return Error(IDLoc, "instruction not supported on this GPU");
4307   }
4308 
4309   // Instruction not supported on any GPU. Probably a typo.
4310   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4311   return Error(IDLoc, "invalid instruction" + Suggestion);
4312 }
4313 
4314 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4315                                               OperandVector &Operands,
4316                                               MCStreamer &Out,
4317                                               uint64_t &ErrorInfo,
4318                                               bool MatchingInlineAsm) {
4319   MCInst Inst;
4320   unsigned Result = Match_Success;
4321   for (auto Variant : getMatchedVariants()) {
4322     uint64_t EI;
4323     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4324                                   Variant);
4325     // We order match statuses from least to most specific. We use most specific
4326     // status as resulting
4327     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4328     if ((R == Match_Success) ||
4329         (R == Match_PreferE32) ||
4330         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4331         (R == Match_InvalidOperand && Result != Match_MissingFeature
4332                                    && Result != Match_PreferE32) ||
4333         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4334                                    && Result != Match_MissingFeature
4335                                    && Result != Match_PreferE32)) {
4336       Result = R;
4337       ErrorInfo = EI;
4338     }
4339     if (R == Match_Success)
4340       break;
4341   }
4342 
4343   if (Result == Match_Success) {
4344     if (!validateInstruction(Inst, IDLoc, Operands)) {
4345       return true;
4346     }
4347     Inst.setLoc(IDLoc);
4348     Out.emitInstruction(Inst, getSTI());
4349     return false;
4350   }
4351 
4352   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4353   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4354     return true;
4355   }
4356 
4357   switch (Result) {
4358   default: break;
4359   case Match_MissingFeature:
4360     // It has been verified that the specified instruction
4361     // mnemonic is valid. A match was found but it requires
4362     // features which are not supported on this GPU.
4363     return Error(IDLoc, "operands are not valid for this GPU or mode");
4364 
4365   case Match_InvalidOperand: {
4366     SMLoc ErrorLoc = IDLoc;
4367     if (ErrorInfo != ~0ULL) {
4368       if (ErrorInfo >= Operands.size()) {
4369         return Error(IDLoc, "too few operands for instruction");
4370       }
4371       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4372       if (ErrorLoc == SMLoc())
4373         ErrorLoc = IDLoc;
4374     }
4375     return Error(ErrorLoc, "invalid operand for instruction");
4376   }
4377 
4378   case Match_PreferE32:
4379     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4380                         "should be encoded as e32");
4381   case Match_MnemonicFail:
4382     llvm_unreachable("Invalid instructions should have been handled already");
4383   }
4384   llvm_unreachable("Implement any new match types added!");
4385 }
4386 
4387 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4388   int64_t Tmp = -1;
4389   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4390     return true;
4391   }
4392   if (getParser().parseAbsoluteExpression(Tmp)) {
4393     return true;
4394   }
4395   Ret = static_cast<uint32_t>(Tmp);
4396   return false;
4397 }
4398 
4399 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4400                                                uint32_t &Minor) {
4401   if (ParseAsAbsoluteExpression(Major))
4402     return TokError("invalid major version");
4403 
4404   if (!trySkipToken(AsmToken::Comma))
4405     return TokError("minor version number required, comma expected");
4406 
4407   if (ParseAsAbsoluteExpression(Minor))
4408     return TokError("invalid minor version");
4409 
4410   return false;
4411 }
4412 
4413 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4414   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4415     return TokError("directive only supported for amdgcn architecture");
4416 
4417   std::string TargetIDDirective;
4418   SMLoc TargetStart = getTok().getLoc();
4419   if (getParser().parseEscapedString(TargetIDDirective))
4420     return true;
4421 
4422   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4423   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4424     return getParser().Error(TargetRange.Start,
4425         (Twine(".amdgcn_target directive's target id ") +
4426          Twine(TargetIDDirective) +
4427          Twine(" does not match the specified target id ") +
4428          Twine(getTargetStreamer().getTargetID()->toString())).str());
4429 
4430   return false;
4431 }
4432 
4433 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4434   return Error(Range.Start, "value out of range", Range);
4435 }
4436 
4437 bool AMDGPUAsmParser::calculateGPRBlocks(
4438     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4439     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4440     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4441     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4442   // TODO(scott.linder): These calculations are duplicated from
4443   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4444   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4445 
4446   unsigned NumVGPRs = NextFreeVGPR;
4447   unsigned NumSGPRs = NextFreeSGPR;
4448 
4449   if (Version.Major >= 10)
4450     NumSGPRs = 0;
4451   else {
4452     unsigned MaxAddressableNumSGPRs =
4453         IsaInfo::getAddressableNumSGPRs(&getSTI());
4454 
4455     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4456         NumSGPRs > MaxAddressableNumSGPRs)
4457       return OutOfRangeError(SGPRRange);
4458 
4459     NumSGPRs +=
4460         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4461 
4462     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4463         NumSGPRs > MaxAddressableNumSGPRs)
4464       return OutOfRangeError(SGPRRange);
4465 
4466     if (Features.test(FeatureSGPRInitBug))
4467       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4468   }
4469 
4470   VGPRBlocks =
4471       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4472   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4473 
4474   return false;
4475 }
4476 
4477 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4478   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4479     return TokError("directive only supported for amdgcn architecture");
4480 
4481   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4482     return TokError("directive only supported for amdhsa OS");
4483 
4484   StringRef KernelName;
4485   if (getParser().parseIdentifier(KernelName))
4486     return true;
4487 
4488   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4489 
4490   StringSet<> Seen;
4491 
4492   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4493 
4494   SMRange VGPRRange;
4495   uint64_t NextFreeVGPR = 0;
4496   uint64_t AccumOffset = 0;
4497   SMRange SGPRRange;
4498   uint64_t NextFreeSGPR = 0;
4499   unsigned UserSGPRCount = 0;
4500   bool ReserveVCC = true;
4501   bool ReserveFlatScr = true;
4502   Optional<bool> EnableWavefrontSize32;
4503 
4504   while (true) {
4505     while (trySkipToken(AsmToken::EndOfStatement));
4506 
4507     StringRef ID;
4508     SMRange IDRange = getTok().getLocRange();
4509     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4510       return true;
4511 
4512     if (ID == ".end_amdhsa_kernel")
4513       break;
4514 
4515     if (Seen.find(ID) != Seen.end())
4516       return TokError(".amdhsa_ directives cannot be repeated");
4517     Seen.insert(ID);
4518 
4519     SMLoc ValStart = getLoc();
4520     int64_t IVal;
4521     if (getParser().parseAbsoluteExpression(IVal))
4522       return true;
4523     SMLoc ValEnd = getLoc();
4524     SMRange ValRange = SMRange(ValStart, ValEnd);
4525 
4526     if (IVal < 0)
4527       return OutOfRangeError(ValRange);
4528 
4529     uint64_t Val = IVal;
4530 
4531 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4532   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4533     return OutOfRangeError(RANGE);                                             \
4534   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4535 
4536     if (ID == ".amdhsa_group_segment_fixed_size") {
4537       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4538         return OutOfRangeError(ValRange);
4539       KD.group_segment_fixed_size = Val;
4540     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4541       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4542         return OutOfRangeError(ValRange);
4543       KD.private_segment_fixed_size = Val;
4544     } else if (ID == ".amdhsa_kernarg_size") {
4545       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4546         return OutOfRangeError(ValRange);
4547       KD.kernarg_size = Val;
4548     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4549       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4550                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4551                        Val, ValRange);
4552       if (Val)
4553         UserSGPRCount += 4;
4554     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4555       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4556                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4557                        ValRange);
4558       if (Val)
4559         UserSGPRCount += 2;
4560     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4561       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4562                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4563                        ValRange);
4564       if (Val)
4565         UserSGPRCount += 2;
4566     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4567       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4568                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4569                        Val, ValRange);
4570       if (Val)
4571         UserSGPRCount += 2;
4572     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4573       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4574                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4575                        ValRange);
4576       if (Val)
4577         UserSGPRCount += 2;
4578     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4579       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4580                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4581                        ValRange);
4582       if (Val)
4583         UserSGPRCount += 2;
4584     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4585       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4586                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4587                        Val, ValRange);
4588       if (Val)
4589         UserSGPRCount += 1;
4590     } else if (ID == ".amdhsa_wavefront_size32") {
4591       if (IVersion.Major < 10)
4592         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4593       EnableWavefrontSize32 = Val;
4594       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4595                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4596                        Val, ValRange);
4597     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4598       PARSE_BITS_ENTRY(
4599           KD.compute_pgm_rsrc2,
4600           COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val,
4601           ValRange);
4602     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4603       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4604                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4605                        ValRange);
4606     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4607       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4608                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4609                        ValRange);
4610     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4611       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4612                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4613                        ValRange);
4614     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4615       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4616                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4617                        ValRange);
4618     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4619       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4620                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4621                        ValRange);
4622     } else if (ID == ".amdhsa_next_free_vgpr") {
4623       VGPRRange = ValRange;
4624       NextFreeVGPR = Val;
4625     } else if (ID == ".amdhsa_next_free_sgpr") {
4626       SGPRRange = ValRange;
4627       NextFreeSGPR = Val;
4628     } else if (ID == ".amdhsa_accum_offset") {
4629       if (!isGFX90A())
4630         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4631       AccumOffset = Val;
4632     } else if (ID == ".amdhsa_reserve_vcc") {
4633       if (!isUInt<1>(Val))
4634         return OutOfRangeError(ValRange);
4635       ReserveVCC = Val;
4636     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4637       if (IVersion.Major < 7)
4638         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4639       if (!isUInt<1>(Val))
4640         return OutOfRangeError(ValRange);
4641       ReserveFlatScr = Val;
4642     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4643       if (IVersion.Major < 8)
4644         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4645       if (!isUInt<1>(Val))
4646         return OutOfRangeError(ValRange);
4647       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4648         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4649                                  IDRange);
4650     } else if (ID == ".amdhsa_float_round_mode_32") {
4651       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4652                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4653     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4654       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4655                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4656     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4657       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4658                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4659     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4660       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4661                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4662                        ValRange);
4663     } else if (ID == ".amdhsa_dx10_clamp") {
4664       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4665                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4666     } else if (ID == ".amdhsa_ieee_mode") {
4667       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4668                        Val, ValRange);
4669     } else if (ID == ".amdhsa_fp16_overflow") {
4670       if (IVersion.Major < 9)
4671         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4672       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4673                        ValRange);
4674     } else if (ID == ".amdhsa_tg_split") {
4675       if (!isGFX90A())
4676         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4677       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4678                        ValRange);
4679     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4680       if (IVersion.Major < 10)
4681         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4682       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4683                        ValRange);
4684     } else if (ID == ".amdhsa_memory_ordered") {
4685       if (IVersion.Major < 10)
4686         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4687       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4688                        ValRange);
4689     } else if (ID == ".amdhsa_forward_progress") {
4690       if (IVersion.Major < 10)
4691         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4692       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4693                        ValRange);
4694     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4695       PARSE_BITS_ENTRY(
4696           KD.compute_pgm_rsrc2,
4697           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4698           ValRange);
4699     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4700       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4701                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4702                        Val, ValRange);
4703     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4704       PARSE_BITS_ENTRY(
4705           KD.compute_pgm_rsrc2,
4706           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4707           ValRange);
4708     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4709       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4710                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4711                        Val, ValRange);
4712     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4713       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4714                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4715                        Val, ValRange);
4716     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4717       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4718                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4719                        Val, ValRange);
4720     } else if (ID == ".amdhsa_exception_int_div_zero") {
4721       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4722                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4723                        Val, ValRange);
4724     } else {
4725       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4726     }
4727 
4728 #undef PARSE_BITS_ENTRY
4729   }
4730 
4731   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4732     return TokError(".amdhsa_next_free_vgpr directive is required");
4733 
4734   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4735     return TokError(".amdhsa_next_free_sgpr directive is required");
4736 
4737   unsigned VGPRBlocks;
4738   unsigned SGPRBlocks;
4739   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4740                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4741                          EnableWavefrontSize32, NextFreeVGPR,
4742                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4743                          SGPRBlocks))
4744     return true;
4745 
4746   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4747           VGPRBlocks))
4748     return OutOfRangeError(VGPRRange);
4749   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4750                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4751 
4752   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4753           SGPRBlocks))
4754     return OutOfRangeError(SGPRRange);
4755   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4756                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4757                   SGPRBlocks);
4758 
4759   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4760     return TokError("too many user SGPRs enabled");
4761   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4762                   UserSGPRCount);
4763 
4764   if (isGFX90A()) {
4765     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4766       return TokError(".amdhsa_accum_offset directive is required");
4767     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4768       return TokError("accum_offset should be in range [4..256] in "
4769                       "increments of 4");
4770     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4771       return TokError("accum_offset exceeds total VGPR allocation");
4772     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4773                     (AccumOffset / 4 - 1));
4774   }
4775 
4776   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4777       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4778       ReserveFlatScr);
4779   return false;
4780 }
4781 
4782 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4783   uint32_t Major;
4784   uint32_t Minor;
4785 
4786   if (ParseDirectiveMajorMinor(Major, Minor))
4787     return true;
4788 
4789   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4790   return false;
4791 }
4792 
4793 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4794   uint32_t Major;
4795   uint32_t Minor;
4796   uint32_t Stepping;
4797   StringRef VendorName;
4798   StringRef ArchName;
4799 
4800   // If this directive has no arguments, then use the ISA version for the
4801   // targeted GPU.
4802   if (isToken(AsmToken::EndOfStatement)) {
4803     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4804     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4805                                                         ISA.Stepping,
4806                                                         "AMD", "AMDGPU");
4807     return false;
4808   }
4809 
4810   if (ParseDirectiveMajorMinor(Major, Minor))
4811     return true;
4812 
4813   if (!trySkipToken(AsmToken::Comma))
4814     return TokError("stepping version number required, comma expected");
4815 
4816   if (ParseAsAbsoluteExpression(Stepping))
4817     return TokError("invalid stepping version");
4818 
4819   if (!trySkipToken(AsmToken::Comma))
4820     return TokError("vendor name required, comma expected");
4821 
4822   if (!parseString(VendorName, "invalid vendor name"))
4823     return true;
4824 
4825   if (!trySkipToken(AsmToken::Comma))
4826     return TokError("arch name required, comma expected");
4827 
4828   if (!parseString(ArchName, "invalid arch name"))
4829     return true;
4830 
4831   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
4832                                                       VendorName, ArchName);
4833   return false;
4834 }
4835 
4836 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4837                                                amd_kernel_code_t &Header) {
4838   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4839   // assembly for backwards compatibility.
4840   if (ID == "max_scratch_backing_memory_byte_size") {
4841     Parser.eatToEndOfStatement();
4842     return false;
4843   }
4844 
4845   SmallString<40> ErrStr;
4846   raw_svector_ostream Err(ErrStr);
4847   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4848     return TokError(Err.str());
4849   }
4850   Lex();
4851 
4852   if (ID == "enable_wavefront_size32") {
4853     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4854       if (!isGFX10Plus())
4855         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4856       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4857         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4858     } else {
4859       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4860         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4861     }
4862   }
4863 
4864   if (ID == "wavefront_size") {
4865     if (Header.wavefront_size == 5) {
4866       if (!isGFX10Plus())
4867         return TokError("wavefront_size=5 is only allowed on GFX10+");
4868       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4869         return TokError("wavefront_size=5 requires +WavefrontSize32");
4870     } else if (Header.wavefront_size == 6) {
4871       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4872         return TokError("wavefront_size=6 requires +WavefrontSize64");
4873     }
4874   }
4875 
4876   if (ID == "enable_wgp_mode") {
4877     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
4878         !isGFX10Plus())
4879       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4880   }
4881 
4882   if (ID == "enable_mem_ordered") {
4883     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
4884         !isGFX10Plus())
4885       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4886   }
4887 
4888   if (ID == "enable_fwd_progress") {
4889     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
4890         !isGFX10Plus())
4891       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4892   }
4893 
4894   return false;
4895 }
4896 
4897 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4898   amd_kernel_code_t Header;
4899   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4900 
4901   while (true) {
4902     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4903     // will set the current token to EndOfStatement.
4904     while(trySkipToken(AsmToken::EndOfStatement));
4905 
4906     StringRef ID;
4907     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4908       return true;
4909 
4910     if (ID == ".end_amd_kernel_code_t")
4911       break;
4912 
4913     if (ParseAMDKernelCodeTValue(ID, Header))
4914       return true;
4915   }
4916 
4917   getTargetStreamer().EmitAMDKernelCodeT(Header);
4918 
4919   return false;
4920 }
4921 
4922 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4923   StringRef KernelName;
4924   if (!parseId(KernelName, "expected symbol name"))
4925     return true;
4926 
4927   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4928                                            ELF::STT_AMDGPU_HSA_KERNEL);
4929 
4930   KernelScope.initialize(getContext());
4931   return false;
4932 }
4933 
4934 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4935   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4936     return Error(getLoc(),
4937                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4938                  "architectures");
4939   }
4940 
4941   auto TargetIDDirective = getLexer().getTok().getStringContents();
4942   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4943     return Error(getParser().getTok().getLoc(), "target id must match options");
4944 
4945   getTargetStreamer().EmitISAVersion();
4946   Lex();
4947 
4948   return false;
4949 }
4950 
4951 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4952   const char *AssemblerDirectiveBegin;
4953   const char *AssemblerDirectiveEnd;
4954   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4955       isHsaAbiVersion3Or4(&getSTI())
4956           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4957                             HSAMD::V3::AssemblerDirectiveEnd)
4958           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4959                             HSAMD::AssemblerDirectiveEnd);
4960 
4961   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4962     return Error(getLoc(),
4963                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4964                  "not available on non-amdhsa OSes")).str());
4965   }
4966 
4967   std::string HSAMetadataString;
4968   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4969                           HSAMetadataString))
4970     return true;
4971 
4972   if (isHsaAbiVersion3Or4(&getSTI())) {
4973     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4974       return Error(getLoc(), "invalid HSA metadata");
4975   } else {
4976     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4977       return Error(getLoc(), "invalid HSA metadata");
4978   }
4979 
4980   return false;
4981 }
4982 
4983 /// Common code to parse out a block of text (typically YAML) between start and
4984 /// end directives.
4985 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4986                                           const char *AssemblerDirectiveEnd,
4987                                           std::string &CollectString) {
4988 
4989   raw_string_ostream CollectStream(CollectString);
4990 
4991   getLexer().setSkipSpace(false);
4992 
4993   bool FoundEnd = false;
4994   while (!isToken(AsmToken::Eof)) {
4995     while (isToken(AsmToken::Space)) {
4996       CollectStream << getTokenStr();
4997       Lex();
4998     }
4999 
5000     if (trySkipId(AssemblerDirectiveEnd)) {
5001       FoundEnd = true;
5002       break;
5003     }
5004 
5005     CollectStream << Parser.parseStringToEndOfStatement()
5006                   << getContext().getAsmInfo()->getSeparatorString();
5007 
5008     Parser.eatToEndOfStatement();
5009   }
5010 
5011   getLexer().setSkipSpace(true);
5012 
5013   if (isToken(AsmToken::Eof) && !FoundEnd) {
5014     return TokError(Twine("expected directive ") +
5015                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5016   }
5017 
5018   CollectStream.flush();
5019   return false;
5020 }
5021 
5022 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5023 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5024   std::string String;
5025   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5026                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5027     return true;
5028 
5029   auto PALMetadata = getTargetStreamer().getPALMetadata();
5030   if (!PALMetadata->setFromString(String))
5031     return Error(getLoc(), "invalid PAL metadata");
5032   return false;
5033 }
5034 
5035 /// Parse the assembler directive for old linear-format PAL metadata.
5036 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5037   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5038     return Error(getLoc(),
5039                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5040                  "not available on non-amdpal OSes")).str());
5041   }
5042 
5043   auto PALMetadata = getTargetStreamer().getPALMetadata();
5044   PALMetadata->setLegacy();
5045   for (;;) {
5046     uint32_t Key, Value;
5047     if (ParseAsAbsoluteExpression(Key)) {
5048       return TokError(Twine("invalid value in ") +
5049                       Twine(PALMD::AssemblerDirective));
5050     }
5051     if (!trySkipToken(AsmToken::Comma)) {
5052       return TokError(Twine("expected an even number of values in ") +
5053                       Twine(PALMD::AssemblerDirective));
5054     }
5055     if (ParseAsAbsoluteExpression(Value)) {
5056       return TokError(Twine("invalid value in ") +
5057                       Twine(PALMD::AssemblerDirective));
5058     }
5059     PALMetadata->setRegister(Key, Value);
5060     if (!trySkipToken(AsmToken::Comma))
5061       break;
5062   }
5063   return false;
5064 }
5065 
5066 /// ParseDirectiveAMDGPULDS
5067 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5068 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5069   if (getParser().checkForValidSection())
5070     return true;
5071 
5072   StringRef Name;
5073   SMLoc NameLoc = getLoc();
5074   if (getParser().parseIdentifier(Name))
5075     return TokError("expected identifier in directive");
5076 
5077   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5078   if (parseToken(AsmToken::Comma, "expected ','"))
5079     return true;
5080 
5081   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5082 
5083   int64_t Size;
5084   SMLoc SizeLoc = getLoc();
5085   if (getParser().parseAbsoluteExpression(Size))
5086     return true;
5087   if (Size < 0)
5088     return Error(SizeLoc, "size must be non-negative");
5089   if (Size > LocalMemorySize)
5090     return Error(SizeLoc, "size is too large");
5091 
5092   int64_t Alignment = 4;
5093   if (trySkipToken(AsmToken::Comma)) {
5094     SMLoc AlignLoc = getLoc();
5095     if (getParser().parseAbsoluteExpression(Alignment))
5096       return true;
5097     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5098       return Error(AlignLoc, "alignment must be a power of two");
5099 
5100     // Alignment larger than the size of LDS is possible in theory, as long
5101     // as the linker manages to place to symbol at address 0, but we do want
5102     // to make sure the alignment fits nicely into a 32-bit integer.
5103     if (Alignment >= 1u << 31)
5104       return Error(AlignLoc, "alignment is too large");
5105   }
5106 
5107   if (parseToken(AsmToken::EndOfStatement,
5108                  "unexpected token in '.amdgpu_lds' directive"))
5109     return true;
5110 
5111   Symbol->redefineIfPossible();
5112   if (!Symbol->isUndefined())
5113     return Error(NameLoc, "invalid symbol redefinition");
5114 
5115   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5116   return false;
5117 }
5118 
5119 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5120   StringRef IDVal = DirectiveID.getString();
5121 
5122   if (isHsaAbiVersion3Or4(&getSTI())) {
5123     if (IDVal == ".amdhsa_kernel")
5124      return ParseDirectiveAMDHSAKernel();
5125 
5126     // TODO: Restructure/combine with PAL metadata directive.
5127     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5128       return ParseDirectiveHSAMetadata();
5129   } else {
5130     if (IDVal == ".hsa_code_object_version")
5131       return ParseDirectiveHSACodeObjectVersion();
5132 
5133     if (IDVal == ".hsa_code_object_isa")
5134       return ParseDirectiveHSACodeObjectISA();
5135 
5136     if (IDVal == ".amd_kernel_code_t")
5137       return ParseDirectiveAMDKernelCodeT();
5138 
5139     if (IDVal == ".amdgpu_hsa_kernel")
5140       return ParseDirectiveAMDGPUHsaKernel();
5141 
5142     if (IDVal == ".amd_amdgpu_isa")
5143       return ParseDirectiveISAVersion();
5144 
5145     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5146       return ParseDirectiveHSAMetadata();
5147   }
5148 
5149   if (IDVal == ".amdgcn_target")
5150     return ParseDirectiveAMDGCNTarget();
5151 
5152   if (IDVal == ".amdgpu_lds")
5153     return ParseDirectiveAMDGPULDS();
5154 
5155   if (IDVal == PALMD::AssemblerDirectiveBegin)
5156     return ParseDirectivePALMetadataBegin();
5157 
5158   if (IDVal == PALMD::AssemblerDirective)
5159     return ParseDirectivePALMetadata();
5160 
5161   return true;
5162 }
5163 
5164 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5165                                            unsigned RegNo) {
5166 
5167   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5168        R.isValid(); ++R) {
5169     if (*R == RegNo)
5170       return isGFX9Plus();
5171   }
5172 
5173   // GFX10 has 2 more SGPRs 104 and 105.
5174   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5175        R.isValid(); ++R) {
5176     if (*R == RegNo)
5177       return hasSGPR104_SGPR105();
5178   }
5179 
5180   switch (RegNo) {
5181   case AMDGPU::SRC_SHARED_BASE:
5182   case AMDGPU::SRC_SHARED_LIMIT:
5183   case AMDGPU::SRC_PRIVATE_BASE:
5184   case AMDGPU::SRC_PRIVATE_LIMIT:
5185   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5186     return isGFX9Plus();
5187   case AMDGPU::TBA:
5188   case AMDGPU::TBA_LO:
5189   case AMDGPU::TBA_HI:
5190   case AMDGPU::TMA:
5191   case AMDGPU::TMA_LO:
5192   case AMDGPU::TMA_HI:
5193     return !isGFX9Plus();
5194   case AMDGPU::XNACK_MASK:
5195   case AMDGPU::XNACK_MASK_LO:
5196   case AMDGPU::XNACK_MASK_HI:
5197     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5198   case AMDGPU::SGPR_NULL:
5199     return isGFX10Plus();
5200   default:
5201     break;
5202   }
5203 
5204   if (isCI())
5205     return true;
5206 
5207   if (isSI() || isGFX10Plus()) {
5208     // No flat_scr on SI.
5209     // On GFX10 flat scratch is not a valid register operand and can only be
5210     // accessed with s_setreg/s_getreg.
5211     switch (RegNo) {
5212     case AMDGPU::FLAT_SCR:
5213     case AMDGPU::FLAT_SCR_LO:
5214     case AMDGPU::FLAT_SCR_HI:
5215       return false;
5216     default:
5217       return true;
5218     }
5219   }
5220 
5221   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5222   // SI/CI have.
5223   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5224        R.isValid(); ++R) {
5225     if (*R == RegNo)
5226       return hasSGPR102_SGPR103();
5227   }
5228 
5229   return true;
5230 }
5231 
5232 OperandMatchResultTy
5233 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5234                               OperandMode Mode) {
5235   // Try to parse with a custom parser
5236   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5237 
5238   // If we successfully parsed the operand or if there as an error parsing,
5239   // we are done.
5240   //
5241   // If we are parsing after we reach EndOfStatement then this means we
5242   // are appending default values to the Operands list.  This is only done
5243   // by custom parser, so we shouldn't continue on to the generic parsing.
5244   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5245       isToken(AsmToken::EndOfStatement))
5246     return ResTy;
5247 
5248   SMLoc RBraceLoc;
5249   SMLoc LBraceLoc = getLoc();
5250   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5251     unsigned Prefix = Operands.size();
5252 
5253     for (;;) {
5254       auto Loc = getLoc();
5255       ResTy = parseReg(Operands);
5256       if (ResTy == MatchOperand_NoMatch)
5257         Error(Loc, "expected a register");
5258       if (ResTy != MatchOperand_Success)
5259         return MatchOperand_ParseFail;
5260 
5261       RBraceLoc = getLoc();
5262       if (trySkipToken(AsmToken::RBrac))
5263         break;
5264 
5265       if (!skipToken(AsmToken::Comma,
5266                      "expected a comma or a closing square bracket")) {
5267         return MatchOperand_ParseFail;
5268       }
5269     }
5270 
5271     if (Operands.size() - Prefix > 1) {
5272       Operands.insert(Operands.begin() + Prefix,
5273                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5274       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5275     }
5276 
5277     return MatchOperand_Success;
5278   }
5279 
5280   return parseRegOrImm(Operands);
5281 }
5282 
5283 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5284   // Clear any forced encodings from the previous instruction.
5285   setForcedEncodingSize(0);
5286   setForcedDPP(false);
5287   setForcedSDWA(false);
5288 
5289   if (Name.endswith("_e64")) {
5290     setForcedEncodingSize(64);
5291     return Name.substr(0, Name.size() - 4);
5292   } else if (Name.endswith("_e32")) {
5293     setForcedEncodingSize(32);
5294     return Name.substr(0, Name.size() - 4);
5295   } else if (Name.endswith("_dpp")) {
5296     setForcedDPP(true);
5297     return Name.substr(0, Name.size() - 4);
5298   } else if (Name.endswith("_sdwa")) {
5299     setForcedSDWA(true);
5300     return Name.substr(0, Name.size() - 5);
5301   }
5302   return Name;
5303 }
5304 
5305 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5306                                        StringRef Name,
5307                                        SMLoc NameLoc, OperandVector &Operands) {
5308   // Add the instruction mnemonic
5309   Name = parseMnemonicSuffix(Name);
5310   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5311 
5312   bool IsMIMG = Name.startswith("image_");
5313 
5314   while (!trySkipToken(AsmToken::EndOfStatement)) {
5315     OperandMode Mode = OperandMode_Default;
5316     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5317       Mode = OperandMode_NSA;
5318     CPolSeen = 0;
5319     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5320 
5321     if (Res != MatchOperand_Success) {
5322       checkUnsupportedInstruction(Name, NameLoc);
5323       if (!Parser.hasPendingError()) {
5324         // FIXME: use real operand location rather than the current location.
5325         StringRef Msg =
5326           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5327                                             "not a valid operand.";
5328         Error(getLoc(), Msg);
5329       }
5330       while (!trySkipToken(AsmToken::EndOfStatement)) {
5331         lex();
5332       }
5333       return true;
5334     }
5335 
5336     // Eat the comma or space if there is one.
5337     trySkipToken(AsmToken::Comma);
5338   }
5339 
5340   return false;
5341 }
5342 
5343 //===----------------------------------------------------------------------===//
5344 // Utility functions
5345 //===----------------------------------------------------------------------===//
5346 
5347 OperandMatchResultTy
5348 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5349 
5350   if (!trySkipId(Prefix, AsmToken::Colon))
5351     return MatchOperand_NoMatch;
5352 
5353   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5354 }
5355 
5356 OperandMatchResultTy
5357 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5358                                     AMDGPUOperand::ImmTy ImmTy,
5359                                     bool (*ConvertResult)(int64_t&)) {
5360   SMLoc S = getLoc();
5361   int64_t Value = 0;
5362 
5363   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5364   if (Res != MatchOperand_Success)
5365     return Res;
5366 
5367   if (ConvertResult && !ConvertResult(Value)) {
5368     Error(S, "invalid " + StringRef(Prefix) + " value.");
5369   }
5370 
5371   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5372   return MatchOperand_Success;
5373 }
5374 
5375 OperandMatchResultTy
5376 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5377                                              OperandVector &Operands,
5378                                              AMDGPUOperand::ImmTy ImmTy,
5379                                              bool (*ConvertResult)(int64_t&)) {
5380   SMLoc S = getLoc();
5381   if (!trySkipId(Prefix, AsmToken::Colon))
5382     return MatchOperand_NoMatch;
5383 
5384   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5385     return MatchOperand_ParseFail;
5386 
5387   unsigned Val = 0;
5388   const unsigned MaxSize = 4;
5389 
5390   // FIXME: How to verify the number of elements matches the number of src
5391   // operands?
5392   for (int I = 0; ; ++I) {
5393     int64_t Op;
5394     SMLoc Loc = getLoc();
5395     if (!parseExpr(Op))
5396       return MatchOperand_ParseFail;
5397 
5398     if (Op != 0 && Op != 1) {
5399       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5400       return MatchOperand_ParseFail;
5401     }
5402 
5403     Val |= (Op << I);
5404 
5405     if (trySkipToken(AsmToken::RBrac))
5406       break;
5407 
5408     if (I + 1 == MaxSize) {
5409       Error(getLoc(), "expected a closing square bracket");
5410       return MatchOperand_ParseFail;
5411     }
5412 
5413     if (!skipToken(AsmToken::Comma, "expected a comma"))
5414       return MatchOperand_ParseFail;
5415   }
5416 
5417   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5418   return MatchOperand_Success;
5419 }
5420 
5421 OperandMatchResultTy
5422 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5423                                AMDGPUOperand::ImmTy ImmTy) {
5424   int64_t Bit;
5425   SMLoc S = getLoc();
5426 
5427   if (trySkipId(Name)) {
5428     Bit = 1;
5429   } else if (trySkipId("no", Name)) {
5430     Bit = 0;
5431   } else {
5432     return MatchOperand_NoMatch;
5433   }
5434 
5435   if (Name == "r128" && !hasMIMG_R128()) {
5436     Error(S, "r128 modifier is not supported on this GPU");
5437     return MatchOperand_ParseFail;
5438   }
5439   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5440     Error(S, "a16 modifier is not supported on this GPU");
5441     return MatchOperand_ParseFail;
5442   }
5443 
5444   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5445     ImmTy = AMDGPUOperand::ImmTyR128A16;
5446 
5447   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5448   return MatchOperand_Success;
5449 }
5450 
5451 OperandMatchResultTy
5452 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5453   unsigned CPolOn = 0;
5454   unsigned CPolOff = 0;
5455   SMLoc S = getLoc();
5456 
5457   if (trySkipId("glc"))
5458     CPolOn = AMDGPU::CPol::GLC;
5459   else if (trySkipId("noglc"))
5460     CPolOff = AMDGPU::CPol::GLC;
5461   else if (trySkipId("slc"))
5462     CPolOn = AMDGPU::CPol::SLC;
5463   else if (trySkipId("noslc"))
5464     CPolOff = AMDGPU::CPol::SLC;
5465   else if (trySkipId("dlc"))
5466     CPolOn = AMDGPU::CPol::DLC;
5467   else if (trySkipId("nodlc"))
5468     CPolOff = AMDGPU::CPol::DLC;
5469   else if (trySkipId("scc"))
5470     CPolOn = AMDGPU::CPol::SCC;
5471   else if (trySkipId("noscc"))
5472     CPolOff = AMDGPU::CPol::SCC;
5473   else
5474     return MatchOperand_NoMatch;
5475 
5476   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5477     Error(S, "dlc modifier is not supported on this GPU");
5478     return MatchOperand_ParseFail;
5479   }
5480 
5481   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5482     Error(S, "scc modifier is not supported on this GPU");
5483     return MatchOperand_ParseFail;
5484   }
5485 
5486   if (CPolSeen & (CPolOn | CPolOff)) {
5487     Error(S, "duplicate cache policy modifier");
5488     return MatchOperand_ParseFail;
5489   }
5490 
5491   CPolSeen |= (CPolOn | CPolOff);
5492 
5493   for (unsigned I = 1; I != Operands.size(); ++I) {
5494     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5495     if (Op.isCPol()) {
5496       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5497       return MatchOperand_Success;
5498     }
5499   }
5500 
5501   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5502                                               AMDGPUOperand::ImmTyCPol));
5503 
5504   return MatchOperand_Success;
5505 }
5506 
5507 static void addOptionalImmOperand(
5508   MCInst& Inst, const OperandVector& Operands,
5509   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5510   AMDGPUOperand::ImmTy ImmT,
5511   int64_t Default = 0) {
5512   auto i = OptionalIdx.find(ImmT);
5513   if (i != OptionalIdx.end()) {
5514     unsigned Idx = i->second;
5515     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5516   } else {
5517     Inst.addOperand(MCOperand::createImm(Default));
5518   }
5519 }
5520 
5521 OperandMatchResultTy
5522 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5523                                        StringRef &Value,
5524                                        SMLoc &StringLoc) {
5525   if (!trySkipId(Prefix, AsmToken::Colon))
5526     return MatchOperand_NoMatch;
5527 
5528   StringLoc = getLoc();
5529   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5530                                                   : MatchOperand_ParseFail;
5531 }
5532 
5533 //===----------------------------------------------------------------------===//
5534 // MTBUF format
5535 //===----------------------------------------------------------------------===//
5536 
5537 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5538                                   int64_t MaxVal,
5539                                   int64_t &Fmt) {
5540   int64_t Val;
5541   SMLoc Loc = getLoc();
5542 
5543   auto Res = parseIntWithPrefix(Pref, Val);
5544   if (Res == MatchOperand_ParseFail)
5545     return false;
5546   if (Res == MatchOperand_NoMatch)
5547     return true;
5548 
5549   if (Val < 0 || Val > MaxVal) {
5550     Error(Loc, Twine("out of range ", StringRef(Pref)));
5551     return false;
5552   }
5553 
5554   Fmt = Val;
5555   return true;
5556 }
5557 
5558 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5559 // values to live in a joint format operand in the MCInst encoding.
5560 OperandMatchResultTy
5561 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5562   using namespace llvm::AMDGPU::MTBUFFormat;
5563 
5564   int64_t Dfmt = DFMT_UNDEF;
5565   int64_t Nfmt = NFMT_UNDEF;
5566 
5567   // dfmt and nfmt can appear in either order, and each is optional.
5568   for (int I = 0; I < 2; ++I) {
5569     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5570       return MatchOperand_ParseFail;
5571 
5572     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5573       return MatchOperand_ParseFail;
5574     }
5575     // Skip optional comma between dfmt/nfmt
5576     // but guard against 2 commas following each other.
5577     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5578         !peekToken().is(AsmToken::Comma)) {
5579       trySkipToken(AsmToken::Comma);
5580     }
5581   }
5582 
5583   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5584     return MatchOperand_NoMatch;
5585 
5586   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5587   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5588 
5589   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5590   return MatchOperand_Success;
5591 }
5592 
5593 OperandMatchResultTy
5594 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5595   using namespace llvm::AMDGPU::MTBUFFormat;
5596 
5597   int64_t Fmt = UFMT_UNDEF;
5598 
5599   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5600     return MatchOperand_ParseFail;
5601 
5602   if (Fmt == UFMT_UNDEF)
5603     return MatchOperand_NoMatch;
5604 
5605   Format = Fmt;
5606   return MatchOperand_Success;
5607 }
5608 
5609 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5610                                     int64_t &Nfmt,
5611                                     StringRef FormatStr,
5612                                     SMLoc Loc) {
5613   using namespace llvm::AMDGPU::MTBUFFormat;
5614   int64_t Format;
5615 
5616   Format = getDfmt(FormatStr);
5617   if (Format != DFMT_UNDEF) {
5618     Dfmt = Format;
5619     return true;
5620   }
5621 
5622   Format = getNfmt(FormatStr, getSTI());
5623   if (Format != NFMT_UNDEF) {
5624     Nfmt = Format;
5625     return true;
5626   }
5627 
5628   Error(Loc, "unsupported format");
5629   return false;
5630 }
5631 
5632 OperandMatchResultTy
5633 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5634                                           SMLoc FormatLoc,
5635                                           int64_t &Format) {
5636   using namespace llvm::AMDGPU::MTBUFFormat;
5637 
5638   int64_t Dfmt = DFMT_UNDEF;
5639   int64_t Nfmt = NFMT_UNDEF;
5640   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5641     return MatchOperand_ParseFail;
5642 
5643   if (trySkipToken(AsmToken::Comma)) {
5644     StringRef Str;
5645     SMLoc Loc = getLoc();
5646     if (!parseId(Str, "expected a format string") ||
5647         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5648       return MatchOperand_ParseFail;
5649     }
5650     if (Dfmt == DFMT_UNDEF) {
5651       Error(Loc, "duplicate numeric format");
5652       return MatchOperand_ParseFail;
5653     } else if (Nfmt == NFMT_UNDEF) {
5654       Error(Loc, "duplicate data format");
5655       return MatchOperand_ParseFail;
5656     }
5657   }
5658 
5659   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5660   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5661 
5662   if (isGFX10Plus()) {
5663     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5664     if (Ufmt == UFMT_UNDEF) {
5665       Error(FormatLoc, "unsupported format");
5666       return MatchOperand_ParseFail;
5667     }
5668     Format = Ufmt;
5669   } else {
5670     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5671   }
5672 
5673   return MatchOperand_Success;
5674 }
5675 
5676 OperandMatchResultTy
5677 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5678                                             SMLoc Loc,
5679                                             int64_t &Format) {
5680   using namespace llvm::AMDGPU::MTBUFFormat;
5681 
5682   auto Id = getUnifiedFormat(FormatStr);
5683   if (Id == UFMT_UNDEF)
5684     return MatchOperand_NoMatch;
5685 
5686   if (!isGFX10Plus()) {
5687     Error(Loc, "unified format is not supported on this GPU");
5688     return MatchOperand_ParseFail;
5689   }
5690 
5691   Format = Id;
5692   return MatchOperand_Success;
5693 }
5694 
5695 OperandMatchResultTy
5696 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5697   using namespace llvm::AMDGPU::MTBUFFormat;
5698   SMLoc Loc = getLoc();
5699 
5700   if (!parseExpr(Format))
5701     return MatchOperand_ParseFail;
5702   if (!isValidFormatEncoding(Format, getSTI())) {
5703     Error(Loc, "out of range format");
5704     return MatchOperand_ParseFail;
5705   }
5706 
5707   return MatchOperand_Success;
5708 }
5709 
5710 OperandMatchResultTy
5711 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5712   using namespace llvm::AMDGPU::MTBUFFormat;
5713 
5714   if (!trySkipId("format", AsmToken::Colon))
5715     return MatchOperand_NoMatch;
5716 
5717   if (trySkipToken(AsmToken::LBrac)) {
5718     StringRef FormatStr;
5719     SMLoc Loc = getLoc();
5720     if (!parseId(FormatStr, "expected a format string"))
5721       return MatchOperand_ParseFail;
5722 
5723     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5724     if (Res == MatchOperand_NoMatch)
5725       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5726     if (Res != MatchOperand_Success)
5727       return Res;
5728 
5729     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5730       return MatchOperand_ParseFail;
5731 
5732     return MatchOperand_Success;
5733   }
5734 
5735   return parseNumericFormat(Format);
5736 }
5737 
5738 OperandMatchResultTy
5739 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5740   using namespace llvm::AMDGPU::MTBUFFormat;
5741 
5742   int64_t Format = getDefaultFormatEncoding(getSTI());
5743   OperandMatchResultTy Res;
5744   SMLoc Loc = getLoc();
5745 
5746   // Parse legacy format syntax.
5747   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5748   if (Res == MatchOperand_ParseFail)
5749     return Res;
5750 
5751   bool FormatFound = (Res == MatchOperand_Success);
5752 
5753   Operands.push_back(
5754     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5755 
5756   if (FormatFound)
5757     trySkipToken(AsmToken::Comma);
5758 
5759   if (isToken(AsmToken::EndOfStatement)) {
5760     // We are expecting an soffset operand,
5761     // but let matcher handle the error.
5762     return MatchOperand_Success;
5763   }
5764 
5765   // Parse soffset.
5766   Res = parseRegOrImm(Operands);
5767   if (Res != MatchOperand_Success)
5768     return Res;
5769 
5770   trySkipToken(AsmToken::Comma);
5771 
5772   if (!FormatFound) {
5773     Res = parseSymbolicOrNumericFormat(Format);
5774     if (Res == MatchOperand_ParseFail)
5775       return Res;
5776     if (Res == MatchOperand_Success) {
5777       auto Size = Operands.size();
5778       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5779       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5780       Op.setImm(Format);
5781     }
5782     return MatchOperand_Success;
5783   }
5784 
5785   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5786     Error(getLoc(), "duplicate format");
5787     return MatchOperand_ParseFail;
5788   }
5789   return MatchOperand_Success;
5790 }
5791 
5792 //===----------------------------------------------------------------------===//
5793 // ds
5794 //===----------------------------------------------------------------------===//
5795 
5796 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5797                                     const OperandVector &Operands) {
5798   OptionalImmIndexMap OptionalIdx;
5799 
5800   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5801     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5802 
5803     // Add the register arguments
5804     if (Op.isReg()) {
5805       Op.addRegOperands(Inst, 1);
5806       continue;
5807     }
5808 
5809     // Handle optional arguments
5810     OptionalIdx[Op.getImmTy()] = i;
5811   }
5812 
5813   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5814   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5815   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5816 
5817   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5818 }
5819 
5820 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5821                                 bool IsGdsHardcoded) {
5822   OptionalImmIndexMap OptionalIdx;
5823 
5824   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5825     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5826 
5827     // Add the register arguments
5828     if (Op.isReg()) {
5829       Op.addRegOperands(Inst, 1);
5830       continue;
5831     }
5832 
5833     if (Op.isToken() && Op.getToken() == "gds") {
5834       IsGdsHardcoded = true;
5835       continue;
5836     }
5837 
5838     // Handle optional arguments
5839     OptionalIdx[Op.getImmTy()] = i;
5840   }
5841 
5842   AMDGPUOperand::ImmTy OffsetType =
5843     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5844      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5845      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5846                                                       AMDGPUOperand::ImmTyOffset;
5847 
5848   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5849 
5850   if (!IsGdsHardcoded) {
5851     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5852   }
5853   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5854 }
5855 
5856 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5857   OptionalImmIndexMap OptionalIdx;
5858 
5859   unsigned OperandIdx[4];
5860   unsigned EnMask = 0;
5861   int SrcIdx = 0;
5862 
5863   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5864     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5865 
5866     // Add the register arguments
5867     if (Op.isReg()) {
5868       assert(SrcIdx < 4);
5869       OperandIdx[SrcIdx] = Inst.size();
5870       Op.addRegOperands(Inst, 1);
5871       ++SrcIdx;
5872       continue;
5873     }
5874 
5875     if (Op.isOff()) {
5876       assert(SrcIdx < 4);
5877       OperandIdx[SrcIdx] = Inst.size();
5878       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5879       ++SrcIdx;
5880       continue;
5881     }
5882 
5883     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5884       Op.addImmOperands(Inst, 1);
5885       continue;
5886     }
5887 
5888     if (Op.isToken() && Op.getToken() == "done")
5889       continue;
5890 
5891     // Handle optional arguments
5892     OptionalIdx[Op.getImmTy()] = i;
5893   }
5894 
5895   assert(SrcIdx == 4);
5896 
5897   bool Compr = false;
5898   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5899     Compr = true;
5900     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5901     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5902     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5903   }
5904 
5905   for (auto i = 0; i < SrcIdx; ++i) {
5906     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5907       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5908     }
5909   }
5910 
5911   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5912   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5913 
5914   Inst.addOperand(MCOperand::createImm(EnMask));
5915 }
5916 
5917 //===----------------------------------------------------------------------===//
5918 // s_waitcnt
5919 //===----------------------------------------------------------------------===//
5920 
5921 static bool
5922 encodeCnt(
5923   const AMDGPU::IsaVersion ISA,
5924   int64_t &IntVal,
5925   int64_t CntVal,
5926   bool Saturate,
5927   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5928   unsigned (*decode)(const IsaVersion &Version, unsigned))
5929 {
5930   bool Failed = false;
5931 
5932   IntVal = encode(ISA, IntVal, CntVal);
5933   if (CntVal != decode(ISA, IntVal)) {
5934     if (Saturate) {
5935       IntVal = encode(ISA, IntVal, -1);
5936     } else {
5937       Failed = true;
5938     }
5939   }
5940   return Failed;
5941 }
5942 
5943 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5944 
5945   SMLoc CntLoc = getLoc();
5946   StringRef CntName = getTokenStr();
5947 
5948   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5949       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5950     return false;
5951 
5952   int64_t CntVal;
5953   SMLoc ValLoc = getLoc();
5954   if (!parseExpr(CntVal))
5955     return false;
5956 
5957   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5958 
5959   bool Failed = true;
5960   bool Sat = CntName.endswith("_sat");
5961 
5962   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5963     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5964   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5965     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5966   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5967     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5968   } else {
5969     Error(CntLoc, "invalid counter name " + CntName);
5970     return false;
5971   }
5972 
5973   if (Failed) {
5974     Error(ValLoc, "too large value for " + CntName);
5975     return false;
5976   }
5977 
5978   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5979     return false;
5980 
5981   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5982     if (isToken(AsmToken::EndOfStatement)) {
5983       Error(getLoc(), "expected a counter name");
5984       return false;
5985     }
5986   }
5987 
5988   return true;
5989 }
5990 
5991 OperandMatchResultTy
5992 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5993   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5994   int64_t Waitcnt = getWaitcntBitMask(ISA);
5995   SMLoc S = getLoc();
5996 
5997   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5998     while (!isToken(AsmToken::EndOfStatement)) {
5999       if (!parseCnt(Waitcnt))
6000         return MatchOperand_ParseFail;
6001     }
6002   } else {
6003     if (!parseExpr(Waitcnt))
6004       return MatchOperand_ParseFail;
6005   }
6006 
6007   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6008   return MatchOperand_Success;
6009 }
6010 
6011 bool
6012 AMDGPUOperand::isSWaitCnt() const {
6013   return isImm();
6014 }
6015 
6016 //===----------------------------------------------------------------------===//
6017 // hwreg
6018 //===----------------------------------------------------------------------===//
6019 
6020 bool
6021 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6022                                 OperandInfoTy &Offset,
6023                                 OperandInfoTy &Width) {
6024   using namespace llvm::AMDGPU::Hwreg;
6025 
6026   // The register may be specified by name or using a numeric code
6027   HwReg.Loc = getLoc();
6028   if (isToken(AsmToken::Identifier) &&
6029       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
6030     HwReg.IsSymbolic = true;
6031     lex(); // skip register name
6032   } else if (!parseExpr(HwReg.Id, "a register name")) {
6033     return false;
6034   }
6035 
6036   if (trySkipToken(AsmToken::RParen))
6037     return true;
6038 
6039   // parse optional params
6040   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6041     return false;
6042 
6043   Offset.Loc = getLoc();
6044   if (!parseExpr(Offset.Id))
6045     return false;
6046 
6047   if (!skipToken(AsmToken::Comma, "expected a comma"))
6048     return false;
6049 
6050   Width.Loc = getLoc();
6051   return parseExpr(Width.Id) &&
6052          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6053 }
6054 
6055 bool
6056 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6057                                const OperandInfoTy &Offset,
6058                                const OperandInfoTy &Width) {
6059 
6060   using namespace llvm::AMDGPU::Hwreg;
6061 
6062   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6063     Error(HwReg.Loc,
6064           "specified hardware register is not supported on this GPU");
6065     return false;
6066   }
6067   if (!isValidHwreg(HwReg.Id)) {
6068     Error(HwReg.Loc,
6069           "invalid code of hardware register: only 6-bit values are legal");
6070     return false;
6071   }
6072   if (!isValidHwregOffset(Offset.Id)) {
6073     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6074     return false;
6075   }
6076   if (!isValidHwregWidth(Width.Id)) {
6077     Error(Width.Loc,
6078           "invalid bitfield width: only values from 1 to 32 are legal");
6079     return false;
6080   }
6081   return true;
6082 }
6083 
6084 OperandMatchResultTy
6085 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6086   using namespace llvm::AMDGPU::Hwreg;
6087 
6088   int64_t ImmVal = 0;
6089   SMLoc Loc = getLoc();
6090 
6091   if (trySkipId("hwreg", AsmToken::LParen)) {
6092     OperandInfoTy HwReg(ID_UNKNOWN_);
6093     OperandInfoTy Offset(OFFSET_DEFAULT_);
6094     OperandInfoTy Width(WIDTH_DEFAULT_);
6095     if (parseHwregBody(HwReg, Offset, Width) &&
6096         validateHwreg(HwReg, Offset, Width)) {
6097       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6098     } else {
6099       return MatchOperand_ParseFail;
6100     }
6101   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6102     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6103       Error(Loc, "invalid immediate: only 16-bit values are legal");
6104       return MatchOperand_ParseFail;
6105     }
6106   } else {
6107     return MatchOperand_ParseFail;
6108   }
6109 
6110   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6111   return MatchOperand_Success;
6112 }
6113 
6114 bool AMDGPUOperand::isHwreg() const {
6115   return isImmTy(ImmTyHwreg);
6116 }
6117 
6118 //===----------------------------------------------------------------------===//
6119 // sendmsg
6120 //===----------------------------------------------------------------------===//
6121 
6122 bool
6123 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6124                                   OperandInfoTy &Op,
6125                                   OperandInfoTy &Stream) {
6126   using namespace llvm::AMDGPU::SendMsg;
6127 
6128   Msg.Loc = getLoc();
6129   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6130     Msg.IsSymbolic = true;
6131     lex(); // skip message name
6132   } else if (!parseExpr(Msg.Id, "a message name")) {
6133     return false;
6134   }
6135 
6136   if (trySkipToken(AsmToken::Comma)) {
6137     Op.IsDefined = true;
6138     Op.Loc = getLoc();
6139     if (isToken(AsmToken::Identifier) &&
6140         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6141       lex(); // skip operation name
6142     } else if (!parseExpr(Op.Id, "an operation name")) {
6143       return false;
6144     }
6145 
6146     if (trySkipToken(AsmToken::Comma)) {
6147       Stream.IsDefined = true;
6148       Stream.Loc = getLoc();
6149       if (!parseExpr(Stream.Id))
6150         return false;
6151     }
6152   }
6153 
6154   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6155 }
6156 
6157 bool
6158 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6159                                  const OperandInfoTy &Op,
6160                                  const OperandInfoTy &Stream) {
6161   using namespace llvm::AMDGPU::SendMsg;
6162 
6163   // Validation strictness depends on whether message is specified
6164   // in a symbolc or in a numeric form. In the latter case
6165   // only encoding possibility is checked.
6166   bool Strict = Msg.IsSymbolic;
6167 
6168   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6169     Error(Msg.Loc, "invalid message id");
6170     return false;
6171   }
6172   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6173     if (Op.IsDefined) {
6174       Error(Op.Loc, "message does not support operations");
6175     } else {
6176       Error(Msg.Loc, "missing message operation");
6177     }
6178     return false;
6179   }
6180   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6181     Error(Op.Loc, "invalid operation id");
6182     return false;
6183   }
6184   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6185     Error(Stream.Loc, "message operation does not support streams");
6186     return false;
6187   }
6188   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6189     Error(Stream.Loc, "invalid message stream id");
6190     return false;
6191   }
6192   return true;
6193 }
6194 
6195 OperandMatchResultTy
6196 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6197   using namespace llvm::AMDGPU::SendMsg;
6198 
6199   int64_t ImmVal = 0;
6200   SMLoc Loc = getLoc();
6201 
6202   if (trySkipId("sendmsg", AsmToken::LParen)) {
6203     OperandInfoTy Msg(ID_UNKNOWN_);
6204     OperandInfoTy Op(OP_NONE_);
6205     OperandInfoTy Stream(STREAM_ID_NONE_);
6206     if (parseSendMsgBody(Msg, Op, Stream) &&
6207         validateSendMsg(Msg, Op, Stream)) {
6208       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6209     } else {
6210       return MatchOperand_ParseFail;
6211     }
6212   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6213     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6214       Error(Loc, "invalid immediate: only 16-bit values are legal");
6215       return MatchOperand_ParseFail;
6216     }
6217   } else {
6218     return MatchOperand_ParseFail;
6219   }
6220 
6221   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6222   return MatchOperand_Success;
6223 }
6224 
6225 bool AMDGPUOperand::isSendMsg() const {
6226   return isImmTy(ImmTySendMsg);
6227 }
6228 
6229 //===----------------------------------------------------------------------===//
6230 // v_interp
6231 //===----------------------------------------------------------------------===//
6232 
6233 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6234   StringRef Str;
6235   SMLoc S = getLoc();
6236 
6237   if (!parseId(Str))
6238     return MatchOperand_NoMatch;
6239 
6240   int Slot = StringSwitch<int>(Str)
6241     .Case("p10", 0)
6242     .Case("p20", 1)
6243     .Case("p0", 2)
6244     .Default(-1);
6245 
6246   if (Slot == -1) {
6247     Error(S, "invalid interpolation slot");
6248     return MatchOperand_ParseFail;
6249   }
6250 
6251   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6252                                               AMDGPUOperand::ImmTyInterpSlot));
6253   return MatchOperand_Success;
6254 }
6255 
6256 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6257   StringRef Str;
6258   SMLoc S = getLoc();
6259 
6260   if (!parseId(Str))
6261     return MatchOperand_NoMatch;
6262 
6263   if (!Str.startswith("attr")) {
6264     Error(S, "invalid interpolation attribute");
6265     return MatchOperand_ParseFail;
6266   }
6267 
6268   StringRef Chan = Str.take_back(2);
6269   int AttrChan = StringSwitch<int>(Chan)
6270     .Case(".x", 0)
6271     .Case(".y", 1)
6272     .Case(".z", 2)
6273     .Case(".w", 3)
6274     .Default(-1);
6275   if (AttrChan == -1) {
6276     Error(S, "invalid or missing interpolation attribute channel");
6277     return MatchOperand_ParseFail;
6278   }
6279 
6280   Str = Str.drop_back(2).drop_front(4);
6281 
6282   uint8_t Attr;
6283   if (Str.getAsInteger(10, Attr)) {
6284     Error(S, "invalid or missing interpolation attribute number");
6285     return MatchOperand_ParseFail;
6286   }
6287 
6288   if (Attr > 63) {
6289     Error(S, "out of bounds interpolation attribute number");
6290     return MatchOperand_ParseFail;
6291   }
6292 
6293   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6294 
6295   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6296                                               AMDGPUOperand::ImmTyInterpAttr));
6297   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6298                                               AMDGPUOperand::ImmTyAttrChan));
6299   return MatchOperand_Success;
6300 }
6301 
6302 //===----------------------------------------------------------------------===//
6303 // exp
6304 //===----------------------------------------------------------------------===//
6305 
6306 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6307   using namespace llvm::AMDGPU::Exp;
6308 
6309   StringRef Str;
6310   SMLoc S = getLoc();
6311 
6312   if (!parseId(Str))
6313     return MatchOperand_NoMatch;
6314 
6315   unsigned Id = getTgtId(Str);
6316   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6317     Error(S, (Id == ET_INVALID) ?
6318                 "invalid exp target" :
6319                 "exp target is not supported on this GPU");
6320     return MatchOperand_ParseFail;
6321   }
6322 
6323   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6324                                               AMDGPUOperand::ImmTyExpTgt));
6325   return MatchOperand_Success;
6326 }
6327 
6328 //===----------------------------------------------------------------------===//
6329 // parser helpers
6330 //===----------------------------------------------------------------------===//
6331 
6332 bool
6333 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6334   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6335 }
6336 
6337 bool
6338 AMDGPUAsmParser::isId(const StringRef Id) const {
6339   return isId(getToken(), Id);
6340 }
6341 
6342 bool
6343 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6344   return getTokenKind() == Kind;
6345 }
6346 
6347 bool
6348 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6349   if (isId(Id)) {
6350     lex();
6351     return true;
6352   }
6353   return false;
6354 }
6355 
6356 bool
6357 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6358   if (isToken(AsmToken::Identifier)) {
6359     StringRef Tok = getTokenStr();
6360     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6361       lex();
6362       return true;
6363     }
6364   }
6365   return false;
6366 }
6367 
6368 bool
6369 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6370   if (isId(Id) && peekToken().is(Kind)) {
6371     lex();
6372     lex();
6373     return true;
6374   }
6375   return false;
6376 }
6377 
6378 bool
6379 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6380   if (isToken(Kind)) {
6381     lex();
6382     return true;
6383   }
6384   return false;
6385 }
6386 
6387 bool
6388 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6389                            const StringRef ErrMsg) {
6390   if (!trySkipToken(Kind)) {
6391     Error(getLoc(), ErrMsg);
6392     return false;
6393   }
6394   return true;
6395 }
6396 
6397 bool
6398 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6399   SMLoc S = getLoc();
6400 
6401   const MCExpr *Expr;
6402   if (Parser.parseExpression(Expr))
6403     return false;
6404 
6405   if (Expr->evaluateAsAbsolute(Imm))
6406     return true;
6407 
6408   if (Expected.empty()) {
6409     Error(S, "expected absolute expression");
6410   } else {
6411     Error(S, Twine("expected ", Expected) +
6412              Twine(" or an absolute expression"));
6413   }
6414   return false;
6415 }
6416 
6417 bool
6418 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6419   SMLoc S = getLoc();
6420 
6421   const MCExpr *Expr;
6422   if (Parser.parseExpression(Expr))
6423     return false;
6424 
6425   int64_t IntVal;
6426   if (Expr->evaluateAsAbsolute(IntVal)) {
6427     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6428   } else {
6429     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6430   }
6431   return true;
6432 }
6433 
6434 bool
6435 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6436   if (isToken(AsmToken::String)) {
6437     Val = getToken().getStringContents();
6438     lex();
6439     return true;
6440   } else {
6441     Error(getLoc(), ErrMsg);
6442     return false;
6443   }
6444 }
6445 
6446 bool
6447 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6448   if (isToken(AsmToken::Identifier)) {
6449     Val = getTokenStr();
6450     lex();
6451     return true;
6452   } else {
6453     if (!ErrMsg.empty())
6454       Error(getLoc(), ErrMsg);
6455     return false;
6456   }
6457 }
6458 
6459 AsmToken
6460 AMDGPUAsmParser::getToken() const {
6461   return Parser.getTok();
6462 }
6463 
6464 AsmToken
6465 AMDGPUAsmParser::peekToken() {
6466   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6467 }
6468 
6469 void
6470 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6471   auto TokCount = getLexer().peekTokens(Tokens);
6472 
6473   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6474     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6475 }
6476 
6477 AsmToken::TokenKind
6478 AMDGPUAsmParser::getTokenKind() const {
6479   return getLexer().getKind();
6480 }
6481 
6482 SMLoc
6483 AMDGPUAsmParser::getLoc() const {
6484   return getToken().getLoc();
6485 }
6486 
6487 StringRef
6488 AMDGPUAsmParser::getTokenStr() const {
6489   return getToken().getString();
6490 }
6491 
6492 void
6493 AMDGPUAsmParser::lex() {
6494   Parser.Lex();
6495 }
6496 
6497 SMLoc
6498 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6499                                const OperandVector &Operands) const {
6500   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6501     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6502     if (Test(Op))
6503       return Op.getStartLoc();
6504   }
6505   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6506 }
6507 
6508 SMLoc
6509 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6510                            const OperandVector &Operands) const {
6511   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6512   return getOperandLoc(Test, Operands);
6513 }
6514 
6515 SMLoc
6516 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6517                            const OperandVector &Operands) const {
6518   auto Test = [=](const AMDGPUOperand& Op) {
6519     return Op.isRegKind() && Op.getReg() == Reg;
6520   };
6521   return getOperandLoc(Test, Operands);
6522 }
6523 
6524 SMLoc
6525 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6526   auto Test = [](const AMDGPUOperand& Op) {
6527     return Op.IsImmKindLiteral() || Op.isExpr();
6528   };
6529   return getOperandLoc(Test, Operands);
6530 }
6531 
6532 SMLoc
6533 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6534   auto Test = [](const AMDGPUOperand& Op) {
6535     return Op.isImmKindConst();
6536   };
6537   return getOperandLoc(Test, Operands);
6538 }
6539 
6540 //===----------------------------------------------------------------------===//
6541 // swizzle
6542 //===----------------------------------------------------------------------===//
6543 
6544 LLVM_READNONE
6545 static unsigned
6546 encodeBitmaskPerm(const unsigned AndMask,
6547                   const unsigned OrMask,
6548                   const unsigned XorMask) {
6549   using namespace llvm::AMDGPU::Swizzle;
6550 
6551   return BITMASK_PERM_ENC |
6552          (AndMask << BITMASK_AND_SHIFT) |
6553          (OrMask  << BITMASK_OR_SHIFT)  |
6554          (XorMask << BITMASK_XOR_SHIFT);
6555 }
6556 
6557 bool
6558 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6559                                      const unsigned MinVal,
6560                                      const unsigned MaxVal,
6561                                      const StringRef ErrMsg,
6562                                      SMLoc &Loc) {
6563   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6564     return false;
6565   }
6566   Loc = getLoc();
6567   if (!parseExpr(Op)) {
6568     return false;
6569   }
6570   if (Op < MinVal || Op > MaxVal) {
6571     Error(Loc, ErrMsg);
6572     return false;
6573   }
6574 
6575   return true;
6576 }
6577 
6578 bool
6579 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6580                                       const unsigned MinVal,
6581                                       const unsigned MaxVal,
6582                                       const StringRef ErrMsg) {
6583   SMLoc Loc;
6584   for (unsigned i = 0; i < OpNum; ++i) {
6585     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6586       return false;
6587   }
6588 
6589   return true;
6590 }
6591 
6592 bool
6593 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6594   using namespace llvm::AMDGPU::Swizzle;
6595 
6596   int64_t Lane[LANE_NUM];
6597   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6598                            "expected a 2-bit lane id")) {
6599     Imm = QUAD_PERM_ENC;
6600     for (unsigned I = 0; I < LANE_NUM; ++I) {
6601       Imm |= Lane[I] << (LANE_SHIFT * I);
6602     }
6603     return true;
6604   }
6605   return false;
6606 }
6607 
6608 bool
6609 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6610   using namespace llvm::AMDGPU::Swizzle;
6611 
6612   SMLoc Loc;
6613   int64_t GroupSize;
6614   int64_t LaneIdx;
6615 
6616   if (!parseSwizzleOperand(GroupSize,
6617                            2, 32,
6618                            "group size must be in the interval [2,32]",
6619                            Loc)) {
6620     return false;
6621   }
6622   if (!isPowerOf2_64(GroupSize)) {
6623     Error(Loc, "group size must be a power of two");
6624     return false;
6625   }
6626   if (parseSwizzleOperand(LaneIdx,
6627                           0, GroupSize - 1,
6628                           "lane id must be in the interval [0,group size - 1]",
6629                           Loc)) {
6630     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6631     return true;
6632   }
6633   return false;
6634 }
6635 
6636 bool
6637 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6638   using namespace llvm::AMDGPU::Swizzle;
6639 
6640   SMLoc Loc;
6641   int64_t GroupSize;
6642 
6643   if (!parseSwizzleOperand(GroupSize,
6644                            2, 32,
6645                            "group size must be in the interval [2,32]",
6646                            Loc)) {
6647     return false;
6648   }
6649   if (!isPowerOf2_64(GroupSize)) {
6650     Error(Loc, "group size must be a power of two");
6651     return false;
6652   }
6653 
6654   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6655   return true;
6656 }
6657 
6658 bool
6659 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6660   using namespace llvm::AMDGPU::Swizzle;
6661 
6662   SMLoc Loc;
6663   int64_t GroupSize;
6664 
6665   if (!parseSwizzleOperand(GroupSize,
6666                            1, 16,
6667                            "group size must be in the interval [1,16]",
6668                            Loc)) {
6669     return false;
6670   }
6671   if (!isPowerOf2_64(GroupSize)) {
6672     Error(Loc, "group size must be a power of two");
6673     return false;
6674   }
6675 
6676   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6677   return true;
6678 }
6679 
6680 bool
6681 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6682   using namespace llvm::AMDGPU::Swizzle;
6683 
6684   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6685     return false;
6686   }
6687 
6688   StringRef Ctl;
6689   SMLoc StrLoc = getLoc();
6690   if (!parseString(Ctl)) {
6691     return false;
6692   }
6693   if (Ctl.size() != BITMASK_WIDTH) {
6694     Error(StrLoc, "expected a 5-character mask");
6695     return false;
6696   }
6697 
6698   unsigned AndMask = 0;
6699   unsigned OrMask = 0;
6700   unsigned XorMask = 0;
6701 
6702   for (size_t i = 0; i < Ctl.size(); ++i) {
6703     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6704     switch(Ctl[i]) {
6705     default:
6706       Error(StrLoc, "invalid mask");
6707       return false;
6708     case '0':
6709       break;
6710     case '1':
6711       OrMask |= Mask;
6712       break;
6713     case 'p':
6714       AndMask |= Mask;
6715       break;
6716     case 'i':
6717       AndMask |= Mask;
6718       XorMask |= Mask;
6719       break;
6720     }
6721   }
6722 
6723   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6724   return true;
6725 }
6726 
6727 bool
6728 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6729 
6730   SMLoc OffsetLoc = getLoc();
6731 
6732   if (!parseExpr(Imm, "a swizzle macro")) {
6733     return false;
6734   }
6735   if (!isUInt<16>(Imm)) {
6736     Error(OffsetLoc, "expected a 16-bit offset");
6737     return false;
6738   }
6739   return true;
6740 }
6741 
6742 bool
6743 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6744   using namespace llvm::AMDGPU::Swizzle;
6745 
6746   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6747 
6748     SMLoc ModeLoc = getLoc();
6749     bool Ok = false;
6750 
6751     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6752       Ok = parseSwizzleQuadPerm(Imm);
6753     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6754       Ok = parseSwizzleBitmaskPerm(Imm);
6755     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6756       Ok = parseSwizzleBroadcast(Imm);
6757     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6758       Ok = parseSwizzleSwap(Imm);
6759     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6760       Ok = parseSwizzleReverse(Imm);
6761     } else {
6762       Error(ModeLoc, "expected a swizzle mode");
6763     }
6764 
6765     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6766   }
6767 
6768   return false;
6769 }
6770 
6771 OperandMatchResultTy
6772 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6773   SMLoc S = getLoc();
6774   int64_t Imm = 0;
6775 
6776   if (trySkipId("offset")) {
6777 
6778     bool Ok = false;
6779     if (skipToken(AsmToken::Colon, "expected a colon")) {
6780       if (trySkipId("swizzle")) {
6781         Ok = parseSwizzleMacro(Imm);
6782       } else {
6783         Ok = parseSwizzleOffset(Imm);
6784       }
6785     }
6786 
6787     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6788 
6789     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6790   } else {
6791     // Swizzle "offset" operand is optional.
6792     // If it is omitted, try parsing other optional operands.
6793     return parseOptionalOpr(Operands);
6794   }
6795 }
6796 
6797 bool
6798 AMDGPUOperand::isSwizzle() const {
6799   return isImmTy(ImmTySwizzle);
6800 }
6801 
6802 //===----------------------------------------------------------------------===//
6803 // VGPR Index Mode
6804 //===----------------------------------------------------------------------===//
6805 
6806 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6807 
6808   using namespace llvm::AMDGPU::VGPRIndexMode;
6809 
6810   if (trySkipToken(AsmToken::RParen)) {
6811     return OFF;
6812   }
6813 
6814   int64_t Imm = 0;
6815 
6816   while (true) {
6817     unsigned Mode = 0;
6818     SMLoc S = getLoc();
6819 
6820     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6821       if (trySkipId(IdSymbolic[ModeId])) {
6822         Mode = 1 << ModeId;
6823         break;
6824       }
6825     }
6826 
6827     if (Mode == 0) {
6828       Error(S, (Imm == 0)?
6829                "expected a VGPR index mode or a closing parenthesis" :
6830                "expected a VGPR index mode");
6831       return UNDEF;
6832     }
6833 
6834     if (Imm & Mode) {
6835       Error(S, "duplicate VGPR index mode");
6836       return UNDEF;
6837     }
6838     Imm |= Mode;
6839 
6840     if (trySkipToken(AsmToken::RParen))
6841       break;
6842     if (!skipToken(AsmToken::Comma,
6843                    "expected a comma or a closing parenthesis"))
6844       return UNDEF;
6845   }
6846 
6847   return Imm;
6848 }
6849 
6850 OperandMatchResultTy
6851 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6852 
6853   using namespace llvm::AMDGPU::VGPRIndexMode;
6854 
6855   int64_t Imm = 0;
6856   SMLoc S = getLoc();
6857 
6858   if (trySkipId("gpr_idx", AsmToken::LParen)) {
6859     Imm = parseGPRIdxMacro();
6860     if (Imm == UNDEF)
6861       return MatchOperand_ParseFail;
6862   } else {
6863     if (getParser().parseAbsoluteExpression(Imm))
6864       return MatchOperand_ParseFail;
6865     if (Imm < 0 || !isUInt<4>(Imm)) {
6866       Error(S, "invalid immediate: only 4-bit values are legal");
6867       return MatchOperand_ParseFail;
6868     }
6869   }
6870 
6871   Operands.push_back(
6872       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6873   return MatchOperand_Success;
6874 }
6875 
6876 bool AMDGPUOperand::isGPRIdxMode() const {
6877   return isImmTy(ImmTyGprIdxMode);
6878 }
6879 
6880 //===----------------------------------------------------------------------===//
6881 // sopp branch targets
6882 //===----------------------------------------------------------------------===//
6883 
6884 OperandMatchResultTy
6885 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6886 
6887   // Make sure we are not parsing something
6888   // that looks like a label or an expression but is not.
6889   // This will improve error messages.
6890   if (isRegister() || isModifier())
6891     return MatchOperand_NoMatch;
6892 
6893   if (!parseExpr(Operands))
6894     return MatchOperand_ParseFail;
6895 
6896   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6897   assert(Opr.isImm() || Opr.isExpr());
6898   SMLoc Loc = Opr.getStartLoc();
6899 
6900   // Currently we do not support arbitrary expressions as branch targets.
6901   // Only labels and absolute expressions are accepted.
6902   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6903     Error(Loc, "expected an absolute expression or a label");
6904   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6905     Error(Loc, "expected a 16-bit signed jump offset");
6906   }
6907 
6908   return MatchOperand_Success;
6909 }
6910 
6911 //===----------------------------------------------------------------------===//
6912 // Boolean holding registers
6913 //===----------------------------------------------------------------------===//
6914 
6915 OperandMatchResultTy
6916 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6917   return parseReg(Operands);
6918 }
6919 
6920 //===----------------------------------------------------------------------===//
6921 // mubuf
6922 //===----------------------------------------------------------------------===//
6923 
6924 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
6925   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
6926 }
6927 
6928 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6929                                    const OperandVector &Operands,
6930                                    bool IsAtomic,
6931                                    bool IsLds) {
6932   bool IsLdsOpcode = IsLds;
6933   bool HasLdsModifier = false;
6934   OptionalImmIndexMap OptionalIdx;
6935   unsigned FirstOperandIdx = 1;
6936   bool IsAtomicReturn = false;
6937 
6938   if (IsAtomic) {
6939     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6940       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6941       if (!Op.isCPol())
6942         continue;
6943       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
6944       break;
6945     }
6946 
6947     if (!IsAtomicReturn) {
6948       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
6949       if (NewOpc != -1)
6950         Inst.setOpcode(NewOpc);
6951     }
6952 
6953     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
6954                       SIInstrFlags::IsAtomicRet;
6955   }
6956 
6957   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6958     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6959 
6960     // Add the register arguments
6961     if (Op.isReg()) {
6962       Op.addRegOperands(Inst, 1);
6963       // Insert a tied src for atomic return dst.
6964       // This cannot be postponed as subsequent calls to
6965       // addImmOperands rely on correct number of MC operands.
6966       if (IsAtomicReturn && i == FirstOperandIdx)
6967         Op.addRegOperands(Inst, 1);
6968       continue;
6969     }
6970 
6971     // Handle the case where soffset is an immediate
6972     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6973       Op.addImmOperands(Inst, 1);
6974       continue;
6975     }
6976 
6977     HasLdsModifier |= Op.isLDS();
6978 
6979     // Handle tokens like 'offen' which are sometimes hard-coded into the
6980     // asm string.  There are no MCInst operands for these.
6981     if (Op.isToken()) {
6982       continue;
6983     }
6984     assert(Op.isImm());
6985 
6986     // Handle optional arguments
6987     OptionalIdx[Op.getImmTy()] = i;
6988   }
6989 
6990   // This is a workaround for an llvm quirk which may result in an
6991   // incorrect instruction selection. Lds and non-lds versions of
6992   // MUBUF instructions are identical except that lds versions
6993   // have mandatory 'lds' modifier. However this modifier follows
6994   // optional modifiers and llvm asm matcher regards this 'lds'
6995   // modifier as an optional one. As a result, an lds version
6996   // of opcode may be selected even if it has no 'lds' modifier.
6997   if (IsLdsOpcode && !HasLdsModifier) {
6998     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6999     if (NoLdsOpcode != -1) { // Got lds version - correct it.
7000       Inst.setOpcode(NoLdsOpcode);
7001       IsLdsOpcode = false;
7002     }
7003   }
7004 
7005   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7006   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7007 
7008   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7009     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7010   }
7011   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7012 }
7013 
7014 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7015   OptionalImmIndexMap OptionalIdx;
7016 
7017   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7018     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7019 
7020     // Add the register arguments
7021     if (Op.isReg()) {
7022       Op.addRegOperands(Inst, 1);
7023       continue;
7024     }
7025 
7026     // Handle the case where soffset is an immediate
7027     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7028       Op.addImmOperands(Inst, 1);
7029       continue;
7030     }
7031 
7032     // Handle tokens like 'offen' which are sometimes hard-coded into the
7033     // asm string.  There are no MCInst operands for these.
7034     if (Op.isToken()) {
7035       continue;
7036     }
7037     assert(Op.isImm());
7038 
7039     // Handle optional arguments
7040     OptionalIdx[Op.getImmTy()] = i;
7041   }
7042 
7043   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7044                         AMDGPUOperand::ImmTyOffset);
7045   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7046   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7047   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7048   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7049 }
7050 
7051 //===----------------------------------------------------------------------===//
7052 // mimg
7053 //===----------------------------------------------------------------------===//
7054 
7055 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7056                               bool IsAtomic) {
7057   unsigned I = 1;
7058   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7059   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7060     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7061   }
7062 
7063   if (IsAtomic) {
7064     // Add src, same as dst
7065     assert(Desc.getNumDefs() == 1);
7066     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7067   }
7068 
7069   OptionalImmIndexMap OptionalIdx;
7070 
7071   for (unsigned E = Operands.size(); I != E; ++I) {
7072     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7073 
7074     // Add the register arguments
7075     if (Op.isReg()) {
7076       Op.addRegOperands(Inst, 1);
7077     } else if (Op.isImmModifier()) {
7078       OptionalIdx[Op.getImmTy()] = I;
7079     } else if (!Op.isToken()) {
7080       llvm_unreachable("unexpected operand type");
7081     }
7082   }
7083 
7084   bool IsGFX10Plus = isGFX10Plus();
7085 
7086   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7087   if (IsGFX10Plus)
7088     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7089   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7090   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7091   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7092   if (IsGFX10Plus)
7093     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7094   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7095     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7096   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7097   if (!IsGFX10Plus)
7098     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7099   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7100 }
7101 
7102 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7103   cvtMIMG(Inst, Operands, true);
7104 }
7105 
7106 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7107   OptionalImmIndexMap OptionalIdx;
7108   bool IsAtomicReturn = false;
7109 
7110   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7111     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7112     if (!Op.isCPol())
7113       continue;
7114     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7115     break;
7116   }
7117 
7118   if (!IsAtomicReturn) {
7119     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7120     if (NewOpc != -1)
7121       Inst.setOpcode(NewOpc);
7122   }
7123 
7124   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7125                     SIInstrFlags::IsAtomicRet;
7126 
7127   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7128     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7129 
7130     // Add the register arguments
7131     if (Op.isReg()) {
7132       Op.addRegOperands(Inst, 1);
7133       if (IsAtomicReturn && i == 1)
7134         Op.addRegOperands(Inst, 1);
7135       continue;
7136     }
7137 
7138     // Handle the case where soffset is an immediate
7139     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7140       Op.addImmOperands(Inst, 1);
7141       continue;
7142     }
7143 
7144     // Handle tokens like 'offen' which are sometimes hard-coded into the
7145     // asm string.  There are no MCInst operands for these.
7146     if (Op.isToken()) {
7147       continue;
7148     }
7149     assert(Op.isImm());
7150 
7151     // Handle optional arguments
7152     OptionalIdx[Op.getImmTy()] = i;
7153   }
7154 
7155   if ((int)Inst.getNumOperands() <=
7156       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7157     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7158   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7159 }
7160 
7161 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7162                                       const OperandVector &Operands) {
7163   for (unsigned I = 1; I < Operands.size(); ++I) {
7164     auto &Operand = (AMDGPUOperand &)*Operands[I];
7165     if (Operand.isReg())
7166       Operand.addRegOperands(Inst, 1);
7167   }
7168 
7169   Inst.addOperand(MCOperand::createImm(1)); // a16
7170 }
7171 
7172 //===----------------------------------------------------------------------===//
7173 // smrd
7174 //===----------------------------------------------------------------------===//
7175 
7176 bool AMDGPUOperand::isSMRDOffset8() const {
7177   return isImm() && isUInt<8>(getImm());
7178 }
7179 
7180 bool AMDGPUOperand::isSMEMOffset() const {
7181   return isImm(); // Offset range is checked later by validator.
7182 }
7183 
7184 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7185   // 32-bit literals are only supported on CI and we only want to use them
7186   // when the offset is > 8-bits.
7187   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7188 }
7189 
7190 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7191   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7192 }
7193 
7194 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7195   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7196 }
7197 
7198 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7199   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7200 }
7201 
7202 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7203   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7204 }
7205 
7206 //===----------------------------------------------------------------------===//
7207 // vop3
7208 //===----------------------------------------------------------------------===//
7209 
7210 static bool ConvertOmodMul(int64_t &Mul) {
7211   if (Mul != 1 && Mul != 2 && Mul != 4)
7212     return false;
7213 
7214   Mul >>= 1;
7215   return true;
7216 }
7217 
7218 static bool ConvertOmodDiv(int64_t &Div) {
7219   if (Div == 1) {
7220     Div = 0;
7221     return true;
7222   }
7223 
7224   if (Div == 2) {
7225     Div = 3;
7226     return true;
7227   }
7228 
7229   return false;
7230 }
7231 
7232 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7233 // This is intentional and ensures compatibility with sp3.
7234 // See bug 35397 for details.
7235 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7236   if (BoundCtrl == 0 || BoundCtrl == 1) {
7237     BoundCtrl = 1;
7238     return true;
7239   }
7240   return false;
7241 }
7242 
7243 // Note: the order in this table matches the order of operands in AsmString.
7244 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7245   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7246   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7247   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7248   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7249   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7250   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7251   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7252   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7253   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7254   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7255   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7256   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7257   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7258   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7259   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7260   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7261   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7262   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7263   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7264   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7265   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7266   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7267   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7268   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7269   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7270   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7271   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7272   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7273   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7274   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7275   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7276   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7277   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7278   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7279   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7280   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7281   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7282   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7283   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7284   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7285   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7286 };
7287 
7288 void AMDGPUAsmParser::onBeginOfFile() {
7289   if (!getParser().getStreamer().getTargetStreamer() ||
7290       getSTI().getTargetTriple().getArch() == Triple::r600)
7291     return;
7292 
7293   if (!getTargetStreamer().getTargetID())
7294     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7295 
7296   if (isHsaAbiVersion3Or4(&getSTI()))
7297     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7298 }
7299 
7300 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7301 
7302   OperandMatchResultTy res = parseOptionalOpr(Operands);
7303 
7304   // This is a hack to enable hardcoded mandatory operands which follow
7305   // optional operands.
7306   //
7307   // Current design assumes that all operands after the first optional operand
7308   // are also optional. However implementation of some instructions violates
7309   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7310   //
7311   // To alleviate this problem, we have to (implicitly) parse extra operands
7312   // to make sure autogenerated parser of custom operands never hit hardcoded
7313   // mandatory operands.
7314 
7315   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7316     if (res != MatchOperand_Success ||
7317         isToken(AsmToken::EndOfStatement))
7318       break;
7319 
7320     trySkipToken(AsmToken::Comma);
7321     res = parseOptionalOpr(Operands);
7322   }
7323 
7324   return res;
7325 }
7326 
7327 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7328   OperandMatchResultTy res;
7329   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7330     // try to parse any optional operand here
7331     if (Op.IsBit) {
7332       res = parseNamedBit(Op.Name, Operands, Op.Type);
7333     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7334       res = parseOModOperand(Operands);
7335     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7336                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7337                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7338       res = parseSDWASel(Operands, Op.Name, Op.Type);
7339     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7340       res = parseSDWADstUnused(Operands);
7341     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7342                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7343                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7344                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7345       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7346                                         Op.ConvertResult);
7347     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7348       res = parseDim(Operands);
7349     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7350       res = parseCPol(Operands);
7351     } else {
7352       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7353     }
7354     if (res != MatchOperand_NoMatch) {
7355       return res;
7356     }
7357   }
7358   return MatchOperand_NoMatch;
7359 }
7360 
7361 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7362   StringRef Name = getTokenStr();
7363   if (Name == "mul") {
7364     return parseIntWithPrefix("mul", Operands,
7365                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7366   }
7367 
7368   if (Name == "div") {
7369     return parseIntWithPrefix("div", Operands,
7370                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7371   }
7372 
7373   return MatchOperand_NoMatch;
7374 }
7375 
7376 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7377   cvtVOP3P(Inst, Operands);
7378 
7379   int Opc = Inst.getOpcode();
7380 
7381   int SrcNum;
7382   const int Ops[] = { AMDGPU::OpName::src0,
7383                       AMDGPU::OpName::src1,
7384                       AMDGPU::OpName::src2 };
7385   for (SrcNum = 0;
7386        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7387        ++SrcNum);
7388   assert(SrcNum > 0);
7389 
7390   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7391   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7392 
7393   if ((OpSel & (1 << SrcNum)) != 0) {
7394     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7395     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7396     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7397   }
7398 }
7399 
7400 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7401       // 1. This operand is input modifiers
7402   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7403       // 2. This is not last operand
7404       && Desc.NumOperands > (OpNum + 1)
7405       // 3. Next operand is register class
7406       && Desc.OpInfo[OpNum + 1].RegClass != -1
7407       // 4. Next register is not tied to any other operand
7408       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7409 }
7410 
7411 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7412 {
7413   OptionalImmIndexMap OptionalIdx;
7414   unsigned Opc = Inst.getOpcode();
7415 
7416   unsigned I = 1;
7417   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7418   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7419     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7420   }
7421 
7422   for (unsigned E = Operands.size(); I != E; ++I) {
7423     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7424     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7425       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7426     } else if (Op.isInterpSlot() ||
7427                Op.isInterpAttr() ||
7428                Op.isAttrChan()) {
7429       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7430     } else if (Op.isImmModifier()) {
7431       OptionalIdx[Op.getImmTy()] = I;
7432     } else {
7433       llvm_unreachable("unhandled operand type");
7434     }
7435   }
7436 
7437   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7438     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7439   }
7440 
7441   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7442     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7443   }
7444 
7445   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7446     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7447   }
7448 }
7449 
7450 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7451                               OptionalImmIndexMap &OptionalIdx) {
7452   unsigned Opc = Inst.getOpcode();
7453 
7454   unsigned I = 1;
7455   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7456   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7457     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7458   }
7459 
7460   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7461     // This instruction has src modifiers
7462     for (unsigned E = Operands.size(); I != E; ++I) {
7463       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7464       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7465         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7466       } else if (Op.isImmModifier()) {
7467         OptionalIdx[Op.getImmTy()] = I;
7468       } else if (Op.isRegOrImm()) {
7469         Op.addRegOrImmOperands(Inst, 1);
7470       } else {
7471         llvm_unreachable("unhandled operand type");
7472       }
7473     }
7474   } else {
7475     // No src modifiers
7476     for (unsigned E = Operands.size(); I != E; ++I) {
7477       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7478       if (Op.isMod()) {
7479         OptionalIdx[Op.getImmTy()] = I;
7480       } else {
7481         Op.addRegOrImmOperands(Inst, 1);
7482       }
7483     }
7484   }
7485 
7486   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7487     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7488   }
7489 
7490   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7491     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7492   }
7493 
7494   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7495   // it has src2 register operand that is tied to dst operand
7496   // we don't allow modifiers for this operand in assembler so src2_modifiers
7497   // should be 0.
7498   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7499       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7500       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7501       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7502       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7503       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7504       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7505       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7506       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7507       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7508       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7509     auto it = Inst.begin();
7510     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7511     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7512     ++it;
7513     // Copy the operand to ensure it's not invalidated when Inst grows.
7514     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7515   }
7516 }
7517 
7518 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7519   OptionalImmIndexMap OptionalIdx;
7520   cvtVOP3(Inst, Operands, OptionalIdx);
7521 }
7522 
7523 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7524                                OptionalImmIndexMap &OptIdx) {
7525   const int Opc = Inst.getOpcode();
7526   const MCInstrDesc &Desc = MII.get(Opc);
7527 
7528   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7529 
7530   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7531     assert(!IsPacked);
7532     Inst.addOperand(Inst.getOperand(0));
7533   }
7534 
7535   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7536   // instruction, and then figure out where to actually put the modifiers
7537 
7538   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7539   if (OpSelIdx != -1) {
7540     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7541   }
7542 
7543   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7544   if (OpSelHiIdx != -1) {
7545     int DefaultVal = IsPacked ? -1 : 0;
7546     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7547                           DefaultVal);
7548   }
7549 
7550   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7551   if (NegLoIdx != -1) {
7552     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7553     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7554   }
7555 
7556   const int Ops[] = { AMDGPU::OpName::src0,
7557                       AMDGPU::OpName::src1,
7558                       AMDGPU::OpName::src2 };
7559   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7560                          AMDGPU::OpName::src1_modifiers,
7561                          AMDGPU::OpName::src2_modifiers };
7562 
7563   unsigned OpSel = 0;
7564   unsigned OpSelHi = 0;
7565   unsigned NegLo = 0;
7566   unsigned NegHi = 0;
7567 
7568   if (OpSelIdx != -1)
7569     OpSel = Inst.getOperand(OpSelIdx).getImm();
7570 
7571   if (OpSelHiIdx != -1)
7572     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7573 
7574   if (NegLoIdx != -1) {
7575     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7576     NegLo = Inst.getOperand(NegLoIdx).getImm();
7577     NegHi = Inst.getOperand(NegHiIdx).getImm();
7578   }
7579 
7580   for (int J = 0; J < 3; ++J) {
7581     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7582     if (OpIdx == -1)
7583       break;
7584 
7585     uint32_t ModVal = 0;
7586 
7587     if ((OpSel & (1 << J)) != 0)
7588       ModVal |= SISrcMods::OP_SEL_0;
7589 
7590     if ((OpSelHi & (1 << J)) != 0)
7591       ModVal |= SISrcMods::OP_SEL_1;
7592 
7593     if ((NegLo & (1 << J)) != 0)
7594       ModVal |= SISrcMods::NEG;
7595 
7596     if ((NegHi & (1 << J)) != 0)
7597       ModVal |= SISrcMods::NEG_HI;
7598 
7599     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7600 
7601     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7602   }
7603 }
7604 
7605 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7606   OptionalImmIndexMap OptIdx;
7607   cvtVOP3(Inst, Operands, OptIdx);
7608   cvtVOP3P(Inst, Operands, OptIdx);
7609 }
7610 
7611 //===----------------------------------------------------------------------===//
7612 // dpp
7613 //===----------------------------------------------------------------------===//
7614 
7615 bool AMDGPUOperand::isDPP8() const {
7616   return isImmTy(ImmTyDPP8);
7617 }
7618 
7619 bool AMDGPUOperand::isDPPCtrl() const {
7620   using namespace AMDGPU::DPP;
7621 
7622   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7623   if (result) {
7624     int64_t Imm = getImm();
7625     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7626            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7627            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7628            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7629            (Imm == DppCtrl::WAVE_SHL1) ||
7630            (Imm == DppCtrl::WAVE_ROL1) ||
7631            (Imm == DppCtrl::WAVE_SHR1) ||
7632            (Imm == DppCtrl::WAVE_ROR1) ||
7633            (Imm == DppCtrl::ROW_MIRROR) ||
7634            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7635            (Imm == DppCtrl::BCAST15) ||
7636            (Imm == DppCtrl::BCAST31) ||
7637            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7638            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7639   }
7640   return false;
7641 }
7642 
7643 //===----------------------------------------------------------------------===//
7644 // mAI
7645 //===----------------------------------------------------------------------===//
7646 
7647 bool AMDGPUOperand::isBLGP() const {
7648   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7649 }
7650 
7651 bool AMDGPUOperand::isCBSZ() const {
7652   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7653 }
7654 
7655 bool AMDGPUOperand::isABID() const {
7656   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7657 }
7658 
7659 bool AMDGPUOperand::isS16Imm() const {
7660   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7661 }
7662 
7663 bool AMDGPUOperand::isU16Imm() const {
7664   return isImm() && isUInt<16>(getImm());
7665 }
7666 
7667 //===----------------------------------------------------------------------===//
7668 // dim
7669 //===----------------------------------------------------------------------===//
7670 
7671 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7672   // We want to allow "dim:1D" etc.,
7673   // but the initial 1 is tokenized as an integer.
7674   std::string Token;
7675   if (isToken(AsmToken::Integer)) {
7676     SMLoc Loc = getToken().getEndLoc();
7677     Token = std::string(getTokenStr());
7678     lex();
7679     if (getLoc() != Loc)
7680       return false;
7681   }
7682 
7683   StringRef Suffix;
7684   if (!parseId(Suffix))
7685     return false;
7686   Token += Suffix;
7687 
7688   StringRef DimId = Token;
7689   if (DimId.startswith("SQ_RSRC_IMG_"))
7690     DimId = DimId.drop_front(12);
7691 
7692   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7693   if (!DimInfo)
7694     return false;
7695 
7696   Encoding = DimInfo->Encoding;
7697   return true;
7698 }
7699 
7700 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7701   if (!isGFX10Plus())
7702     return MatchOperand_NoMatch;
7703 
7704   SMLoc S = getLoc();
7705 
7706   if (!trySkipId("dim", AsmToken::Colon))
7707     return MatchOperand_NoMatch;
7708 
7709   unsigned Encoding;
7710   SMLoc Loc = getLoc();
7711   if (!parseDimId(Encoding)) {
7712     Error(Loc, "invalid dim value");
7713     return MatchOperand_ParseFail;
7714   }
7715 
7716   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7717                                               AMDGPUOperand::ImmTyDim));
7718   return MatchOperand_Success;
7719 }
7720 
7721 //===----------------------------------------------------------------------===//
7722 // dpp
7723 //===----------------------------------------------------------------------===//
7724 
7725 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7726   SMLoc S = getLoc();
7727 
7728   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7729     return MatchOperand_NoMatch;
7730 
7731   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7732 
7733   int64_t Sels[8];
7734 
7735   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7736     return MatchOperand_ParseFail;
7737 
7738   for (size_t i = 0; i < 8; ++i) {
7739     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7740       return MatchOperand_ParseFail;
7741 
7742     SMLoc Loc = getLoc();
7743     if (getParser().parseAbsoluteExpression(Sels[i]))
7744       return MatchOperand_ParseFail;
7745     if (0 > Sels[i] || 7 < Sels[i]) {
7746       Error(Loc, "expected a 3-bit value");
7747       return MatchOperand_ParseFail;
7748     }
7749   }
7750 
7751   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7752     return MatchOperand_ParseFail;
7753 
7754   unsigned DPP8 = 0;
7755   for (size_t i = 0; i < 8; ++i)
7756     DPP8 |= (Sels[i] << (i * 3));
7757 
7758   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7759   return MatchOperand_Success;
7760 }
7761 
7762 bool
7763 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7764                                     const OperandVector &Operands) {
7765   if (Ctrl == "row_newbcast")
7766     return isGFX90A();
7767 
7768   if (Ctrl == "row_share" ||
7769       Ctrl == "row_xmask")
7770     return isGFX10Plus();
7771 
7772   if (Ctrl == "wave_shl" ||
7773       Ctrl == "wave_shr" ||
7774       Ctrl == "wave_rol" ||
7775       Ctrl == "wave_ror" ||
7776       Ctrl == "row_bcast")
7777     return isVI() || isGFX9();
7778 
7779   return Ctrl == "row_mirror" ||
7780          Ctrl == "row_half_mirror" ||
7781          Ctrl == "quad_perm" ||
7782          Ctrl == "row_shl" ||
7783          Ctrl == "row_shr" ||
7784          Ctrl == "row_ror";
7785 }
7786 
7787 int64_t
7788 AMDGPUAsmParser::parseDPPCtrlPerm() {
7789   // quad_perm:[%d,%d,%d,%d]
7790 
7791   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7792     return -1;
7793 
7794   int64_t Val = 0;
7795   for (int i = 0; i < 4; ++i) {
7796     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7797       return -1;
7798 
7799     int64_t Temp;
7800     SMLoc Loc = getLoc();
7801     if (getParser().parseAbsoluteExpression(Temp))
7802       return -1;
7803     if (Temp < 0 || Temp > 3) {
7804       Error(Loc, "expected a 2-bit value");
7805       return -1;
7806     }
7807 
7808     Val += (Temp << i * 2);
7809   }
7810 
7811   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7812     return -1;
7813 
7814   return Val;
7815 }
7816 
7817 int64_t
7818 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7819   using namespace AMDGPU::DPP;
7820 
7821   // sel:%d
7822 
7823   int64_t Val;
7824   SMLoc Loc = getLoc();
7825 
7826   if (getParser().parseAbsoluteExpression(Val))
7827     return -1;
7828 
7829   struct DppCtrlCheck {
7830     int64_t Ctrl;
7831     int Lo;
7832     int Hi;
7833   };
7834 
7835   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7836     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
7837     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
7838     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
7839     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
7840     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
7841     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
7842     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
7843     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7844     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7845     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
7846     .Default({-1, 0, 0});
7847 
7848   bool Valid;
7849   if (Check.Ctrl == -1) {
7850     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
7851     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
7852   } else {
7853     Valid = Check.Lo <= Val && Val <= Check.Hi;
7854     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
7855   }
7856 
7857   if (!Valid) {
7858     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
7859     return -1;
7860   }
7861 
7862   return Val;
7863 }
7864 
7865 OperandMatchResultTy
7866 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7867   using namespace AMDGPU::DPP;
7868 
7869   if (!isToken(AsmToken::Identifier) ||
7870       !isSupportedDPPCtrl(getTokenStr(), Operands))
7871     return MatchOperand_NoMatch;
7872 
7873   SMLoc S = getLoc();
7874   int64_t Val = -1;
7875   StringRef Ctrl;
7876 
7877   parseId(Ctrl);
7878 
7879   if (Ctrl == "row_mirror") {
7880     Val = DppCtrl::ROW_MIRROR;
7881   } else if (Ctrl == "row_half_mirror") {
7882     Val = DppCtrl::ROW_HALF_MIRROR;
7883   } else {
7884     if (skipToken(AsmToken::Colon, "expected a colon")) {
7885       if (Ctrl == "quad_perm") {
7886         Val = parseDPPCtrlPerm();
7887       } else {
7888         Val = parseDPPCtrlSel(Ctrl);
7889       }
7890     }
7891   }
7892 
7893   if (Val == -1)
7894     return MatchOperand_ParseFail;
7895 
7896   Operands.push_back(
7897     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
7898   return MatchOperand_Success;
7899 }
7900 
7901 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7902   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7903 }
7904 
7905 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7906   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7907 }
7908 
7909 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7910   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7911 }
7912 
7913 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7914   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7915 }
7916 
7917 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7918   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7919 }
7920 
7921 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7922   OptionalImmIndexMap OptionalIdx;
7923 
7924   unsigned I = 1;
7925   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7926   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7927     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7928   }
7929 
7930   int Fi = 0;
7931   for (unsigned E = Operands.size(); I != E; ++I) {
7932     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7933                                             MCOI::TIED_TO);
7934     if (TiedTo != -1) {
7935       assert((unsigned)TiedTo < Inst.getNumOperands());
7936       // handle tied old or src2 for MAC instructions
7937       Inst.addOperand(Inst.getOperand(TiedTo));
7938     }
7939     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7940     // Add the register arguments
7941     if (Op.isReg() && validateVccOperand(Op.getReg())) {
7942       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7943       // Skip it.
7944       continue;
7945     }
7946 
7947     if (IsDPP8) {
7948       if (Op.isDPP8()) {
7949         Op.addImmOperands(Inst, 1);
7950       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7951         Op.addRegWithFPInputModsOperands(Inst, 2);
7952       } else if (Op.isFI()) {
7953         Fi = Op.getImm();
7954       } else if (Op.isReg()) {
7955         Op.addRegOperands(Inst, 1);
7956       } else {
7957         llvm_unreachable("Invalid operand type");
7958       }
7959     } else {
7960       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7961         Op.addRegWithFPInputModsOperands(Inst, 2);
7962       } else if (Op.isDPPCtrl()) {
7963         Op.addImmOperands(Inst, 1);
7964       } else if (Op.isImm()) {
7965         // Handle optional arguments
7966         OptionalIdx[Op.getImmTy()] = I;
7967       } else {
7968         llvm_unreachable("Invalid operand type");
7969       }
7970     }
7971   }
7972 
7973   if (IsDPP8) {
7974     using namespace llvm::AMDGPU::DPP;
7975     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7976   } else {
7977     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7978     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7979     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7980     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7981       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7982     }
7983   }
7984 }
7985 
7986 //===----------------------------------------------------------------------===//
7987 // sdwa
7988 //===----------------------------------------------------------------------===//
7989 
7990 OperandMatchResultTy
7991 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7992                               AMDGPUOperand::ImmTy Type) {
7993   using namespace llvm::AMDGPU::SDWA;
7994 
7995   SMLoc S = getLoc();
7996   StringRef Value;
7997   OperandMatchResultTy res;
7998 
7999   SMLoc StringLoc;
8000   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8001   if (res != MatchOperand_Success) {
8002     return res;
8003   }
8004 
8005   int64_t Int;
8006   Int = StringSwitch<int64_t>(Value)
8007         .Case("BYTE_0", SdwaSel::BYTE_0)
8008         .Case("BYTE_1", SdwaSel::BYTE_1)
8009         .Case("BYTE_2", SdwaSel::BYTE_2)
8010         .Case("BYTE_3", SdwaSel::BYTE_3)
8011         .Case("WORD_0", SdwaSel::WORD_0)
8012         .Case("WORD_1", SdwaSel::WORD_1)
8013         .Case("DWORD", SdwaSel::DWORD)
8014         .Default(0xffffffff);
8015 
8016   if (Int == 0xffffffff) {
8017     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8018     return MatchOperand_ParseFail;
8019   }
8020 
8021   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8022   return MatchOperand_Success;
8023 }
8024 
8025 OperandMatchResultTy
8026 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8027   using namespace llvm::AMDGPU::SDWA;
8028 
8029   SMLoc S = getLoc();
8030   StringRef Value;
8031   OperandMatchResultTy res;
8032 
8033   SMLoc StringLoc;
8034   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8035   if (res != MatchOperand_Success) {
8036     return res;
8037   }
8038 
8039   int64_t Int;
8040   Int = StringSwitch<int64_t>(Value)
8041         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8042         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8043         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8044         .Default(0xffffffff);
8045 
8046   if (Int == 0xffffffff) {
8047     Error(StringLoc, "invalid dst_unused value");
8048     return MatchOperand_ParseFail;
8049   }
8050 
8051   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8052   return MatchOperand_Success;
8053 }
8054 
8055 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8056   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8057 }
8058 
8059 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8060   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8061 }
8062 
8063 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8064   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8065 }
8066 
8067 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8068   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8069 }
8070 
8071 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8072   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8073 }
8074 
8075 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8076                               uint64_t BasicInstType,
8077                               bool SkipDstVcc,
8078                               bool SkipSrcVcc) {
8079   using namespace llvm::AMDGPU::SDWA;
8080 
8081   OptionalImmIndexMap OptionalIdx;
8082   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8083   bool SkippedVcc = false;
8084 
8085   unsigned I = 1;
8086   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8087   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8088     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8089   }
8090 
8091   for (unsigned E = Operands.size(); I != E; ++I) {
8092     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8093     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8094         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8095       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8096       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8097       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8098       // Skip VCC only if we didn't skip it on previous iteration.
8099       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8100       if (BasicInstType == SIInstrFlags::VOP2 &&
8101           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8102            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8103         SkippedVcc = true;
8104         continue;
8105       } else if (BasicInstType == SIInstrFlags::VOPC &&
8106                  Inst.getNumOperands() == 0) {
8107         SkippedVcc = true;
8108         continue;
8109       }
8110     }
8111     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8112       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8113     } else if (Op.isImm()) {
8114       // Handle optional arguments
8115       OptionalIdx[Op.getImmTy()] = I;
8116     } else {
8117       llvm_unreachable("Invalid operand type");
8118     }
8119     SkippedVcc = false;
8120   }
8121 
8122   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8123       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8124       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8125     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8126     switch (BasicInstType) {
8127     case SIInstrFlags::VOP1:
8128       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8129       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8130         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8131       }
8132       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8133       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8134       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8135       break;
8136 
8137     case SIInstrFlags::VOP2:
8138       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8139       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8140         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8141       }
8142       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8143       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8144       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8145       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8146       break;
8147 
8148     case SIInstrFlags::VOPC:
8149       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8150         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8151       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8152       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8153       break;
8154 
8155     default:
8156       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8157     }
8158   }
8159 
8160   // special case v_mac_{f16, f32}:
8161   // it has src2 register operand that is tied to dst operand
8162   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8163       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8164     auto it = Inst.begin();
8165     std::advance(
8166       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8167     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8168   }
8169 }
8170 
8171 //===----------------------------------------------------------------------===//
8172 // mAI
8173 //===----------------------------------------------------------------------===//
8174 
8175 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8176   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8177 }
8178 
8179 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8180   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8181 }
8182 
8183 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8184   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8185 }
8186 
8187 /// Force static initialization.
8188 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8189   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8190   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8191 }
8192 
8193 #define GET_REGISTER_MATCHER
8194 #define GET_MATCHER_IMPLEMENTATION
8195 #define GET_MNEMONIC_SPELL_CHECKER
8196 #define GET_MNEMONIC_CHECKER
8197 #include "AMDGPUGenAsmMatcher.inc"
8198 
8199 // This fuction should be defined after auto-generated include so that we have
8200 // MatchClassKind enum defined
8201 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8202                                                      unsigned Kind) {
8203   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8204   // But MatchInstructionImpl() expects to meet token and fails to validate
8205   // operand. This method checks if we are given immediate operand but expect to
8206   // get corresponding token.
8207   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8208   switch (Kind) {
8209   case MCK_addr64:
8210     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8211   case MCK_gds:
8212     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8213   case MCK_lds:
8214     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8215   case MCK_idxen:
8216     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8217   case MCK_offen:
8218     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8219   case MCK_SSrcB32:
8220     // When operands have expression values, they will return true for isToken,
8221     // because it is not possible to distinguish between a token and an
8222     // expression at parse time. MatchInstructionImpl() will always try to
8223     // match an operand as a token, when isToken returns true, and when the
8224     // name of the expression is not a valid token, the match will fail,
8225     // so we need to handle it here.
8226     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8227   case MCK_SSrcF32:
8228     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8229   case MCK_SoppBrTarget:
8230     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8231   case MCK_VReg32OrOff:
8232     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8233   case MCK_InterpSlot:
8234     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8235   case MCK_Attr:
8236     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8237   case MCK_AttrChan:
8238     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8239   case MCK_ImmSMEMOffset:
8240     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8241   case MCK_SReg_64:
8242   case MCK_SReg_64_XEXEC:
8243     // Null is defined as a 32-bit register but
8244     // it should also be enabled with 64-bit operands.
8245     // The following code enables it for SReg_64 operands
8246     // used as source and destination. Remaining source
8247     // operands are handled in isInlinableImm.
8248     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8249   default:
8250     return Match_InvalidOperand;
8251   }
8252 }
8253 
8254 //===----------------------------------------------------------------------===//
8255 // endpgm
8256 //===----------------------------------------------------------------------===//
8257 
8258 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8259   SMLoc S = getLoc();
8260   int64_t Imm = 0;
8261 
8262   if (!parseExpr(Imm)) {
8263     // The operand is optional, if not present default to 0
8264     Imm = 0;
8265   }
8266 
8267   if (!isUInt<16>(Imm)) {
8268     Error(S, "expected a 16-bit value");
8269     return MatchOperand_ParseFail;
8270   }
8271 
8272   Operands.push_back(
8273       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8274   return MatchOperand_Success;
8275 }
8276 
8277 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8278