1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/Support/AMDGPUMetadata.h"
32 #include "llvm/Support/AMDHSAKernelDescriptor.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/MachineValueType.h"
35 #include "llvm/Support/TargetParser.h"
36 #include "llvm/Support/TargetRegistry.h"
37 
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::amdhsa;
41 
42 namespace {
43 
44 class AMDGPUAsmParser;
45 
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47 
48 //===----------------------------------------------------------------------===//
49 // Operand
50 //===----------------------------------------------------------------------===//
51 
52 class AMDGPUOperand : public MCParsedAsmOperand {
53   enum KindTy {
54     Token,
55     Immediate,
56     Register,
57     Expression
58   } Kind;
59 
60   SMLoc StartLoc, EndLoc;
61   const AMDGPUAsmParser *AsmParser;
62 
63 public:
64   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
66 
67   using Ptr = std::unique_ptr<AMDGPUOperand>;
68 
69   struct Modifiers {
70     bool Abs = false;
71     bool Neg = false;
72     bool Sext = false;
73 
74     bool hasFPModifiers() const { return Abs || Neg; }
75     bool hasIntModifiers() const { return Sext; }
76     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77 
78     int64_t getFPModifiersOperand() const {
79       int64_t Operand = 0;
80       Operand |= Abs ? SISrcMods::ABS : 0u;
81       Operand |= Neg ? SISrcMods::NEG : 0u;
82       return Operand;
83     }
84 
85     int64_t getIntModifiersOperand() const {
86       int64_t Operand = 0;
87       Operand |= Sext ? SISrcMods::SEXT : 0u;
88       return Operand;
89     }
90 
91     int64_t getModifiersOperand() const {
92       assert(!(hasFPModifiers() && hasIntModifiers())
93            && "fp and int modifiers should not be used simultaneously");
94       if (hasFPModifiers()) {
95         return getFPModifiersOperand();
96       } else if (hasIntModifiers()) {
97         return getIntModifiersOperand();
98       } else {
99         return 0;
100       }
101     }
102 
103     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104   };
105 
106   enum ImmTy {
107     ImmTyNone,
108     ImmTyGDS,
109     ImmTyLDS,
110     ImmTyOffen,
111     ImmTyIdxen,
112     ImmTyAddr64,
113     ImmTyOffset,
114     ImmTyInstOffset,
115     ImmTyOffset0,
116     ImmTyOffset1,
117     ImmTyCPol,
118     ImmTySWZ,
119     ImmTyTFE,
120     ImmTyD16,
121     ImmTyClampSI,
122     ImmTyOModSI,
123     ImmTyDPP8,
124     ImmTyDppCtrl,
125     ImmTyDppRowMask,
126     ImmTyDppBankMask,
127     ImmTyDppBoundCtrl,
128     ImmTyDppFi,
129     ImmTySdwaDstSel,
130     ImmTySdwaSrc0Sel,
131     ImmTySdwaSrc1Sel,
132     ImmTySdwaDstUnused,
133     ImmTyDMask,
134     ImmTyDim,
135     ImmTyUNorm,
136     ImmTyDA,
137     ImmTyR128A16,
138     ImmTyA16,
139     ImmTyLWE,
140     ImmTyExpTgt,
141     ImmTyExpCompr,
142     ImmTyExpVM,
143     ImmTyFORMAT,
144     ImmTyHwreg,
145     ImmTyOff,
146     ImmTySendMsg,
147     ImmTyInterpSlot,
148     ImmTyInterpAttr,
149     ImmTyAttrChan,
150     ImmTyOpSel,
151     ImmTyOpSelHi,
152     ImmTyNegLo,
153     ImmTyNegHi,
154     ImmTySwizzle,
155     ImmTyGprIdxMode,
156     ImmTyHigh,
157     ImmTyBLGP,
158     ImmTyCBSZ,
159     ImmTyABID,
160     ImmTyEndpgm,
161   };
162 
163   enum ImmKindTy {
164     ImmKindTyNone,
165     ImmKindTyLiteral,
166     ImmKindTyConst,
167   };
168 
169 private:
170   struct TokOp {
171     const char *Data;
172     unsigned Length;
173   };
174 
175   struct ImmOp {
176     int64_t Val;
177     ImmTy Type;
178     bool IsFPImm;
179     mutable ImmKindTy Kind;
180     Modifiers Mods;
181   };
182 
183   struct RegOp {
184     unsigned RegNo;
185     Modifiers Mods;
186   };
187 
188   union {
189     TokOp Tok;
190     ImmOp Imm;
191     RegOp Reg;
192     const MCExpr *Expr;
193   };
194 
195 public:
196   bool isToken() const override {
197     if (Kind == Token)
198       return true;
199 
200     // When parsing operands, we can't always tell if something was meant to be
201     // a token, like 'gds', or an expression that references a global variable.
202     // In this case, we assume the string is an expression, and if we need to
203     // interpret is a token, then we treat the symbol name as the token.
204     return isSymbolRefExpr();
205   }
206 
207   bool isSymbolRefExpr() const {
208     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
209   }
210 
211   bool isImm() const override {
212     return Kind == Immediate;
213   }
214 
215   void setImmKindNone() const {
216     assert(isImm());
217     Imm.Kind = ImmKindTyNone;
218   }
219 
220   void setImmKindLiteral() const {
221     assert(isImm());
222     Imm.Kind = ImmKindTyLiteral;
223   }
224 
225   void setImmKindConst() const {
226     assert(isImm());
227     Imm.Kind = ImmKindTyConst;
228   }
229 
230   bool IsImmKindLiteral() const {
231     return isImm() && Imm.Kind == ImmKindTyLiteral;
232   }
233 
234   bool isImmKindConst() const {
235     return isImm() && Imm.Kind == ImmKindTyConst;
236   }
237 
238   bool isInlinableImm(MVT type) const;
239   bool isLiteralImm(MVT type) const;
240 
241   bool isRegKind() const {
242     return Kind == Register;
243   }
244 
245   bool isReg() const override {
246     return isRegKind() && !hasModifiers();
247   }
248 
249   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
250     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
251   }
252 
253   bool isRegOrImmWithInt16InputMods() const {
254     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
255   }
256 
257   bool isRegOrImmWithInt32InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259   }
260 
261   bool isRegOrImmWithInt64InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
263   }
264 
265   bool isRegOrImmWithFP16InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
267   }
268 
269   bool isRegOrImmWithFP32InputMods() const {
270     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
271   }
272 
273   bool isRegOrImmWithFP64InputMods() const {
274     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
275   }
276 
277   bool isVReg() const {
278     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
279            isRegClass(AMDGPU::VReg_64RegClassID) ||
280            isRegClass(AMDGPU::VReg_96RegClassID) ||
281            isRegClass(AMDGPU::VReg_128RegClassID) ||
282            isRegClass(AMDGPU::VReg_160RegClassID) ||
283            isRegClass(AMDGPU::VReg_192RegClassID) ||
284            isRegClass(AMDGPU::VReg_256RegClassID) ||
285            isRegClass(AMDGPU::VReg_512RegClassID) ||
286            isRegClass(AMDGPU::VReg_1024RegClassID);
287   }
288 
289   bool isVReg32() const {
290     return isRegClass(AMDGPU::VGPR_32RegClassID);
291   }
292 
293   bool isVReg32OrOff() const {
294     return isOff() || isVReg32();
295   }
296 
297   bool isNull() const {
298     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
299   }
300 
301   bool isVRegWithInputMods() const;
302 
303   bool isSDWAOperand(MVT type) const;
304   bool isSDWAFP16Operand() const;
305   bool isSDWAFP32Operand() const;
306   bool isSDWAInt16Operand() const;
307   bool isSDWAInt32Operand() const;
308 
309   bool isImmTy(ImmTy ImmT) const {
310     return isImm() && Imm.Type == ImmT;
311   }
312 
313   bool isImmModifier() const {
314     return isImm() && Imm.Type != ImmTyNone;
315   }
316 
317   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
318   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
319   bool isDMask() const { return isImmTy(ImmTyDMask); }
320   bool isDim() const { return isImmTy(ImmTyDim); }
321   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
322   bool isDA() const { return isImmTy(ImmTyDA); }
323   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
324   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
325   bool isLWE() const { return isImmTy(ImmTyLWE); }
326   bool isOff() const { return isImmTy(ImmTyOff); }
327   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
328   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
329   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
330   bool isOffen() const { return isImmTy(ImmTyOffen); }
331   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
332   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
333   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
334   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
335   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
336 
337   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
338   bool isGDS() const { return isImmTy(ImmTyGDS); }
339   bool isLDS() const { return isImmTy(ImmTyLDS); }
340   bool isCPol() const { return isImmTy(ImmTyCPol); }
341   bool isSWZ() const { return isImmTy(ImmTySWZ); }
342   bool isTFE() const { return isImmTy(ImmTyTFE); }
343   bool isD16() const { return isImmTy(ImmTyD16); }
344   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
345   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
346   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
347   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
348   bool isFI() const { return isImmTy(ImmTyDppFi); }
349   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
350   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
351   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
352   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
353   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
354   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
355   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
356   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
357   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
358   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
359   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
360   bool isHigh() const { return isImmTy(ImmTyHigh); }
361 
362   bool isMod() const {
363     return isClampSI() || isOModSI();
364   }
365 
366   bool isRegOrImm() const {
367     return isReg() || isImm();
368   }
369 
370   bool isRegClass(unsigned RCID) const;
371 
372   bool isInlineValue() const;
373 
374   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
375     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
376   }
377 
378   bool isSCSrcB16() const {
379     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
380   }
381 
382   bool isSCSrcV2B16() const {
383     return isSCSrcB16();
384   }
385 
386   bool isSCSrcB32() const {
387     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
388   }
389 
390   bool isSCSrcB64() const {
391     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
392   }
393 
394   bool isBoolReg() const;
395 
396   bool isSCSrcF16() const {
397     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
398   }
399 
400   bool isSCSrcV2F16() const {
401     return isSCSrcF16();
402   }
403 
404   bool isSCSrcF32() const {
405     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
406   }
407 
408   bool isSCSrcF64() const {
409     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
410   }
411 
412   bool isSSrcB32() const {
413     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
414   }
415 
416   bool isSSrcB16() const {
417     return isSCSrcB16() || isLiteralImm(MVT::i16);
418   }
419 
420   bool isSSrcV2B16() const {
421     llvm_unreachable("cannot happen");
422     return isSSrcB16();
423   }
424 
425   bool isSSrcB64() const {
426     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
427     // See isVSrc64().
428     return isSCSrcB64() || isLiteralImm(MVT::i64);
429   }
430 
431   bool isSSrcF32() const {
432     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
433   }
434 
435   bool isSSrcF64() const {
436     return isSCSrcB64() || isLiteralImm(MVT::f64);
437   }
438 
439   bool isSSrcF16() const {
440     return isSCSrcB16() || isLiteralImm(MVT::f16);
441   }
442 
443   bool isSSrcV2F16() const {
444     llvm_unreachable("cannot happen");
445     return isSSrcF16();
446   }
447 
448   bool isSSrcV2FP32() const {
449     llvm_unreachable("cannot happen");
450     return isSSrcF32();
451   }
452 
453   bool isSCSrcV2FP32() const {
454     llvm_unreachable("cannot happen");
455     return isSCSrcF32();
456   }
457 
458   bool isSSrcV2INT32() const {
459     llvm_unreachable("cannot happen");
460     return isSSrcB32();
461   }
462 
463   bool isSCSrcV2INT32() const {
464     llvm_unreachable("cannot happen");
465     return isSCSrcB32();
466   }
467 
468   bool isSSrcOrLdsB32() const {
469     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
470            isLiteralImm(MVT::i32) || isExpr();
471   }
472 
473   bool isVCSrcB32() const {
474     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
475   }
476 
477   bool isVCSrcB64() const {
478     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
479   }
480 
481   bool isVCSrcB16() const {
482     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
483   }
484 
485   bool isVCSrcV2B16() const {
486     return isVCSrcB16();
487   }
488 
489   bool isVCSrcF32() const {
490     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
491   }
492 
493   bool isVCSrcF64() const {
494     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
495   }
496 
497   bool isVCSrcF16() const {
498     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
499   }
500 
501   bool isVCSrcV2F16() const {
502     return isVCSrcF16();
503   }
504 
505   bool isVSrcB32() const {
506     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
507   }
508 
509   bool isVSrcB64() const {
510     return isVCSrcF64() || isLiteralImm(MVT::i64);
511   }
512 
513   bool isVSrcB16() const {
514     return isVCSrcB16() || isLiteralImm(MVT::i16);
515   }
516 
517   bool isVSrcV2B16() const {
518     return isVSrcB16() || isLiteralImm(MVT::v2i16);
519   }
520 
521   bool isVCSrcV2FP32() const {
522     return isVCSrcF64();
523   }
524 
525   bool isVSrcV2FP32() const {
526     return isVSrcF64() || isLiteralImm(MVT::v2f32);
527   }
528 
529   bool isVCSrcV2INT32() const {
530     return isVCSrcB64();
531   }
532 
533   bool isVSrcV2INT32() const {
534     return isVSrcB64() || isLiteralImm(MVT::v2i32);
535   }
536 
537   bool isVSrcF32() const {
538     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
539   }
540 
541   bool isVSrcF64() const {
542     return isVCSrcF64() || isLiteralImm(MVT::f64);
543   }
544 
545   bool isVSrcF16() const {
546     return isVCSrcF16() || isLiteralImm(MVT::f16);
547   }
548 
549   bool isVSrcV2F16() const {
550     return isVSrcF16() || isLiteralImm(MVT::v2f16);
551   }
552 
553   bool isVISrcB32() const {
554     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
555   }
556 
557   bool isVISrcB16() const {
558     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
559   }
560 
561   bool isVISrcV2B16() const {
562     return isVISrcB16();
563   }
564 
565   bool isVISrcF32() const {
566     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
567   }
568 
569   bool isVISrcF16() const {
570     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
571   }
572 
573   bool isVISrcV2F16() const {
574     return isVISrcF16() || isVISrcB32();
575   }
576 
577   bool isVISrc_64B64() const {
578     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
579   }
580 
581   bool isVISrc_64F64() const {
582     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
583   }
584 
585   bool isVISrc_64V2FP32() const {
586     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
587   }
588 
589   bool isVISrc_64V2INT32() const {
590     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
591   }
592 
593   bool isVISrc_256B64() const {
594     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
595   }
596 
597   bool isVISrc_256F64() const {
598     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
599   }
600 
601   bool isVISrc_128B16() const {
602     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
603   }
604 
605   bool isVISrc_128V2B16() const {
606     return isVISrc_128B16();
607   }
608 
609   bool isVISrc_128B32() const {
610     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
611   }
612 
613   bool isVISrc_128F32() const {
614     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
615   }
616 
617   bool isVISrc_256V2FP32() const {
618     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
619   }
620 
621   bool isVISrc_256V2INT32() const {
622     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
623   }
624 
625   bool isVISrc_512B32() const {
626     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
627   }
628 
629   bool isVISrc_512B16() const {
630     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
631   }
632 
633   bool isVISrc_512V2B16() const {
634     return isVISrc_512B16();
635   }
636 
637   bool isVISrc_512F32() const {
638     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
639   }
640 
641   bool isVISrc_512F16() const {
642     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
643   }
644 
645   bool isVISrc_512V2F16() const {
646     return isVISrc_512F16() || isVISrc_512B32();
647   }
648 
649   bool isVISrc_1024B32() const {
650     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
651   }
652 
653   bool isVISrc_1024B16() const {
654     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
655   }
656 
657   bool isVISrc_1024V2B16() const {
658     return isVISrc_1024B16();
659   }
660 
661   bool isVISrc_1024F32() const {
662     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
663   }
664 
665   bool isVISrc_1024F16() const {
666     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
667   }
668 
669   bool isVISrc_1024V2F16() const {
670     return isVISrc_1024F16() || isVISrc_1024B32();
671   }
672 
673   bool isAISrcB32() const {
674     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
675   }
676 
677   bool isAISrcB16() const {
678     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
679   }
680 
681   bool isAISrcV2B16() const {
682     return isAISrcB16();
683   }
684 
685   bool isAISrcF32() const {
686     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
687   }
688 
689   bool isAISrcF16() const {
690     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
691   }
692 
693   bool isAISrcV2F16() const {
694     return isAISrcF16() || isAISrcB32();
695   }
696 
697   bool isAISrc_64B64() const {
698     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
699   }
700 
701   bool isAISrc_64F64() const {
702     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
703   }
704 
705   bool isAISrc_128B32() const {
706     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
707   }
708 
709   bool isAISrc_128B16() const {
710     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
711   }
712 
713   bool isAISrc_128V2B16() const {
714     return isAISrc_128B16();
715   }
716 
717   bool isAISrc_128F32() const {
718     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
719   }
720 
721   bool isAISrc_128F16() const {
722     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
723   }
724 
725   bool isAISrc_128V2F16() const {
726     return isAISrc_128F16() || isAISrc_128B32();
727   }
728 
729   bool isVISrc_128F16() const {
730     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
731   }
732 
733   bool isVISrc_128V2F16() const {
734     return isVISrc_128F16() || isVISrc_128B32();
735   }
736 
737   bool isAISrc_256B64() const {
738     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
739   }
740 
741   bool isAISrc_256F64() const {
742     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
743   }
744 
745   bool isAISrc_512B32() const {
746     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
747   }
748 
749   bool isAISrc_512B16() const {
750     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
751   }
752 
753   bool isAISrc_512V2B16() const {
754     return isAISrc_512B16();
755   }
756 
757   bool isAISrc_512F32() const {
758     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
759   }
760 
761   bool isAISrc_512F16() const {
762     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
763   }
764 
765   bool isAISrc_512V2F16() const {
766     return isAISrc_512F16() || isAISrc_512B32();
767   }
768 
769   bool isAISrc_1024B32() const {
770     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
771   }
772 
773   bool isAISrc_1024B16() const {
774     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
775   }
776 
777   bool isAISrc_1024V2B16() const {
778     return isAISrc_1024B16();
779   }
780 
781   bool isAISrc_1024F32() const {
782     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
783   }
784 
785   bool isAISrc_1024F16() const {
786     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
787   }
788 
789   bool isAISrc_1024V2F16() const {
790     return isAISrc_1024F16() || isAISrc_1024B32();
791   }
792 
793   bool isKImmFP32() const {
794     return isLiteralImm(MVT::f32);
795   }
796 
797   bool isKImmFP16() const {
798     return isLiteralImm(MVT::f16);
799   }
800 
801   bool isMem() const override {
802     return false;
803   }
804 
805   bool isExpr() const {
806     return Kind == Expression;
807   }
808 
809   bool isSoppBrTarget() const {
810     return isExpr() || isImm();
811   }
812 
813   bool isSWaitCnt() const;
814   bool isHwreg() const;
815   bool isSendMsg() const;
816   bool isSwizzle() const;
817   bool isSMRDOffset8() const;
818   bool isSMEMOffset() const;
819   bool isSMRDLiteralOffset() const;
820   bool isDPP8() const;
821   bool isDPPCtrl() const;
822   bool isBLGP() const;
823   bool isCBSZ() const;
824   bool isABID() const;
825   bool isGPRIdxMode() const;
826   bool isS16Imm() const;
827   bool isU16Imm() const;
828   bool isEndpgm() const;
829 
830   StringRef getExpressionAsToken() const {
831     assert(isExpr());
832     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
833     return S->getSymbol().getName();
834   }
835 
836   StringRef getToken() const {
837     assert(isToken());
838 
839     if (Kind == Expression)
840       return getExpressionAsToken();
841 
842     return StringRef(Tok.Data, Tok.Length);
843   }
844 
845   int64_t getImm() const {
846     assert(isImm());
847     return Imm.Val;
848   }
849 
850   void setImm(int64_t Val) {
851     assert(isImm());
852     Imm.Val = Val;
853   }
854 
855   ImmTy getImmTy() const {
856     assert(isImm());
857     return Imm.Type;
858   }
859 
860   unsigned getReg() const override {
861     assert(isRegKind());
862     return Reg.RegNo;
863   }
864 
865   SMLoc getStartLoc() const override {
866     return StartLoc;
867   }
868 
869   SMLoc getEndLoc() const override {
870     return EndLoc;
871   }
872 
873   SMRange getLocRange() const {
874     return SMRange(StartLoc, EndLoc);
875   }
876 
877   Modifiers getModifiers() const {
878     assert(isRegKind() || isImmTy(ImmTyNone));
879     return isRegKind() ? Reg.Mods : Imm.Mods;
880   }
881 
882   void setModifiers(Modifiers Mods) {
883     assert(isRegKind() || isImmTy(ImmTyNone));
884     if (isRegKind())
885       Reg.Mods = Mods;
886     else
887       Imm.Mods = Mods;
888   }
889 
890   bool hasModifiers() const {
891     return getModifiers().hasModifiers();
892   }
893 
894   bool hasFPModifiers() const {
895     return getModifiers().hasFPModifiers();
896   }
897 
898   bool hasIntModifiers() const {
899     return getModifiers().hasIntModifiers();
900   }
901 
902   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
903 
904   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
905 
906   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
907 
908   template <unsigned Bitwidth>
909   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
910 
911   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
912     addKImmFPOperands<16>(Inst, N);
913   }
914 
915   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
916     addKImmFPOperands<32>(Inst, N);
917   }
918 
919   void addRegOperands(MCInst &Inst, unsigned N) const;
920 
921   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
922     addRegOperands(Inst, N);
923   }
924 
925   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
926     if (isRegKind())
927       addRegOperands(Inst, N);
928     else if (isExpr())
929       Inst.addOperand(MCOperand::createExpr(Expr));
930     else
931       addImmOperands(Inst, N);
932   }
933 
934   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
935     Modifiers Mods = getModifiers();
936     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
937     if (isRegKind()) {
938       addRegOperands(Inst, N);
939     } else {
940       addImmOperands(Inst, N, false);
941     }
942   }
943 
944   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
945     assert(!hasIntModifiers());
946     addRegOrImmWithInputModsOperands(Inst, N);
947   }
948 
949   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
950     assert(!hasFPModifiers());
951     addRegOrImmWithInputModsOperands(Inst, N);
952   }
953 
954   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
955     Modifiers Mods = getModifiers();
956     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
957     assert(isRegKind());
958     addRegOperands(Inst, N);
959   }
960 
961   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
962     assert(!hasIntModifiers());
963     addRegWithInputModsOperands(Inst, N);
964   }
965 
966   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
967     assert(!hasFPModifiers());
968     addRegWithInputModsOperands(Inst, N);
969   }
970 
971   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
972     if (isImm())
973       addImmOperands(Inst, N);
974     else {
975       assert(isExpr());
976       Inst.addOperand(MCOperand::createExpr(Expr));
977     }
978   }
979 
980   static void printImmTy(raw_ostream& OS, ImmTy Type) {
981     switch (Type) {
982     case ImmTyNone: OS << "None"; break;
983     case ImmTyGDS: OS << "GDS"; break;
984     case ImmTyLDS: OS << "LDS"; break;
985     case ImmTyOffen: OS << "Offen"; break;
986     case ImmTyIdxen: OS << "Idxen"; break;
987     case ImmTyAddr64: OS << "Addr64"; break;
988     case ImmTyOffset: OS << "Offset"; break;
989     case ImmTyInstOffset: OS << "InstOffset"; break;
990     case ImmTyOffset0: OS << "Offset0"; break;
991     case ImmTyOffset1: OS << "Offset1"; break;
992     case ImmTyCPol: OS << "CPol"; break;
993     case ImmTySWZ: OS << "SWZ"; break;
994     case ImmTyTFE: OS << "TFE"; break;
995     case ImmTyD16: OS << "D16"; break;
996     case ImmTyFORMAT: OS << "FORMAT"; break;
997     case ImmTyClampSI: OS << "ClampSI"; break;
998     case ImmTyOModSI: OS << "OModSI"; break;
999     case ImmTyDPP8: OS << "DPP8"; break;
1000     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1001     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1002     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1003     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1004     case ImmTyDppFi: OS << "FI"; break;
1005     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1006     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1007     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1008     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1009     case ImmTyDMask: OS << "DMask"; break;
1010     case ImmTyDim: OS << "Dim"; break;
1011     case ImmTyUNorm: OS << "UNorm"; break;
1012     case ImmTyDA: OS << "DA"; break;
1013     case ImmTyR128A16: OS << "R128A16"; break;
1014     case ImmTyA16: OS << "A16"; break;
1015     case ImmTyLWE: OS << "LWE"; break;
1016     case ImmTyOff: OS << "Off"; break;
1017     case ImmTyExpTgt: OS << "ExpTgt"; break;
1018     case ImmTyExpCompr: OS << "ExpCompr"; break;
1019     case ImmTyExpVM: OS << "ExpVM"; break;
1020     case ImmTyHwreg: OS << "Hwreg"; break;
1021     case ImmTySendMsg: OS << "SendMsg"; break;
1022     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1023     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1024     case ImmTyAttrChan: OS << "AttrChan"; break;
1025     case ImmTyOpSel: OS << "OpSel"; break;
1026     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1027     case ImmTyNegLo: OS << "NegLo"; break;
1028     case ImmTyNegHi: OS << "NegHi"; break;
1029     case ImmTySwizzle: OS << "Swizzle"; break;
1030     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1031     case ImmTyHigh: OS << "High"; break;
1032     case ImmTyBLGP: OS << "BLGP"; break;
1033     case ImmTyCBSZ: OS << "CBSZ"; break;
1034     case ImmTyABID: OS << "ABID"; break;
1035     case ImmTyEndpgm: OS << "Endpgm"; break;
1036     }
1037   }
1038 
1039   void print(raw_ostream &OS) const override {
1040     switch (Kind) {
1041     case Register:
1042       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1043       break;
1044     case Immediate:
1045       OS << '<' << getImm();
1046       if (getImmTy() != ImmTyNone) {
1047         OS << " type: "; printImmTy(OS, getImmTy());
1048       }
1049       OS << " mods: " << Imm.Mods << '>';
1050       break;
1051     case Token:
1052       OS << '\'' << getToken() << '\'';
1053       break;
1054     case Expression:
1055       OS << "<expr " << *Expr << '>';
1056       break;
1057     }
1058   }
1059 
1060   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1061                                       int64_t Val, SMLoc Loc,
1062                                       ImmTy Type = ImmTyNone,
1063                                       bool IsFPImm = false) {
1064     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1065     Op->Imm.Val = Val;
1066     Op->Imm.IsFPImm = IsFPImm;
1067     Op->Imm.Kind = ImmKindTyNone;
1068     Op->Imm.Type = Type;
1069     Op->Imm.Mods = Modifiers();
1070     Op->StartLoc = Loc;
1071     Op->EndLoc = Loc;
1072     return Op;
1073   }
1074 
1075   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1076                                         StringRef Str, SMLoc Loc,
1077                                         bool HasExplicitEncodingSize = true) {
1078     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1079     Res->Tok.Data = Str.data();
1080     Res->Tok.Length = Str.size();
1081     Res->StartLoc = Loc;
1082     Res->EndLoc = Loc;
1083     return Res;
1084   }
1085 
1086   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1087                                       unsigned RegNo, SMLoc S,
1088                                       SMLoc E) {
1089     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1090     Op->Reg.RegNo = RegNo;
1091     Op->Reg.Mods = Modifiers();
1092     Op->StartLoc = S;
1093     Op->EndLoc = E;
1094     return Op;
1095   }
1096 
1097   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1098                                        const class MCExpr *Expr, SMLoc S) {
1099     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1100     Op->Expr = Expr;
1101     Op->StartLoc = S;
1102     Op->EndLoc = S;
1103     return Op;
1104   }
1105 };
1106 
1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1108   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1109   return OS;
1110 }
1111 
1112 //===----------------------------------------------------------------------===//
1113 // AsmParser
1114 //===----------------------------------------------------------------------===//
1115 
1116 // Holds info related to the current kernel, e.g. count of SGPRs used.
1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1118 // .amdgpu_hsa_kernel or at EOF.
1119 class KernelScopeInfo {
1120   int SgprIndexUnusedMin = -1;
1121   int VgprIndexUnusedMin = -1;
1122   MCContext *Ctx = nullptr;
1123 
1124   void usesSgprAt(int i) {
1125     if (i >= SgprIndexUnusedMin) {
1126       SgprIndexUnusedMin = ++i;
1127       if (Ctx) {
1128         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1129         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1130       }
1131     }
1132   }
1133 
1134   void usesVgprAt(int i) {
1135     if (i >= VgprIndexUnusedMin) {
1136       VgprIndexUnusedMin = ++i;
1137       if (Ctx) {
1138         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1139         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1140       }
1141     }
1142   }
1143 
1144 public:
1145   KernelScopeInfo() = default;
1146 
1147   void initialize(MCContext &Context) {
1148     Ctx = &Context;
1149     usesSgprAt(SgprIndexUnusedMin = -1);
1150     usesVgprAt(VgprIndexUnusedMin = -1);
1151   }
1152 
1153   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1154     switch (RegKind) {
1155       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1156       case IS_AGPR: // fall through
1157       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1158       default: break;
1159     }
1160   }
1161 };
1162 
1163 class AMDGPUAsmParser : public MCTargetAsmParser {
1164   MCAsmParser &Parser;
1165 
1166   // Number of extra operands parsed after the first optional operand.
1167   // This may be necessary to skip hardcoded mandatory operands.
1168   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1169 
1170   unsigned ForcedEncodingSize = 0;
1171   bool ForcedDPP = false;
1172   bool ForcedSDWA = false;
1173   KernelScopeInfo KernelScope;
1174   unsigned CPolSeen;
1175 
1176   /// @name Auto-generated Match Functions
1177   /// {
1178 
1179 #define GET_ASSEMBLER_HEADER
1180 #include "AMDGPUGenAsmMatcher.inc"
1181 
1182   /// }
1183 
1184 private:
1185   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1186   bool OutOfRangeError(SMRange Range);
1187   /// Calculate VGPR/SGPR blocks required for given target, reserved
1188   /// registers, and user-specified NextFreeXGPR values.
1189   ///
1190   /// \param Features [in] Target features, used for bug corrections.
1191   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1192   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1193   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1194   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1195   /// descriptor field, if valid.
1196   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1197   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1198   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1199   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1200   /// \param VGPRBlocks [out] Result VGPR block count.
1201   /// \param SGPRBlocks [out] Result SGPR block count.
1202   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1203                           bool FlatScrUsed, bool XNACKUsed,
1204                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1205                           SMRange VGPRRange, unsigned NextFreeSGPR,
1206                           SMRange SGPRRange, unsigned &VGPRBlocks,
1207                           unsigned &SGPRBlocks);
1208   bool ParseDirectiveAMDGCNTarget();
1209   bool ParseDirectiveAMDHSAKernel();
1210   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1211   bool ParseDirectiveHSACodeObjectVersion();
1212   bool ParseDirectiveHSACodeObjectISA();
1213   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1214   bool ParseDirectiveAMDKernelCodeT();
1215   // TODO: Possibly make subtargetHasRegister const.
1216   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1217   bool ParseDirectiveAMDGPUHsaKernel();
1218 
1219   bool ParseDirectiveISAVersion();
1220   bool ParseDirectiveHSAMetadata();
1221   bool ParseDirectivePALMetadataBegin();
1222   bool ParseDirectivePALMetadata();
1223   bool ParseDirectiveAMDGPULDS();
1224 
1225   /// Common code to parse out a block of text (typically YAML) between start and
1226   /// end directives.
1227   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1228                            const char *AssemblerDirectiveEnd,
1229                            std::string &CollectString);
1230 
1231   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1232                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1233   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1234                            unsigned &RegNum, unsigned &RegWidth,
1235                            bool RestoreOnFailure = false);
1236   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1237                            unsigned &RegNum, unsigned &RegWidth,
1238                            SmallVectorImpl<AsmToken> &Tokens);
1239   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1240                            unsigned &RegWidth,
1241                            SmallVectorImpl<AsmToken> &Tokens);
1242   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1243                            unsigned &RegWidth,
1244                            SmallVectorImpl<AsmToken> &Tokens);
1245   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1246                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1247   bool ParseRegRange(unsigned& Num, unsigned& Width);
1248   unsigned getRegularReg(RegisterKind RegKind,
1249                          unsigned RegNum,
1250                          unsigned RegWidth,
1251                          SMLoc Loc);
1252 
1253   bool isRegister();
1254   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1255   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1256   void initializeGprCountSymbol(RegisterKind RegKind);
1257   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1258                              unsigned RegWidth);
1259   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1260                     bool IsAtomic, bool IsLds = false);
1261   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1262                  bool IsGdsHardcoded);
1263 
1264 public:
1265   enum AMDGPUMatchResultTy {
1266     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1267   };
1268   enum OperandMode {
1269     OperandMode_Default,
1270     OperandMode_NSA,
1271   };
1272 
1273   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1274 
1275   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1276                const MCInstrInfo &MII,
1277                const MCTargetOptions &Options)
1278       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1279     MCAsmParserExtension::Initialize(Parser);
1280 
1281     if (getFeatureBits().none()) {
1282       // Set default features.
1283       copySTI().ToggleFeature("southern-islands");
1284     }
1285 
1286     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1287 
1288     {
1289       // TODO: make those pre-defined variables read-only.
1290       // Currently there is none suitable machinery in the core llvm-mc for this.
1291       // MCSymbol::isRedefinable is intended for another purpose, and
1292       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1293       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1294       MCContext &Ctx = getContext();
1295       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1296         MCSymbol *Sym =
1297             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1298         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1299         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1300         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1301         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1302         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1303       } else {
1304         MCSymbol *Sym =
1305             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1306         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1307         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1308         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1309         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1310         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1311       }
1312       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1313         initializeGprCountSymbol(IS_VGPR);
1314         initializeGprCountSymbol(IS_SGPR);
1315       } else
1316         KernelScope.initialize(getContext());
1317     }
1318   }
1319 
1320   bool hasMIMG_R128() const {
1321     return AMDGPU::hasMIMG_R128(getSTI());
1322   }
1323 
1324   bool hasPackedD16() const {
1325     return AMDGPU::hasPackedD16(getSTI());
1326   }
1327 
1328   bool hasGFX10A16() const {
1329     return AMDGPU::hasGFX10A16(getSTI());
1330   }
1331 
1332   bool isSI() const {
1333     return AMDGPU::isSI(getSTI());
1334   }
1335 
1336   bool isCI() const {
1337     return AMDGPU::isCI(getSTI());
1338   }
1339 
1340   bool isVI() const {
1341     return AMDGPU::isVI(getSTI());
1342   }
1343 
1344   bool isGFX9() const {
1345     return AMDGPU::isGFX9(getSTI());
1346   }
1347 
1348   bool isGFX90A() const {
1349     return AMDGPU::isGFX90A(getSTI());
1350   }
1351 
1352   bool isGFX9Plus() const {
1353     return AMDGPU::isGFX9Plus(getSTI());
1354   }
1355 
1356   bool isGFX10() const {
1357     return AMDGPU::isGFX10(getSTI());
1358   }
1359 
1360   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1361 
1362   bool isGFX10_BEncoding() const {
1363     return AMDGPU::isGFX10_BEncoding(getSTI());
1364   }
1365 
1366   bool hasInv2PiInlineImm() const {
1367     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1368   }
1369 
1370   bool hasFlatOffsets() const {
1371     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1372   }
1373 
1374   bool hasSGPR102_SGPR103() const {
1375     return !isVI() && !isGFX9();
1376   }
1377 
1378   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1379 
1380   bool hasIntClamp() const {
1381     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1382   }
1383 
1384   AMDGPUTargetStreamer &getTargetStreamer() {
1385     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1386     return static_cast<AMDGPUTargetStreamer &>(TS);
1387   }
1388 
1389   const MCRegisterInfo *getMRI() const {
1390     // We need this const_cast because for some reason getContext() is not const
1391     // in MCAsmParser.
1392     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1393   }
1394 
1395   const MCInstrInfo *getMII() const {
1396     return &MII;
1397   }
1398 
1399   const FeatureBitset &getFeatureBits() const {
1400     return getSTI().getFeatureBits();
1401   }
1402 
1403   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1404   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1405   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1406 
1407   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1408   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1409   bool isForcedDPP() const { return ForcedDPP; }
1410   bool isForcedSDWA() const { return ForcedSDWA; }
1411   ArrayRef<unsigned> getMatchedVariants() const;
1412   StringRef getMatchedVariantName() const;
1413 
1414   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1415   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1416                      bool RestoreOnFailure);
1417   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1418   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1419                                         SMLoc &EndLoc) override;
1420   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1421   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1422                                       unsigned Kind) override;
1423   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1424                                OperandVector &Operands, MCStreamer &Out,
1425                                uint64_t &ErrorInfo,
1426                                bool MatchingInlineAsm) override;
1427   bool ParseDirective(AsmToken DirectiveID) override;
1428   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1429                                     OperandMode Mode = OperandMode_Default);
1430   StringRef parseMnemonicSuffix(StringRef Name);
1431   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1432                         SMLoc NameLoc, OperandVector &Operands) override;
1433   //bool ProcessInstruction(MCInst &Inst);
1434 
1435   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1436 
1437   OperandMatchResultTy
1438   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1439                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1440                      bool (*ConvertResult)(int64_t &) = nullptr);
1441 
1442   OperandMatchResultTy
1443   parseOperandArrayWithPrefix(const char *Prefix,
1444                               OperandVector &Operands,
1445                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1446                               bool (*ConvertResult)(int64_t&) = nullptr);
1447 
1448   OperandMatchResultTy
1449   parseNamedBit(StringRef Name, OperandVector &Operands,
1450                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1451   OperandMatchResultTy parseCPol(OperandVector &Operands);
1452   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1453                                              StringRef &Value,
1454                                              SMLoc &StringLoc);
1455 
1456   bool isModifier();
1457   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1458   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1459   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1460   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1461   bool parseSP3NegModifier();
1462   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1463   OperandMatchResultTy parseReg(OperandVector &Operands);
1464   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1465   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1466   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1467   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1468   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1469   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1470   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1471   OperandMatchResultTy parseUfmt(int64_t &Format);
1472   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1473   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1474   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1475   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1476   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1477   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1478   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1479 
1480   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1481   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1482   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1483   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1484 
1485   bool parseCnt(int64_t &IntVal);
1486   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1487   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1488 
1489 private:
1490   struct OperandInfoTy {
1491     SMLoc Loc;
1492     int64_t Id;
1493     bool IsSymbolic = false;
1494     bool IsDefined = false;
1495 
1496     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1497   };
1498 
1499   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1500   bool validateSendMsg(const OperandInfoTy &Msg,
1501                        const OperandInfoTy &Op,
1502                        const OperandInfoTy &Stream);
1503 
1504   bool parseHwregBody(OperandInfoTy &HwReg,
1505                       OperandInfoTy &Offset,
1506                       OperandInfoTy &Width);
1507   bool validateHwreg(const OperandInfoTy &HwReg,
1508                      const OperandInfoTy &Offset,
1509                      const OperandInfoTy &Width);
1510 
1511   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1512   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1513 
1514   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1515                       const OperandVector &Operands) const;
1516   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1517   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1518   SMLoc getLitLoc(const OperandVector &Operands) const;
1519   SMLoc getConstLoc(const OperandVector &Operands) const;
1520 
1521   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1522   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1523   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1524   bool validateSOPLiteral(const MCInst &Inst) const;
1525   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1526   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1527   bool validateIntClampSupported(const MCInst &Inst);
1528   bool validateMIMGAtomicDMask(const MCInst &Inst);
1529   bool validateMIMGGatherDMask(const MCInst &Inst);
1530   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1531   bool validateMIMGDataSize(const MCInst &Inst);
1532   bool validateMIMGAddrSize(const MCInst &Inst);
1533   bool validateMIMGD16(const MCInst &Inst);
1534   bool validateMIMGDim(const MCInst &Inst);
1535   bool validateMIMGMSAA(const MCInst &Inst);
1536   bool validateOpSel(const MCInst &Inst);
1537   bool validateVccOperand(unsigned Reg) const;
1538   bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1539   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1540   bool validateAGPRLdSt(const MCInst &Inst) const;
1541   bool validateVGPRAlign(const MCInst &Inst) const;
1542   bool validateDivScale(const MCInst &Inst);
1543   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1544                              const SMLoc &IDLoc);
1545   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1546   unsigned getConstantBusLimit(unsigned Opcode) const;
1547   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1548   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1549   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1550 
1551   bool isSupportedMnemo(StringRef Mnemo,
1552                         const FeatureBitset &FBS);
1553   bool isSupportedMnemo(StringRef Mnemo,
1554                         const FeatureBitset &FBS,
1555                         ArrayRef<unsigned> Variants);
1556   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1557 
1558   bool isId(const StringRef Id) const;
1559   bool isId(const AsmToken &Token, const StringRef Id) const;
1560   bool isToken(const AsmToken::TokenKind Kind) const;
1561   bool trySkipId(const StringRef Id);
1562   bool trySkipId(const StringRef Pref, const StringRef Id);
1563   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1564   bool trySkipToken(const AsmToken::TokenKind Kind);
1565   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1566   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1567   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1568 
1569   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1570   AsmToken::TokenKind getTokenKind() const;
1571   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1572   bool parseExpr(OperandVector &Operands);
1573   StringRef getTokenStr() const;
1574   AsmToken peekToken();
1575   AsmToken getToken() const;
1576   SMLoc getLoc() const;
1577   void lex();
1578 
1579 public:
1580   void onBeginOfFile() override;
1581 
1582   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1583   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1584 
1585   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1586   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1587   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1588   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1589   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1590   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1591 
1592   bool parseSwizzleOperand(int64_t &Op,
1593                            const unsigned MinVal,
1594                            const unsigned MaxVal,
1595                            const StringRef ErrMsg,
1596                            SMLoc &Loc);
1597   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1598                             const unsigned MinVal,
1599                             const unsigned MaxVal,
1600                             const StringRef ErrMsg);
1601   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1602   bool parseSwizzleOffset(int64_t &Imm);
1603   bool parseSwizzleMacro(int64_t &Imm);
1604   bool parseSwizzleQuadPerm(int64_t &Imm);
1605   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1606   bool parseSwizzleBroadcast(int64_t &Imm);
1607   bool parseSwizzleSwap(int64_t &Imm);
1608   bool parseSwizzleReverse(int64_t &Imm);
1609 
1610   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1611   int64_t parseGPRIdxMacro();
1612 
1613   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1614   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1615   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1616   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1617 
1618   AMDGPUOperand::Ptr defaultCPol() const;
1619 
1620   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1621   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1622   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1623   AMDGPUOperand::Ptr defaultFlatOffset() const;
1624 
1625   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1626 
1627   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1628                OptionalImmIndexMap &OptionalIdx);
1629   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1630   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1631   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1632 
1633   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1634 
1635   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1636                bool IsAtomic = false);
1637   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1638   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1639 
1640   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1641 
1642   bool parseDimId(unsigned &Encoding);
1643   OperandMatchResultTy parseDim(OperandVector &Operands);
1644   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1645   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1646   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1647   int64_t parseDPPCtrlSel(StringRef Ctrl);
1648   int64_t parseDPPCtrlPerm();
1649   AMDGPUOperand::Ptr defaultRowMask() const;
1650   AMDGPUOperand::Ptr defaultBankMask() const;
1651   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1652   AMDGPUOperand::Ptr defaultFI() const;
1653   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1654   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1655 
1656   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1657                                     AMDGPUOperand::ImmTy Type);
1658   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1659   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1660   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1661   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1662   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1663   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1664   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1665                uint64_t BasicInstType,
1666                bool SkipDstVcc = false,
1667                bool SkipSrcVcc = false);
1668 
1669   AMDGPUOperand::Ptr defaultBLGP() const;
1670   AMDGPUOperand::Ptr defaultCBSZ() const;
1671   AMDGPUOperand::Ptr defaultABID() const;
1672 
1673   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1674   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1675 };
1676 
1677 struct OptionalOperand {
1678   const char *Name;
1679   AMDGPUOperand::ImmTy Type;
1680   bool IsBit;
1681   bool (*ConvertResult)(int64_t&);
1682 };
1683 
1684 } // end anonymous namespace
1685 
1686 // May be called with integer type with equivalent bitwidth.
1687 static const fltSemantics *getFltSemantics(unsigned Size) {
1688   switch (Size) {
1689   case 4:
1690     return &APFloat::IEEEsingle();
1691   case 8:
1692     return &APFloat::IEEEdouble();
1693   case 2:
1694     return &APFloat::IEEEhalf();
1695   default:
1696     llvm_unreachable("unsupported fp type");
1697   }
1698 }
1699 
1700 static const fltSemantics *getFltSemantics(MVT VT) {
1701   return getFltSemantics(VT.getSizeInBits() / 8);
1702 }
1703 
1704 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1705   switch (OperandType) {
1706   case AMDGPU::OPERAND_REG_IMM_INT32:
1707   case AMDGPU::OPERAND_REG_IMM_FP32:
1708   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1709   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1710   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1711   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1712   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1713   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1714   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1715   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1716     return &APFloat::IEEEsingle();
1717   case AMDGPU::OPERAND_REG_IMM_INT64:
1718   case AMDGPU::OPERAND_REG_IMM_FP64:
1719   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1720   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1721   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1722     return &APFloat::IEEEdouble();
1723   case AMDGPU::OPERAND_REG_IMM_INT16:
1724   case AMDGPU::OPERAND_REG_IMM_FP16:
1725   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1726   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1727   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1728   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1729   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1730   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1731   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1732   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1733   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1734   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1735     return &APFloat::IEEEhalf();
1736   default:
1737     llvm_unreachable("unsupported fp type");
1738   }
1739 }
1740 
1741 //===----------------------------------------------------------------------===//
1742 // Operand
1743 //===----------------------------------------------------------------------===//
1744 
1745 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1746   bool Lost;
1747 
1748   // Convert literal to single precision
1749   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1750                                                APFloat::rmNearestTiesToEven,
1751                                                &Lost);
1752   // We allow precision lost but not overflow or underflow
1753   if (Status != APFloat::opOK &&
1754       Lost &&
1755       ((Status & APFloat::opOverflow)  != 0 ||
1756        (Status & APFloat::opUnderflow) != 0)) {
1757     return false;
1758   }
1759 
1760   return true;
1761 }
1762 
1763 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1764   return isUIntN(Size, Val) || isIntN(Size, Val);
1765 }
1766 
1767 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1768   if (VT.getScalarType() == MVT::i16) {
1769     // FP immediate values are broken.
1770     return isInlinableIntLiteral(Val);
1771   }
1772 
1773   // f16/v2f16 operands work correctly for all values.
1774   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1775 }
1776 
1777 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1778 
1779   // This is a hack to enable named inline values like
1780   // shared_base with both 32-bit and 64-bit operands.
1781   // Note that these values are defined as
1782   // 32-bit operands only.
1783   if (isInlineValue()) {
1784     return true;
1785   }
1786 
1787   if (!isImmTy(ImmTyNone)) {
1788     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1789     return false;
1790   }
1791   // TODO: We should avoid using host float here. It would be better to
1792   // check the float bit values which is what a few other places do.
1793   // We've had bot failures before due to weird NaN support on mips hosts.
1794 
1795   APInt Literal(64, Imm.Val);
1796 
1797   if (Imm.IsFPImm) { // We got fp literal token
1798     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1799       return AMDGPU::isInlinableLiteral64(Imm.Val,
1800                                           AsmParser->hasInv2PiInlineImm());
1801     }
1802 
1803     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1804     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1805       return false;
1806 
1807     if (type.getScalarSizeInBits() == 16) {
1808       return isInlineableLiteralOp16(
1809         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1810         type, AsmParser->hasInv2PiInlineImm());
1811     }
1812 
1813     // Check if single precision literal is inlinable
1814     return AMDGPU::isInlinableLiteral32(
1815       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1816       AsmParser->hasInv2PiInlineImm());
1817   }
1818 
1819   // We got int literal token.
1820   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1821     return AMDGPU::isInlinableLiteral64(Imm.Val,
1822                                         AsmParser->hasInv2PiInlineImm());
1823   }
1824 
1825   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1826     return false;
1827   }
1828 
1829   if (type.getScalarSizeInBits() == 16) {
1830     return isInlineableLiteralOp16(
1831       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1832       type, AsmParser->hasInv2PiInlineImm());
1833   }
1834 
1835   return AMDGPU::isInlinableLiteral32(
1836     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1837     AsmParser->hasInv2PiInlineImm());
1838 }
1839 
1840 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1841   // Check that this immediate can be added as literal
1842   if (!isImmTy(ImmTyNone)) {
1843     return false;
1844   }
1845 
1846   if (!Imm.IsFPImm) {
1847     // We got int literal token.
1848 
1849     if (type == MVT::f64 && hasFPModifiers()) {
1850       // Cannot apply fp modifiers to int literals preserving the same semantics
1851       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1852       // disable these cases.
1853       return false;
1854     }
1855 
1856     unsigned Size = type.getSizeInBits();
1857     if (Size == 64)
1858       Size = 32;
1859 
1860     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1861     // types.
1862     return isSafeTruncation(Imm.Val, Size);
1863   }
1864 
1865   // We got fp literal token
1866   if (type == MVT::f64) { // Expected 64-bit fp operand
1867     // We would set low 64-bits of literal to zeroes but we accept this literals
1868     return true;
1869   }
1870 
1871   if (type == MVT::i64) { // Expected 64-bit int operand
1872     // We don't allow fp literals in 64-bit integer instructions. It is
1873     // unclear how we should encode them.
1874     return false;
1875   }
1876 
1877   // We allow fp literals with f16x2 operands assuming that the specified
1878   // literal goes into the lower half and the upper half is zero. We also
1879   // require that the literal may be losslesly converted to f16.
1880   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1881                      (type == MVT::v2i16)? MVT::i16 :
1882                      (type == MVT::v2f32)? MVT::f32 : type;
1883 
1884   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1885   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1886 }
1887 
1888 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1889   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1890 }
1891 
1892 bool AMDGPUOperand::isVRegWithInputMods() const {
1893   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1894          // GFX90A allows DPP on 64-bit operands.
1895          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1896           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1897 }
1898 
1899 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1900   if (AsmParser->isVI())
1901     return isVReg32();
1902   else if (AsmParser->isGFX9Plus())
1903     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1904   else
1905     return false;
1906 }
1907 
1908 bool AMDGPUOperand::isSDWAFP16Operand() const {
1909   return isSDWAOperand(MVT::f16);
1910 }
1911 
1912 bool AMDGPUOperand::isSDWAFP32Operand() const {
1913   return isSDWAOperand(MVT::f32);
1914 }
1915 
1916 bool AMDGPUOperand::isSDWAInt16Operand() const {
1917   return isSDWAOperand(MVT::i16);
1918 }
1919 
1920 bool AMDGPUOperand::isSDWAInt32Operand() const {
1921   return isSDWAOperand(MVT::i32);
1922 }
1923 
1924 bool AMDGPUOperand::isBoolReg() const {
1925   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1926          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1927 }
1928 
1929 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1930 {
1931   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1932   assert(Size == 2 || Size == 4 || Size == 8);
1933 
1934   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1935 
1936   if (Imm.Mods.Abs) {
1937     Val &= ~FpSignMask;
1938   }
1939   if (Imm.Mods.Neg) {
1940     Val ^= FpSignMask;
1941   }
1942 
1943   return Val;
1944 }
1945 
1946 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1947   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1948                              Inst.getNumOperands())) {
1949     addLiteralImmOperand(Inst, Imm.Val,
1950                          ApplyModifiers &
1951                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1952   } else {
1953     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1954     Inst.addOperand(MCOperand::createImm(Imm.Val));
1955     setImmKindNone();
1956   }
1957 }
1958 
1959 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1960   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1961   auto OpNum = Inst.getNumOperands();
1962   // Check that this operand accepts literals
1963   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1964 
1965   if (ApplyModifiers) {
1966     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1967     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1968     Val = applyInputFPModifiers(Val, Size);
1969   }
1970 
1971   APInt Literal(64, Val);
1972   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1973 
1974   if (Imm.IsFPImm) { // We got fp literal token
1975     switch (OpTy) {
1976     case AMDGPU::OPERAND_REG_IMM_INT64:
1977     case AMDGPU::OPERAND_REG_IMM_FP64:
1978     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1979     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1980     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1981       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1982                                        AsmParser->hasInv2PiInlineImm())) {
1983         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1984         setImmKindConst();
1985         return;
1986       }
1987 
1988       // Non-inlineable
1989       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1990         // For fp operands we check if low 32 bits are zeros
1991         if (Literal.getLoBits(32) != 0) {
1992           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1993           "Can't encode literal as exact 64-bit floating-point operand. "
1994           "Low 32-bits will be set to zero");
1995         }
1996 
1997         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1998         setImmKindLiteral();
1999         return;
2000       }
2001 
2002       // We don't allow fp literals in 64-bit integer instructions. It is
2003       // unclear how we should encode them. This case should be checked earlier
2004       // in predicate methods (isLiteralImm())
2005       llvm_unreachable("fp literal in 64-bit integer instruction.");
2006 
2007     case AMDGPU::OPERAND_REG_IMM_INT32:
2008     case AMDGPU::OPERAND_REG_IMM_FP32:
2009     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2010     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2011     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2012     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2013     case AMDGPU::OPERAND_REG_IMM_INT16:
2014     case AMDGPU::OPERAND_REG_IMM_FP16:
2015     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2016     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2017     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2018     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2019     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2020     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2021     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2022     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2023     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2024     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2025     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2026     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2027     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2028     case AMDGPU::OPERAND_REG_IMM_V2INT32: {
2029       bool lost;
2030       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2031       // Convert literal to single precision
2032       FPLiteral.convert(*getOpFltSemantics(OpTy),
2033                         APFloat::rmNearestTiesToEven, &lost);
2034       // We allow precision lost but not overflow or underflow. This should be
2035       // checked earlier in isLiteralImm()
2036 
2037       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2038       Inst.addOperand(MCOperand::createImm(ImmVal));
2039       setImmKindLiteral();
2040       return;
2041     }
2042     default:
2043       llvm_unreachable("invalid operand size");
2044     }
2045 
2046     return;
2047   }
2048 
2049   // We got int literal token.
2050   // Only sign extend inline immediates.
2051   switch (OpTy) {
2052   case AMDGPU::OPERAND_REG_IMM_INT32:
2053   case AMDGPU::OPERAND_REG_IMM_FP32:
2054   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2055   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2056   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2057   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2058   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2059   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2060   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2061   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2062   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2063   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2064     if (isSafeTruncation(Val, 32) &&
2065         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2066                                      AsmParser->hasInv2PiInlineImm())) {
2067       Inst.addOperand(MCOperand::createImm(Val));
2068       setImmKindConst();
2069       return;
2070     }
2071 
2072     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2073     setImmKindLiteral();
2074     return;
2075 
2076   case AMDGPU::OPERAND_REG_IMM_INT64:
2077   case AMDGPU::OPERAND_REG_IMM_FP64:
2078   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2079   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2080   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2081     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2082       Inst.addOperand(MCOperand::createImm(Val));
2083       setImmKindConst();
2084       return;
2085     }
2086 
2087     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2088     setImmKindLiteral();
2089     return;
2090 
2091   case AMDGPU::OPERAND_REG_IMM_INT16:
2092   case AMDGPU::OPERAND_REG_IMM_FP16:
2093   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2094   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2095   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2096   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2097     if (isSafeTruncation(Val, 16) &&
2098         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2099                                      AsmParser->hasInv2PiInlineImm())) {
2100       Inst.addOperand(MCOperand::createImm(Val));
2101       setImmKindConst();
2102       return;
2103     }
2104 
2105     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2106     setImmKindLiteral();
2107     return;
2108 
2109   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2110   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2111   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2112   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2113     assert(isSafeTruncation(Val, 16));
2114     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2115                                         AsmParser->hasInv2PiInlineImm()));
2116 
2117     Inst.addOperand(MCOperand::createImm(Val));
2118     return;
2119   }
2120   default:
2121     llvm_unreachable("invalid operand size");
2122   }
2123 }
2124 
2125 template <unsigned Bitwidth>
2126 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2127   APInt Literal(64, Imm.Val);
2128   setImmKindNone();
2129 
2130   if (!Imm.IsFPImm) {
2131     // We got int literal token.
2132     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2133     return;
2134   }
2135 
2136   bool Lost;
2137   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2138   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2139                     APFloat::rmNearestTiesToEven, &Lost);
2140   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2141 }
2142 
2143 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2144   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2145 }
2146 
2147 static bool isInlineValue(unsigned Reg) {
2148   switch (Reg) {
2149   case AMDGPU::SRC_SHARED_BASE:
2150   case AMDGPU::SRC_SHARED_LIMIT:
2151   case AMDGPU::SRC_PRIVATE_BASE:
2152   case AMDGPU::SRC_PRIVATE_LIMIT:
2153   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2154     return true;
2155   case AMDGPU::SRC_VCCZ:
2156   case AMDGPU::SRC_EXECZ:
2157   case AMDGPU::SRC_SCC:
2158     return true;
2159   case AMDGPU::SGPR_NULL:
2160     return true;
2161   default:
2162     return false;
2163   }
2164 }
2165 
2166 bool AMDGPUOperand::isInlineValue() const {
2167   return isRegKind() && ::isInlineValue(getReg());
2168 }
2169 
2170 //===----------------------------------------------------------------------===//
2171 // AsmParser
2172 //===----------------------------------------------------------------------===//
2173 
2174 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2175   if (Is == IS_VGPR) {
2176     switch (RegWidth) {
2177       default: return -1;
2178       case 1: return AMDGPU::VGPR_32RegClassID;
2179       case 2: return AMDGPU::VReg_64RegClassID;
2180       case 3: return AMDGPU::VReg_96RegClassID;
2181       case 4: return AMDGPU::VReg_128RegClassID;
2182       case 5: return AMDGPU::VReg_160RegClassID;
2183       case 6: return AMDGPU::VReg_192RegClassID;
2184       case 8: return AMDGPU::VReg_256RegClassID;
2185       case 16: return AMDGPU::VReg_512RegClassID;
2186       case 32: return AMDGPU::VReg_1024RegClassID;
2187     }
2188   } else if (Is == IS_TTMP) {
2189     switch (RegWidth) {
2190       default: return -1;
2191       case 1: return AMDGPU::TTMP_32RegClassID;
2192       case 2: return AMDGPU::TTMP_64RegClassID;
2193       case 4: return AMDGPU::TTMP_128RegClassID;
2194       case 8: return AMDGPU::TTMP_256RegClassID;
2195       case 16: return AMDGPU::TTMP_512RegClassID;
2196     }
2197   } else if (Is == IS_SGPR) {
2198     switch (RegWidth) {
2199       default: return -1;
2200       case 1: return AMDGPU::SGPR_32RegClassID;
2201       case 2: return AMDGPU::SGPR_64RegClassID;
2202       case 3: return AMDGPU::SGPR_96RegClassID;
2203       case 4: return AMDGPU::SGPR_128RegClassID;
2204       case 5: return AMDGPU::SGPR_160RegClassID;
2205       case 6: return AMDGPU::SGPR_192RegClassID;
2206       case 8: return AMDGPU::SGPR_256RegClassID;
2207       case 16: return AMDGPU::SGPR_512RegClassID;
2208     }
2209   } else if (Is == IS_AGPR) {
2210     switch (RegWidth) {
2211       default: return -1;
2212       case 1: return AMDGPU::AGPR_32RegClassID;
2213       case 2: return AMDGPU::AReg_64RegClassID;
2214       case 3: return AMDGPU::AReg_96RegClassID;
2215       case 4: return AMDGPU::AReg_128RegClassID;
2216       case 5: return AMDGPU::AReg_160RegClassID;
2217       case 6: return AMDGPU::AReg_192RegClassID;
2218       case 8: return AMDGPU::AReg_256RegClassID;
2219       case 16: return AMDGPU::AReg_512RegClassID;
2220       case 32: return AMDGPU::AReg_1024RegClassID;
2221     }
2222   }
2223   return -1;
2224 }
2225 
2226 static unsigned getSpecialRegForName(StringRef RegName) {
2227   return StringSwitch<unsigned>(RegName)
2228     .Case("exec", AMDGPU::EXEC)
2229     .Case("vcc", AMDGPU::VCC)
2230     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2231     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2232     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2233     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2234     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2235     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2236     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2237     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2238     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2239     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2240     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2241     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2242     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2243     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2244     .Case("m0", AMDGPU::M0)
2245     .Case("vccz", AMDGPU::SRC_VCCZ)
2246     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2247     .Case("execz", AMDGPU::SRC_EXECZ)
2248     .Case("src_execz", AMDGPU::SRC_EXECZ)
2249     .Case("scc", AMDGPU::SRC_SCC)
2250     .Case("src_scc", AMDGPU::SRC_SCC)
2251     .Case("tba", AMDGPU::TBA)
2252     .Case("tma", AMDGPU::TMA)
2253     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2254     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2255     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2256     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2257     .Case("vcc_lo", AMDGPU::VCC_LO)
2258     .Case("vcc_hi", AMDGPU::VCC_HI)
2259     .Case("exec_lo", AMDGPU::EXEC_LO)
2260     .Case("exec_hi", AMDGPU::EXEC_HI)
2261     .Case("tma_lo", AMDGPU::TMA_LO)
2262     .Case("tma_hi", AMDGPU::TMA_HI)
2263     .Case("tba_lo", AMDGPU::TBA_LO)
2264     .Case("tba_hi", AMDGPU::TBA_HI)
2265     .Case("pc", AMDGPU::PC_REG)
2266     .Case("null", AMDGPU::SGPR_NULL)
2267     .Default(AMDGPU::NoRegister);
2268 }
2269 
2270 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2271                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2272   auto R = parseRegister();
2273   if (!R) return true;
2274   assert(R->isReg());
2275   RegNo = R->getReg();
2276   StartLoc = R->getStartLoc();
2277   EndLoc = R->getEndLoc();
2278   return false;
2279 }
2280 
2281 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2282                                     SMLoc &EndLoc) {
2283   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2284 }
2285 
2286 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2287                                                        SMLoc &StartLoc,
2288                                                        SMLoc &EndLoc) {
2289   bool Result =
2290       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2291   bool PendingErrors = getParser().hasPendingError();
2292   getParser().clearPendingErrors();
2293   if (PendingErrors)
2294     return MatchOperand_ParseFail;
2295   if (Result)
2296     return MatchOperand_NoMatch;
2297   return MatchOperand_Success;
2298 }
2299 
2300 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2301                                             RegisterKind RegKind, unsigned Reg1,
2302                                             SMLoc Loc) {
2303   switch (RegKind) {
2304   case IS_SPECIAL:
2305     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2306       Reg = AMDGPU::EXEC;
2307       RegWidth = 2;
2308       return true;
2309     }
2310     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2311       Reg = AMDGPU::FLAT_SCR;
2312       RegWidth = 2;
2313       return true;
2314     }
2315     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2316       Reg = AMDGPU::XNACK_MASK;
2317       RegWidth = 2;
2318       return true;
2319     }
2320     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2321       Reg = AMDGPU::VCC;
2322       RegWidth = 2;
2323       return true;
2324     }
2325     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2326       Reg = AMDGPU::TBA;
2327       RegWidth = 2;
2328       return true;
2329     }
2330     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2331       Reg = AMDGPU::TMA;
2332       RegWidth = 2;
2333       return true;
2334     }
2335     Error(Loc, "register does not fit in the list");
2336     return false;
2337   case IS_VGPR:
2338   case IS_SGPR:
2339   case IS_AGPR:
2340   case IS_TTMP:
2341     if (Reg1 != Reg + RegWidth) {
2342       Error(Loc, "registers in a list must have consecutive indices");
2343       return false;
2344     }
2345     RegWidth++;
2346     return true;
2347   default:
2348     llvm_unreachable("unexpected register kind");
2349   }
2350 }
2351 
2352 struct RegInfo {
2353   StringLiteral Name;
2354   RegisterKind Kind;
2355 };
2356 
2357 static constexpr RegInfo RegularRegisters[] = {
2358   {{"v"},    IS_VGPR},
2359   {{"s"},    IS_SGPR},
2360   {{"ttmp"}, IS_TTMP},
2361   {{"acc"},  IS_AGPR},
2362   {{"a"},    IS_AGPR},
2363 };
2364 
2365 static bool isRegularReg(RegisterKind Kind) {
2366   return Kind == IS_VGPR ||
2367          Kind == IS_SGPR ||
2368          Kind == IS_TTMP ||
2369          Kind == IS_AGPR;
2370 }
2371 
2372 static const RegInfo* getRegularRegInfo(StringRef Str) {
2373   for (const RegInfo &Reg : RegularRegisters)
2374     if (Str.startswith(Reg.Name))
2375       return &Reg;
2376   return nullptr;
2377 }
2378 
2379 static bool getRegNum(StringRef Str, unsigned& Num) {
2380   return !Str.getAsInteger(10, Num);
2381 }
2382 
2383 bool
2384 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2385                             const AsmToken &NextToken) const {
2386 
2387   // A list of consecutive registers: [s0,s1,s2,s3]
2388   if (Token.is(AsmToken::LBrac))
2389     return true;
2390 
2391   if (!Token.is(AsmToken::Identifier))
2392     return false;
2393 
2394   // A single register like s0 or a range of registers like s[0:1]
2395 
2396   StringRef Str = Token.getString();
2397   const RegInfo *Reg = getRegularRegInfo(Str);
2398   if (Reg) {
2399     StringRef RegName = Reg->Name;
2400     StringRef RegSuffix = Str.substr(RegName.size());
2401     if (!RegSuffix.empty()) {
2402       unsigned Num;
2403       // A single register with an index: rXX
2404       if (getRegNum(RegSuffix, Num))
2405         return true;
2406     } else {
2407       // A range of registers: r[XX:YY].
2408       if (NextToken.is(AsmToken::LBrac))
2409         return true;
2410     }
2411   }
2412 
2413   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2414 }
2415 
2416 bool
2417 AMDGPUAsmParser::isRegister()
2418 {
2419   return isRegister(getToken(), peekToken());
2420 }
2421 
2422 unsigned
2423 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2424                                unsigned RegNum,
2425                                unsigned RegWidth,
2426                                SMLoc Loc) {
2427 
2428   assert(isRegularReg(RegKind));
2429 
2430   unsigned AlignSize = 1;
2431   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2432     // SGPR and TTMP registers must be aligned.
2433     // Max required alignment is 4 dwords.
2434     AlignSize = std::min(RegWidth, 4u);
2435   }
2436 
2437   if (RegNum % AlignSize != 0) {
2438     Error(Loc, "invalid register alignment");
2439     return AMDGPU::NoRegister;
2440   }
2441 
2442   unsigned RegIdx = RegNum / AlignSize;
2443   int RCID = getRegClass(RegKind, RegWidth);
2444   if (RCID == -1) {
2445     Error(Loc, "invalid or unsupported register size");
2446     return AMDGPU::NoRegister;
2447   }
2448 
2449   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2450   const MCRegisterClass RC = TRI->getRegClass(RCID);
2451   if (RegIdx >= RC.getNumRegs()) {
2452     Error(Loc, "register index is out of range");
2453     return AMDGPU::NoRegister;
2454   }
2455 
2456   return RC.getRegister(RegIdx);
2457 }
2458 
2459 bool
2460 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2461   int64_t RegLo, RegHi;
2462   if (!skipToken(AsmToken::LBrac, "missing register index"))
2463     return false;
2464 
2465   SMLoc FirstIdxLoc = getLoc();
2466   SMLoc SecondIdxLoc;
2467 
2468   if (!parseExpr(RegLo))
2469     return false;
2470 
2471   if (trySkipToken(AsmToken::Colon)) {
2472     SecondIdxLoc = getLoc();
2473     if (!parseExpr(RegHi))
2474       return false;
2475   } else {
2476     RegHi = RegLo;
2477   }
2478 
2479   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2480     return false;
2481 
2482   if (!isUInt<32>(RegLo)) {
2483     Error(FirstIdxLoc, "invalid register index");
2484     return false;
2485   }
2486 
2487   if (!isUInt<32>(RegHi)) {
2488     Error(SecondIdxLoc, "invalid register index");
2489     return false;
2490   }
2491 
2492   if (RegLo > RegHi) {
2493     Error(FirstIdxLoc, "first register index should not exceed second index");
2494     return false;
2495   }
2496 
2497   Num = static_cast<unsigned>(RegLo);
2498   Width = (RegHi - RegLo) + 1;
2499   return true;
2500 }
2501 
2502 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2503                                           unsigned &RegNum, unsigned &RegWidth,
2504                                           SmallVectorImpl<AsmToken> &Tokens) {
2505   assert(isToken(AsmToken::Identifier));
2506   unsigned Reg = getSpecialRegForName(getTokenStr());
2507   if (Reg) {
2508     RegNum = 0;
2509     RegWidth = 1;
2510     RegKind = IS_SPECIAL;
2511     Tokens.push_back(getToken());
2512     lex(); // skip register name
2513   }
2514   return Reg;
2515 }
2516 
2517 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2518                                           unsigned &RegNum, unsigned &RegWidth,
2519                                           SmallVectorImpl<AsmToken> &Tokens) {
2520   assert(isToken(AsmToken::Identifier));
2521   StringRef RegName = getTokenStr();
2522   auto Loc = getLoc();
2523 
2524   const RegInfo *RI = getRegularRegInfo(RegName);
2525   if (!RI) {
2526     Error(Loc, "invalid register name");
2527     return AMDGPU::NoRegister;
2528   }
2529 
2530   Tokens.push_back(getToken());
2531   lex(); // skip register name
2532 
2533   RegKind = RI->Kind;
2534   StringRef RegSuffix = RegName.substr(RI->Name.size());
2535   if (!RegSuffix.empty()) {
2536     // Single 32-bit register: vXX.
2537     if (!getRegNum(RegSuffix, RegNum)) {
2538       Error(Loc, "invalid register index");
2539       return AMDGPU::NoRegister;
2540     }
2541     RegWidth = 1;
2542   } else {
2543     // Range of registers: v[XX:YY]. ":YY" is optional.
2544     if (!ParseRegRange(RegNum, RegWidth))
2545       return AMDGPU::NoRegister;
2546   }
2547 
2548   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2549 }
2550 
2551 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2552                                        unsigned &RegWidth,
2553                                        SmallVectorImpl<AsmToken> &Tokens) {
2554   unsigned Reg = AMDGPU::NoRegister;
2555   auto ListLoc = getLoc();
2556 
2557   if (!skipToken(AsmToken::LBrac,
2558                  "expected a register or a list of registers")) {
2559     return AMDGPU::NoRegister;
2560   }
2561 
2562   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2563 
2564   auto Loc = getLoc();
2565   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2566     return AMDGPU::NoRegister;
2567   if (RegWidth != 1) {
2568     Error(Loc, "expected a single 32-bit register");
2569     return AMDGPU::NoRegister;
2570   }
2571 
2572   for (; trySkipToken(AsmToken::Comma); ) {
2573     RegisterKind NextRegKind;
2574     unsigned NextReg, NextRegNum, NextRegWidth;
2575     Loc = getLoc();
2576 
2577     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2578                              NextRegNum, NextRegWidth,
2579                              Tokens)) {
2580       return AMDGPU::NoRegister;
2581     }
2582     if (NextRegWidth != 1) {
2583       Error(Loc, "expected a single 32-bit register");
2584       return AMDGPU::NoRegister;
2585     }
2586     if (NextRegKind != RegKind) {
2587       Error(Loc, "registers in a list must be of the same kind");
2588       return AMDGPU::NoRegister;
2589     }
2590     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2591       return AMDGPU::NoRegister;
2592   }
2593 
2594   if (!skipToken(AsmToken::RBrac,
2595                  "expected a comma or a closing square bracket")) {
2596     return AMDGPU::NoRegister;
2597   }
2598 
2599   if (isRegularReg(RegKind))
2600     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2601 
2602   return Reg;
2603 }
2604 
2605 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2606                                           unsigned &RegNum, unsigned &RegWidth,
2607                                           SmallVectorImpl<AsmToken> &Tokens) {
2608   auto Loc = getLoc();
2609   Reg = AMDGPU::NoRegister;
2610 
2611   if (isToken(AsmToken::Identifier)) {
2612     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2613     if (Reg == AMDGPU::NoRegister)
2614       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2615   } else {
2616     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2617   }
2618 
2619   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2620   if (Reg == AMDGPU::NoRegister) {
2621     assert(Parser.hasPendingError());
2622     return false;
2623   }
2624 
2625   if (!subtargetHasRegister(*TRI, Reg)) {
2626     if (Reg == AMDGPU::SGPR_NULL) {
2627       Error(Loc, "'null' operand is not supported on this GPU");
2628     } else {
2629       Error(Loc, "register not available on this GPU");
2630     }
2631     return false;
2632   }
2633 
2634   return true;
2635 }
2636 
2637 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2638                                           unsigned &RegNum, unsigned &RegWidth,
2639                                           bool RestoreOnFailure /*=false*/) {
2640   Reg = AMDGPU::NoRegister;
2641 
2642   SmallVector<AsmToken, 1> Tokens;
2643   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2644     if (RestoreOnFailure) {
2645       while (!Tokens.empty()) {
2646         getLexer().UnLex(Tokens.pop_back_val());
2647       }
2648     }
2649     return true;
2650   }
2651   return false;
2652 }
2653 
2654 Optional<StringRef>
2655 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2656   switch (RegKind) {
2657   case IS_VGPR:
2658     return StringRef(".amdgcn.next_free_vgpr");
2659   case IS_SGPR:
2660     return StringRef(".amdgcn.next_free_sgpr");
2661   default:
2662     return None;
2663   }
2664 }
2665 
2666 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2667   auto SymbolName = getGprCountSymbolName(RegKind);
2668   assert(SymbolName && "initializing invalid register kind");
2669   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2670   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2671 }
2672 
2673 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2674                                             unsigned DwordRegIndex,
2675                                             unsigned RegWidth) {
2676   // Symbols are only defined for GCN targets
2677   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2678     return true;
2679 
2680   auto SymbolName = getGprCountSymbolName(RegKind);
2681   if (!SymbolName)
2682     return true;
2683   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2684 
2685   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2686   int64_t OldCount;
2687 
2688   if (!Sym->isVariable())
2689     return !Error(getLoc(),
2690                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2691   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2692     return !Error(
2693         getLoc(),
2694         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2695 
2696   if (OldCount <= NewMax)
2697     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2698 
2699   return true;
2700 }
2701 
2702 std::unique_ptr<AMDGPUOperand>
2703 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2704   const auto &Tok = getToken();
2705   SMLoc StartLoc = Tok.getLoc();
2706   SMLoc EndLoc = Tok.getEndLoc();
2707   RegisterKind RegKind;
2708   unsigned Reg, RegNum, RegWidth;
2709 
2710   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2711     return nullptr;
2712   }
2713   if (isHsaAbiVersion3Or4(&getSTI())) {
2714     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2715       return nullptr;
2716   } else
2717     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2718   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2719 }
2720 
2721 OperandMatchResultTy
2722 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2723   // TODO: add syntactic sugar for 1/(2*PI)
2724 
2725   assert(!isRegister());
2726   assert(!isModifier());
2727 
2728   const auto& Tok = getToken();
2729   const auto& NextTok = peekToken();
2730   bool IsReal = Tok.is(AsmToken::Real);
2731   SMLoc S = getLoc();
2732   bool Negate = false;
2733 
2734   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2735     lex();
2736     IsReal = true;
2737     Negate = true;
2738   }
2739 
2740   if (IsReal) {
2741     // Floating-point expressions are not supported.
2742     // Can only allow floating-point literals with an
2743     // optional sign.
2744 
2745     StringRef Num = getTokenStr();
2746     lex();
2747 
2748     APFloat RealVal(APFloat::IEEEdouble());
2749     auto roundMode = APFloat::rmNearestTiesToEven;
2750     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2751       return MatchOperand_ParseFail;
2752     }
2753     if (Negate)
2754       RealVal.changeSign();
2755 
2756     Operands.push_back(
2757       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2758                                AMDGPUOperand::ImmTyNone, true));
2759 
2760     return MatchOperand_Success;
2761 
2762   } else {
2763     int64_t IntVal;
2764     const MCExpr *Expr;
2765     SMLoc S = getLoc();
2766 
2767     if (HasSP3AbsModifier) {
2768       // This is a workaround for handling expressions
2769       // as arguments of SP3 'abs' modifier, for example:
2770       //     |1.0|
2771       //     |-1|
2772       //     |1+x|
2773       // This syntax is not compatible with syntax of standard
2774       // MC expressions (due to the trailing '|').
2775       SMLoc EndLoc;
2776       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2777         return MatchOperand_ParseFail;
2778     } else {
2779       if (Parser.parseExpression(Expr))
2780         return MatchOperand_ParseFail;
2781     }
2782 
2783     if (Expr->evaluateAsAbsolute(IntVal)) {
2784       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2785     } else {
2786       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2787     }
2788 
2789     return MatchOperand_Success;
2790   }
2791 
2792   return MatchOperand_NoMatch;
2793 }
2794 
2795 OperandMatchResultTy
2796 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2797   if (!isRegister())
2798     return MatchOperand_NoMatch;
2799 
2800   if (auto R = parseRegister()) {
2801     assert(R->isReg());
2802     Operands.push_back(std::move(R));
2803     return MatchOperand_Success;
2804   }
2805   return MatchOperand_ParseFail;
2806 }
2807 
2808 OperandMatchResultTy
2809 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2810   auto res = parseReg(Operands);
2811   if (res != MatchOperand_NoMatch) {
2812     return res;
2813   } else if (isModifier()) {
2814     return MatchOperand_NoMatch;
2815   } else {
2816     return parseImm(Operands, HasSP3AbsMod);
2817   }
2818 }
2819 
2820 bool
2821 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2822   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2823     const auto &str = Token.getString();
2824     return str == "abs" || str == "neg" || str == "sext";
2825   }
2826   return false;
2827 }
2828 
2829 bool
2830 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2831   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2832 }
2833 
2834 bool
2835 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2836   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2837 }
2838 
2839 bool
2840 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2841   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2842 }
2843 
2844 // Check if this is an operand modifier or an opcode modifier
2845 // which may look like an expression but it is not. We should
2846 // avoid parsing these modifiers as expressions. Currently
2847 // recognized sequences are:
2848 //   |...|
2849 //   abs(...)
2850 //   neg(...)
2851 //   sext(...)
2852 //   -reg
2853 //   -|...|
2854 //   -abs(...)
2855 //   name:...
2856 // Note that simple opcode modifiers like 'gds' may be parsed as
2857 // expressions; this is a special case. See getExpressionAsToken.
2858 //
2859 bool
2860 AMDGPUAsmParser::isModifier() {
2861 
2862   AsmToken Tok = getToken();
2863   AsmToken NextToken[2];
2864   peekTokens(NextToken);
2865 
2866   return isOperandModifier(Tok, NextToken[0]) ||
2867          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2868          isOpcodeModifierWithVal(Tok, NextToken[0]);
2869 }
2870 
2871 // Check if the current token is an SP3 'neg' modifier.
2872 // Currently this modifier is allowed in the following context:
2873 //
2874 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2875 // 2. Before an 'abs' modifier: -abs(...)
2876 // 3. Before an SP3 'abs' modifier: -|...|
2877 //
2878 // In all other cases "-" is handled as a part
2879 // of an expression that follows the sign.
2880 //
2881 // Note: When "-" is followed by an integer literal,
2882 // this is interpreted as integer negation rather
2883 // than a floating-point NEG modifier applied to N.
2884 // Beside being contr-intuitive, such use of floating-point
2885 // NEG modifier would have resulted in different meaning
2886 // of integer literals used with VOP1/2/C and VOP3,
2887 // for example:
2888 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2889 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2890 // Negative fp literals with preceding "-" are
2891 // handled likewise for unifomtity
2892 //
2893 bool
2894 AMDGPUAsmParser::parseSP3NegModifier() {
2895 
2896   AsmToken NextToken[2];
2897   peekTokens(NextToken);
2898 
2899   if (isToken(AsmToken::Minus) &&
2900       (isRegister(NextToken[0], NextToken[1]) ||
2901        NextToken[0].is(AsmToken::Pipe) ||
2902        isId(NextToken[0], "abs"))) {
2903     lex();
2904     return true;
2905   }
2906 
2907   return false;
2908 }
2909 
2910 OperandMatchResultTy
2911 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2912                                               bool AllowImm) {
2913   bool Neg, SP3Neg;
2914   bool Abs, SP3Abs;
2915   SMLoc Loc;
2916 
2917   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2918   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2919     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2920     return MatchOperand_ParseFail;
2921   }
2922 
2923   SP3Neg = parseSP3NegModifier();
2924 
2925   Loc = getLoc();
2926   Neg = trySkipId("neg");
2927   if (Neg && SP3Neg) {
2928     Error(Loc, "expected register or immediate");
2929     return MatchOperand_ParseFail;
2930   }
2931   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2932     return MatchOperand_ParseFail;
2933 
2934   Abs = trySkipId("abs");
2935   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2936     return MatchOperand_ParseFail;
2937 
2938   Loc = getLoc();
2939   SP3Abs = trySkipToken(AsmToken::Pipe);
2940   if (Abs && SP3Abs) {
2941     Error(Loc, "expected register or immediate");
2942     return MatchOperand_ParseFail;
2943   }
2944 
2945   OperandMatchResultTy Res;
2946   if (AllowImm) {
2947     Res = parseRegOrImm(Operands, SP3Abs);
2948   } else {
2949     Res = parseReg(Operands);
2950   }
2951   if (Res != MatchOperand_Success) {
2952     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2953   }
2954 
2955   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2956     return MatchOperand_ParseFail;
2957   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2958     return MatchOperand_ParseFail;
2959   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2960     return MatchOperand_ParseFail;
2961 
2962   AMDGPUOperand::Modifiers Mods;
2963   Mods.Abs = Abs || SP3Abs;
2964   Mods.Neg = Neg || SP3Neg;
2965 
2966   if (Mods.hasFPModifiers()) {
2967     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2968     if (Op.isExpr()) {
2969       Error(Op.getStartLoc(), "expected an absolute expression");
2970       return MatchOperand_ParseFail;
2971     }
2972     Op.setModifiers(Mods);
2973   }
2974   return MatchOperand_Success;
2975 }
2976 
2977 OperandMatchResultTy
2978 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2979                                                bool AllowImm) {
2980   bool Sext = trySkipId("sext");
2981   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2982     return MatchOperand_ParseFail;
2983 
2984   OperandMatchResultTy Res;
2985   if (AllowImm) {
2986     Res = parseRegOrImm(Operands);
2987   } else {
2988     Res = parseReg(Operands);
2989   }
2990   if (Res != MatchOperand_Success) {
2991     return Sext? MatchOperand_ParseFail : Res;
2992   }
2993 
2994   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2995     return MatchOperand_ParseFail;
2996 
2997   AMDGPUOperand::Modifiers Mods;
2998   Mods.Sext = Sext;
2999 
3000   if (Mods.hasIntModifiers()) {
3001     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3002     if (Op.isExpr()) {
3003       Error(Op.getStartLoc(), "expected an absolute expression");
3004       return MatchOperand_ParseFail;
3005     }
3006     Op.setModifiers(Mods);
3007   }
3008 
3009   return MatchOperand_Success;
3010 }
3011 
3012 OperandMatchResultTy
3013 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3014   return parseRegOrImmWithFPInputMods(Operands, false);
3015 }
3016 
3017 OperandMatchResultTy
3018 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3019   return parseRegOrImmWithIntInputMods(Operands, false);
3020 }
3021 
3022 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3023   auto Loc = getLoc();
3024   if (trySkipId("off")) {
3025     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3026                                                 AMDGPUOperand::ImmTyOff, false));
3027     return MatchOperand_Success;
3028   }
3029 
3030   if (!isRegister())
3031     return MatchOperand_NoMatch;
3032 
3033   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3034   if (Reg) {
3035     Operands.push_back(std::move(Reg));
3036     return MatchOperand_Success;
3037   }
3038 
3039   return MatchOperand_ParseFail;
3040 
3041 }
3042 
3043 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3044   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3045 
3046   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3047       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3048       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3049       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3050     return Match_InvalidOperand;
3051 
3052   if ((TSFlags & SIInstrFlags::VOP3) &&
3053       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3054       getForcedEncodingSize() != 64)
3055     return Match_PreferE32;
3056 
3057   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3058       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3059     // v_mac_f32/16 allow only dst_sel == DWORD;
3060     auto OpNum =
3061         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3062     const auto &Op = Inst.getOperand(OpNum);
3063     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3064       return Match_InvalidOperand;
3065     }
3066   }
3067 
3068   return Match_Success;
3069 }
3070 
3071 static ArrayRef<unsigned> getAllVariants() {
3072   static const unsigned Variants[] = {
3073     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3074     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3075   };
3076 
3077   return makeArrayRef(Variants);
3078 }
3079 
3080 // What asm variants we should check
3081 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3082   if (getForcedEncodingSize() == 32) {
3083     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3084     return makeArrayRef(Variants);
3085   }
3086 
3087   if (isForcedVOP3()) {
3088     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3089     return makeArrayRef(Variants);
3090   }
3091 
3092   if (isForcedSDWA()) {
3093     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3094                                         AMDGPUAsmVariants::SDWA9};
3095     return makeArrayRef(Variants);
3096   }
3097 
3098   if (isForcedDPP()) {
3099     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3100     return makeArrayRef(Variants);
3101   }
3102 
3103   return getAllVariants();
3104 }
3105 
3106 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3107   if (getForcedEncodingSize() == 32)
3108     return "e32";
3109 
3110   if (isForcedVOP3())
3111     return "e64";
3112 
3113   if (isForcedSDWA())
3114     return "sdwa";
3115 
3116   if (isForcedDPP())
3117     return "dpp";
3118 
3119   return "";
3120 }
3121 
3122 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3123   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3124   const unsigned Num = Desc.getNumImplicitUses();
3125   for (unsigned i = 0; i < Num; ++i) {
3126     unsigned Reg = Desc.ImplicitUses[i];
3127     switch (Reg) {
3128     case AMDGPU::FLAT_SCR:
3129     case AMDGPU::VCC:
3130     case AMDGPU::VCC_LO:
3131     case AMDGPU::VCC_HI:
3132     case AMDGPU::M0:
3133       return Reg;
3134     default:
3135       break;
3136     }
3137   }
3138   return AMDGPU::NoRegister;
3139 }
3140 
3141 // NB: This code is correct only when used to check constant
3142 // bus limitations because GFX7 support no f16 inline constants.
3143 // Note that there are no cases when a GFX7 opcode violates
3144 // constant bus limitations due to the use of an f16 constant.
3145 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3146                                        unsigned OpIdx) const {
3147   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3148 
3149   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3150     return false;
3151   }
3152 
3153   const MCOperand &MO = Inst.getOperand(OpIdx);
3154 
3155   int64_t Val = MO.getImm();
3156   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3157 
3158   switch (OpSize) { // expected operand size
3159   case 8:
3160     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3161   case 4:
3162     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3163   case 2: {
3164     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3165     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3166         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3167         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3168       return AMDGPU::isInlinableIntLiteral(Val);
3169 
3170     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3171         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3172         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3173       return AMDGPU::isInlinableIntLiteralV216(Val);
3174 
3175     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3176         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3177         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3178       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3179 
3180     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3181   }
3182   default:
3183     llvm_unreachable("invalid operand size");
3184   }
3185 }
3186 
3187 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3188   if (!isGFX10Plus())
3189     return 1;
3190 
3191   switch (Opcode) {
3192   // 64-bit shift instructions can use only one scalar value input
3193   case AMDGPU::V_LSHLREV_B64_e64:
3194   case AMDGPU::V_LSHLREV_B64_gfx10:
3195   case AMDGPU::V_LSHRREV_B64_e64:
3196   case AMDGPU::V_LSHRREV_B64_gfx10:
3197   case AMDGPU::V_ASHRREV_I64_e64:
3198   case AMDGPU::V_ASHRREV_I64_gfx10:
3199   case AMDGPU::V_LSHL_B64_e64:
3200   case AMDGPU::V_LSHR_B64_e64:
3201   case AMDGPU::V_ASHR_I64_e64:
3202     return 1;
3203   default:
3204     return 2;
3205   }
3206 }
3207 
3208 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3209   const MCOperand &MO = Inst.getOperand(OpIdx);
3210   if (MO.isImm()) {
3211     return !isInlineConstant(Inst, OpIdx);
3212   } else if (MO.isReg()) {
3213     auto Reg = MO.getReg();
3214     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3215     auto PReg = mc2PseudoReg(Reg);
3216     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3217   } else {
3218     return true;
3219   }
3220 }
3221 
3222 bool
3223 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3224                                                 const OperandVector &Operands) {
3225   const unsigned Opcode = Inst.getOpcode();
3226   const MCInstrDesc &Desc = MII.get(Opcode);
3227   unsigned LastSGPR = AMDGPU::NoRegister;
3228   unsigned ConstantBusUseCount = 0;
3229   unsigned NumLiterals = 0;
3230   unsigned LiteralSize;
3231 
3232   if (Desc.TSFlags &
3233       (SIInstrFlags::VOPC |
3234        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3235        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3236        SIInstrFlags::SDWA)) {
3237     // Check special imm operands (used by madmk, etc)
3238     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3239       ++ConstantBusUseCount;
3240     }
3241 
3242     SmallDenseSet<unsigned> SGPRsUsed;
3243     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3244     if (SGPRUsed != AMDGPU::NoRegister) {
3245       SGPRsUsed.insert(SGPRUsed);
3246       ++ConstantBusUseCount;
3247     }
3248 
3249     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3250     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3251     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3252 
3253     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3254 
3255     for (int OpIdx : OpIndices) {
3256       if (OpIdx == -1) break;
3257 
3258       const MCOperand &MO = Inst.getOperand(OpIdx);
3259       if (usesConstantBus(Inst, OpIdx)) {
3260         if (MO.isReg()) {
3261           LastSGPR = mc2PseudoReg(MO.getReg());
3262           // Pairs of registers with a partial intersections like these
3263           //   s0, s[0:1]
3264           //   flat_scratch_lo, flat_scratch
3265           //   flat_scratch_lo, flat_scratch_hi
3266           // are theoretically valid but they are disabled anyway.
3267           // Note that this code mimics SIInstrInfo::verifyInstruction
3268           if (!SGPRsUsed.count(LastSGPR)) {
3269             SGPRsUsed.insert(LastSGPR);
3270             ++ConstantBusUseCount;
3271           }
3272         } else { // Expression or a literal
3273 
3274           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3275             continue; // special operand like VINTERP attr_chan
3276 
3277           // An instruction may use only one literal.
3278           // This has been validated on the previous step.
3279           // See validateVOP3Literal.
3280           // This literal may be used as more than one operand.
3281           // If all these operands are of the same size,
3282           // this literal counts as one scalar value.
3283           // Otherwise it counts as 2 scalar values.
3284           // See "GFX10 Shader Programming", section 3.6.2.3.
3285 
3286           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3287           if (Size < 4) Size = 4;
3288 
3289           if (NumLiterals == 0) {
3290             NumLiterals = 1;
3291             LiteralSize = Size;
3292           } else if (LiteralSize != Size) {
3293             NumLiterals = 2;
3294           }
3295         }
3296       }
3297     }
3298   }
3299   ConstantBusUseCount += NumLiterals;
3300 
3301   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3302     return true;
3303 
3304   SMLoc LitLoc = getLitLoc(Operands);
3305   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3306   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3307   Error(Loc, "invalid operand (violates constant bus restrictions)");
3308   return false;
3309 }
3310 
3311 bool
3312 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3313                                                  const OperandVector &Operands) {
3314   const unsigned Opcode = Inst.getOpcode();
3315   const MCInstrDesc &Desc = MII.get(Opcode);
3316 
3317   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3318   if (DstIdx == -1 ||
3319       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3320     return true;
3321   }
3322 
3323   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3324 
3325   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3326   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3327   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3328 
3329   assert(DstIdx != -1);
3330   const MCOperand &Dst = Inst.getOperand(DstIdx);
3331   assert(Dst.isReg());
3332   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3333 
3334   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3335 
3336   for (int SrcIdx : SrcIndices) {
3337     if (SrcIdx == -1) break;
3338     const MCOperand &Src = Inst.getOperand(SrcIdx);
3339     if (Src.isReg()) {
3340       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3341       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3342         Error(getRegLoc(SrcReg, Operands),
3343           "destination must be different than all sources");
3344         return false;
3345       }
3346     }
3347   }
3348 
3349   return true;
3350 }
3351 
3352 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3353 
3354   const unsigned Opc = Inst.getOpcode();
3355   const MCInstrDesc &Desc = MII.get(Opc);
3356 
3357   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3358     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3359     assert(ClampIdx != -1);
3360     return Inst.getOperand(ClampIdx).getImm() == 0;
3361   }
3362 
3363   return true;
3364 }
3365 
3366 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3367 
3368   const unsigned Opc = Inst.getOpcode();
3369   const MCInstrDesc &Desc = MII.get(Opc);
3370 
3371   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3372     return true;
3373 
3374   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3375   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3376   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3377 
3378   assert(VDataIdx != -1);
3379 
3380   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3381     return true;
3382 
3383   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3384   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3385   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3386   if (DMask == 0)
3387     DMask = 1;
3388 
3389   unsigned DataSize =
3390     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3391   if (hasPackedD16()) {
3392     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3393     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3394       DataSize = (DataSize + 1) / 2;
3395   }
3396 
3397   return (VDataSize / 4) == DataSize + TFESize;
3398 }
3399 
3400 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3401   const unsigned Opc = Inst.getOpcode();
3402   const MCInstrDesc &Desc = MII.get(Opc);
3403 
3404   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3405     return true;
3406 
3407   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3408 
3409   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3410       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3411   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3412   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3413   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3414 
3415   assert(VAddr0Idx != -1);
3416   assert(SrsrcIdx != -1);
3417   assert(SrsrcIdx > VAddr0Idx);
3418 
3419   if (DimIdx == -1)
3420     return true; // intersect_ray
3421 
3422   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3423   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3424   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3425   unsigned VAddrSize =
3426       IsNSA ? SrsrcIdx - VAddr0Idx
3427             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3428 
3429   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3430                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3431                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3432                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3433   if (!IsNSA) {
3434     if (AddrSize > 8)
3435       AddrSize = 16;
3436     else if (AddrSize > 4)
3437       AddrSize = 8;
3438   }
3439 
3440   return VAddrSize == AddrSize;
3441 }
3442 
3443 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3444 
3445   const unsigned Opc = Inst.getOpcode();
3446   const MCInstrDesc &Desc = MII.get(Opc);
3447 
3448   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3449     return true;
3450   if (!Desc.mayLoad() || !Desc.mayStore())
3451     return true; // Not atomic
3452 
3453   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3454   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3455 
3456   // This is an incomplete check because image_atomic_cmpswap
3457   // may only use 0x3 and 0xf while other atomic operations
3458   // may use 0x1 and 0x3. However these limitations are
3459   // verified when we check that dmask matches dst size.
3460   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3461 }
3462 
3463 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3464 
3465   const unsigned Opc = Inst.getOpcode();
3466   const MCInstrDesc &Desc = MII.get(Opc);
3467 
3468   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3469     return true;
3470 
3471   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3472   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3473 
3474   // GATHER4 instructions use dmask in a different fashion compared to
3475   // other MIMG instructions. The only useful DMASK values are
3476   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3477   // (red,red,red,red) etc.) The ISA document doesn't mention
3478   // this.
3479   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3480 }
3481 
3482 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3483   const unsigned Opc = Inst.getOpcode();
3484   const MCInstrDesc &Desc = MII.get(Opc);
3485 
3486   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3487     return true;
3488 
3489   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3490   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3491       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3492 
3493   if (!BaseOpcode->MSAA)
3494     return true;
3495 
3496   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3497   assert(DimIdx != -1);
3498 
3499   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3500   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3501 
3502   return DimInfo->MSAA;
3503 }
3504 
3505 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3506 {
3507   switch (Opcode) {
3508   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3509   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3510   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3511     return true;
3512   default:
3513     return false;
3514   }
3515 }
3516 
3517 // movrels* opcodes should only allow VGPRS as src0.
3518 // This is specified in .td description for vop1/vop3,
3519 // but sdwa is handled differently. See isSDWAOperand.
3520 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3521                                       const OperandVector &Operands) {
3522 
3523   const unsigned Opc = Inst.getOpcode();
3524   const MCInstrDesc &Desc = MII.get(Opc);
3525 
3526   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3527     return true;
3528 
3529   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3530   assert(Src0Idx != -1);
3531 
3532   SMLoc ErrLoc;
3533   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3534   if (Src0.isReg()) {
3535     auto Reg = mc2PseudoReg(Src0.getReg());
3536     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3537     if (!isSGPR(Reg, TRI))
3538       return true;
3539     ErrLoc = getRegLoc(Reg, Operands);
3540   } else {
3541     ErrLoc = getConstLoc(Operands);
3542   }
3543 
3544   Error(ErrLoc, "source operand must be a VGPR");
3545   return false;
3546 }
3547 
3548 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3549                                           const OperandVector &Operands) {
3550 
3551   const unsigned Opc = Inst.getOpcode();
3552 
3553   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3554     return true;
3555 
3556   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3557   assert(Src0Idx != -1);
3558 
3559   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3560   if (!Src0.isReg())
3561     return true;
3562 
3563   auto Reg = mc2PseudoReg(Src0.getReg());
3564   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3565   if (isSGPR(Reg, TRI)) {
3566     Error(getRegLoc(Reg, Operands),
3567           "source operand must be either a VGPR or an inline constant");
3568     return false;
3569   }
3570 
3571   return true;
3572 }
3573 
3574 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3575   switch (Inst.getOpcode()) {
3576   default:
3577     return true;
3578   case V_DIV_SCALE_F32_gfx6_gfx7:
3579   case V_DIV_SCALE_F32_vi:
3580   case V_DIV_SCALE_F32_gfx10:
3581   case V_DIV_SCALE_F64_gfx6_gfx7:
3582   case V_DIV_SCALE_F64_vi:
3583   case V_DIV_SCALE_F64_gfx10:
3584     break;
3585   }
3586 
3587   // TODO: Check that src0 = src1 or src2.
3588 
3589   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3590                     AMDGPU::OpName::src2_modifiers,
3591                     AMDGPU::OpName::src2_modifiers}) {
3592     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3593             .getImm() &
3594         SISrcMods::ABS) {
3595       return false;
3596     }
3597   }
3598 
3599   return true;
3600 }
3601 
3602 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3603 
3604   const unsigned Opc = Inst.getOpcode();
3605   const MCInstrDesc &Desc = MII.get(Opc);
3606 
3607   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3608     return true;
3609 
3610   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3611   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3612     if (isCI() || isSI())
3613       return false;
3614   }
3615 
3616   return true;
3617 }
3618 
3619 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3620   const unsigned Opc = Inst.getOpcode();
3621   const MCInstrDesc &Desc = MII.get(Opc);
3622 
3623   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3624     return true;
3625 
3626   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3627   if (DimIdx < 0)
3628     return true;
3629 
3630   long Imm = Inst.getOperand(DimIdx).getImm();
3631   if (Imm < 0 || Imm >= 8)
3632     return false;
3633 
3634   return true;
3635 }
3636 
3637 static bool IsRevOpcode(const unsigned Opcode)
3638 {
3639   switch (Opcode) {
3640   case AMDGPU::V_SUBREV_F32_e32:
3641   case AMDGPU::V_SUBREV_F32_e64:
3642   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3643   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3644   case AMDGPU::V_SUBREV_F32_e32_vi:
3645   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3646   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3647   case AMDGPU::V_SUBREV_F32_e64_vi:
3648 
3649   case AMDGPU::V_SUBREV_CO_U32_e32:
3650   case AMDGPU::V_SUBREV_CO_U32_e64:
3651   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3652   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3653 
3654   case AMDGPU::V_SUBBREV_U32_e32:
3655   case AMDGPU::V_SUBBREV_U32_e64:
3656   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3657   case AMDGPU::V_SUBBREV_U32_e32_vi:
3658   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3659   case AMDGPU::V_SUBBREV_U32_e64_vi:
3660 
3661   case AMDGPU::V_SUBREV_U32_e32:
3662   case AMDGPU::V_SUBREV_U32_e64:
3663   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3664   case AMDGPU::V_SUBREV_U32_e32_vi:
3665   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3666   case AMDGPU::V_SUBREV_U32_e64_vi:
3667 
3668   case AMDGPU::V_SUBREV_F16_e32:
3669   case AMDGPU::V_SUBREV_F16_e64:
3670   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3671   case AMDGPU::V_SUBREV_F16_e32_vi:
3672   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3673   case AMDGPU::V_SUBREV_F16_e64_vi:
3674 
3675   case AMDGPU::V_SUBREV_U16_e32:
3676   case AMDGPU::V_SUBREV_U16_e64:
3677   case AMDGPU::V_SUBREV_U16_e32_vi:
3678   case AMDGPU::V_SUBREV_U16_e64_vi:
3679 
3680   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3681   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3682   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3683 
3684   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3685   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3686 
3687   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3688   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3689 
3690   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3691   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3692 
3693   case AMDGPU::V_LSHRREV_B32_e32:
3694   case AMDGPU::V_LSHRREV_B32_e64:
3695   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3696   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3697   case AMDGPU::V_LSHRREV_B32_e32_vi:
3698   case AMDGPU::V_LSHRREV_B32_e64_vi:
3699   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3700   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3701 
3702   case AMDGPU::V_ASHRREV_I32_e32:
3703   case AMDGPU::V_ASHRREV_I32_e64:
3704   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3705   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3706   case AMDGPU::V_ASHRREV_I32_e32_vi:
3707   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3708   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3709   case AMDGPU::V_ASHRREV_I32_e64_vi:
3710 
3711   case AMDGPU::V_LSHLREV_B32_e32:
3712   case AMDGPU::V_LSHLREV_B32_e64:
3713   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3714   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3715   case AMDGPU::V_LSHLREV_B32_e32_vi:
3716   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3717   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3718   case AMDGPU::V_LSHLREV_B32_e64_vi:
3719 
3720   case AMDGPU::V_LSHLREV_B16_e32:
3721   case AMDGPU::V_LSHLREV_B16_e64:
3722   case AMDGPU::V_LSHLREV_B16_e32_vi:
3723   case AMDGPU::V_LSHLREV_B16_e64_vi:
3724   case AMDGPU::V_LSHLREV_B16_gfx10:
3725 
3726   case AMDGPU::V_LSHRREV_B16_e32:
3727   case AMDGPU::V_LSHRREV_B16_e64:
3728   case AMDGPU::V_LSHRREV_B16_e32_vi:
3729   case AMDGPU::V_LSHRREV_B16_e64_vi:
3730   case AMDGPU::V_LSHRREV_B16_gfx10:
3731 
3732   case AMDGPU::V_ASHRREV_I16_e32:
3733   case AMDGPU::V_ASHRREV_I16_e64:
3734   case AMDGPU::V_ASHRREV_I16_e32_vi:
3735   case AMDGPU::V_ASHRREV_I16_e64_vi:
3736   case AMDGPU::V_ASHRREV_I16_gfx10:
3737 
3738   case AMDGPU::V_LSHLREV_B64_e64:
3739   case AMDGPU::V_LSHLREV_B64_gfx10:
3740   case AMDGPU::V_LSHLREV_B64_vi:
3741 
3742   case AMDGPU::V_LSHRREV_B64_e64:
3743   case AMDGPU::V_LSHRREV_B64_gfx10:
3744   case AMDGPU::V_LSHRREV_B64_vi:
3745 
3746   case AMDGPU::V_ASHRREV_I64_e64:
3747   case AMDGPU::V_ASHRREV_I64_gfx10:
3748   case AMDGPU::V_ASHRREV_I64_vi:
3749 
3750   case AMDGPU::V_PK_LSHLREV_B16:
3751   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3752   case AMDGPU::V_PK_LSHLREV_B16_vi:
3753 
3754   case AMDGPU::V_PK_LSHRREV_B16:
3755   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3756   case AMDGPU::V_PK_LSHRREV_B16_vi:
3757   case AMDGPU::V_PK_ASHRREV_I16:
3758   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3759   case AMDGPU::V_PK_ASHRREV_I16_vi:
3760     return true;
3761   default:
3762     return false;
3763   }
3764 }
3765 
3766 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3767 
3768   using namespace SIInstrFlags;
3769   const unsigned Opcode = Inst.getOpcode();
3770   const MCInstrDesc &Desc = MII.get(Opcode);
3771 
3772   // lds_direct register is defined so that it can be used
3773   // with 9-bit operands only. Ignore encodings which do not accept these.
3774   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3775   if ((Desc.TSFlags & Enc) == 0)
3776     return None;
3777 
3778   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3779     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3780     if (SrcIdx == -1)
3781       break;
3782     const auto &Src = Inst.getOperand(SrcIdx);
3783     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3784 
3785       if (isGFX90A())
3786         return StringRef("lds_direct is not supported on this GPU");
3787 
3788       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3789         return StringRef("lds_direct cannot be used with this instruction");
3790 
3791       if (SrcName != OpName::src0)
3792         return StringRef("lds_direct may be used as src0 only");
3793     }
3794   }
3795 
3796   return None;
3797 }
3798 
3799 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3800   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3801     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3802     if (Op.isFlatOffset())
3803       return Op.getStartLoc();
3804   }
3805   return getLoc();
3806 }
3807 
3808 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3809                                          const OperandVector &Operands) {
3810   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3811   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3812     return true;
3813 
3814   auto Opcode = Inst.getOpcode();
3815   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3816   assert(OpNum != -1);
3817 
3818   const auto &Op = Inst.getOperand(OpNum);
3819   if (!hasFlatOffsets() && Op.getImm() != 0) {
3820     Error(getFlatOffsetLoc(Operands),
3821           "flat offset modifier is not supported on this GPU");
3822     return false;
3823   }
3824 
3825   // For FLAT segment the offset must be positive;
3826   // MSB is ignored and forced to zero.
3827   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3828     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3829     if (!isIntN(OffsetSize, Op.getImm())) {
3830       Error(getFlatOffsetLoc(Operands),
3831             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3832       return false;
3833     }
3834   } else {
3835     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3836     if (!isUIntN(OffsetSize, Op.getImm())) {
3837       Error(getFlatOffsetLoc(Operands),
3838             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3839       return false;
3840     }
3841   }
3842 
3843   return true;
3844 }
3845 
3846 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3847   // Start with second operand because SMEM Offset cannot be dst or src0.
3848   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3849     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3850     if (Op.isSMEMOffset())
3851       return Op.getStartLoc();
3852   }
3853   return getLoc();
3854 }
3855 
3856 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3857                                          const OperandVector &Operands) {
3858   if (isCI() || isSI())
3859     return true;
3860 
3861   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3862   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3863     return true;
3864 
3865   auto Opcode = Inst.getOpcode();
3866   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3867   if (OpNum == -1)
3868     return true;
3869 
3870   const auto &Op = Inst.getOperand(OpNum);
3871   if (!Op.isImm())
3872     return true;
3873 
3874   uint64_t Offset = Op.getImm();
3875   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3876   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3877       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3878     return true;
3879 
3880   Error(getSMEMOffsetLoc(Operands),
3881         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3882                                "expected a 21-bit signed offset");
3883 
3884   return false;
3885 }
3886 
3887 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3888   unsigned Opcode = Inst.getOpcode();
3889   const MCInstrDesc &Desc = MII.get(Opcode);
3890   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3891     return true;
3892 
3893   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3894   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3895 
3896   const int OpIndices[] = { Src0Idx, Src1Idx };
3897 
3898   unsigned NumExprs = 0;
3899   unsigned NumLiterals = 0;
3900   uint32_t LiteralValue;
3901 
3902   for (int OpIdx : OpIndices) {
3903     if (OpIdx == -1) break;
3904 
3905     const MCOperand &MO = Inst.getOperand(OpIdx);
3906     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3907     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3908       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3909         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3910         if (NumLiterals == 0 || LiteralValue != Value) {
3911           LiteralValue = Value;
3912           ++NumLiterals;
3913         }
3914       } else if (MO.isExpr()) {
3915         ++NumExprs;
3916       }
3917     }
3918   }
3919 
3920   return NumLiterals + NumExprs <= 1;
3921 }
3922 
3923 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3924   const unsigned Opc = Inst.getOpcode();
3925   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3926       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3927     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3928     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3929 
3930     if (OpSel & ~3)
3931       return false;
3932   }
3933   return true;
3934 }
3935 
3936 // Check if VCC register matches wavefront size
3937 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3938   auto FB = getFeatureBits();
3939   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3940     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3941 }
3942 
3943 // VOP3 literal is only allowed in GFX10+ and only one can be used
3944 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3945                                           const OperandVector &Operands) {
3946   unsigned Opcode = Inst.getOpcode();
3947   const MCInstrDesc &Desc = MII.get(Opcode);
3948   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3949     return true;
3950 
3951   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3952   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3953   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3954 
3955   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3956 
3957   unsigned NumExprs = 0;
3958   unsigned NumLiterals = 0;
3959   uint32_t LiteralValue;
3960 
3961   for (int OpIdx : OpIndices) {
3962     if (OpIdx == -1) break;
3963 
3964     const MCOperand &MO = Inst.getOperand(OpIdx);
3965     if (!MO.isImm() && !MO.isExpr())
3966       continue;
3967     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3968       continue;
3969 
3970     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3971         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
3972       Error(getConstLoc(Operands),
3973             "inline constants are not allowed for this operand");
3974       return false;
3975     }
3976 
3977     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3978       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3979       if (NumLiterals == 0 || LiteralValue != Value) {
3980         LiteralValue = Value;
3981         ++NumLiterals;
3982       }
3983     } else if (MO.isExpr()) {
3984       ++NumExprs;
3985     }
3986   }
3987   NumLiterals += NumExprs;
3988 
3989   if (!NumLiterals)
3990     return true;
3991 
3992   if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
3993     Error(getLitLoc(Operands), "literal operands are not supported");
3994     return false;
3995   }
3996 
3997   if (NumLiterals > 1) {
3998     Error(getLitLoc(Operands), "only one literal operand is allowed");
3999     return false;
4000   }
4001 
4002   return true;
4003 }
4004 
4005 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4006 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4007                          const MCRegisterInfo *MRI) {
4008   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4009   if (OpIdx < 0)
4010     return -1;
4011 
4012   const MCOperand &Op = Inst.getOperand(OpIdx);
4013   if (!Op.isReg())
4014     return -1;
4015 
4016   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4017   auto Reg = Sub ? Sub : Op.getReg();
4018   const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4019   return AGRP32.contains(Reg) ? 1 : 0;
4020 }
4021 
4022 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4023   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4024   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4025                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4026                   SIInstrFlags::DS)) == 0)
4027     return true;
4028 
4029   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4030                                                       : AMDGPU::OpName::vdata;
4031 
4032   const MCRegisterInfo *MRI = getMRI();
4033   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4034   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4035 
4036   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4037     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4038     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4039       return false;
4040   }
4041 
4042   auto FB = getFeatureBits();
4043   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4044     if (DataAreg < 0 || DstAreg < 0)
4045       return true;
4046     return DstAreg == DataAreg;
4047   }
4048 
4049   return DstAreg < 1 && DataAreg < 1;
4050 }
4051 
4052 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4053   auto FB = getFeatureBits();
4054   if (!FB[AMDGPU::FeatureGFX90AInsts])
4055     return true;
4056 
4057   const MCRegisterInfo *MRI = getMRI();
4058   const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4059   const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4060   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4061     const MCOperand &Op = Inst.getOperand(I);
4062     if (!Op.isReg())
4063       continue;
4064 
4065     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4066     if (!Sub)
4067       continue;
4068 
4069     if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4070       return false;
4071     if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4072       return false;
4073   }
4074 
4075   return true;
4076 }
4077 
4078 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4079                                             const OperandVector &Operands,
4080                                             const SMLoc &IDLoc) {
4081   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4082                                            AMDGPU::OpName::cpol);
4083   if (CPolPos == -1)
4084     return true;
4085 
4086   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4087 
4088   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4089   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4090       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4091     Error(IDLoc, "invalid cache policy for SMRD instruction");
4092     return false;
4093   }
4094 
4095   if (isGFX90A() && (CPol & CPol::SCC)) {
4096     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4097     StringRef CStr(S.getPointer());
4098     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4099     Error(S, "scc is not supported on this GPU");
4100     return false;
4101   }
4102 
4103   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4104     return true;
4105 
4106   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4107     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4108       Error(IDLoc, "instruction must use glc");
4109       return false;
4110     }
4111   } else {
4112     if (CPol & CPol::GLC) {
4113       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4114       StringRef CStr(S.getPointer());
4115       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4116       Error(S, "instruction must not use glc");
4117       return false;
4118     }
4119   }
4120 
4121   return true;
4122 }
4123 
4124 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4125                                           const SMLoc &IDLoc,
4126                                           const OperandVector &Operands) {
4127   if (auto ErrMsg = validateLdsDirect(Inst)) {
4128     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4129     return false;
4130   }
4131   if (!validateSOPLiteral(Inst)) {
4132     Error(getLitLoc(Operands),
4133       "only one literal operand is allowed");
4134     return false;
4135   }
4136   if (!validateVOP3Literal(Inst, Operands)) {
4137     return false;
4138   }
4139   if (!validateConstantBusLimitations(Inst, Operands)) {
4140     return false;
4141   }
4142   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4143     return false;
4144   }
4145   if (!validateIntClampSupported(Inst)) {
4146     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4147       "integer clamping is not supported on this GPU");
4148     return false;
4149   }
4150   if (!validateOpSel(Inst)) {
4151     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4152       "invalid op_sel operand");
4153     return false;
4154   }
4155   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4156   if (!validateMIMGD16(Inst)) {
4157     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4158       "d16 modifier is not supported on this GPU");
4159     return false;
4160   }
4161   if (!validateMIMGDim(Inst)) {
4162     Error(IDLoc, "dim modifier is required on this GPU");
4163     return false;
4164   }
4165   if (!validateMIMGMSAA(Inst)) {
4166     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4167           "invalid dim; must be MSAA type");
4168     return false;
4169   }
4170   if (!validateMIMGDataSize(Inst)) {
4171     Error(IDLoc,
4172       "image data size does not match dmask and tfe");
4173     return false;
4174   }
4175   if (!validateMIMGAddrSize(Inst)) {
4176     Error(IDLoc,
4177       "image address size does not match dim and a16");
4178     return false;
4179   }
4180   if (!validateMIMGAtomicDMask(Inst)) {
4181     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4182       "invalid atomic image dmask");
4183     return false;
4184   }
4185   if (!validateMIMGGatherDMask(Inst)) {
4186     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4187       "invalid image_gather dmask: only one bit must be set");
4188     return false;
4189   }
4190   if (!validateMovrels(Inst, Operands)) {
4191     return false;
4192   }
4193   if (!validateFlatOffset(Inst, Operands)) {
4194     return false;
4195   }
4196   if (!validateSMEMOffset(Inst, Operands)) {
4197     return false;
4198   }
4199   if (!validateMAIAccWrite(Inst, Operands)) {
4200     return false;
4201   }
4202   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4203     return false;
4204   }
4205 
4206   if (!validateAGPRLdSt(Inst)) {
4207     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4208     ? "invalid register class: data and dst should be all VGPR or AGPR"
4209     : "invalid register class: agpr loads and stores not supported on this GPU"
4210     );
4211     return false;
4212   }
4213   if (!validateVGPRAlign(Inst)) {
4214     Error(IDLoc,
4215       "invalid register class: vgpr tuples must be 64 bit aligned");
4216     return false;
4217   }
4218 
4219   if (!validateDivScale(Inst)) {
4220     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4221     return false;
4222   }
4223   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4224     return false;
4225   }
4226 
4227   return true;
4228 }
4229 
4230 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4231                                             const FeatureBitset &FBS,
4232                                             unsigned VariantID = 0);
4233 
4234 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4235                                 const FeatureBitset &AvailableFeatures,
4236                                 unsigned VariantID);
4237 
4238 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4239                                        const FeatureBitset &FBS) {
4240   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4241 }
4242 
4243 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4244                                        const FeatureBitset &FBS,
4245                                        ArrayRef<unsigned> Variants) {
4246   for (auto Variant : Variants) {
4247     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4248       return true;
4249   }
4250 
4251   return false;
4252 }
4253 
4254 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4255                                                   const SMLoc &IDLoc) {
4256   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4257 
4258   // Check if requested instruction variant is supported.
4259   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4260     return false;
4261 
4262   // This instruction is not supported.
4263   // Clear any other pending errors because they are no longer relevant.
4264   getParser().clearPendingErrors();
4265 
4266   // Requested instruction variant is not supported.
4267   // Check if any other variants are supported.
4268   StringRef VariantName = getMatchedVariantName();
4269   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4270     return Error(IDLoc,
4271                  Twine(VariantName,
4272                        " variant of this instruction is not supported"));
4273   }
4274 
4275   // Finally check if this instruction is supported on any other GPU.
4276   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4277     return Error(IDLoc, "instruction not supported on this GPU");
4278   }
4279 
4280   // Instruction not supported on any GPU. Probably a typo.
4281   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4282   return Error(IDLoc, "invalid instruction" + Suggestion);
4283 }
4284 
4285 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4286                                               OperandVector &Operands,
4287                                               MCStreamer &Out,
4288                                               uint64_t &ErrorInfo,
4289                                               bool MatchingInlineAsm) {
4290   MCInst Inst;
4291   unsigned Result = Match_Success;
4292   for (auto Variant : getMatchedVariants()) {
4293     uint64_t EI;
4294     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4295                                   Variant);
4296     // We order match statuses from least to most specific. We use most specific
4297     // status as resulting
4298     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4299     if ((R == Match_Success) ||
4300         (R == Match_PreferE32) ||
4301         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4302         (R == Match_InvalidOperand && Result != Match_MissingFeature
4303                                    && Result != Match_PreferE32) ||
4304         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4305                                    && Result != Match_MissingFeature
4306                                    && Result != Match_PreferE32)) {
4307       Result = R;
4308       ErrorInfo = EI;
4309     }
4310     if (R == Match_Success)
4311       break;
4312   }
4313 
4314   if (Result == Match_Success) {
4315     if (!validateInstruction(Inst, IDLoc, Operands)) {
4316       return true;
4317     }
4318     Inst.setLoc(IDLoc);
4319     Out.emitInstruction(Inst, getSTI());
4320     return false;
4321   }
4322 
4323   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4324   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4325     return true;
4326   }
4327 
4328   switch (Result) {
4329   default: break;
4330   case Match_MissingFeature:
4331     // It has been verified that the specified instruction
4332     // mnemonic is valid. A match was found but it requires
4333     // features which are not supported on this GPU.
4334     return Error(IDLoc, "operands are not valid for this GPU or mode");
4335 
4336   case Match_InvalidOperand: {
4337     SMLoc ErrorLoc = IDLoc;
4338     if (ErrorInfo != ~0ULL) {
4339       if (ErrorInfo >= Operands.size()) {
4340         return Error(IDLoc, "too few operands for instruction");
4341       }
4342       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4343       if (ErrorLoc == SMLoc())
4344         ErrorLoc = IDLoc;
4345     }
4346     return Error(ErrorLoc, "invalid operand for instruction");
4347   }
4348 
4349   case Match_PreferE32:
4350     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4351                         "should be encoded as e32");
4352   case Match_MnemonicFail:
4353     llvm_unreachable("Invalid instructions should have been handled already");
4354   }
4355   llvm_unreachable("Implement any new match types added!");
4356 }
4357 
4358 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4359   int64_t Tmp = -1;
4360   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4361     return true;
4362   }
4363   if (getParser().parseAbsoluteExpression(Tmp)) {
4364     return true;
4365   }
4366   Ret = static_cast<uint32_t>(Tmp);
4367   return false;
4368 }
4369 
4370 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4371                                                uint32_t &Minor) {
4372   if (ParseAsAbsoluteExpression(Major))
4373     return TokError("invalid major version");
4374 
4375   if (!trySkipToken(AsmToken::Comma))
4376     return TokError("minor version number required, comma expected");
4377 
4378   if (ParseAsAbsoluteExpression(Minor))
4379     return TokError("invalid minor version");
4380 
4381   return false;
4382 }
4383 
4384 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4385   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4386     return TokError("directive only supported for amdgcn architecture");
4387 
4388   std::string TargetIDDirective;
4389   SMLoc TargetStart = getTok().getLoc();
4390   if (getParser().parseEscapedString(TargetIDDirective))
4391     return true;
4392 
4393   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4394   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4395     return getParser().Error(TargetRange.Start,
4396         (Twine(".amdgcn_target directive's target id ") +
4397          Twine(TargetIDDirective) +
4398          Twine(" does not match the specified target id ") +
4399          Twine(getTargetStreamer().getTargetID()->toString())).str());
4400 
4401   return false;
4402 }
4403 
4404 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4405   return Error(Range.Start, "value out of range", Range);
4406 }
4407 
4408 bool AMDGPUAsmParser::calculateGPRBlocks(
4409     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4410     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4411     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4412     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4413   // TODO(scott.linder): These calculations are duplicated from
4414   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4415   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4416 
4417   unsigned NumVGPRs = NextFreeVGPR;
4418   unsigned NumSGPRs = NextFreeSGPR;
4419 
4420   if (Version.Major >= 10)
4421     NumSGPRs = 0;
4422   else {
4423     unsigned MaxAddressableNumSGPRs =
4424         IsaInfo::getAddressableNumSGPRs(&getSTI());
4425 
4426     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4427         NumSGPRs > MaxAddressableNumSGPRs)
4428       return OutOfRangeError(SGPRRange);
4429 
4430     NumSGPRs +=
4431         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4432 
4433     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4434         NumSGPRs > MaxAddressableNumSGPRs)
4435       return OutOfRangeError(SGPRRange);
4436 
4437     if (Features.test(FeatureSGPRInitBug))
4438       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4439   }
4440 
4441   VGPRBlocks =
4442       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4443   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4444 
4445   return false;
4446 }
4447 
4448 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4449   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4450     return TokError("directive only supported for amdgcn architecture");
4451 
4452   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4453     return TokError("directive only supported for amdhsa OS");
4454 
4455   StringRef KernelName;
4456   if (getParser().parseIdentifier(KernelName))
4457     return true;
4458 
4459   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4460 
4461   StringSet<> Seen;
4462 
4463   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4464 
4465   SMRange VGPRRange;
4466   uint64_t NextFreeVGPR = 0;
4467   uint64_t AccumOffset = 0;
4468   SMRange SGPRRange;
4469   uint64_t NextFreeSGPR = 0;
4470   unsigned UserSGPRCount = 0;
4471   bool ReserveVCC = true;
4472   bool ReserveFlatScr = true;
4473   Optional<bool> EnableWavefrontSize32;
4474 
4475   while (true) {
4476     while (trySkipToken(AsmToken::EndOfStatement));
4477 
4478     StringRef ID;
4479     SMRange IDRange = getTok().getLocRange();
4480     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4481       return true;
4482 
4483     if (ID == ".end_amdhsa_kernel")
4484       break;
4485 
4486     if (Seen.find(ID) != Seen.end())
4487       return TokError(".amdhsa_ directives cannot be repeated");
4488     Seen.insert(ID);
4489 
4490     SMLoc ValStart = getLoc();
4491     int64_t IVal;
4492     if (getParser().parseAbsoluteExpression(IVal))
4493       return true;
4494     SMLoc ValEnd = getLoc();
4495     SMRange ValRange = SMRange(ValStart, ValEnd);
4496 
4497     if (IVal < 0)
4498       return OutOfRangeError(ValRange);
4499 
4500     uint64_t Val = IVal;
4501 
4502 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4503   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4504     return OutOfRangeError(RANGE);                                             \
4505   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4506 
4507     if (ID == ".amdhsa_group_segment_fixed_size") {
4508       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4509         return OutOfRangeError(ValRange);
4510       KD.group_segment_fixed_size = Val;
4511     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4512       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4513         return OutOfRangeError(ValRange);
4514       KD.private_segment_fixed_size = Val;
4515     } else if (ID == ".amdhsa_kernarg_size") {
4516       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4517         return OutOfRangeError(ValRange);
4518       KD.kernarg_size = Val;
4519     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4520       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4521                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4522                        Val, ValRange);
4523       if (Val)
4524         UserSGPRCount += 4;
4525     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4526       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4527                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4528                        ValRange);
4529       if (Val)
4530         UserSGPRCount += 2;
4531     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4532       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4533                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4534                        ValRange);
4535       if (Val)
4536         UserSGPRCount += 2;
4537     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4538       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4539                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4540                        Val, ValRange);
4541       if (Val)
4542         UserSGPRCount += 2;
4543     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4544       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4545                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4546                        ValRange);
4547       if (Val)
4548         UserSGPRCount += 2;
4549     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4550       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4551                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4552                        ValRange);
4553       if (Val)
4554         UserSGPRCount += 2;
4555     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4556       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4557                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4558                        Val, ValRange);
4559       if (Val)
4560         UserSGPRCount += 1;
4561     } else if (ID == ".amdhsa_wavefront_size32") {
4562       if (IVersion.Major < 10)
4563         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4564       EnableWavefrontSize32 = Val;
4565       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4566                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4567                        Val, ValRange);
4568     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4569       PARSE_BITS_ENTRY(
4570           KD.compute_pgm_rsrc2,
4571           COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val,
4572           ValRange);
4573     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4574       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4575                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4576                        ValRange);
4577     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4578       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4579                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4580                        ValRange);
4581     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4582       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4583                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4584                        ValRange);
4585     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4586       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4587                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4588                        ValRange);
4589     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4590       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4591                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4592                        ValRange);
4593     } else if (ID == ".amdhsa_next_free_vgpr") {
4594       VGPRRange = ValRange;
4595       NextFreeVGPR = Val;
4596     } else if (ID == ".amdhsa_next_free_sgpr") {
4597       SGPRRange = ValRange;
4598       NextFreeSGPR = Val;
4599     } else if (ID == ".amdhsa_accum_offset") {
4600       if (!isGFX90A())
4601         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4602       AccumOffset = Val;
4603     } else if (ID == ".amdhsa_reserve_vcc") {
4604       if (!isUInt<1>(Val))
4605         return OutOfRangeError(ValRange);
4606       ReserveVCC = Val;
4607     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4608       if (IVersion.Major < 7)
4609         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4610       if (!isUInt<1>(Val))
4611         return OutOfRangeError(ValRange);
4612       ReserveFlatScr = Val;
4613     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4614       if (IVersion.Major < 8)
4615         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4616       if (!isUInt<1>(Val))
4617         return OutOfRangeError(ValRange);
4618       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4619         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4620                                  IDRange);
4621     } else if (ID == ".amdhsa_float_round_mode_32") {
4622       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4623                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4624     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4625       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4626                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4627     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4628       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4629                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4630     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4631       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4632                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4633                        ValRange);
4634     } else if (ID == ".amdhsa_dx10_clamp") {
4635       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4636                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4637     } else if (ID == ".amdhsa_ieee_mode") {
4638       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4639                        Val, ValRange);
4640     } else if (ID == ".amdhsa_fp16_overflow") {
4641       if (IVersion.Major < 9)
4642         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4643       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4644                        ValRange);
4645     } else if (ID == ".amdhsa_tg_split") {
4646       if (!isGFX90A())
4647         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4648       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4649                        ValRange);
4650     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4651       if (IVersion.Major < 10)
4652         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4653       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4654                        ValRange);
4655     } else if (ID == ".amdhsa_memory_ordered") {
4656       if (IVersion.Major < 10)
4657         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4658       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4659                        ValRange);
4660     } else if (ID == ".amdhsa_forward_progress") {
4661       if (IVersion.Major < 10)
4662         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4663       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4664                        ValRange);
4665     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4666       PARSE_BITS_ENTRY(
4667           KD.compute_pgm_rsrc2,
4668           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4669           ValRange);
4670     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4671       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4672                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4673                        Val, ValRange);
4674     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4675       PARSE_BITS_ENTRY(
4676           KD.compute_pgm_rsrc2,
4677           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4678           ValRange);
4679     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4680       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4681                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4682                        Val, ValRange);
4683     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4684       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4685                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4686                        Val, ValRange);
4687     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4688       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4689                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4690                        Val, ValRange);
4691     } else if (ID == ".amdhsa_exception_int_div_zero") {
4692       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4693                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4694                        Val, ValRange);
4695     } else {
4696       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4697     }
4698 
4699 #undef PARSE_BITS_ENTRY
4700   }
4701 
4702   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4703     return TokError(".amdhsa_next_free_vgpr directive is required");
4704 
4705   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4706     return TokError(".amdhsa_next_free_sgpr directive is required");
4707 
4708   unsigned VGPRBlocks;
4709   unsigned SGPRBlocks;
4710   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4711                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4712                          EnableWavefrontSize32, NextFreeVGPR,
4713                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4714                          SGPRBlocks))
4715     return true;
4716 
4717   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4718           VGPRBlocks))
4719     return OutOfRangeError(VGPRRange);
4720   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4721                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4722 
4723   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4724           SGPRBlocks))
4725     return OutOfRangeError(SGPRRange);
4726   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4727                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4728                   SGPRBlocks);
4729 
4730   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4731     return TokError("too many user SGPRs enabled");
4732   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4733                   UserSGPRCount);
4734 
4735   if (isGFX90A()) {
4736     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4737       return TokError(".amdhsa_accum_offset directive is required");
4738     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4739       return TokError("accum_offset should be in range [4..256] in "
4740                       "increments of 4");
4741     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4742       return TokError("accum_offset exceeds total VGPR allocation");
4743     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4744                     (AccumOffset / 4 - 1));
4745   }
4746 
4747   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4748       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4749       ReserveFlatScr);
4750   return false;
4751 }
4752 
4753 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4754   uint32_t Major;
4755   uint32_t Minor;
4756 
4757   if (ParseDirectiveMajorMinor(Major, Minor))
4758     return true;
4759 
4760   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4761   return false;
4762 }
4763 
4764 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4765   uint32_t Major;
4766   uint32_t Minor;
4767   uint32_t Stepping;
4768   StringRef VendorName;
4769   StringRef ArchName;
4770 
4771   // If this directive has no arguments, then use the ISA version for the
4772   // targeted GPU.
4773   if (isToken(AsmToken::EndOfStatement)) {
4774     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4775     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4776                                                         ISA.Stepping,
4777                                                         "AMD", "AMDGPU");
4778     return false;
4779   }
4780 
4781   if (ParseDirectiveMajorMinor(Major, Minor))
4782     return true;
4783 
4784   if (!trySkipToken(AsmToken::Comma))
4785     return TokError("stepping version number required, comma expected");
4786 
4787   if (ParseAsAbsoluteExpression(Stepping))
4788     return TokError("invalid stepping version");
4789 
4790   if (!trySkipToken(AsmToken::Comma))
4791     return TokError("vendor name required, comma expected");
4792 
4793   if (!parseString(VendorName, "invalid vendor name"))
4794     return true;
4795 
4796   if (!trySkipToken(AsmToken::Comma))
4797     return TokError("arch name required, comma expected");
4798 
4799   if (!parseString(ArchName, "invalid arch name"))
4800     return true;
4801 
4802   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
4803                                                       VendorName, ArchName);
4804   return false;
4805 }
4806 
4807 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4808                                                amd_kernel_code_t &Header) {
4809   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4810   // assembly for backwards compatibility.
4811   if (ID == "max_scratch_backing_memory_byte_size") {
4812     Parser.eatToEndOfStatement();
4813     return false;
4814   }
4815 
4816   SmallString<40> ErrStr;
4817   raw_svector_ostream Err(ErrStr);
4818   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4819     return TokError(Err.str());
4820   }
4821   Lex();
4822 
4823   if (ID == "enable_wavefront_size32") {
4824     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4825       if (!isGFX10Plus())
4826         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4827       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4828         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4829     } else {
4830       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4831         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4832     }
4833   }
4834 
4835   if (ID == "wavefront_size") {
4836     if (Header.wavefront_size == 5) {
4837       if (!isGFX10Plus())
4838         return TokError("wavefront_size=5 is only allowed on GFX10+");
4839       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4840         return TokError("wavefront_size=5 requires +WavefrontSize32");
4841     } else if (Header.wavefront_size == 6) {
4842       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4843         return TokError("wavefront_size=6 requires +WavefrontSize64");
4844     }
4845   }
4846 
4847   if (ID == "enable_wgp_mode") {
4848     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
4849         !isGFX10Plus())
4850       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4851   }
4852 
4853   if (ID == "enable_mem_ordered") {
4854     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
4855         !isGFX10Plus())
4856       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4857   }
4858 
4859   if (ID == "enable_fwd_progress") {
4860     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
4861         !isGFX10Plus())
4862       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4863   }
4864 
4865   return false;
4866 }
4867 
4868 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4869   amd_kernel_code_t Header;
4870   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4871 
4872   while (true) {
4873     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4874     // will set the current token to EndOfStatement.
4875     while(trySkipToken(AsmToken::EndOfStatement));
4876 
4877     StringRef ID;
4878     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4879       return true;
4880 
4881     if (ID == ".end_amd_kernel_code_t")
4882       break;
4883 
4884     if (ParseAMDKernelCodeTValue(ID, Header))
4885       return true;
4886   }
4887 
4888   getTargetStreamer().EmitAMDKernelCodeT(Header);
4889 
4890   return false;
4891 }
4892 
4893 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4894   StringRef KernelName;
4895   if (!parseId(KernelName, "expected symbol name"))
4896     return true;
4897 
4898   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4899                                            ELF::STT_AMDGPU_HSA_KERNEL);
4900 
4901   KernelScope.initialize(getContext());
4902   return false;
4903 }
4904 
4905 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4906   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4907     return Error(getLoc(),
4908                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4909                  "architectures");
4910   }
4911 
4912   auto TargetIDDirective = getLexer().getTok().getStringContents();
4913   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4914     return Error(getParser().getTok().getLoc(), "target id must match options");
4915 
4916   getTargetStreamer().EmitISAVersion();
4917   Lex();
4918 
4919   return false;
4920 }
4921 
4922 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4923   const char *AssemblerDirectiveBegin;
4924   const char *AssemblerDirectiveEnd;
4925   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4926       isHsaAbiVersion3Or4(&getSTI())
4927           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4928                             HSAMD::V3::AssemblerDirectiveEnd)
4929           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4930                             HSAMD::AssemblerDirectiveEnd);
4931 
4932   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4933     return Error(getLoc(),
4934                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4935                  "not available on non-amdhsa OSes")).str());
4936   }
4937 
4938   std::string HSAMetadataString;
4939   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4940                           HSAMetadataString))
4941     return true;
4942 
4943   if (isHsaAbiVersion3Or4(&getSTI())) {
4944     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4945       return Error(getLoc(), "invalid HSA metadata");
4946   } else {
4947     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4948       return Error(getLoc(), "invalid HSA metadata");
4949   }
4950 
4951   return false;
4952 }
4953 
4954 /// Common code to parse out a block of text (typically YAML) between start and
4955 /// end directives.
4956 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4957                                           const char *AssemblerDirectiveEnd,
4958                                           std::string &CollectString) {
4959 
4960   raw_string_ostream CollectStream(CollectString);
4961 
4962   getLexer().setSkipSpace(false);
4963 
4964   bool FoundEnd = false;
4965   while (!isToken(AsmToken::Eof)) {
4966     while (isToken(AsmToken::Space)) {
4967       CollectStream << getTokenStr();
4968       Lex();
4969     }
4970 
4971     if (trySkipId(AssemblerDirectiveEnd)) {
4972       FoundEnd = true;
4973       break;
4974     }
4975 
4976     CollectStream << Parser.parseStringToEndOfStatement()
4977                   << getContext().getAsmInfo()->getSeparatorString();
4978 
4979     Parser.eatToEndOfStatement();
4980   }
4981 
4982   getLexer().setSkipSpace(true);
4983 
4984   if (isToken(AsmToken::Eof) && !FoundEnd) {
4985     return TokError(Twine("expected directive ") +
4986                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4987   }
4988 
4989   CollectStream.flush();
4990   return false;
4991 }
4992 
4993 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4994 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4995   std::string String;
4996   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4997                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4998     return true;
4999 
5000   auto PALMetadata = getTargetStreamer().getPALMetadata();
5001   if (!PALMetadata->setFromString(String))
5002     return Error(getLoc(), "invalid PAL metadata");
5003   return false;
5004 }
5005 
5006 /// Parse the assembler directive for old linear-format PAL metadata.
5007 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5008   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5009     return Error(getLoc(),
5010                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5011                  "not available on non-amdpal OSes")).str());
5012   }
5013 
5014   auto PALMetadata = getTargetStreamer().getPALMetadata();
5015   PALMetadata->setLegacy();
5016   for (;;) {
5017     uint32_t Key, Value;
5018     if (ParseAsAbsoluteExpression(Key)) {
5019       return TokError(Twine("invalid value in ") +
5020                       Twine(PALMD::AssemblerDirective));
5021     }
5022     if (!trySkipToken(AsmToken::Comma)) {
5023       return TokError(Twine("expected an even number of values in ") +
5024                       Twine(PALMD::AssemblerDirective));
5025     }
5026     if (ParseAsAbsoluteExpression(Value)) {
5027       return TokError(Twine("invalid value in ") +
5028                       Twine(PALMD::AssemblerDirective));
5029     }
5030     PALMetadata->setRegister(Key, Value);
5031     if (!trySkipToken(AsmToken::Comma))
5032       break;
5033   }
5034   return false;
5035 }
5036 
5037 /// ParseDirectiveAMDGPULDS
5038 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5039 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5040   if (getParser().checkForValidSection())
5041     return true;
5042 
5043   StringRef Name;
5044   SMLoc NameLoc = getLoc();
5045   if (getParser().parseIdentifier(Name))
5046     return TokError("expected identifier in directive");
5047 
5048   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5049   if (parseToken(AsmToken::Comma, "expected ','"))
5050     return true;
5051 
5052   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5053 
5054   int64_t Size;
5055   SMLoc SizeLoc = getLoc();
5056   if (getParser().parseAbsoluteExpression(Size))
5057     return true;
5058   if (Size < 0)
5059     return Error(SizeLoc, "size must be non-negative");
5060   if (Size > LocalMemorySize)
5061     return Error(SizeLoc, "size is too large");
5062 
5063   int64_t Alignment = 4;
5064   if (trySkipToken(AsmToken::Comma)) {
5065     SMLoc AlignLoc = getLoc();
5066     if (getParser().parseAbsoluteExpression(Alignment))
5067       return true;
5068     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5069       return Error(AlignLoc, "alignment must be a power of two");
5070 
5071     // Alignment larger than the size of LDS is possible in theory, as long
5072     // as the linker manages to place to symbol at address 0, but we do want
5073     // to make sure the alignment fits nicely into a 32-bit integer.
5074     if (Alignment >= 1u << 31)
5075       return Error(AlignLoc, "alignment is too large");
5076   }
5077 
5078   if (parseToken(AsmToken::EndOfStatement,
5079                  "unexpected token in '.amdgpu_lds' directive"))
5080     return true;
5081 
5082   Symbol->redefineIfPossible();
5083   if (!Symbol->isUndefined())
5084     return Error(NameLoc, "invalid symbol redefinition");
5085 
5086   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5087   return false;
5088 }
5089 
5090 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5091   StringRef IDVal = DirectiveID.getString();
5092 
5093   if (isHsaAbiVersion3Or4(&getSTI())) {
5094     if (IDVal == ".amdhsa_kernel")
5095      return ParseDirectiveAMDHSAKernel();
5096 
5097     // TODO: Restructure/combine with PAL metadata directive.
5098     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5099       return ParseDirectiveHSAMetadata();
5100   } else {
5101     if (IDVal == ".hsa_code_object_version")
5102       return ParseDirectiveHSACodeObjectVersion();
5103 
5104     if (IDVal == ".hsa_code_object_isa")
5105       return ParseDirectiveHSACodeObjectISA();
5106 
5107     if (IDVal == ".amd_kernel_code_t")
5108       return ParseDirectiveAMDKernelCodeT();
5109 
5110     if (IDVal == ".amdgpu_hsa_kernel")
5111       return ParseDirectiveAMDGPUHsaKernel();
5112 
5113     if (IDVal == ".amd_amdgpu_isa")
5114       return ParseDirectiveISAVersion();
5115 
5116     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5117       return ParseDirectiveHSAMetadata();
5118   }
5119 
5120   if (IDVal == ".amdgcn_target")
5121     return ParseDirectiveAMDGCNTarget();
5122 
5123   if (IDVal == ".amdgpu_lds")
5124     return ParseDirectiveAMDGPULDS();
5125 
5126   if (IDVal == PALMD::AssemblerDirectiveBegin)
5127     return ParseDirectivePALMetadataBegin();
5128 
5129   if (IDVal == PALMD::AssemblerDirective)
5130     return ParseDirectivePALMetadata();
5131 
5132   return true;
5133 }
5134 
5135 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5136                                            unsigned RegNo) {
5137 
5138   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5139        R.isValid(); ++R) {
5140     if (*R == RegNo)
5141       return isGFX9Plus();
5142   }
5143 
5144   // GFX10 has 2 more SGPRs 104 and 105.
5145   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5146        R.isValid(); ++R) {
5147     if (*R == RegNo)
5148       return hasSGPR104_SGPR105();
5149   }
5150 
5151   switch (RegNo) {
5152   case AMDGPU::SRC_SHARED_BASE:
5153   case AMDGPU::SRC_SHARED_LIMIT:
5154   case AMDGPU::SRC_PRIVATE_BASE:
5155   case AMDGPU::SRC_PRIVATE_LIMIT:
5156   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5157     return isGFX9Plus();
5158   case AMDGPU::TBA:
5159   case AMDGPU::TBA_LO:
5160   case AMDGPU::TBA_HI:
5161   case AMDGPU::TMA:
5162   case AMDGPU::TMA_LO:
5163   case AMDGPU::TMA_HI:
5164     return !isGFX9Plus();
5165   case AMDGPU::XNACK_MASK:
5166   case AMDGPU::XNACK_MASK_LO:
5167   case AMDGPU::XNACK_MASK_HI:
5168     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5169   case AMDGPU::SGPR_NULL:
5170     return isGFX10Plus();
5171   default:
5172     break;
5173   }
5174 
5175   if (isCI())
5176     return true;
5177 
5178   if (isSI() || isGFX10Plus()) {
5179     // No flat_scr on SI.
5180     // On GFX10 flat scratch is not a valid register operand and can only be
5181     // accessed with s_setreg/s_getreg.
5182     switch (RegNo) {
5183     case AMDGPU::FLAT_SCR:
5184     case AMDGPU::FLAT_SCR_LO:
5185     case AMDGPU::FLAT_SCR_HI:
5186       return false;
5187     default:
5188       return true;
5189     }
5190   }
5191 
5192   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5193   // SI/CI have.
5194   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5195        R.isValid(); ++R) {
5196     if (*R == RegNo)
5197       return hasSGPR102_SGPR103();
5198   }
5199 
5200   return true;
5201 }
5202 
5203 OperandMatchResultTy
5204 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5205                               OperandMode Mode) {
5206   // Try to parse with a custom parser
5207   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5208 
5209   // If we successfully parsed the operand or if there as an error parsing,
5210   // we are done.
5211   //
5212   // If we are parsing after we reach EndOfStatement then this means we
5213   // are appending default values to the Operands list.  This is only done
5214   // by custom parser, so we shouldn't continue on to the generic parsing.
5215   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5216       isToken(AsmToken::EndOfStatement))
5217     return ResTy;
5218 
5219   SMLoc RBraceLoc;
5220   SMLoc LBraceLoc = getLoc();
5221   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5222     unsigned Prefix = Operands.size();
5223 
5224     for (;;) {
5225       auto Loc = getLoc();
5226       ResTy = parseReg(Operands);
5227       if (ResTy == MatchOperand_NoMatch)
5228         Error(Loc, "expected a register");
5229       if (ResTy != MatchOperand_Success)
5230         return MatchOperand_ParseFail;
5231 
5232       RBraceLoc = getLoc();
5233       if (trySkipToken(AsmToken::RBrac))
5234         break;
5235 
5236       if (!skipToken(AsmToken::Comma,
5237                      "expected a comma or a closing square bracket")) {
5238         return MatchOperand_ParseFail;
5239       }
5240     }
5241 
5242     if (Operands.size() - Prefix > 1) {
5243       Operands.insert(Operands.begin() + Prefix,
5244                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5245       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5246     }
5247 
5248     return MatchOperand_Success;
5249   }
5250 
5251   return parseRegOrImm(Operands);
5252 }
5253 
5254 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5255   // Clear any forced encodings from the previous instruction.
5256   setForcedEncodingSize(0);
5257   setForcedDPP(false);
5258   setForcedSDWA(false);
5259 
5260   if (Name.endswith("_e64")) {
5261     setForcedEncodingSize(64);
5262     return Name.substr(0, Name.size() - 4);
5263   } else if (Name.endswith("_e32")) {
5264     setForcedEncodingSize(32);
5265     return Name.substr(0, Name.size() - 4);
5266   } else if (Name.endswith("_dpp")) {
5267     setForcedDPP(true);
5268     return Name.substr(0, Name.size() - 4);
5269   } else if (Name.endswith("_sdwa")) {
5270     setForcedSDWA(true);
5271     return Name.substr(0, Name.size() - 5);
5272   }
5273   return Name;
5274 }
5275 
5276 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5277                                        StringRef Name,
5278                                        SMLoc NameLoc, OperandVector &Operands) {
5279   // Add the instruction mnemonic
5280   Name = parseMnemonicSuffix(Name);
5281   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5282 
5283   bool IsMIMG = Name.startswith("image_");
5284 
5285   while (!trySkipToken(AsmToken::EndOfStatement)) {
5286     OperandMode Mode = OperandMode_Default;
5287     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5288       Mode = OperandMode_NSA;
5289     CPolSeen = 0;
5290     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5291 
5292     if (Res != MatchOperand_Success) {
5293       checkUnsupportedInstruction(Name, NameLoc);
5294       if (!Parser.hasPendingError()) {
5295         // FIXME: use real operand location rather than the current location.
5296         StringRef Msg =
5297           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5298                                             "not a valid operand.";
5299         Error(getLoc(), Msg);
5300       }
5301       while (!trySkipToken(AsmToken::EndOfStatement)) {
5302         lex();
5303       }
5304       return true;
5305     }
5306 
5307     // Eat the comma or space if there is one.
5308     trySkipToken(AsmToken::Comma);
5309   }
5310 
5311   return false;
5312 }
5313 
5314 //===----------------------------------------------------------------------===//
5315 // Utility functions
5316 //===----------------------------------------------------------------------===//
5317 
5318 OperandMatchResultTy
5319 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5320 
5321   if (!trySkipId(Prefix, AsmToken::Colon))
5322     return MatchOperand_NoMatch;
5323 
5324   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5325 }
5326 
5327 OperandMatchResultTy
5328 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5329                                     AMDGPUOperand::ImmTy ImmTy,
5330                                     bool (*ConvertResult)(int64_t&)) {
5331   SMLoc S = getLoc();
5332   int64_t Value = 0;
5333 
5334   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5335   if (Res != MatchOperand_Success)
5336     return Res;
5337 
5338   if (ConvertResult && !ConvertResult(Value)) {
5339     Error(S, "invalid " + StringRef(Prefix) + " value.");
5340   }
5341 
5342   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5343   return MatchOperand_Success;
5344 }
5345 
5346 OperandMatchResultTy
5347 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5348                                              OperandVector &Operands,
5349                                              AMDGPUOperand::ImmTy ImmTy,
5350                                              bool (*ConvertResult)(int64_t&)) {
5351   SMLoc S = getLoc();
5352   if (!trySkipId(Prefix, AsmToken::Colon))
5353     return MatchOperand_NoMatch;
5354 
5355   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5356     return MatchOperand_ParseFail;
5357 
5358   unsigned Val = 0;
5359   const unsigned MaxSize = 4;
5360 
5361   // FIXME: How to verify the number of elements matches the number of src
5362   // operands?
5363   for (int I = 0; ; ++I) {
5364     int64_t Op;
5365     SMLoc Loc = getLoc();
5366     if (!parseExpr(Op))
5367       return MatchOperand_ParseFail;
5368 
5369     if (Op != 0 && Op != 1) {
5370       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5371       return MatchOperand_ParseFail;
5372     }
5373 
5374     Val |= (Op << I);
5375 
5376     if (trySkipToken(AsmToken::RBrac))
5377       break;
5378 
5379     if (I + 1 == MaxSize) {
5380       Error(getLoc(), "expected a closing square bracket");
5381       return MatchOperand_ParseFail;
5382     }
5383 
5384     if (!skipToken(AsmToken::Comma, "expected a comma"))
5385       return MatchOperand_ParseFail;
5386   }
5387 
5388   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5389   return MatchOperand_Success;
5390 }
5391 
5392 OperandMatchResultTy
5393 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5394                                AMDGPUOperand::ImmTy ImmTy) {
5395   int64_t Bit;
5396   SMLoc S = getLoc();
5397 
5398   if (trySkipId(Name)) {
5399     Bit = 1;
5400   } else if (trySkipId("no", Name)) {
5401     Bit = 0;
5402   } else {
5403     return MatchOperand_NoMatch;
5404   }
5405 
5406   if (Name == "r128" && !hasMIMG_R128()) {
5407     Error(S, "r128 modifier is not supported on this GPU");
5408     return MatchOperand_ParseFail;
5409   }
5410   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5411     Error(S, "a16 modifier is not supported on this GPU");
5412     return MatchOperand_ParseFail;
5413   }
5414 
5415   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5416     ImmTy = AMDGPUOperand::ImmTyR128A16;
5417 
5418   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5419   return MatchOperand_Success;
5420 }
5421 
5422 OperandMatchResultTy
5423 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5424   unsigned CPolOn = 0;
5425   unsigned CPolOff = 0;
5426   SMLoc S = getLoc();
5427 
5428   if (trySkipId("glc"))
5429     CPolOn = AMDGPU::CPol::GLC;
5430   else if (trySkipId("noglc"))
5431     CPolOff = AMDGPU::CPol::GLC;
5432   else if (trySkipId("slc"))
5433     CPolOn = AMDGPU::CPol::SLC;
5434   else if (trySkipId("noslc"))
5435     CPolOff = AMDGPU::CPol::SLC;
5436   else if (trySkipId("dlc"))
5437     CPolOn = AMDGPU::CPol::DLC;
5438   else if (trySkipId("nodlc"))
5439     CPolOff = AMDGPU::CPol::DLC;
5440   else if (trySkipId("scc"))
5441     CPolOn = AMDGPU::CPol::SCC;
5442   else if (trySkipId("noscc"))
5443     CPolOff = AMDGPU::CPol::SCC;
5444   else
5445     return MatchOperand_NoMatch;
5446 
5447   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5448     Error(S, "dlc modifier is not supported on this GPU");
5449     return MatchOperand_ParseFail;
5450   }
5451 
5452   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5453     Error(S, "scc modifier is not supported on this GPU");
5454     return MatchOperand_ParseFail;
5455   }
5456 
5457   if (CPolSeen & (CPolOn | CPolOff)) {
5458     Error(S, "duplicate cache policy modifier");
5459     return MatchOperand_ParseFail;
5460   }
5461 
5462   CPolSeen |= (CPolOn | CPolOff);
5463 
5464   for (unsigned I = 1; I != Operands.size(); ++I) {
5465     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5466     if (Op.isCPol()) {
5467       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5468       return MatchOperand_Success;
5469     }
5470   }
5471 
5472   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5473                                               AMDGPUOperand::ImmTyCPol));
5474 
5475   return MatchOperand_Success;
5476 }
5477 
5478 static void addOptionalImmOperand(
5479   MCInst& Inst, const OperandVector& Operands,
5480   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5481   AMDGPUOperand::ImmTy ImmT,
5482   int64_t Default = 0) {
5483   auto i = OptionalIdx.find(ImmT);
5484   if (i != OptionalIdx.end()) {
5485     unsigned Idx = i->second;
5486     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5487   } else {
5488     Inst.addOperand(MCOperand::createImm(Default));
5489   }
5490 }
5491 
5492 OperandMatchResultTy
5493 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5494                                        StringRef &Value,
5495                                        SMLoc &StringLoc) {
5496   if (!trySkipId(Prefix, AsmToken::Colon))
5497     return MatchOperand_NoMatch;
5498 
5499   StringLoc = getLoc();
5500   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5501                                                   : MatchOperand_ParseFail;
5502 }
5503 
5504 //===----------------------------------------------------------------------===//
5505 // MTBUF format
5506 //===----------------------------------------------------------------------===//
5507 
5508 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5509                                   int64_t MaxVal,
5510                                   int64_t &Fmt) {
5511   int64_t Val;
5512   SMLoc Loc = getLoc();
5513 
5514   auto Res = parseIntWithPrefix(Pref, Val);
5515   if (Res == MatchOperand_ParseFail)
5516     return false;
5517   if (Res == MatchOperand_NoMatch)
5518     return true;
5519 
5520   if (Val < 0 || Val > MaxVal) {
5521     Error(Loc, Twine("out of range ", StringRef(Pref)));
5522     return false;
5523   }
5524 
5525   Fmt = Val;
5526   return true;
5527 }
5528 
5529 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5530 // values to live in a joint format operand in the MCInst encoding.
5531 OperandMatchResultTy
5532 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5533   using namespace llvm::AMDGPU::MTBUFFormat;
5534 
5535   int64_t Dfmt = DFMT_UNDEF;
5536   int64_t Nfmt = NFMT_UNDEF;
5537 
5538   // dfmt and nfmt can appear in either order, and each is optional.
5539   for (int I = 0; I < 2; ++I) {
5540     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5541       return MatchOperand_ParseFail;
5542 
5543     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5544       return MatchOperand_ParseFail;
5545     }
5546     // Skip optional comma between dfmt/nfmt
5547     // but guard against 2 commas following each other.
5548     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5549         !peekToken().is(AsmToken::Comma)) {
5550       trySkipToken(AsmToken::Comma);
5551     }
5552   }
5553 
5554   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5555     return MatchOperand_NoMatch;
5556 
5557   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5558   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5559 
5560   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5561   return MatchOperand_Success;
5562 }
5563 
5564 OperandMatchResultTy
5565 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5566   using namespace llvm::AMDGPU::MTBUFFormat;
5567 
5568   int64_t Fmt = UFMT_UNDEF;
5569 
5570   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5571     return MatchOperand_ParseFail;
5572 
5573   if (Fmt == UFMT_UNDEF)
5574     return MatchOperand_NoMatch;
5575 
5576   Format = Fmt;
5577   return MatchOperand_Success;
5578 }
5579 
5580 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5581                                     int64_t &Nfmt,
5582                                     StringRef FormatStr,
5583                                     SMLoc Loc) {
5584   using namespace llvm::AMDGPU::MTBUFFormat;
5585   int64_t Format;
5586 
5587   Format = getDfmt(FormatStr);
5588   if (Format != DFMT_UNDEF) {
5589     Dfmt = Format;
5590     return true;
5591   }
5592 
5593   Format = getNfmt(FormatStr, getSTI());
5594   if (Format != NFMT_UNDEF) {
5595     Nfmt = Format;
5596     return true;
5597   }
5598 
5599   Error(Loc, "unsupported format");
5600   return false;
5601 }
5602 
5603 OperandMatchResultTy
5604 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5605                                           SMLoc FormatLoc,
5606                                           int64_t &Format) {
5607   using namespace llvm::AMDGPU::MTBUFFormat;
5608 
5609   int64_t Dfmt = DFMT_UNDEF;
5610   int64_t Nfmt = NFMT_UNDEF;
5611   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5612     return MatchOperand_ParseFail;
5613 
5614   if (trySkipToken(AsmToken::Comma)) {
5615     StringRef Str;
5616     SMLoc Loc = getLoc();
5617     if (!parseId(Str, "expected a format string") ||
5618         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5619       return MatchOperand_ParseFail;
5620     }
5621     if (Dfmt == DFMT_UNDEF) {
5622       Error(Loc, "duplicate numeric format");
5623       return MatchOperand_ParseFail;
5624     } else if (Nfmt == NFMT_UNDEF) {
5625       Error(Loc, "duplicate data format");
5626       return MatchOperand_ParseFail;
5627     }
5628   }
5629 
5630   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5631   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5632 
5633   if (isGFX10Plus()) {
5634     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5635     if (Ufmt == UFMT_UNDEF) {
5636       Error(FormatLoc, "unsupported format");
5637       return MatchOperand_ParseFail;
5638     }
5639     Format = Ufmt;
5640   } else {
5641     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5642   }
5643 
5644   return MatchOperand_Success;
5645 }
5646 
5647 OperandMatchResultTy
5648 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5649                                             SMLoc Loc,
5650                                             int64_t &Format) {
5651   using namespace llvm::AMDGPU::MTBUFFormat;
5652 
5653   auto Id = getUnifiedFormat(FormatStr);
5654   if (Id == UFMT_UNDEF)
5655     return MatchOperand_NoMatch;
5656 
5657   if (!isGFX10Plus()) {
5658     Error(Loc, "unified format is not supported on this GPU");
5659     return MatchOperand_ParseFail;
5660   }
5661 
5662   Format = Id;
5663   return MatchOperand_Success;
5664 }
5665 
5666 OperandMatchResultTy
5667 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5668   using namespace llvm::AMDGPU::MTBUFFormat;
5669   SMLoc Loc = getLoc();
5670 
5671   if (!parseExpr(Format))
5672     return MatchOperand_ParseFail;
5673   if (!isValidFormatEncoding(Format, getSTI())) {
5674     Error(Loc, "out of range format");
5675     return MatchOperand_ParseFail;
5676   }
5677 
5678   return MatchOperand_Success;
5679 }
5680 
5681 OperandMatchResultTy
5682 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5683   using namespace llvm::AMDGPU::MTBUFFormat;
5684 
5685   if (!trySkipId("format", AsmToken::Colon))
5686     return MatchOperand_NoMatch;
5687 
5688   if (trySkipToken(AsmToken::LBrac)) {
5689     StringRef FormatStr;
5690     SMLoc Loc = getLoc();
5691     if (!parseId(FormatStr, "expected a format string"))
5692       return MatchOperand_ParseFail;
5693 
5694     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5695     if (Res == MatchOperand_NoMatch)
5696       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5697     if (Res != MatchOperand_Success)
5698       return Res;
5699 
5700     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5701       return MatchOperand_ParseFail;
5702 
5703     return MatchOperand_Success;
5704   }
5705 
5706   return parseNumericFormat(Format);
5707 }
5708 
5709 OperandMatchResultTy
5710 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5711   using namespace llvm::AMDGPU::MTBUFFormat;
5712 
5713   int64_t Format = getDefaultFormatEncoding(getSTI());
5714   OperandMatchResultTy Res;
5715   SMLoc Loc = getLoc();
5716 
5717   // Parse legacy format syntax.
5718   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5719   if (Res == MatchOperand_ParseFail)
5720     return Res;
5721 
5722   bool FormatFound = (Res == MatchOperand_Success);
5723 
5724   Operands.push_back(
5725     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5726 
5727   if (FormatFound)
5728     trySkipToken(AsmToken::Comma);
5729 
5730   if (isToken(AsmToken::EndOfStatement)) {
5731     // We are expecting an soffset operand,
5732     // but let matcher handle the error.
5733     return MatchOperand_Success;
5734   }
5735 
5736   // Parse soffset.
5737   Res = parseRegOrImm(Operands);
5738   if (Res != MatchOperand_Success)
5739     return Res;
5740 
5741   trySkipToken(AsmToken::Comma);
5742 
5743   if (!FormatFound) {
5744     Res = parseSymbolicOrNumericFormat(Format);
5745     if (Res == MatchOperand_ParseFail)
5746       return Res;
5747     if (Res == MatchOperand_Success) {
5748       auto Size = Operands.size();
5749       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5750       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5751       Op.setImm(Format);
5752     }
5753     return MatchOperand_Success;
5754   }
5755 
5756   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5757     Error(getLoc(), "duplicate format");
5758     return MatchOperand_ParseFail;
5759   }
5760   return MatchOperand_Success;
5761 }
5762 
5763 //===----------------------------------------------------------------------===//
5764 // ds
5765 //===----------------------------------------------------------------------===//
5766 
5767 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5768                                     const OperandVector &Operands) {
5769   OptionalImmIndexMap OptionalIdx;
5770 
5771   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5772     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5773 
5774     // Add the register arguments
5775     if (Op.isReg()) {
5776       Op.addRegOperands(Inst, 1);
5777       continue;
5778     }
5779 
5780     // Handle optional arguments
5781     OptionalIdx[Op.getImmTy()] = i;
5782   }
5783 
5784   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5785   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5786   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5787 
5788   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5789 }
5790 
5791 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5792                                 bool IsGdsHardcoded) {
5793   OptionalImmIndexMap OptionalIdx;
5794 
5795   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5796     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5797 
5798     // Add the register arguments
5799     if (Op.isReg()) {
5800       Op.addRegOperands(Inst, 1);
5801       continue;
5802     }
5803 
5804     if (Op.isToken() && Op.getToken() == "gds") {
5805       IsGdsHardcoded = true;
5806       continue;
5807     }
5808 
5809     // Handle optional arguments
5810     OptionalIdx[Op.getImmTy()] = i;
5811   }
5812 
5813   AMDGPUOperand::ImmTy OffsetType =
5814     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5815      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5816      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5817                                                       AMDGPUOperand::ImmTyOffset;
5818 
5819   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5820 
5821   if (!IsGdsHardcoded) {
5822     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5823   }
5824   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5825 }
5826 
5827 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5828   OptionalImmIndexMap OptionalIdx;
5829 
5830   unsigned OperandIdx[4];
5831   unsigned EnMask = 0;
5832   int SrcIdx = 0;
5833 
5834   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5835     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5836 
5837     // Add the register arguments
5838     if (Op.isReg()) {
5839       assert(SrcIdx < 4);
5840       OperandIdx[SrcIdx] = Inst.size();
5841       Op.addRegOperands(Inst, 1);
5842       ++SrcIdx;
5843       continue;
5844     }
5845 
5846     if (Op.isOff()) {
5847       assert(SrcIdx < 4);
5848       OperandIdx[SrcIdx] = Inst.size();
5849       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5850       ++SrcIdx;
5851       continue;
5852     }
5853 
5854     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5855       Op.addImmOperands(Inst, 1);
5856       continue;
5857     }
5858 
5859     if (Op.isToken() && Op.getToken() == "done")
5860       continue;
5861 
5862     // Handle optional arguments
5863     OptionalIdx[Op.getImmTy()] = i;
5864   }
5865 
5866   assert(SrcIdx == 4);
5867 
5868   bool Compr = false;
5869   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5870     Compr = true;
5871     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5872     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5873     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5874   }
5875 
5876   for (auto i = 0; i < SrcIdx; ++i) {
5877     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5878       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5879     }
5880   }
5881 
5882   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5883   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5884 
5885   Inst.addOperand(MCOperand::createImm(EnMask));
5886 }
5887 
5888 //===----------------------------------------------------------------------===//
5889 // s_waitcnt
5890 //===----------------------------------------------------------------------===//
5891 
5892 static bool
5893 encodeCnt(
5894   const AMDGPU::IsaVersion ISA,
5895   int64_t &IntVal,
5896   int64_t CntVal,
5897   bool Saturate,
5898   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5899   unsigned (*decode)(const IsaVersion &Version, unsigned))
5900 {
5901   bool Failed = false;
5902 
5903   IntVal = encode(ISA, IntVal, CntVal);
5904   if (CntVal != decode(ISA, IntVal)) {
5905     if (Saturate) {
5906       IntVal = encode(ISA, IntVal, -1);
5907     } else {
5908       Failed = true;
5909     }
5910   }
5911   return Failed;
5912 }
5913 
5914 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5915 
5916   SMLoc CntLoc = getLoc();
5917   StringRef CntName = getTokenStr();
5918 
5919   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5920       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5921     return false;
5922 
5923   int64_t CntVal;
5924   SMLoc ValLoc = getLoc();
5925   if (!parseExpr(CntVal))
5926     return false;
5927 
5928   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5929 
5930   bool Failed = true;
5931   bool Sat = CntName.endswith("_sat");
5932 
5933   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5934     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5935   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5936     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5937   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5938     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5939   } else {
5940     Error(CntLoc, "invalid counter name " + CntName);
5941     return false;
5942   }
5943 
5944   if (Failed) {
5945     Error(ValLoc, "too large value for " + CntName);
5946     return false;
5947   }
5948 
5949   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5950     return false;
5951 
5952   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5953     if (isToken(AsmToken::EndOfStatement)) {
5954       Error(getLoc(), "expected a counter name");
5955       return false;
5956     }
5957   }
5958 
5959   return true;
5960 }
5961 
5962 OperandMatchResultTy
5963 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5964   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5965   int64_t Waitcnt = getWaitcntBitMask(ISA);
5966   SMLoc S = getLoc();
5967 
5968   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5969     while (!isToken(AsmToken::EndOfStatement)) {
5970       if (!parseCnt(Waitcnt))
5971         return MatchOperand_ParseFail;
5972     }
5973   } else {
5974     if (!parseExpr(Waitcnt))
5975       return MatchOperand_ParseFail;
5976   }
5977 
5978   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5979   return MatchOperand_Success;
5980 }
5981 
5982 bool
5983 AMDGPUOperand::isSWaitCnt() const {
5984   return isImm();
5985 }
5986 
5987 //===----------------------------------------------------------------------===//
5988 // hwreg
5989 //===----------------------------------------------------------------------===//
5990 
5991 bool
5992 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5993                                 OperandInfoTy &Offset,
5994                                 OperandInfoTy &Width) {
5995   using namespace llvm::AMDGPU::Hwreg;
5996 
5997   // The register may be specified by name or using a numeric code
5998   HwReg.Loc = getLoc();
5999   if (isToken(AsmToken::Identifier) &&
6000       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
6001     HwReg.IsSymbolic = true;
6002     lex(); // skip register name
6003   } else if (!parseExpr(HwReg.Id, "a register name")) {
6004     return false;
6005   }
6006 
6007   if (trySkipToken(AsmToken::RParen))
6008     return true;
6009 
6010   // parse optional params
6011   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6012     return false;
6013 
6014   Offset.Loc = getLoc();
6015   if (!parseExpr(Offset.Id))
6016     return false;
6017 
6018   if (!skipToken(AsmToken::Comma, "expected a comma"))
6019     return false;
6020 
6021   Width.Loc = getLoc();
6022   return parseExpr(Width.Id) &&
6023          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6024 }
6025 
6026 bool
6027 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6028                                const OperandInfoTy &Offset,
6029                                const OperandInfoTy &Width) {
6030 
6031   using namespace llvm::AMDGPU::Hwreg;
6032 
6033   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6034     Error(HwReg.Loc,
6035           "specified hardware register is not supported on this GPU");
6036     return false;
6037   }
6038   if (!isValidHwreg(HwReg.Id)) {
6039     Error(HwReg.Loc,
6040           "invalid code of hardware register: only 6-bit values are legal");
6041     return false;
6042   }
6043   if (!isValidHwregOffset(Offset.Id)) {
6044     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6045     return false;
6046   }
6047   if (!isValidHwregWidth(Width.Id)) {
6048     Error(Width.Loc,
6049           "invalid bitfield width: only values from 1 to 32 are legal");
6050     return false;
6051   }
6052   return true;
6053 }
6054 
6055 OperandMatchResultTy
6056 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6057   using namespace llvm::AMDGPU::Hwreg;
6058 
6059   int64_t ImmVal = 0;
6060   SMLoc Loc = getLoc();
6061 
6062   if (trySkipId("hwreg", AsmToken::LParen)) {
6063     OperandInfoTy HwReg(ID_UNKNOWN_);
6064     OperandInfoTy Offset(OFFSET_DEFAULT_);
6065     OperandInfoTy Width(WIDTH_DEFAULT_);
6066     if (parseHwregBody(HwReg, Offset, Width) &&
6067         validateHwreg(HwReg, Offset, Width)) {
6068       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6069     } else {
6070       return MatchOperand_ParseFail;
6071     }
6072   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6073     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6074       Error(Loc, "invalid immediate: only 16-bit values are legal");
6075       return MatchOperand_ParseFail;
6076     }
6077   } else {
6078     return MatchOperand_ParseFail;
6079   }
6080 
6081   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6082   return MatchOperand_Success;
6083 }
6084 
6085 bool AMDGPUOperand::isHwreg() const {
6086   return isImmTy(ImmTyHwreg);
6087 }
6088 
6089 //===----------------------------------------------------------------------===//
6090 // sendmsg
6091 //===----------------------------------------------------------------------===//
6092 
6093 bool
6094 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6095                                   OperandInfoTy &Op,
6096                                   OperandInfoTy &Stream) {
6097   using namespace llvm::AMDGPU::SendMsg;
6098 
6099   Msg.Loc = getLoc();
6100   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6101     Msg.IsSymbolic = true;
6102     lex(); // skip message name
6103   } else if (!parseExpr(Msg.Id, "a message name")) {
6104     return false;
6105   }
6106 
6107   if (trySkipToken(AsmToken::Comma)) {
6108     Op.IsDefined = true;
6109     Op.Loc = getLoc();
6110     if (isToken(AsmToken::Identifier) &&
6111         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6112       lex(); // skip operation name
6113     } else if (!parseExpr(Op.Id, "an operation name")) {
6114       return false;
6115     }
6116 
6117     if (trySkipToken(AsmToken::Comma)) {
6118       Stream.IsDefined = true;
6119       Stream.Loc = getLoc();
6120       if (!parseExpr(Stream.Id))
6121         return false;
6122     }
6123   }
6124 
6125   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6126 }
6127 
6128 bool
6129 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6130                                  const OperandInfoTy &Op,
6131                                  const OperandInfoTy &Stream) {
6132   using namespace llvm::AMDGPU::SendMsg;
6133 
6134   // Validation strictness depends on whether message is specified
6135   // in a symbolc or in a numeric form. In the latter case
6136   // only encoding possibility is checked.
6137   bool Strict = Msg.IsSymbolic;
6138 
6139   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6140     Error(Msg.Loc, "invalid message id");
6141     return false;
6142   }
6143   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6144     if (Op.IsDefined) {
6145       Error(Op.Loc, "message does not support operations");
6146     } else {
6147       Error(Msg.Loc, "missing message operation");
6148     }
6149     return false;
6150   }
6151   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6152     Error(Op.Loc, "invalid operation id");
6153     return false;
6154   }
6155   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6156     Error(Stream.Loc, "message operation does not support streams");
6157     return false;
6158   }
6159   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6160     Error(Stream.Loc, "invalid message stream id");
6161     return false;
6162   }
6163   return true;
6164 }
6165 
6166 OperandMatchResultTy
6167 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6168   using namespace llvm::AMDGPU::SendMsg;
6169 
6170   int64_t ImmVal = 0;
6171   SMLoc Loc = getLoc();
6172 
6173   if (trySkipId("sendmsg", AsmToken::LParen)) {
6174     OperandInfoTy Msg(ID_UNKNOWN_);
6175     OperandInfoTy Op(OP_NONE_);
6176     OperandInfoTy Stream(STREAM_ID_NONE_);
6177     if (parseSendMsgBody(Msg, Op, Stream) &&
6178         validateSendMsg(Msg, Op, Stream)) {
6179       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6180     } else {
6181       return MatchOperand_ParseFail;
6182     }
6183   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6184     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6185       Error(Loc, "invalid immediate: only 16-bit values are legal");
6186       return MatchOperand_ParseFail;
6187     }
6188   } else {
6189     return MatchOperand_ParseFail;
6190   }
6191 
6192   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6193   return MatchOperand_Success;
6194 }
6195 
6196 bool AMDGPUOperand::isSendMsg() const {
6197   return isImmTy(ImmTySendMsg);
6198 }
6199 
6200 //===----------------------------------------------------------------------===//
6201 // v_interp
6202 //===----------------------------------------------------------------------===//
6203 
6204 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6205   StringRef Str;
6206   SMLoc S = getLoc();
6207 
6208   if (!parseId(Str))
6209     return MatchOperand_NoMatch;
6210 
6211   int Slot = StringSwitch<int>(Str)
6212     .Case("p10", 0)
6213     .Case("p20", 1)
6214     .Case("p0", 2)
6215     .Default(-1);
6216 
6217   if (Slot == -1) {
6218     Error(S, "invalid interpolation slot");
6219     return MatchOperand_ParseFail;
6220   }
6221 
6222   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6223                                               AMDGPUOperand::ImmTyInterpSlot));
6224   return MatchOperand_Success;
6225 }
6226 
6227 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6228   StringRef Str;
6229   SMLoc S = getLoc();
6230 
6231   if (!parseId(Str))
6232     return MatchOperand_NoMatch;
6233 
6234   if (!Str.startswith("attr")) {
6235     Error(S, "invalid interpolation attribute");
6236     return MatchOperand_ParseFail;
6237   }
6238 
6239   StringRef Chan = Str.take_back(2);
6240   int AttrChan = StringSwitch<int>(Chan)
6241     .Case(".x", 0)
6242     .Case(".y", 1)
6243     .Case(".z", 2)
6244     .Case(".w", 3)
6245     .Default(-1);
6246   if (AttrChan == -1) {
6247     Error(S, "invalid or missing interpolation attribute channel");
6248     return MatchOperand_ParseFail;
6249   }
6250 
6251   Str = Str.drop_back(2).drop_front(4);
6252 
6253   uint8_t Attr;
6254   if (Str.getAsInteger(10, Attr)) {
6255     Error(S, "invalid or missing interpolation attribute number");
6256     return MatchOperand_ParseFail;
6257   }
6258 
6259   if (Attr > 63) {
6260     Error(S, "out of bounds interpolation attribute number");
6261     return MatchOperand_ParseFail;
6262   }
6263 
6264   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6265 
6266   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6267                                               AMDGPUOperand::ImmTyInterpAttr));
6268   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6269                                               AMDGPUOperand::ImmTyAttrChan));
6270   return MatchOperand_Success;
6271 }
6272 
6273 //===----------------------------------------------------------------------===//
6274 // exp
6275 //===----------------------------------------------------------------------===//
6276 
6277 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6278   using namespace llvm::AMDGPU::Exp;
6279 
6280   StringRef Str;
6281   SMLoc S = getLoc();
6282 
6283   if (!parseId(Str))
6284     return MatchOperand_NoMatch;
6285 
6286   unsigned Id = getTgtId(Str);
6287   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6288     Error(S, (Id == ET_INVALID) ?
6289                 "invalid exp target" :
6290                 "exp target is not supported on this GPU");
6291     return MatchOperand_ParseFail;
6292   }
6293 
6294   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6295                                               AMDGPUOperand::ImmTyExpTgt));
6296   return MatchOperand_Success;
6297 }
6298 
6299 //===----------------------------------------------------------------------===//
6300 // parser helpers
6301 //===----------------------------------------------------------------------===//
6302 
6303 bool
6304 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6305   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6306 }
6307 
6308 bool
6309 AMDGPUAsmParser::isId(const StringRef Id) const {
6310   return isId(getToken(), Id);
6311 }
6312 
6313 bool
6314 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6315   return getTokenKind() == Kind;
6316 }
6317 
6318 bool
6319 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6320   if (isId(Id)) {
6321     lex();
6322     return true;
6323   }
6324   return false;
6325 }
6326 
6327 bool
6328 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6329   if (isToken(AsmToken::Identifier)) {
6330     StringRef Tok = getTokenStr();
6331     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6332       lex();
6333       return true;
6334     }
6335   }
6336   return false;
6337 }
6338 
6339 bool
6340 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6341   if (isId(Id) && peekToken().is(Kind)) {
6342     lex();
6343     lex();
6344     return true;
6345   }
6346   return false;
6347 }
6348 
6349 bool
6350 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6351   if (isToken(Kind)) {
6352     lex();
6353     return true;
6354   }
6355   return false;
6356 }
6357 
6358 bool
6359 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6360                            const StringRef ErrMsg) {
6361   if (!trySkipToken(Kind)) {
6362     Error(getLoc(), ErrMsg);
6363     return false;
6364   }
6365   return true;
6366 }
6367 
6368 bool
6369 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6370   SMLoc S = getLoc();
6371 
6372   const MCExpr *Expr;
6373   if (Parser.parseExpression(Expr))
6374     return false;
6375 
6376   if (Expr->evaluateAsAbsolute(Imm))
6377     return true;
6378 
6379   if (Expected.empty()) {
6380     Error(S, "expected absolute expression");
6381   } else {
6382     Error(S, Twine("expected ", Expected) +
6383              Twine(" or an absolute expression"));
6384   }
6385   return false;
6386 }
6387 
6388 bool
6389 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6390   SMLoc S = getLoc();
6391 
6392   const MCExpr *Expr;
6393   if (Parser.parseExpression(Expr))
6394     return false;
6395 
6396   int64_t IntVal;
6397   if (Expr->evaluateAsAbsolute(IntVal)) {
6398     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6399   } else {
6400     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6401   }
6402   return true;
6403 }
6404 
6405 bool
6406 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6407   if (isToken(AsmToken::String)) {
6408     Val = getToken().getStringContents();
6409     lex();
6410     return true;
6411   } else {
6412     Error(getLoc(), ErrMsg);
6413     return false;
6414   }
6415 }
6416 
6417 bool
6418 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6419   if (isToken(AsmToken::Identifier)) {
6420     Val = getTokenStr();
6421     lex();
6422     return true;
6423   } else {
6424     if (!ErrMsg.empty())
6425       Error(getLoc(), ErrMsg);
6426     return false;
6427   }
6428 }
6429 
6430 AsmToken
6431 AMDGPUAsmParser::getToken() const {
6432   return Parser.getTok();
6433 }
6434 
6435 AsmToken
6436 AMDGPUAsmParser::peekToken() {
6437   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6438 }
6439 
6440 void
6441 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6442   auto TokCount = getLexer().peekTokens(Tokens);
6443 
6444   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6445     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6446 }
6447 
6448 AsmToken::TokenKind
6449 AMDGPUAsmParser::getTokenKind() const {
6450   return getLexer().getKind();
6451 }
6452 
6453 SMLoc
6454 AMDGPUAsmParser::getLoc() const {
6455   return getToken().getLoc();
6456 }
6457 
6458 StringRef
6459 AMDGPUAsmParser::getTokenStr() const {
6460   return getToken().getString();
6461 }
6462 
6463 void
6464 AMDGPUAsmParser::lex() {
6465   Parser.Lex();
6466 }
6467 
6468 SMLoc
6469 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6470                                const OperandVector &Operands) const {
6471   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6472     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6473     if (Test(Op))
6474       return Op.getStartLoc();
6475   }
6476   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6477 }
6478 
6479 SMLoc
6480 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6481                            const OperandVector &Operands) const {
6482   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6483   return getOperandLoc(Test, Operands);
6484 }
6485 
6486 SMLoc
6487 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6488                            const OperandVector &Operands) const {
6489   auto Test = [=](const AMDGPUOperand& Op) {
6490     return Op.isRegKind() && Op.getReg() == Reg;
6491   };
6492   return getOperandLoc(Test, Operands);
6493 }
6494 
6495 SMLoc
6496 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6497   auto Test = [](const AMDGPUOperand& Op) {
6498     return Op.IsImmKindLiteral() || Op.isExpr();
6499   };
6500   return getOperandLoc(Test, Operands);
6501 }
6502 
6503 SMLoc
6504 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6505   auto Test = [](const AMDGPUOperand& Op) {
6506     return Op.isImmKindConst();
6507   };
6508   return getOperandLoc(Test, Operands);
6509 }
6510 
6511 //===----------------------------------------------------------------------===//
6512 // swizzle
6513 //===----------------------------------------------------------------------===//
6514 
6515 LLVM_READNONE
6516 static unsigned
6517 encodeBitmaskPerm(const unsigned AndMask,
6518                   const unsigned OrMask,
6519                   const unsigned XorMask) {
6520   using namespace llvm::AMDGPU::Swizzle;
6521 
6522   return BITMASK_PERM_ENC |
6523          (AndMask << BITMASK_AND_SHIFT) |
6524          (OrMask  << BITMASK_OR_SHIFT)  |
6525          (XorMask << BITMASK_XOR_SHIFT);
6526 }
6527 
6528 bool
6529 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6530                                      const unsigned MinVal,
6531                                      const unsigned MaxVal,
6532                                      const StringRef ErrMsg,
6533                                      SMLoc &Loc) {
6534   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6535     return false;
6536   }
6537   Loc = getLoc();
6538   if (!parseExpr(Op)) {
6539     return false;
6540   }
6541   if (Op < MinVal || Op > MaxVal) {
6542     Error(Loc, ErrMsg);
6543     return false;
6544   }
6545 
6546   return true;
6547 }
6548 
6549 bool
6550 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6551                                       const unsigned MinVal,
6552                                       const unsigned MaxVal,
6553                                       const StringRef ErrMsg) {
6554   SMLoc Loc;
6555   for (unsigned i = 0; i < OpNum; ++i) {
6556     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6557       return false;
6558   }
6559 
6560   return true;
6561 }
6562 
6563 bool
6564 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6565   using namespace llvm::AMDGPU::Swizzle;
6566 
6567   int64_t Lane[LANE_NUM];
6568   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6569                            "expected a 2-bit lane id")) {
6570     Imm = QUAD_PERM_ENC;
6571     for (unsigned I = 0; I < LANE_NUM; ++I) {
6572       Imm |= Lane[I] << (LANE_SHIFT * I);
6573     }
6574     return true;
6575   }
6576   return false;
6577 }
6578 
6579 bool
6580 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6581   using namespace llvm::AMDGPU::Swizzle;
6582 
6583   SMLoc Loc;
6584   int64_t GroupSize;
6585   int64_t LaneIdx;
6586 
6587   if (!parseSwizzleOperand(GroupSize,
6588                            2, 32,
6589                            "group size must be in the interval [2,32]",
6590                            Loc)) {
6591     return false;
6592   }
6593   if (!isPowerOf2_64(GroupSize)) {
6594     Error(Loc, "group size must be a power of two");
6595     return false;
6596   }
6597   if (parseSwizzleOperand(LaneIdx,
6598                           0, GroupSize - 1,
6599                           "lane id must be in the interval [0,group size - 1]",
6600                           Loc)) {
6601     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6602     return true;
6603   }
6604   return false;
6605 }
6606 
6607 bool
6608 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6609   using namespace llvm::AMDGPU::Swizzle;
6610 
6611   SMLoc Loc;
6612   int64_t GroupSize;
6613 
6614   if (!parseSwizzleOperand(GroupSize,
6615                            2, 32,
6616                            "group size must be in the interval [2,32]",
6617                            Loc)) {
6618     return false;
6619   }
6620   if (!isPowerOf2_64(GroupSize)) {
6621     Error(Loc, "group size must be a power of two");
6622     return false;
6623   }
6624 
6625   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6626   return true;
6627 }
6628 
6629 bool
6630 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6631   using namespace llvm::AMDGPU::Swizzle;
6632 
6633   SMLoc Loc;
6634   int64_t GroupSize;
6635 
6636   if (!parseSwizzleOperand(GroupSize,
6637                            1, 16,
6638                            "group size must be in the interval [1,16]",
6639                            Loc)) {
6640     return false;
6641   }
6642   if (!isPowerOf2_64(GroupSize)) {
6643     Error(Loc, "group size must be a power of two");
6644     return false;
6645   }
6646 
6647   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6648   return true;
6649 }
6650 
6651 bool
6652 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6653   using namespace llvm::AMDGPU::Swizzle;
6654 
6655   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6656     return false;
6657   }
6658 
6659   StringRef Ctl;
6660   SMLoc StrLoc = getLoc();
6661   if (!parseString(Ctl)) {
6662     return false;
6663   }
6664   if (Ctl.size() != BITMASK_WIDTH) {
6665     Error(StrLoc, "expected a 5-character mask");
6666     return false;
6667   }
6668 
6669   unsigned AndMask = 0;
6670   unsigned OrMask = 0;
6671   unsigned XorMask = 0;
6672 
6673   for (size_t i = 0; i < Ctl.size(); ++i) {
6674     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6675     switch(Ctl[i]) {
6676     default:
6677       Error(StrLoc, "invalid mask");
6678       return false;
6679     case '0':
6680       break;
6681     case '1':
6682       OrMask |= Mask;
6683       break;
6684     case 'p':
6685       AndMask |= Mask;
6686       break;
6687     case 'i':
6688       AndMask |= Mask;
6689       XorMask |= Mask;
6690       break;
6691     }
6692   }
6693 
6694   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6695   return true;
6696 }
6697 
6698 bool
6699 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6700 
6701   SMLoc OffsetLoc = getLoc();
6702 
6703   if (!parseExpr(Imm, "a swizzle macro")) {
6704     return false;
6705   }
6706   if (!isUInt<16>(Imm)) {
6707     Error(OffsetLoc, "expected a 16-bit offset");
6708     return false;
6709   }
6710   return true;
6711 }
6712 
6713 bool
6714 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6715   using namespace llvm::AMDGPU::Swizzle;
6716 
6717   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6718 
6719     SMLoc ModeLoc = getLoc();
6720     bool Ok = false;
6721 
6722     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6723       Ok = parseSwizzleQuadPerm(Imm);
6724     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6725       Ok = parseSwizzleBitmaskPerm(Imm);
6726     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6727       Ok = parseSwizzleBroadcast(Imm);
6728     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6729       Ok = parseSwizzleSwap(Imm);
6730     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6731       Ok = parseSwizzleReverse(Imm);
6732     } else {
6733       Error(ModeLoc, "expected a swizzle mode");
6734     }
6735 
6736     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6737   }
6738 
6739   return false;
6740 }
6741 
6742 OperandMatchResultTy
6743 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6744   SMLoc S = getLoc();
6745   int64_t Imm = 0;
6746 
6747   if (trySkipId("offset")) {
6748 
6749     bool Ok = false;
6750     if (skipToken(AsmToken::Colon, "expected a colon")) {
6751       if (trySkipId("swizzle")) {
6752         Ok = parseSwizzleMacro(Imm);
6753       } else {
6754         Ok = parseSwizzleOffset(Imm);
6755       }
6756     }
6757 
6758     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6759 
6760     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6761   } else {
6762     // Swizzle "offset" operand is optional.
6763     // If it is omitted, try parsing other optional operands.
6764     return parseOptionalOpr(Operands);
6765   }
6766 }
6767 
6768 bool
6769 AMDGPUOperand::isSwizzle() const {
6770   return isImmTy(ImmTySwizzle);
6771 }
6772 
6773 //===----------------------------------------------------------------------===//
6774 // VGPR Index Mode
6775 //===----------------------------------------------------------------------===//
6776 
6777 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6778 
6779   using namespace llvm::AMDGPU::VGPRIndexMode;
6780 
6781   if (trySkipToken(AsmToken::RParen)) {
6782     return OFF;
6783   }
6784 
6785   int64_t Imm = 0;
6786 
6787   while (true) {
6788     unsigned Mode = 0;
6789     SMLoc S = getLoc();
6790 
6791     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6792       if (trySkipId(IdSymbolic[ModeId])) {
6793         Mode = 1 << ModeId;
6794         break;
6795       }
6796     }
6797 
6798     if (Mode == 0) {
6799       Error(S, (Imm == 0)?
6800                "expected a VGPR index mode or a closing parenthesis" :
6801                "expected a VGPR index mode");
6802       return UNDEF;
6803     }
6804 
6805     if (Imm & Mode) {
6806       Error(S, "duplicate VGPR index mode");
6807       return UNDEF;
6808     }
6809     Imm |= Mode;
6810 
6811     if (trySkipToken(AsmToken::RParen))
6812       break;
6813     if (!skipToken(AsmToken::Comma,
6814                    "expected a comma or a closing parenthesis"))
6815       return UNDEF;
6816   }
6817 
6818   return Imm;
6819 }
6820 
6821 OperandMatchResultTy
6822 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6823 
6824   using namespace llvm::AMDGPU::VGPRIndexMode;
6825 
6826   int64_t Imm = 0;
6827   SMLoc S = getLoc();
6828 
6829   if (trySkipId("gpr_idx", AsmToken::LParen)) {
6830     Imm = parseGPRIdxMacro();
6831     if (Imm == UNDEF)
6832       return MatchOperand_ParseFail;
6833   } else {
6834     if (getParser().parseAbsoluteExpression(Imm))
6835       return MatchOperand_ParseFail;
6836     if (Imm < 0 || !isUInt<4>(Imm)) {
6837       Error(S, "invalid immediate: only 4-bit values are legal");
6838       return MatchOperand_ParseFail;
6839     }
6840   }
6841 
6842   Operands.push_back(
6843       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6844   return MatchOperand_Success;
6845 }
6846 
6847 bool AMDGPUOperand::isGPRIdxMode() const {
6848   return isImmTy(ImmTyGprIdxMode);
6849 }
6850 
6851 //===----------------------------------------------------------------------===//
6852 // sopp branch targets
6853 //===----------------------------------------------------------------------===//
6854 
6855 OperandMatchResultTy
6856 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6857 
6858   // Make sure we are not parsing something
6859   // that looks like a label or an expression but is not.
6860   // This will improve error messages.
6861   if (isRegister() || isModifier())
6862     return MatchOperand_NoMatch;
6863 
6864   if (!parseExpr(Operands))
6865     return MatchOperand_ParseFail;
6866 
6867   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6868   assert(Opr.isImm() || Opr.isExpr());
6869   SMLoc Loc = Opr.getStartLoc();
6870 
6871   // Currently we do not support arbitrary expressions as branch targets.
6872   // Only labels and absolute expressions are accepted.
6873   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6874     Error(Loc, "expected an absolute expression or a label");
6875   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6876     Error(Loc, "expected a 16-bit signed jump offset");
6877   }
6878 
6879   return MatchOperand_Success;
6880 }
6881 
6882 //===----------------------------------------------------------------------===//
6883 // Boolean holding registers
6884 //===----------------------------------------------------------------------===//
6885 
6886 OperandMatchResultTy
6887 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6888   return parseReg(Operands);
6889 }
6890 
6891 //===----------------------------------------------------------------------===//
6892 // mubuf
6893 //===----------------------------------------------------------------------===//
6894 
6895 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
6896   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
6897 }
6898 
6899 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6900                                    const OperandVector &Operands,
6901                                    bool IsAtomic,
6902                                    bool IsLds) {
6903   bool IsLdsOpcode = IsLds;
6904   bool HasLdsModifier = false;
6905   OptionalImmIndexMap OptionalIdx;
6906   unsigned FirstOperandIdx = 1;
6907   bool IsAtomicReturn = false;
6908 
6909   if (IsAtomic) {
6910     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6911       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6912       if (!Op.isCPol())
6913         continue;
6914       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
6915       break;
6916     }
6917 
6918     if (!IsAtomicReturn) {
6919       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
6920       if (NewOpc != -1)
6921         Inst.setOpcode(NewOpc);
6922     }
6923 
6924     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
6925                       SIInstrFlags::IsAtomicRet;
6926   }
6927 
6928   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6929     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6930 
6931     // Add the register arguments
6932     if (Op.isReg()) {
6933       Op.addRegOperands(Inst, 1);
6934       // Insert a tied src for atomic return dst.
6935       // This cannot be postponed as subsequent calls to
6936       // addImmOperands rely on correct number of MC operands.
6937       if (IsAtomicReturn && i == FirstOperandIdx)
6938         Op.addRegOperands(Inst, 1);
6939       continue;
6940     }
6941 
6942     // Handle the case where soffset is an immediate
6943     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6944       Op.addImmOperands(Inst, 1);
6945       continue;
6946     }
6947 
6948     HasLdsModifier |= Op.isLDS();
6949 
6950     // Handle tokens like 'offen' which are sometimes hard-coded into the
6951     // asm string.  There are no MCInst operands for these.
6952     if (Op.isToken()) {
6953       continue;
6954     }
6955     assert(Op.isImm());
6956 
6957     // Handle optional arguments
6958     OptionalIdx[Op.getImmTy()] = i;
6959   }
6960 
6961   // This is a workaround for an llvm quirk which may result in an
6962   // incorrect instruction selection. Lds and non-lds versions of
6963   // MUBUF instructions are identical except that lds versions
6964   // have mandatory 'lds' modifier. However this modifier follows
6965   // optional modifiers and llvm asm matcher regards this 'lds'
6966   // modifier as an optional one. As a result, an lds version
6967   // of opcode may be selected even if it has no 'lds' modifier.
6968   if (IsLdsOpcode && !HasLdsModifier) {
6969     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6970     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6971       Inst.setOpcode(NoLdsOpcode);
6972       IsLdsOpcode = false;
6973     }
6974   }
6975 
6976   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6977   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
6978 
6979   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6980     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6981   }
6982   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
6983 }
6984 
6985 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6986   OptionalImmIndexMap OptionalIdx;
6987 
6988   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6989     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6990 
6991     // Add the register arguments
6992     if (Op.isReg()) {
6993       Op.addRegOperands(Inst, 1);
6994       continue;
6995     }
6996 
6997     // Handle the case where soffset is an immediate
6998     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6999       Op.addImmOperands(Inst, 1);
7000       continue;
7001     }
7002 
7003     // Handle tokens like 'offen' which are sometimes hard-coded into the
7004     // asm string.  There are no MCInst operands for these.
7005     if (Op.isToken()) {
7006       continue;
7007     }
7008     assert(Op.isImm());
7009 
7010     // Handle optional arguments
7011     OptionalIdx[Op.getImmTy()] = i;
7012   }
7013 
7014   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7015                         AMDGPUOperand::ImmTyOffset);
7016   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7017   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7018   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7019   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7020 }
7021 
7022 //===----------------------------------------------------------------------===//
7023 // mimg
7024 //===----------------------------------------------------------------------===//
7025 
7026 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7027                               bool IsAtomic) {
7028   unsigned I = 1;
7029   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7030   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7031     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7032   }
7033 
7034   if (IsAtomic) {
7035     // Add src, same as dst
7036     assert(Desc.getNumDefs() == 1);
7037     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7038   }
7039 
7040   OptionalImmIndexMap OptionalIdx;
7041 
7042   for (unsigned E = Operands.size(); I != E; ++I) {
7043     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7044 
7045     // Add the register arguments
7046     if (Op.isReg()) {
7047       Op.addRegOperands(Inst, 1);
7048     } else if (Op.isImmModifier()) {
7049       OptionalIdx[Op.getImmTy()] = I;
7050     } else if (!Op.isToken()) {
7051       llvm_unreachable("unexpected operand type");
7052     }
7053   }
7054 
7055   bool IsGFX10Plus = isGFX10Plus();
7056 
7057   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7058   if (IsGFX10Plus)
7059     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7060   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7061   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7062   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7063   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7064     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7065   if (IsGFX10Plus)
7066     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7067   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7068   if (!IsGFX10Plus)
7069     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7070   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7071 }
7072 
7073 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7074   cvtMIMG(Inst, Operands, true);
7075 }
7076 
7077 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7078   OptionalImmIndexMap OptionalIdx;
7079   bool IsAtomicReturn = false;
7080 
7081   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7082     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7083     if (!Op.isCPol())
7084       continue;
7085     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7086     break;
7087   }
7088 
7089   if (!IsAtomicReturn) {
7090     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7091     if (NewOpc != -1)
7092       Inst.setOpcode(NewOpc);
7093   }
7094 
7095   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7096                     SIInstrFlags::IsAtomicRet;
7097 
7098   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7099     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7100 
7101     // Add the register arguments
7102     if (Op.isReg()) {
7103       Op.addRegOperands(Inst, 1);
7104       if (IsAtomicReturn && i == 1)
7105         Op.addRegOperands(Inst, 1);
7106       continue;
7107     }
7108 
7109     // Handle the case where soffset is an immediate
7110     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7111       Op.addImmOperands(Inst, 1);
7112       continue;
7113     }
7114 
7115     // Handle tokens like 'offen' which are sometimes hard-coded into the
7116     // asm string.  There are no MCInst operands for these.
7117     if (Op.isToken()) {
7118       continue;
7119     }
7120     assert(Op.isImm());
7121 
7122     // Handle optional arguments
7123     OptionalIdx[Op.getImmTy()] = i;
7124   }
7125 
7126   if ((int)Inst.getNumOperands() <=
7127       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7128     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7129   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7130 }
7131 
7132 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7133                                       const OperandVector &Operands) {
7134   for (unsigned I = 1; I < Operands.size(); ++I) {
7135     auto &Operand = (AMDGPUOperand &)*Operands[I];
7136     if (Operand.isReg())
7137       Operand.addRegOperands(Inst, 1);
7138   }
7139 
7140   Inst.addOperand(MCOperand::createImm(1)); // a16
7141 }
7142 
7143 //===----------------------------------------------------------------------===//
7144 // smrd
7145 //===----------------------------------------------------------------------===//
7146 
7147 bool AMDGPUOperand::isSMRDOffset8() const {
7148   return isImm() && isUInt<8>(getImm());
7149 }
7150 
7151 bool AMDGPUOperand::isSMEMOffset() const {
7152   return isImm(); // Offset range is checked later by validator.
7153 }
7154 
7155 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7156   // 32-bit literals are only supported on CI and we only want to use them
7157   // when the offset is > 8-bits.
7158   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7159 }
7160 
7161 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7162   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7163 }
7164 
7165 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7166   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7167 }
7168 
7169 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7170   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7171 }
7172 
7173 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7174   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7175 }
7176 
7177 //===----------------------------------------------------------------------===//
7178 // vop3
7179 //===----------------------------------------------------------------------===//
7180 
7181 static bool ConvertOmodMul(int64_t &Mul) {
7182   if (Mul != 1 && Mul != 2 && Mul != 4)
7183     return false;
7184 
7185   Mul >>= 1;
7186   return true;
7187 }
7188 
7189 static bool ConvertOmodDiv(int64_t &Div) {
7190   if (Div == 1) {
7191     Div = 0;
7192     return true;
7193   }
7194 
7195   if (Div == 2) {
7196     Div = 3;
7197     return true;
7198   }
7199 
7200   return false;
7201 }
7202 
7203 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7204 // This is intentional and ensures compatibility with sp3.
7205 // See bug 35397 for details.
7206 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7207   if (BoundCtrl == 0 || BoundCtrl == 1) {
7208     BoundCtrl = 1;
7209     return true;
7210   }
7211   return false;
7212 }
7213 
7214 // Note: the order in this table matches the order of operands in AsmString.
7215 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7216   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7217   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7218   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7219   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7220   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7221   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7222   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7223   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7224   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7225   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7226   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7227   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7228   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7229   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7230   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7231   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7232   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7233   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7234   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7235   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7236   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7237   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7238   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7239   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7240   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7241   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7242   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7243   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7244   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7245   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7246   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7247   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7248   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7249   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7250   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7251   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7252   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7253   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7254   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7255   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7256   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7257 };
7258 
7259 void AMDGPUAsmParser::onBeginOfFile() {
7260   if (!getParser().getStreamer().getTargetStreamer() ||
7261       getSTI().getTargetTriple().getArch() == Triple::r600)
7262     return;
7263 
7264   if (!getTargetStreamer().getTargetID())
7265     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7266 
7267   if (isHsaAbiVersion3Or4(&getSTI()))
7268     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7269 }
7270 
7271 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7272 
7273   OperandMatchResultTy res = parseOptionalOpr(Operands);
7274 
7275   // This is a hack to enable hardcoded mandatory operands which follow
7276   // optional operands.
7277   //
7278   // Current design assumes that all operands after the first optional operand
7279   // are also optional. However implementation of some instructions violates
7280   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7281   //
7282   // To alleviate this problem, we have to (implicitly) parse extra operands
7283   // to make sure autogenerated parser of custom operands never hit hardcoded
7284   // mandatory operands.
7285 
7286   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7287     if (res != MatchOperand_Success ||
7288         isToken(AsmToken::EndOfStatement))
7289       break;
7290 
7291     trySkipToken(AsmToken::Comma);
7292     res = parseOptionalOpr(Operands);
7293   }
7294 
7295   return res;
7296 }
7297 
7298 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7299   OperandMatchResultTy res;
7300   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7301     // try to parse any optional operand here
7302     if (Op.IsBit) {
7303       res = parseNamedBit(Op.Name, Operands, Op.Type);
7304     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7305       res = parseOModOperand(Operands);
7306     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7307                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7308                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7309       res = parseSDWASel(Operands, Op.Name, Op.Type);
7310     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7311       res = parseSDWADstUnused(Operands);
7312     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7313                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7314                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7315                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7316       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7317                                         Op.ConvertResult);
7318     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7319       res = parseDim(Operands);
7320     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7321       res = parseCPol(Operands);
7322     } else {
7323       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7324     }
7325     if (res != MatchOperand_NoMatch) {
7326       return res;
7327     }
7328   }
7329   return MatchOperand_NoMatch;
7330 }
7331 
7332 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7333   StringRef Name = getTokenStr();
7334   if (Name == "mul") {
7335     return parseIntWithPrefix("mul", Operands,
7336                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7337   }
7338 
7339   if (Name == "div") {
7340     return parseIntWithPrefix("div", Operands,
7341                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7342   }
7343 
7344   return MatchOperand_NoMatch;
7345 }
7346 
7347 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7348   cvtVOP3P(Inst, Operands);
7349 
7350   int Opc = Inst.getOpcode();
7351 
7352   int SrcNum;
7353   const int Ops[] = { AMDGPU::OpName::src0,
7354                       AMDGPU::OpName::src1,
7355                       AMDGPU::OpName::src2 };
7356   for (SrcNum = 0;
7357        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7358        ++SrcNum);
7359   assert(SrcNum > 0);
7360 
7361   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7362   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7363 
7364   if ((OpSel & (1 << SrcNum)) != 0) {
7365     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7366     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7367     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7368   }
7369 }
7370 
7371 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7372       // 1. This operand is input modifiers
7373   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7374       // 2. This is not last operand
7375       && Desc.NumOperands > (OpNum + 1)
7376       // 3. Next operand is register class
7377       && Desc.OpInfo[OpNum + 1].RegClass != -1
7378       // 4. Next register is not tied to any other operand
7379       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7380 }
7381 
7382 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7383 {
7384   OptionalImmIndexMap OptionalIdx;
7385   unsigned Opc = Inst.getOpcode();
7386 
7387   unsigned I = 1;
7388   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7389   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7390     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7391   }
7392 
7393   for (unsigned E = Operands.size(); I != E; ++I) {
7394     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7395     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7396       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7397     } else if (Op.isInterpSlot() ||
7398                Op.isInterpAttr() ||
7399                Op.isAttrChan()) {
7400       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7401     } else if (Op.isImmModifier()) {
7402       OptionalIdx[Op.getImmTy()] = I;
7403     } else {
7404       llvm_unreachable("unhandled operand type");
7405     }
7406   }
7407 
7408   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7409     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7410   }
7411 
7412   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7413     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7414   }
7415 
7416   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7417     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7418   }
7419 }
7420 
7421 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7422                               OptionalImmIndexMap &OptionalIdx) {
7423   unsigned Opc = Inst.getOpcode();
7424 
7425   unsigned I = 1;
7426   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7427   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7428     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7429   }
7430 
7431   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7432     // This instruction has src modifiers
7433     for (unsigned E = Operands.size(); I != E; ++I) {
7434       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7435       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7436         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7437       } else if (Op.isImmModifier()) {
7438         OptionalIdx[Op.getImmTy()] = I;
7439       } else if (Op.isRegOrImm()) {
7440         Op.addRegOrImmOperands(Inst, 1);
7441       } else {
7442         llvm_unreachable("unhandled operand type");
7443       }
7444     }
7445   } else {
7446     // No src modifiers
7447     for (unsigned E = Operands.size(); I != E; ++I) {
7448       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7449       if (Op.isMod()) {
7450         OptionalIdx[Op.getImmTy()] = I;
7451       } else {
7452         Op.addRegOrImmOperands(Inst, 1);
7453       }
7454     }
7455   }
7456 
7457   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7458     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7459   }
7460 
7461   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7462     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7463   }
7464 
7465   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7466   // it has src2 register operand that is tied to dst operand
7467   // we don't allow modifiers for this operand in assembler so src2_modifiers
7468   // should be 0.
7469   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7470       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7471       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7472       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7473       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7474       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7475       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7476       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7477       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7478       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7479       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7480     auto it = Inst.begin();
7481     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7482     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7483     ++it;
7484     // Copy the operand to ensure it's not invalidated when Inst grows.
7485     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7486   }
7487 }
7488 
7489 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7490   OptionalImmIndexMap OptionalIdx;
7491   cvtVOP3(Inst, Operands, OptionalIdx);
7492 }
7493 
7494 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
7495                                const OperandVector &Operands) {
7496   OptionalImmIndexMap OptIdx;
7497   const int Opc = Inst.getOpcode();
7498   const MCInstrDesc &Desc = MII.get(Opc);
7499 
7500   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7501 
7502   cvtVOP3(Inst, Operands, OptIdx);
7503 
7504   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7505     assert(!IsPacked);
7506     Inst.addOperand(Inst.getOperand(0));
7507   }
7508 
7509   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7510   // instruction, and then figure out where to actually put the modifiers
7511 
7512   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7513 
7514   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7515   if (OpSelHiIdx != -1) {
7516     int DefaultVal = IsPacked ? -1 : 0;
7517     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7518                           DefaultVal);
7519   }
7520 
7521   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7522   if (NegLoIdx != -1) {
7523     assert(IsPacked);
7524     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7525     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7526   }
7527 
7528   const int Ops[] = { AMDGPU::OpName::src0,
7529                       AMDGPU::OpName::src1,
7530                       AMDGPU::OpName::src2 };
7531   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7532                          AMDGPU::OpName::src1_modifiers,
7533                          AMDGPU::OpName::src2_modifiers };
7534 
7535   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7536 
7537   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7538   unsigned OpSelHi = 0;
7539   unsigned NegLo = 0;
7540   unsigned NegHi = 0;
7541 
7542   if (OpSelHiIdx != -1) {
7543     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7544   }
7545 
7546   if (NegLoIdx != -1) {
7547     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7548     NegLo = Inst.getOperand(NegLoIdx).getImm();
7549     NegHi = Inst.getOperand(NegHiIdx).getImm();
7550   }
7551 
7552   for (int J = 0; J < 3; ++J) {
7553     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7554     if (OpIdx == -1)
7555       break;
7556 
7557     uint32_t ModVal = 0;
7558 
7559     if ((OpSel & (1 << J)) != 0)
7560       ModVal |= SISrcMods::OP_SEL_0;
7561 
7562     if ((OpSelHi & (1 << J)) != 0)
7563       ModVal |= SISrcMods::OP_SEL_1;
7564 
7565     if ((NegLo & (1 << J)) != 0)
7566       ModVal |= SISrcMods::NEG;
7567 
7568     if ((NegHi & (1 << J)) != 0)
7569       ModVal |= SISrcMods::NEG_HI;
7570 
7571     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7572 
7573     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7574   }
7575 }
7576 
7577 //===----------------------------------------------------------------------===//
7578 // dpp
7579 //===----------------------------------------------------------------------===//
7580 
7581 bool AMDGPUOperand::isDPP8() const {
7582   return isImmTy(ImmTyDPP8);
7583 }
7584 
7585 bool AMDGPUOperand::isDPPCtrl() const {
7586   using namespace AMDGPU::DPP;
7587 
7588   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7589   if (result) {
7590     int64_t Imm = getImm();
7591     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7592            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7593            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7594            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7595            (Imm == DppCtrl::WAVE_SHL1) ||
7596            (Imm == DppCtrl::WAVE_ROL1) ||
7597            (Imm == DppCtrl::WAVE_SHR1) ||
7598            (Imm == DppCtrl::WAVE_ROR1) ||
7599            (Imm == DppCtrl::ROW_MIRROR) ||
7600            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7601            (Imm == DppCtrl::BCAST15) ||
7602            (Imm == DppCtrl::BCAST31) ||
7603            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7604            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7605   }
7606   return false;
7607 }
7608 
7609 //===----------------------------------------------------------------------===//
7610 // mAI
7611 //===----------------------------------------------------------------------===//
7612 
7613 bool AMDGPUOperand::isBLGP() const {
7614   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7615 }
7616 
7617 bool AMDGPUOperand::isCBSZ() const {
7618   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7619 }
7620 
7621 bool AMDGPUOperand::isABID() const {
7622   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7623 }
7624 
7625 bool AMDGPUOperand::isS16Imm() const {
7626   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7627 }
7628 
7629 bool AMDGPUOperand::isU16Imm() const {
7630   return isImm() && isUInt<16>(getImm());
7631 }
7632 
7633 //===----------------------------------------------------------------------===//
7634 // dim
7635 //===----------------------------------------------------------------------===//
7636 
7637 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7638   // We want to allow "dim:1D" etc.,
7639   // but the initial 1 is tokenized as an integer.
7640   std::string Token;
7641   if (isToken(AsmToken::Integer)) {
7642     SMLoc Loc = getToken().getEndLoc();
7643     Token = std::string(getTokenStr());
7644     lex();
7645     if (getLoc() != Loc)
7646       return false;
7647   }
7648 
7649   StringRef Suffix;
7650   if (!parseId(Suffix))
7651     return false;
7652   Token += Suffix;
7653 
7654   StringRef DimId = Token;
7655   if (DimId.startswith("SQ_RSRC_IMG_"))
7656     DimId = DimId.drop_front(12);
7657 
7658   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7659   if (!DimInfo)
7660     return false;
7661 
7662   Encoding = DimInfo->Encoding;
7663   return true;
7664 }
7665 
7666 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7667   if (!isGFX10Plus())
7668     return MatchOperand_NoMatch;
7669 
7670   SMLoc S = getLoc();
7671 
7672   if (!trySkipId("dim", AsmToken::Colon))
7673     return MatchOperand_NoMatch;
7674 
7675   unsigned Encoding;
7676   SMLoc Loc = getLoc();
7677   if (!parseDimId(Encoding)) {
7678     Error(Loc, "invalid dim value");
7679     return MatchOperand_ParseFail;
7680   }
7681 
7682   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7683                                               AMDGPUOperand::ImmTyDim));
7684   return MatchOperand_Success;
7685 }
7686 
7687 //===----------------------------------------------------------------------===//
7688 // dpp
7689 //===----------------------------------------------------------------------===//
7690 
7691 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7692   SMLoc S = getLoc();
7693 
7694   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7695     return MatchOperand_NoMatch;
7696 
7697   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7698 
7699   int64_t Sels[8];
7700 
7701   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7702     return MatchOperand_ParseFail;
7703 
7704   for (size_t i = 0; i < 8; ++i) {
7705     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7706       return MatchOperand_ParseFail;
7707 
7708     SMLoc Loc = getLoc();
7709     if (getParser().parseAbsoluteExpression(Sels[i]))
7710       return MatchOperand_ParseFail;
7711     if (0 > Sels[i] || 7 < Sels[i]) {
7712       Error(Loc, "expected a 3-bit value");
7713       return MatchOperand_ParseFail;
7714     }
7715   }
7716 
7717   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7718     return MatchOperand_ParseFail;
7719 
7720   unsigned DPP8 = 0;
7721   for (size_t i = 0; i < 8; ++i)
7722     DPP8 |= (Sels[i] << (i * 3));
7723 
7724   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7725   return MatchOperand_Success;
7726 }
7727 
7728 bool
7729 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7730                                     const OperandVector &Operands) {
7731   if (Ctrl == "row_newbcast")
7732       return isGFX90A();
7733 
7734   // DPP64 is supported for row_newbcast only.
7735   const MCRegisterInfo *MRI = getMRI();
7736   if (Operands.size() > 2 && Operands[1]->isReg() &&
7737       MRI->getSubReg(Operands[1]->getReg(), AMDGPU::sub1))
7738     return false;
7739 
7740   if (Ctrl == "row_share" ||
7741       Ctrl == "row_xmask")
7742     return isGFX10Plus();
7743 
7744   if (Ctrl == "wave_shl" ||
7745       Ctrl == "wave_shr" ||
7746       Ctrl == "wave_rol" ||
7747       Ctrl == "wave_ror" ||
7748       Ctrl == "row_bcast")
7749     return isVI() || isGFX9();
7750 
7751   return Ctrl == "row_mirror" ||
7752          Ctrl == "row_half_mirror" ||
7753          Ctrl == "quad_perm" ||
7754          Ctrl == "row_shl" ||
7755          Ctrl == "row_shr" ||
7756          Ctrl == "row_ror";
7757 }
7758 
7759 int64_t
7760 AMDGPUAsmParser::parseDPPCtrlPerm() {
7761   // quad_perm:[%d,%d,%d,%d]
7762 
7763   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7764     return -1;
7765 
7766   int64_t Val = 0;
7767   for (int i = 0; i < 4; ++i) {
7768     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7769       return -1;
7770 
7771     int64_t Temp;
7772     SMLoc Loc = getLoc();
7773     if (getParser().parseAbsoluteExpression(Temp))
7774       return -1;
7775     if (Temp < 0 || Temp > 3) {
7776       Error(Loc, "expected a 2-bit value");
7777       return -1;
7778     }
7779 
7780     Val += (Temp << i * 2);
7781   }
7782 
7783   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7784     return -1;
7785 
7786   return Val;
7787 }
7788 
7789 int64_t
7790 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7791   using namespace AMDGPU::DPP;
7792 
7793   // sel:%d
7794 
7795   int64_t Val;
7796   SMLoc Loc = getLoc();
7797 
7798   if (getParser().parseAbsoluteExpression(Val))
7799     return -1;
7800 
7801   struct DppCtrlCheck {
7802     int64_t Ctrl;
7803     int Lo;
7804     int Hi;
7805   };
7806 
7807   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7808     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
7809     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
7810     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
7811     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
7812     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
7813     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
7814     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
7815     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7816     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7817     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
7818     .Default({-1, 0, 0});
7819 
7820   bool Valid;
7821   if (Check.Ctrl == -1) {
7822     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
7823     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
7824   } else {
7825     Valid = Check.Lo <= Val && Val <= Check.Hi;
7826     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
7827   }
7828 
7829   if (!Valid) {
7830     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
7831     return -1;
7832   }
7833 
7834   return Val;
7835 }
7836 
7837 OperandMatchResultTy
7838 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7839   using namespace AMDGPU::DPP;
7840 
7841   if (!isToken(AsmToken::Identifier) ||
7842       !isSupportedDPPCtrl(getTokenStr(), Operands))
7843     return MatchOperand_NoMatch;
7844 
7845   SMLoc S = getLoc();
7846   int64_t Val = -1;
7847   StringRef Ctrl;
7848 
7849   parseId(Ctrl);
7850 
7851   if (Ctrl == "row_mirror") {
7852     Val = DppCtrl::ROW_MIRROR;
7853   } else if (Ctrl == "row_half_mirror") {
7854     Val = DppCtrl::ROW_HALF_MIRROR;
7855   } else {
7856     if (skipToken(AsmToken::Colon, "expected a colon")) {
7857       if (Ctrl == "quad_perm") {
7858         Val = parseDPPCtrlPerm();
7859       } else {
7860         Val = parseDPPCtrlSel(Ctrl);
7861       }
7862     }
7863   }
7864 
7865   if (Val == -1)
7866     return MatchOperand_ParseFail;
7867 
7868   Operands.push_back(
7869     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
7870   return MatchOperand_Success;
7871 }
7872 
7873 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7874   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7875 }
7876 
7877 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7878   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7879 }
7880 
7881 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7882   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7883 }
7884 
7885 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7886   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7887 }
7888 
7889 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7890   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7891 }
7892 
7893 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7894   OptionalImmIndexMap OptionalIdx;
7895 
7896   unsigned I = 1;
7897   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7898   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7899     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7900   }
7901 
7902   int Fi = 0;
7903   for (unsigned E = Operands.size(); I != E; ++I) {
7904     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7905                                             MCOI::TIED_TO);
7906     if (TiedTo != -1) {
7907       assert((unsigned)TiedTo < Inst.getNumOperands());
7908       // handle tied old or src2 for MAC instructions
7909       Inst.addOperand(Inst.getOperand(TiedTo));
7910     }
7911     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7912     // Add the register arguments
7913     if (Op.isReg() && validateVccOperand(Op.getReg())) {
7914       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7915       // Skip it.
7916       continue;
7917     }
7918 
7919     if (IsDPP8) {
7920       if (Op.isDPP8()) {
7921         Op.addImmOperands(Inst, 1);
7922       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7923         Op.addRegWithFPInputModsOperands(Inst, 2);
7924       } else if (Op.isFI()) {
7925         Fi = Op.getImm();
7926       } else if (Op.isReg()) {
7927         Op.addRegOperands(Inst, 1);
7928       } else {
7929         llvm_unreachable("Invalid operand type");
7930       }
7931     } else {
7932       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7933         Op.addRegWithFPInputModsOperands(Inst, 2);
7934       } else if (Op.isDPPCtrl()) {
7935         Op.addImmOperands(Inst, 1);
7936       } else if (Op.isImm()) {
7937         // Handle optional arguments
7938         OptionalIdx[Op.getImmTy()] = I;
7939       } else {
7940         llvm_unreachable("Invalid operand type");
7941       }
7942     }
7943   }
7944 
7945   if (IsDPP8) {
7946     using namespace llvm::AMDGPU::DPP;
7947     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7948   } else {
7949     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7950     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7951     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7952     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7953       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7954     }
7955   }
7956 }
7957 
7958 //===----------------------------------------------------------------------===//
7959 // sdwa
7960 //===----------------------------------------------------------------------===//
7961 
7962 OperandMatchResultTy
7963 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7964                               AMDGPUOperand::ImmTy Type) {
7965   using namespace llvm::AMDGPU::SDWA;
7966 
7967   SMLoc S = getLoc();
7968   StringRef Value;
7969   OperandMatchResultTy res;
7970 
7971   SMLoc StringLoc;
7972   res = parseStringWithPrefix(Prefix, Value, StringLoc);
7973   if (res != MatchOperand_Success) {
7974     return res;
7975   }
7976 
7977   int64_t Int;
7978   Int = StringSwitch<int64_t>(Value)
7979         .Case("BYTE_0", SdwaSel::BYTE_0)
7980         .Case("BYTE_1", SdwaSel::BYTE_1)
7981         .Case("BYTE_2", SdwaSel::BYTE_2)
7982         .Case("BYTE_3", SdwaSel::BYTE_3)
7983         .Case("WORD_0", SdwaSel::WORD_0)
7984         .Case("WORD_1", SdwaSel::WORD_1)
7985         .Case("DWORD", SdwaSel::DWORD)
7986         .Default(0xffffffff);
7987 
7988   if (Int == 0xffffffff) {
7989     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
7990     return MatchOperand_ParseFail;
7991   }
7992 
7993   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
7994   return MatchOperand_Success;
7995 }
7996 
7997 OperandMatchResultTy
7998 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
7999   using namespace llvm::AMDGPU::SDWA;
8000 
8001   SMLoc S = getLoc();
8002   StringRef Value;
8003   OperandMatchResultTy res;
8004 
8005   SMLoc StringLoc;
8006   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8007   if (res != MatchOperand_Success) {
8008     return res;
8009   }
8010 
8011   int64_t Int;
8012   Int = StringSwitch<int64_t>(Value)
8013         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8014         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8015         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8016         .Default(0xffffffff);
8017 
8018   if (Int == 0xffffffff) {
8019     Error(StringLoc, "invalid dst_unused value");
8020     return MatchOperand_ParseFail;
8021   }
8022 
8023   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8024   return MatchOperand_Success;
8025 }
8026 
8027 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8028   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8029 }
8030 
8031 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8032   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8033 }
8034 
8035 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8036   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8037 }
8038 
8039 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8040   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8041 }
8042 
8043 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8044   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8045 }
8046 
8047 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8048                               uint64_t BasicInstType,
8049                               bool SkipDstVcc,
8050                               bool SkipSrcVcc) {
8051   using namespace llvm::AMDGPU::SDWA;
8052 
8053   OptionalImmIndexMap OptionalIdx;
8054   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8055   bool SkippedVcc = false;
8056 
8057   unsigned I = 1;
8058   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8059   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8060     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8061   }
8062 
8063   for (unsigned E = Operands.size(); I != E; ++I) {
8064     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8065     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8066         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8067       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8068       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8069       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8070       // Skip VCC only if we didn't skip it on previous iteration.
8071       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8072       if (BasicInstType == SIInstrFlags::VOP2 &&
8073           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8074            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8075         SkippedVcc = true;
8076         continue;
8077       } else if (BasicInstType == SIInstrFlags::VOPC &&
8078                  Inst.getNumOperands() == 0) {
8079         SkippedVcc = true;
8080         continue;
8081       }
8082     }
8083     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8084       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8085     } else if (Op.isImm()) {
8086       // Handle optional arguments
8087       OptionalIdx[Op.getImmTy()] = I;
8088     } else {
8089       llvm_unreachable("Invalid operand type");
8090     }
8091     SkippedVcc = false;
8092   }
8093 
8094   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8095       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8096       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8097     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8098     switch (BasicInstType) {
8099     case SIInstrFlags::VOP1:
8100       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8101       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8102         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8103       }
8104       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8105       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8106       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8107       break;
8108 
8109     case SIInstrFlags::VOP2:
8110       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8111       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8112         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8113       }
8114       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8115       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8116       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8117       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8118       break;
8119 
8120     case SIInstrFlags::VOPC:
8121       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8122         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8123       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8124       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8125       break;
8126 
8127     default:
8128       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8129     }
8130   }
8131 
8132   // special case v_mac_{f16, f32}:
8133   // it has src2 register operand that is tied to dst operand
8134   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8135       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8136     auto it = Inst.begin();
8137     std::advance(
8138       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8139     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8140   }
8141 }
8142 
8143 //===----------------------------------------------------------------------===//
8144 // mAI
8145 //===----------------------------------------------------------------------===//
8146 
8147 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8148   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8149 }
8150 
8151 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8152   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8153 }
8154 
8155 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8156   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8157 }
8158 
8159 /// Force static initialization.
8160 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8161   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8162   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8163 }
8164 
8165 #define GET_REGISTER_MATCHER
8166 #define GET_MATCHER_IMPLEMENTATION
8167 #define GET_MNEMONIC_SPELL_CHECKER
8168 #define GET_MNEMONIC_CHECKER
8169 #include "AMDGPUGenAsmMatcher.inc"
8170 
8171 // This fuction should be defined after auto-generated include so that we have
8172 // MatchClassKind enum defined
8173 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8174                                                      unsigned Kind) {
8175   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8176   // But MatchInstructionImpl() expects to meet token and fails to validate
8177   // operand. This method checks if we are given immediate operand but expect to
8178   // get corresponding token.
8179   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8180   switch (Kind) {
8181   case MCK_addr64:
8182     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8183   case MCK_gds:
8184     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8185   case MCK_lds:
8186     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8187   case MCK_idxen:
8188     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8189   case MCK_offen:
8190     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8191   case MCK_SSrcB32:
8192     // When operands have expression values, they will return true for isToken,
8193     // because it is not possible to distinguish between a token and an
8194     // expression at parse time. MatchInstructionImpl() will always try to
8195     // match an operand as a token, when isToken returns true, and when the
8196     // name of the expression is not a valid token, the match will fail,
8197     // so we need to handle it here.
8198     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8199   case MCK_SSrcF32:
8200     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8201   case MCK_SoppBrTarget:
8202     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8203   case MCK_VReg32OrOff:
8204     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8205   case MCK_InterpSlot:
8206     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8207   case MCK_Attr:
8208     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8209   case MCK_AttrChan:
8210     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8211   case MCK_ImmSMEMOffset:
8212     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8213   case MCK_SReg_64:
8214   case MCK_SReg_64_XEXEC:
8215     // Null is defined as a 32-bit register but
8216     // it should also be enabled with 64-bit operands.
8217     // The following code enables it for SReg_64 operands
8218     // used as source and destination. Remaining source
8219     // operands are handled in isInlinableImm.
8220     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8221   default:
8222     return Match_InvalidOperand;
8223   }
8224 }
8225 
8226 //===----------------------------------------------------------------------===//
8227 // endpgm
8228 //===----------------------------------------------------------------------===//
8229 
8230 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8231   SMLoc S = getLoc();
8232   int64_t Imm = 0;
8233 
8234   if (!parseExpr(Imm)) {
8235     // The operand is optional, if not present default to 0
8236     Imm = 0;
8237   }
8238 
8239   if (!isUInt<16>(Imm)) {
8240     Error(S, "expected a 16-bit value");
8241     return MatchOperand_ParseFail;
8242   }
8243 
8244   Operands.push_back(
8245       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8246   return MatchOperand_Success;
8247 }
8248 
8249 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8250