1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/Support/AMDGPUMetadata.h"
32 #include "llvm/Support/AMDHSAKernelDescriptor.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/MachineValueType.h"
35 #include "llvm/Support/TargetParser.h"
36 #include "llvm/Support/TargetRegistry.h"
37 
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::amdhsa;
41 
42 namespace {
43 
44 class AMDGPUAsmParser;
45 
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47 
48 //===----------------------------------------------------------------------===//
49 // Operand
50 //===----------------------------------------------------------------------===//
51 
52 class AMDGPUOperand : public MCParsedAsmOperand {
53   enum KindTy {
54     Token,
55     Immediate,
56     Register,
57     Expression
58   } Kind;
59 
60   SMLoc StartLoc, EndLoc;
61   const AMDGPUAsmParser *AsmParser;
62 
63 public:
64   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
66 
67   using Ptr = std::unique_ptr<AMDGPUOperand>;
68 
69   struct Modifiers {
70     bool Abs = false;
71     bool Neg = false;
72     bool Sext = false;
73 
74     bool hasFPModifiers() const { return Abs || Neg; }
75     bool hasIntModifiers() const { return Sext; }
76     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77 
78     int64_t getFPModifiersOperand() const {
79       int64_t Operand = 0;
80       Operand |= Abs ? SISrcMods::ABS : 0u;
81       Operand |= Neg ? SISrcMods::NEG : 0u;
82       return Operand;
83     }
84 
85     int64_t getIntModifiersOperand() const {
86       int64_t Operand = 0;
87       Operand |= Sext ? SISrcMods::SEXT : 0u;
88       return Operand;
89     }
90 
91     int64_t getModifiersOperand() const {
92       assert(!(hasFPModifiers() && hasIntModifiers())
93            && "fp and int modifiers should not be used simultaneously");
94       if (hasFPModifiers()) {
95         return getFPModifiersOperand();
96       } else if (hasIntModifiers()) {
97         return getIntModifiersOperand();
98       } else {
99         return 0;
100       }
101     }
102 
103     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104   };
105 
106   enum ImmTy {
107     ImmTyNone,
108     ImmTyGDS,
109     ImmTyLDS,
110     ImmTyOffen,
111     ImmTyIdxen,
112     ImmTyAddr64,
113     ImmTyOffset,
114     ImmTyInstOffset,
115     ImmTyOffset0,
116     ImmTyOffset1,
117     ImmTyCPol,
118     ImmTySWZ,
119     ImmTyTFE,
120     ImmTyD16,
121     ImmTyClampSI,
122     ImmTyOModSI,
123     ImmTyDPP8,
124     ImmTyDppCtrl,
125     ImmTyDppRowMask,
126     ImmTyDppBankMask,
127     ImmTyDppBoundCtrl,
128     ImmTyDppFi,
129     ImmTySdwaDstSel,
130     ImmTySdwaSrc0Sel,
131     ImmTySdwaSrc1Sel,
132     ImmTySdwaDstUnused,
133     ImmTyDMask,
134     ImmTyDim,
135     ImmTyUNorm,
136     ImmTyDA,
137     ImmTyR128A16,
138     ImmTyA16,
139     ImmTyLWE,
140     ImmTyExpTgt,
141     ImmTyExpCompr,
142     ImmTyExpVM,
143     ImmTyFORMAT,
144     ImmTyHwreg,
145     ImmTyOff,
146     ImmTySendMsg,
147     ImmTyInterpSlot,
148     ImmTyInterpAttr,
149     ImmTyAttrChan,
150     ImmTyOpSel,
151     ImmTyOpSelHi,
152     ImmTyNegLo,
153     ImmTyNegHi,
154     ImmTySwizzle,
155     ImmTyGprIdxMode,
156     ImmTyHigh,
157     ImmTyBLGP,
158     ImmTyCBSZ,
159     ImmTyABID,
160     ImmTyEndpgm,
161   };
162 
163   enum ImmKindTy {
164     ImmKindTyNone,
165     ImmKindTyLiteral,
166     ImmKindTyConst,
167   };
168 
169 private:
170   struct TokOp {
171     const char *Data;
172     unsigned Length;
173   };
174 
175   struct ImmOp {
176     int64_t Val;
177     ImmTy Type;
178     bool IsFPImm;
179     mutable ImmKindTy Kind;
180     Modifiers Mods;
181   };
182 
183   struct RegOp {
184     unsigned RegNo;
185     Modifiers Mods;
186   };
187 
188   union {
189     TokOp Tok;
190     ImmOp Imm;
191     RegOp Reg;
192     const MCExpr *Expr;
193   };
194 
195 public:
196   bool isToken() const override {
197     if (Kind == Token)
198       return true;
199 
200     // When parsing operands, we can't always tell if something was meant to be
201     // a token, like 'gds', or an expression that references a global variable.
202     // In this case, we assume the string is an expression, and if we need to
203     // interpret is a token, then we treat the symbol name as the token.
204     return isSymbolRefExpr();
205   }
206 
207   bool isSymbolRefExpr() const {
208     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
209   }
210 
211   bool isImm() const override {
212     return Kind == Immediate;
213   }
214 
215   void setImmKindNone() const {
216     assert(isImm());
217     Imm.Kind = ImmKindTyNone;
218   }
219 
220   void setImmKindLiteral() const {
221     assert(isImm());
222     Imm.Kind = ImmKindTyLiteral;
223   }
224 
225   void setImmKindConst() const {
226     assert(isImm());
227     Imm.Kind = ImmKindTyConst;
228   }
229 
230   bool IsImmKindLiteral() const {
231     return isImm() && Imm.Kind == ImmKindTyLiteral;
232   }
233 
234   bool isImmKindConst() const {
235     return isImm() && Imm.Kind == ImmKindTyConst;
236   }
237 
238   bool isInlinableImm(MVT type) const;
239   bool isLiteralImm(MVT type) const;
240 
241   bool isRegKind() const {
242     return Kind == Register;
243   }
244 
245   bool isReg() const override {
246     return isRegKind() && !hasModifiers();
247   }
248 
249   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
250     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
251   }
252 
253   bool isRegOrImmWithInt16InputMods() const {
254     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
255   }
256 
257   bool isRegOrImmWithInt32InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259   }
260 
261   bool isRegOrImmWithInt64InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
263   }
264 
265   bool isRegOrImmWithFP16InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
267   }
268 
269   bool isRegOrImmWithFP32InputMods() const {
270     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
271   }
272 
273   bool isRegOrImmWithFP64InputMods() const {
274     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
275   }
276 
277   bool isVReg() const {
278     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
279            isRegClass(AMDGPU::VReg_64RegClassID) ||
280            isRegClass(AMDGPU::VReg_96RegClassID) ||
281            isRegClass(AMDGPU::VReg_128RegClassID) ||
282            isRegClass(AMDGPU::VReg_160RegClassID) ||
283            isRegClass(AMDGPU::VReg_192RegClassID) ||
284            isRegClass(AMDGPU::VReg_256RegClassID) ||
285            isRegClass(AMDGPU::VReg_512RegClassID) ||
286            isRegClass(AMDGPU::VReg_1024RegClassID);
287   }
288 
289   bool isVReg32() const {
290     return isRegClass(AMDGPU::VGPR_32RegClassID);
291   }
292 
293   bool isVReg32OrOff() const {
294     return isOff() || isVReg32();
295   }
296 
297   bool isNull() const {
298     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
299   }
300 
301   bool isVRegWithInputMods() const;
302 
303   bool isSDWAOperand(MVT type) const;
304   bool isSDWAFP16Operand() const;
305   bool isSDWAFP32Operand() const;
306   bool isSDWAInt16Operand() const;
307   bool isSDWAInt32Operand() const;
308 
309   bool isImmTy(ImmTy ImmT) const {
310     return isImm() && Imm.Type == ImmT;
311   }
312 
313   bool isImmModifier() const {
314     return isImm() && Imm.Type != ImmTyNone;
315   }
316 
317   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
318   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
319   bool isDMask() const { return isImmTy(ImmTyDMask); }
320   bool isDim() const { return isImmTy(ImmTyDim); }
321   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
322   bool isDA() const { return isImmTy(ImmTyDA); }
323   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
324   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
325   bool isLWE() const { return isImmTy(ImmTyLWE); }
326   bool isOff() const { return isImmTy(ImmTyOff); }
327   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
328   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
329   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
330   bool isOffen() const { return isImmTy(ImmTyOffen); }
331   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
332   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
333   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
334   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
335   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
336 
337   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
338   bool isGDS() const { return isImmTy(ImmTyGDS); }
339   bool isLDS() const { return isImmTy(ImmTyLDS); }
340   bool isCPol() const { return isImmTy(ImmTyCPol); }
341   bool isSWZ() const { return isImmTy(ImmTySWZ); }
342   bool isTFE() const { return isImmTy(ImmTyTFE); }
343   bool isD16() const { return isImmTy(ImmTyD16); }
344   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
345   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
346   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
347   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
348   bool isFI() const { return isImmTy(ImmTyDppFi); }
349   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
350   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
351   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
352   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
353   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
354   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
355   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
356   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
357   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
358   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
359   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
360   bool isHigh() const { return isImmTy(ImmTyHigh); }
361 
362   bool isMod() const {
363     return isClampSI() || isOModSI();
364   }
365 
366   bool isRegOrImm() const {
367     return isReg() || isImm();
368   }
369 
370   bool isRegClass(unsigned RCID) const;
371 
372   bool isInlineValue() const;
373 
374   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
375     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
376   }
377 
378   bool isSCSrcB16() const {
379     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
380   }
381 
382   bool isSCSrcV2B16() const {
383     return isSCSrcB16();
384   }
385 
386   bool isSCSrcB32() const {
387     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
388   }
389 
390   bool isSCSrcB64() const {
391     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
392   }
393 
394   bool isBoolReg() const;
395 
396   bool isSCSrcF16() const {
397     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
398   }
399 
400   bool isSCSrcV2F16() const {
401     return isSCSrcF16();
402   }
403 
404   bool isSCSrcF32() const {
405     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
406   }
407 
408   bool isSCSrcF64() const {
409     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
410   }
411 
412   bool isSSrcB32() const {
413     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
414   }
415 
416   bool isSSrcB16() const {
417     return isSCSrcB16() || isLiteralImm(MVT::i16);
418   }
419 
420   bool isSSrcV2B16() const {
421     llvm_unreachable("cannot happen");
422     return isSSrcB16();
423   }
424 
425   bool isSSrcB64() const {
426     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
427     // See isVSrc64().
428     return isSCSrcB64() || isLiteralImm(MVT::i64);
429   }
430 
431   bool isSSrcF32() const {
432     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
433   }
434 
435   bool isSSrcF64() const {
436     return isSCSrcB64() || isLiteralImm(MVT::f64);
437   }
438 
439   bool isSSrcF16() const {
440     return isSCSrcB16() || isLiteralImm(MVT::f16);
441   }
442 
443   bool isSSrcV2F16() const {
444     llvm_unreachable("cannot happen");
445     return isSSrcF16();
446   }
447 
448   bool isSSrcV2FP32() const {
449     llvm_unreachable("cannot happen");
450     return isSSrcF32();
451   }
452 
453   bool isSCSrcV2FP32() const {
454     llvm_unreachable("cannot happen");
455     return isSCSrcF32();
456   }
457 
458   bool isSSrcV2INT32() const {
459     llvm_unreachable("cannot happen");
460     return isSSrcB32();
461   }
462 
463   bool isSCSrcV2INT32() const {
464     llvm_unreachable("cannot happen");
465     return isSCSrcB32();
466   }
467 
468   bool isSSrcOrLdsB32() const {
469     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
470            isLiteralImm(MVT::i32) || isExpr();
471   }
472 
473   bool isVCSrcB32() const {
474     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
475   }
476 
477   bool isVCSrcB64() const {
478     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
479   }
480 
481   bool isVCSrcB16() const {
482     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
483   }
484 
485   bool isVCSrcV2B16() const {
486     return isVCSrcB16();
487   }
488 
489   bool isVCSrcF32() const {
490     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
491   }
492 
493   bool isVCSrcF64() const {
494     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
495   }
496 
497   bool isVCSrcF16() const {
498     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
499   }
500 
501   bool isVCSrcV2F16() const {
502     return isVCSrcF16();
503   }
504 
505   bool isVSrcB32() const {
506     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
507   }
508 
509   bool isVSrcB64() const {
510     return isVCSrcF64() || isLiteralImm(MVT::i64);
511   }
512 
513   bool isVSrcB16() const {
514     return isVCSrcB16() || isLiteralImm(MVT::i16);
515   }
516 
517   bool isVSrcV2B16() const {
518     return isVSrcB16() || isLiteralImm(MVT::v2i16);
519   }
520 
521   bool isVCSrcV2FP32() const {
522     return isVCSrcF64();
523   }
524 
525   bool isVSrcV2FP32() const {
526     return isVSrcF64() || isLiteralImm(MVT::v2f32);
527   }
528 
529   bool isVCSrcV2INT32() const {
530     return isVCSrcB64();
531   }
532 
533   bool isVSrcV2INT32() const {
534     return isVSrcB64() || isLiteralImm(MVT::v2i32);
535   }
536 
537   bool isVSrcF32() const {
538     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
539   }
540 
541   bool isVSrcF64() const {
542     return isVCSrcF64() || isLiteralImm(MVT::f64);
543   }
544 
545   bool isVSrcF16() const {
546     return isVCSrcF16() || isLiteralImm(MVT::f16);
547   }
548 
549   bool isVSrcV2F16() const {
550     return isVSrcF16() || isLiteralImm(MVT::v2f16);
551   }
552 
553   bool isVISrcB32() const {
554     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
555   }
556 
557   bool isVISrcB16() const {
558     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
559   }
560 
561   bool isVISrcV2B16() const {
562     return isVISrcB16();
563   }
564 
565   bool isVISrcF32() const {
566     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
567   }
568 
569   bool isVISrcF16() const {
570     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
571   }
572 
573   bool isVISrcV2F16() const {
574     return isVISrcF16() || isVISrcB32();
575   }
576 
577   bool isVISrc_64B64() const {
578     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
579   }
580 
581   bool isVISrc_64F64() const {
582     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
583   }
584 
585   bool isVISrc_64V2FP32() const {
586     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
587   }
588 
589   bool isVISrc_64V2INT32() const {
590     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
591   }
592 
593   bool isVISrc_256B64() const {
594     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
595   }
596 
597   bool isVISrc_256F64() const {
598     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
599   }
600 
601   bool isVISrc_128B16() const {
602     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
603   }
604 
605   bool isVISrc_128V2B16() const {
606     return isVISrc_128B16();
607   }
608 
609   bool isVISrc_128B32() const {
610     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
611   }
612 
613   bool isVISrc_128F32() const {
614     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
615   }
616 
617   bool isVISrc_256V2FP32() const {
618     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
619   }
620 
621   bool isVISrc_256V2INT32() const {
622     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
623   }
624 
625   bool isVISrc_512B32() const {
626     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
627   }
628 
629   bool isVISrc_512B16() const {
630     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
631   }
632 
633   bool isVISrc_512V2B16() const {
634     return isVISrc_512B16();
635   }
636 
637   bool isVISrc_512F32() const {
638     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
639   }
640 
641   bool isVISrc_512F16() const {
642     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
643   }
644 
645   bool isVISrc_512V2F16() const {
646     return isVISrc_512F16() || isVISrc_512B32();
647   }
648 
649   bool isVISrc_1024B32() const {
650     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
651   }
652 
653   bool isVISrc_1024B16() const {
654     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
655   }
656 
657   bool isVISrc_1024V2B16() const {
658     return isVISrc_1024B16();
659   }
660 
661   bool isVISrc_1024F32() const {
662     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
663   }
664 
665   bool isVISrc_1024F16() const {
666     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
667   }
668 
669   bool isVISrc_1024V2F16() const {
670     return isVISrc_1024F16() || isVISrc_1024B32();
671   }
672 
673   bool isAISrcB32() const {
674     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
675   }
676 
677   bool isAISrcB16() const {
678     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
679   }
680 
681   bool isAISrcV2B16() const {
682     return isAISrcB16();
683   }
684 
685   bool isAISrcF32() const {
686     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
687   }
688 
689   bool isAISrcF16() const {
690     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
691   }
692 
693   bool isAISrcV2F16() const {
694     return isAISrcF16() || isAISrcB32();
695   }
696 
697   bool isAISrc_64B64() const {
698     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
699   }
700 
701   bool isAISrc_64F64() const {
702     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
703   }
704 
705   bool isAISrc_128B32() const {
706     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
707   }
708 
709   bool isAISrc_128B16() const {
710     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
711   }
712 
713   bool isAISrc_128V2B16() const {
714     return isAISrc_128B16();
715   }
716 
717   bool isAISrc_128F32() const {
718     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
719   }
720 
721   bool isAISrc_128F16() const {
722     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
723   }
724 
725   bool isAISrc_128V2F16() const {
726     return isAISrc_128F16() || isAISrc_128B32();
727   }
728 
729   bool isVISrc_128F16() const {
730     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
731   }
732 
733   bool isVISrc_128V2F16() const {
734     return isVISrc_128F16() || isVISrc_128B32();
735   }
736 
737   bool isAISrc_256B64() const {
738     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
739   }
740 
741   bool isAISrc_256F64() const {
742     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
743   }
744 
745   bool isAISrc_512B32() const {
746     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
747   }
748 
749   bool isAISrc_512B16() const {
750     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
751   }
752 
753   bool isAISrc_512V2B16() const {
754     return isAISrc_512B16();
755   }
756 
757   bool isAISrc_512F32() const {
758     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
759   }
760 
761   bool isAISrc_512F16() const {
762     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
763   }
764 
765   bool isAISrc_512V2F16() const {
766     return isAISrc_512F16() || isAISrc_512B32();
767   }
768 
769   bool isAISrc_1024B32() const {
770     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
771   }
772 
773   bool isAISrc_1024B16() const {
774     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
775   }
776 
777   bool isAISrc_1024V2B16() const {
778     return isAISrc_1024B16();
779   }
780 
781   bool isAISrc_1024F32() const {
782     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
783   }
784 
785   bool isAISrc_1024F16() const {
786     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
787   }
788 
789   bool isAISrc_1024V2F16() const {
790     return isAISrc_1024F16() || isAISrc_1024B32();
791   }
792 
793   bool isKImmFP32() const {
794     return isLiteralImm(MVT::f32);
795   }
796 
797   bool isKImmFP16() const {
798     return isLiteralImm(MVT::f16);
799   }
800 
801   bool isMem() const override {
802     return false;
803   }
804 
805   bool isExpr() const {
806     return Kind == Expression;
807   }
808 
809   bool isSoppBrTarget() const {
810     return isExpr() || isImm();
811   }
812 
813   bool isSWaitCnt() const;
814   bool isHwreg() const;
815   bool isSendMsg() const;
816   bool isSwizzle() const;
817   bool isSMRDOffset8() const;
818   bool isSMEMOffset() const;
819   bool isSMRDLiteralOffset() const;
820   bool isDPP8() const;
821   bool isDPPCtrl() const;
822   bool isBLGP() const;
823   bool isCBSZ() const;
824   bool isABID() const;
825   bool isGPRIdxMode() const;
826   bool isS16Imm() const;
827   bool isU16Imm() const;
828   bool isEndpgm() const;
829 
830   StringRef getExpressionAsToken() const {
831     assert(isExpr());
832     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
833     return S->getSymbol().getName();
834   }
835 
836   StringRef getToken() const {
837     assert(isToken());
838 
839     if (Kind == Expression)
840       return getExpressionAsToken();
841 
842     return StringRef(Tok.Data, Tok.Length);
843   }
844 
845   int64_t getImm() const {
846     assert(isImm());
847     return Imm.Val;
848   }
849 
850   void setImm(int64_t Val) {
851     assert(isImm());
852     Imm.Val = Val;
853   }
854 
855   ImmTy getImmTy() const {
856     assert(isImm());
857     return Imm.Type;
858   }
859 
860   unsigned getReg() const override {
861     assert(isRegKind());
862     return Reg.RegNo;
863   }
864 
865   SMLoc getStartLoc() const override {
866     return StartLoc;
867   }
868 
869   SMLoc getEndLoc() const override {
870     return EndLoc;
871   }
872 
873   SMRange getLocRange() const {
874     return SMRange(StartLoc, EndLoc);
875   }
876 
877   Modifiers getModifiers() const {
878     assert(isRegKind() || isImmTy(ImmTyNone));
879     return isRegKind() ? Reg.Mods : Imm.Mods;
880   }
881 
882   void setModifiers(Modifiers Mods) {
883     assert(isRegKind() || isImmTy(ImmTyNone));
884     if (isRegKind())
885       Reg.Mods = Mods;
886     else
887       Imm.Mods = Mods;
888   }
889 
890   bool hasModifiers() const {
891     return getModifiers().hasModifiers();
892   }
893 
894   bool hasFPModifiers() const {
895     return getModifiers().hasFPModifiers();
896   }
897 
898   bool hasIntModifiers() const {
899     return getModifiers().hasIntModifiers();
900   }
901 
902   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
903 
904   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
905 
906   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
907 
908   template <unsigned Bitwidth>
909   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
910 
911   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
912     addKImmFPOperands<16>(Inst, N);
913   }
914 
915   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
916     addKImmFPOperands<32>(Inst, N);
917   }
918 
919   void addRegOperands(MCInst &Inst, unsigned N) const;
920 
921   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
922     addRegOperands(Inst, N);
923   }
924 
925   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
926     if (isRegKind())
927       addRegOperands(Inst, N);
928     else if (isExpr())
929       Inst.addOperand(MCOperand::createExpr(Expr));
930     else
931       addImmOperands(Inst, N);
932   }
933 
934   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
935     Modifiers Mods = getModifiers();
936     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
937     if (isRegKind()) {
938       addRegOperands(Inst, N);
939     } else {
940       addImmOperands(Inst, N, false);
941     }
942   }
943 
944   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
945     assert(!hasIntModifiers());
946     addRegOrImmWithInputModsOperands(Inst, N);
947   }
948 
949   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
950     assert(!hasFPModifiers());
951     addRegOrImmWithInputModsOperands(Inst, N);
952   }
953 
954   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
955     Modifiers Mods = getModifiers();
956     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
957     assert(isRegKind());
958     addRegOperands(Inst, N);
959   }
960 
961   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
962     assert(!hasIntModifiers());
963     addRegWithInputModsOperands(Inst, N);
964   }
965 
966   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
967     assert(!hasFPModifiers());
968     addRegWithInputModsOperands(Inst, N);
969   }
970 
971   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
972     if (isImm())
973       addImmOperands(Inst, N);
974     else {
975       assert(isExpr());
976       Inst.addOperand(MCOperand::createExpr(Expr));
977     }
978   }
979 
980   static void printImmTy(raw_ostream& OS, ImmTy Type) {
981     switch (Type) {
982     case ImmTyNone: OS << "None"; break;
983     case ImmTyGDS: OS << "GDS"; break;
984     case ImmTyLDS: OS << "LDS"; break;
985     case ImmTyOffen: OS << "Offen"; break;
986     case ImmTyIdxen: OS << "Idxen"; break;
987     case ImmTyAddr64: OS << "Addr64"; break;
988     case ImmTyOffset: OS << "Offset"; break;
989     case ImmTyInstOffset: OS << "InstOffset"; break;
990     case ImmTyOffset0: OS << "Offset0"; break;
991     case ImmTyOffset1: OS << "Offset1"; break;
992     case ImmTyCPol: OS << "CPol"; break;
993     case ImmTySWZ: OS << "SWZ"; break;
994     case ImmTyTFE: OS << "TFE"; break;
995     case ImmTyD16: OS << "D16"; break;
996     case ImmTyFORMAT: OS << "FORMAT"; break;
997     case ImmTyClampSI: OS << "ClampSI"; break;
998     case ImmTyOModSI: OS << "OModSI"; break;
999     case ImmTyDPP8: OS << "DPP8"; break;
1000     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1001     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1002     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1003     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1004     case ImmTyDppFi: OS << "FI"; break;
1005     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1006     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1007     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1008     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1009     case ImmTyDMask: OS << "DMask"; break;
1010     case ImmTyDim: OS << "Dim"; break;
1011     case ImmTyUNorm: OS << "UNorm"; break;
1012     case ImmTyDA: OS << "DA"; break;
1013     case ImmTyR128A16: OS << "R128A16"; break;
1014     case ImmTyA16: OS << "A16"; break;
1015     case ImmTyLWE: OS << "LWE"; break;
1016     case ImmTyOff: OS << "Off"; break;
1017     case ImmTyExpTgt: OS << "ExpTgt"; break;
1018     case ImmTyExpCompr: OS << "ExpCompr"; break;
1019     case ImmTyExpVM: OS << "ExpVM"; break;
1020     case ImmTyHwreg: OS << "Hwreg"; break;
1021     case ImmTySendMsg: OS << "SendMsg"; break;
1022     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1023     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1024     case ImmTyAttrChan: OS << "AttrChan"; break;
1025     case ImmTyOpSel: OS << "OpSel"; break;
1026     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1027     case ImmTyNegLo: OS << "NegLo"; break;
1028     case ImmTyNegHi: OS << "NegHi"; break;
1029     case ImmTySwizzle: OS << "Swizzle"; break;
1030     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1031     case ImmTyHigh: OS << "High"; break;
1032     case ImmTyBLGP: OS << "BLGP"; break;
1033     case ImmTyCBSZ: OS << "CBSZ"; break;
1034     case ImmTyABID: OS << "ABID"; break;
1035     case ImmTyEndpgm: OS << "Endpgm"; break;
1036     }
1037   }
1038 
1039   void print(raw_ostream &OS) const override {
1040     switch (Kind) {
1041     case Register:
1042       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1043       break;
1044     case Immediate:
1045       OS << '<' << getImm();
1046       if (getImmTy() != ImmTyNone) {
1047         OS << " type: "; printImmTy(OS, getImmTy());
1048       }
1049       OS << " mods: " << Imm.Mods << '>';
1050       break;
1051     case Token:
1052       OS << '\'' << getToken() << '\'';
1053       break;
1054     case Expression:
1055       OS << "<expr " << *Expr << '>';
1056       break;
1057     }
1058   }
1059 
1060   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1061                                       int64_t Val, SMLoc Loc,
1062                                       ImmTy Type = ImmTyNone,
1063                                       bool IsFPImm = false) {
1064     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1065     Op->Imm.Val = Val;
1066     Op->Imm.IsFPImm = IsFPImm;
1067     Op->Imm.Kind = ImmKindTyNone;
1068     Op->Imm.Type = Type;
1069     Op->Imm.Mods = Modifiers();
1070     Op->StartLoc = Loc;
1071     Op->EndLoc = Loc;
1072     return Op;
1073   }
1074 
1075   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1076                                         StringRef Str, SMLoc Loc,
1077                                         bool HasExplicitEncodingSize = true) {
1078     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1079     Res->Tok.Data = Str.data();
1080     Res->Tok.Length = Str.size();
1081     Res->StartLoc = Loc;
1082     Res->EndLoc = Loc;
1083     return Res;
1084   }
1085 
1086   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1087                                       unsigned RegNo, SMLoc S,
1088                                       SMLoc E) {
1089     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1090     Op->Reg.RegNo = RegNo;
1091     Op->Reg.Mods = Modifiers();
1092     Op->StartLoc = S;
1093     Op->EndLoc = E;
1094     return Op;
1095   }
1096 
1097   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1098                                        const class MCExpr *Expr, SMLoc S) {
1099     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1100     Op->Expr = Expr;
1101     Op->StartLoc = S;
1102     Op->EndLoc = S;
1103     return Op;
1104   }
1105 };
1106 
1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1108   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1109   return OS;
1110 }
1111 
1112 //===----------------------------------------------------------------------===//
1113 // AsmParser
1114 //===----------------------------------------------------------------------===//
1115 
1116 // Holds info related to the current kernel, e.g. count of SGPRs used.
1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1118 // .amdgpu_hsa_kernel or at EOF.
1119 class KernelScopeInfo {
1120   int SgprIndexUnusedMin = -1;
1121   int VgprIndexUnusedMin = -1;
1122   MCContext *Ctx = nullptr;
1123 
1124   void usesSgprAt(int i) {
1125     if (i >= SgprIndexUnusedMin) {
1126       SgprIndexUnusedMin = ++i;
1127       if (Ctx) {
1128         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1129         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1130       }
1131     }
1132   }
1133 
1134   void usesVgprAt(int i) {
1135     if (i >= VgprIndexUnusedMin) {
1136       VgprIndexUnusedMin = ++i;
1137       if (Ctx) {
1138         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1139         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1140       }
1141     }
1142   }
1143 
1144 public:
1145   KernelScopeInfo() = default;
1146 
1147   void initialize(MCContext &Context) {
1148     Ctx = &Context;
1149     usesSgprAt(SgprIndexUnusedMin = -1);
1150     usesVgprAt(VgprIndexUnusedMin = -1);
1151   }
1152 
1153   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1154     switch (RegKind) {
1155       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1156       case IS_AGPR: // fall through
1157       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1158       default: break;
1159     }
1160   }
1161 };
1162 
1163 class AMDGPUAsmParser : public MCTargetAsmParser {
1164   MCAsmParser &Parser;
1165 
1166   // Number of extra operands parsed after the first optional operand.
1167   // This may be necessary to skip hardcoded mandatory operands.
1168   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1169 
1170   unsigned ForcedEncodingSize = 0;
1171   bool ForcedDPP = false;
1172   bool ForcedSDWA = false;
1173   KernelScopeInfo KernelScope;
1174   unsigned CPolSeen;
1175 
1176   /// @name Auto-generated Match Functions
1177   /// {
1178 
1179 #define GET_ASSEMBLER_HEADER
1180 #include "AMDGPUGenAsmMatcher.inc"
1181 
1182   /// }
1183 
1184 private:
1185   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1186   bool OutOfRangeError(SMRange Range);
1187   /// Calculate VGPR/SGPR blocks required for given target, reserved
1188   /// registers, and user-specified NextFreeXGPR values.
1189   ///
1190   /// \param Features [in] Target features, used for bug corrections.
1191   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1192   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1193   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1194   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1195   /// descriptor field, if valid.
1196   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1197   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1198   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1199   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1200   /// \param VGPRBlocks [out] Result VGPR block count.
1201   /// \param SGPRBlocks [out] Result SGPR block count.
1202   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1203                           bool FlatScrUsed, bool XNACKUsed,
1204                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1205                           SMRange VGPRRange, unsigned NextFreeSGPR,
1206                           SMRange SGPRRange, unsigned &VGPRBlocks,
1207                           unsigned &SGPRBlocks);
1208   bool ParseDirectiveAMDGCNTarget();
1209   bool ParseDirectiveAMDHSAKernel();
1210   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1211   bool ParseDirectiveHSACodeObjectVersion();
1212   bool ParseDirectiveHSACodeObjectISA();
1213   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1214   bool ParseDirectiveAMDKernelCodeT();
1215   // TODO: Possibly make subtargetHasRegister const.
1216   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1217   bool ParseDirectiveAMDGPUHsaKernel();
1218 
1219   bool ParseDirectiveISAVersion();
1220   bool ParseDirectiveHSAMetadata();
1221   bool ParseDirectivePALMetadataBegin();
1222   bool ParseDirectivePALMetadata();
1223   bool ParseDirectiveAMDGPULDS();
1224 
1225   /// Common code to parse out a block of text (typically YAML) between start and
1226   /// end directives.
1227   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1228                            const char *AssemblerDirectiveEnd,
1229                            std::string &CollectString);
1230 
1231   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1232                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1233   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1234                            unsigned &RegNum, unsigned &RegWidth,
1235                            bool RestoreOnFailure = false);
1236   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1237                            unsigned &RegNum, unsigned &RegWidth,
1238                            SmallVectorImpl<AsmToken> &Tokens);
1239   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1240                            unsigned &RegWidth,
1241                            SmallVectorImpl<AsmToken> &Tokens);
1242   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1243                            unsigned &RegWidth,
1244                            SmallVectorImpl<AsmToken> &Tokens);
1245   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1246                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1247   bool ParseRegRange(unsigned& Num, unsigned& Width);
1248   unsigned getRegularReg(RegisterKind RegKind,
1249                          unsigned RegNum,
1250                          unsigned RegWidth,
1251                          SMLoc Loc);
1252 
1253   bool isRegister();
1254   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1255   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1256   void initializeGprCountSymbol(RegisterKind RegKind);
1257   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1258                              unsigned RegWidth);
1259   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1260                     bool IsAtomic, bool IsLds = false);
1261   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1262                  bool IsGdsHardcoded);
1263 
1264 public:
1265   enum AMDGPUMatchResultTy {
1266     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1267   };
1268   enum OperandMode {
1269     OperandMode_Default,
1270     OperandMode_NSA,
1271   };
1272 
1273   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1274 
1275   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1276                const MCInstrInfo &MII,
1277                const MCTargetOptions &Options)
1278       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1279     MCAsmParserExtension::Initialize(Parser);
1280 
1281     if (getFeatureBits().none()) {
1282       // Set default features.
1283       copySTI().ToggleFeature("southern-islands");
1284     }
1285 
1286     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1287 
1288     {
1289       // TODO: make those pre-defined variables read-only.
1290       // Currently there is none suitable machinery in the core llvm-mc for this.
1291       // MCSymbol::isRedefinable is intended for another purpose, and
1292       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1293       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1294       MCContext &Ctx = getContext();
1295       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1296         MCSymbol *Sym =
1297             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1298         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1299         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1300         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1301         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1302         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1303       } else {
1304         MCSymbol *Sym =
1305             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1306         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1307         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1308         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1309         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1310         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1311       }
1312       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1313         initializeGprCountSymbol(IS_VGPR);
1314         initializeGprCountSymbol(IS_SGPR);
1315       } else
1316         KernelScope.initialize(getContext());
1317     }
1318   }
1319 
1320   bool hasMIMG_R128() const {
1321     return AMDGPU::hasMIMG_R128(getSTI());
1322   }
1323 
1324   bool hasPackedD16() const {
1325     return AMDGPU::hasPackedD16(getSTI());
1326   }
1327 
1328   bool hasGFX10A16() const {
1329     return AMDGPU::hasGFX10A16(getSTI());
1330   }
1331 
1332   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1333 
1334   bool isSI() const {
1335     return AMDGPU::isSI(getSTI());
1336   }
1337 
1338   bool isCI() const {
1339     return AMDGPU::isCI(getSTI());
1340   }
1341 
1342   bool isVI() const {
1343     return AMDGPU::isVI(getSTI());
1344   }
1345 
1346   bool isGFX9() const {
1347     return AMDGPU::isGFX9(getSTI());
1348   }
1349 
1350   bool isGFX90A() const {
1351     return AMDGPU::isGFX90A(getSTI());
1352   }
1353 
1354   bool isGFX9Plus() const {
1355     return AMDGPU::isGFX9Plus(getSTI());
1356   }
1357 
1358   bool isGFX10() const {
1359     return AMDGPU::isGFX10(getSTI());
1360   }
1361 
1362   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1363 
1364   bool isGFX10_BEncoding() const {
1365     return AMDGPU::isGFX10_BEncoding(getSTI());
1366   }
1367 
1368   bool hasInv2PiInlineImm() const {
1369     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1370   }
1371 
1372   bool hasFlatOffsets() const {
1373     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1374   }
1375 
1376   bool hasArchitectedFlatScratch() const {
1377     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1378   }
1379 
1380   bool hasSGPR102_SGPR103() const {
1381     return !isVI() && !isGFX9();
1382   }
1383 
1384   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1385 
1386   bool hasIntClamp() const {
1387     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1388   }
1389 
1390   AMDGPUTargetStreamer &getTargetStreamer() {
1391     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1392     return static_cast<AMDGPUTargetStreamer &>(TS);
1393   }
1394 
1395   const MCRegisterInfo *getMRI() const {
1396     // We need this const_cast because for some reason getContext() is not const
1397     // in MCAsmParser.
1398     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1399   }
1400 
1401   const MCInstrInfo *getMII() const {
1402     return &MII;
1403   }
1404 
1405   const FeatureBitset &getFeatureBits() const {
1406     return getSTI().getFeatureBits();
1407   }
1408 
1409   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1410   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1411   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1412 
1413   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1414   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1415   bool isForcedDPP() const { return ForcedDPP; }
1416   bool isForcedSDWA() const { return ForcedSDWA; }
1417   ArrayRef<unsigned> getMatchedVariants() const;
1418   StringRef getMatchedVariantName() const;
1419 
1420   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1421   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1422                      bool RestoreOnFailure);
1423   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1424   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1425                                         SMLoc &EndLoc) override;
1426   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1427   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1428                                       unsigned Kind) override;
1429   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1430                                OperandVector &Operands, MCStreamer &Out,
1431                                uint64_t &ErrorInfo,
1432                                bool MatchingInlineAsm) override;
1433   bool ParseDirective(AsmToken DirectiveID) override;
1434   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1435                                     OperandMode Mode = OperandMode_Default);
1436   StringRef parseMnemonicSuffix(StringRef Name);
1437   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1438                         SMLoc NameLoc, OperandVector &Operands) override;
1439   //bool ProcessInstruction(MCInst &Inst);
1440 
1441   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1442 
1443   OperandMatchResultTy
1444   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1445                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1446                      bool (*ConvertResult)(int64_t &) = nullptr);
1447 
1448   OperandMatchResultTy
1449   parseOperandArrayWithPrefix(const char *Prefix,
1450                               OperandVector &Operands,
1451                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1452                               bool (*ConvertResult)(int64_t&) = nullptr);
1453 
1454   OperandMatchResultTy
1455   parseNamedBit(StringRef Name, OperandVector &Operands,
1456                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1457   OperandMatchResultTy parseCPol(OperandVector &Operands);
1458   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1459                                              StringRef &Value,
1460                                              SMLoc &StringLoc);
1461 
1462   bool isModifier();
1463   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1464   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1465   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1466   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1467   bool parseSP3NegModifier();
1468   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1469   OperandMatchResultTy parseReg(OperandVector &Operands);
1470   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1471   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1472   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1473   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1474   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1475   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1476   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1477   OperandMatchResultTy parseUfmt(int64_t &Format);
1478   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1479   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1480   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1481   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1482   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1483   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1484   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1485 
1486   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1487   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1488   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1489   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1490 
1491   bool parseCnt(int64_t &IntVal);
1492   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1493   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1494 
1495 private:
1496   struct OperandInfoTy {
1497     SMLoc Loc;
1498     int64_t Id;
1499     bool IsSymbolic = false;
1500     bool IsDefined = false;
1501 
1502     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1503   };
1504 
1505   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1506   bool validateSendMsg(const OperandInfoTy &Msg,
1507                        const OperandInfoTy &Op,
1508                        const OperandInfoTy &Stream);
1509 
1510   bool parseHwregBody(OperandInfoTy &HwReg,
1511                       OperandInfoTy &Offset,
1512                       OperandInfoTy &Width);
1513   bool validateHwreg(const OperandInfoTy &HwReg,
1514                      const OperandInfoTy &Offset,
1515                      const OperandInfoTy &Width);
1516 
1517   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1518   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1519 
1520   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1521                       const OperandVector &Operands) const;
1522   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1523   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1524   SMLoc getLitLoc(const OperandVector &Operands) const;
1525   SMLoc getConstLoc(const OperandVector &Operands) const;
1526 
1527   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1528   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1529   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1530   bool validateSOPLiteral(const MCInst &Inst) const;
1531   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1532   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1533   bool validateIntClampSupported(const MCInst &Inst);
1534   bool validateMIMGAtomicDMask(const MCInst &Inst);
1535   bool validateMIMGGatherDMask(const MCInst &Inst);
1536   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1537   bool validateMIMGDataSize(const MCInst &Inst);
1538   bool validateMIMGAddrSize(const MCInst &Inst);
1539   bool validateMIMGD16(const MCInst &Inst);
1540   bool validateMIMGDim(const MCInst &Inst);
1541   bool validateMIMGMSAA(const MCInst &Inst);
1542   bool validateOpSel(const MCInst &Inst);
1543   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1544   bool validateVccOperand(unsigned Reg) const;
1545   bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1546   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1547   bool validateAGPRLdSt(const MCInst &Inst) const;
1548   bool validateVGPRAlign(const MCInst &Inst) const;
1549   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1550   bool validateDivScale(const MCInst &Inst);
1551   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1552                              const SMLoc &IDLoc);
1553   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1554   unsigned getConstantBusLimit(unsigned Opcode) const;
1555   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1556   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1557   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1558 
1559   bool isSupportedMnemo(StringRef Mnemo,
1560                         const FeatureBitset &FBS);
1561   bool isSupportedMnemo(StringRef Mnemo,
1562                         const FeatureBitset &FBS,
1563                         ArrayRef<unsigned> Variants);
1564   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1565 
1566   bool isId(const StringRef Id) const;
1567   bool isId(const AsmToken &Token, const StringRef Id) const;
1568   bool isToken(const AsmToken::TokenKind Kind) const;
1569   bool trySkipId(const StringRef Id);
1570   bool trySkipId(const StringRef Pref, const StringRef Id);
1571   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1572   bool trySkipToken(const AsmToken::TokenKind Kind);
1573   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1574   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1575   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1576 
1577   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1578   AsmToken::TokenKind getTokenKind() const;
1579   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1580   bool parseExpr(OperandVector &Operands);
1581   StringRef getTokenStr() const;
1582   AsmToken peekToken();
1583   AsmToken getToken() const;
1584   SMLoc getLoc() const;
1585   void lex();
1586 
1587 public:
1588   void onBeginOfFile() override;
1589 
1590   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1591   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1592 
1593   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1594   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1595   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1596   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1597   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1598   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1599 
1600   bool parseSwizzleOperand(int64_t &Op,
1601                            const unsigned MinVal,
1602                            const unsigned MaxVal,
1603                            const StringRef ErrMsg,
1604                            SMLoc &Loc);
1605   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1606                             const unsigned MinVal,
1607                             const unsigned MaxVal,
1608                             const StringRef ErrMsg);
1609   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1610   bool parseSwizzleOffset(int64_t &Imm);
1611   bool parseSwizzleMacro(int64_t &Imm);
1612   bool parseSwizzleQuadPerm(int64_t &Imm);
1613   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1614   bool parseSwizzleBroadcast(int64_t &Imm);
1615   bool parseSwizzleSwap(int64_t &Imm);
1616   bool parseSwizzleReverse(int64_t &Imm);
1617 
1618   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1619   int64_t parseGPRIdxMacro();
1620 
1621   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1622   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1623   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1624   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1625 
1626   AMDGPUOperand::Ptr defaultCPol() const;
1627 
1628   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1629   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1630   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1631   AMDGPUOperand::Ptr defaultFlatOffset() const;
1632 
1633   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1634 
1635   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1636                OptionalImmIndexMap &OptionalIdx);
1637   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1638   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1639   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1640   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1641                 OptionalImmIndexMap &OptionalIdx);
1642 
1643   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1644 
1645   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1646                bool IsAtomic = false);
1647   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1648   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1649 
1650   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1651 
1652   bool parseDimId(unsigned &Encoding);
1653   OperandMatchResultTy parseDim(OperandVector &Operands);
1654   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1655   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1656   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1657   int64_t parseDPPCtrlSel(StringRef Ctrl);
1658   int64_t parseDPPCtrlPerm();
1659   AMDGPUOperand::Ptr defaultRowMask() const;
1660   AMDGPUOperand::Ptr defaultBankMask() const;
1661   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1662   AMDGPUOperand::Ptr defaultFI() const;
1663   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1664   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1665 
1666   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1667                                     AMDGPUOperand::ImmTy Type);
1668   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1669   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1670   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1671   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1672   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1673   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1674   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1675                uint64_t BasicInstType,
1676                bool SkipDstVcc = false,
1677                bool SkipSrcVcc = false);
1678 
1679   AMDGPUOperand::Ptr defaultBLGP() const;
1680   AMDGPUOperand::Ptr defaultCBSZ() const;
1681   AMDGPUOperand::Ptr defaultABID() const;
1682 
1683   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1684   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1685 };
1686 
1687 struct OptionalOperand {
1688   const char *Name;
1689   AMDGPUOperand::ImmTy Type;
1690   bool IsBit;
1691   bool (*ConvertResult)(int64_t&);
1692 };
1693 
1694 } // end anonymous namespace
1695 
1696 // May be called with integer type with equivalent bitwidth.
1697 static const fltSemantics *getFltSemantics(unsigned Size) {
1698   switch (Size) {
1699   case 4:
1700     return &APFloat::IEEEsingle();
1701   case 8:
1702     return &APFloat::IEEEdouble();
1703   case 2:
1704     return &APFloat::IEEEhalf();
1705   default:
1706     llvm_unreachable("unsupported fp type");
1707   }
1708 }
1709 
1710 static const fltSemantics *getFltSemantics(MVT VT) {
1711   return getFltSemantics(VT.getSizeInBits() / 8);
1712 }
1713 
1714 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1715   switch (OperandType) {
1716   case AMDGPU::OPERAND_REG_IMM_INT32:
1717   case AMDGPU::OPERAND_REG_IMM_FP32:
1718   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1719   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1720   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1721   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1722   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1723   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1724   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1725   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1726     return &APFloat::IEEEsingle();
1727   case AMDGPU::OPERAND_REG_IMM_INT64:
1728   case AMDGPU::OPERAND_REG_IMM_FP64:
1729   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1730   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1731   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1732     return &APFloat::IEEEdouble();
1733   case AMDGPU::OPERAND_REG_IMM_INT16:
1734   case AMDGPU::OPERAND_REG_IMM_FP16:
1735   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1736   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1737   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1738   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1739   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1740   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1741   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1742   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1743   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1744   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1745     return &APFloat::IEEEhalf();
1746   default:
1747     llvm_unreachable("unsupported fp type");
1748   }
1749 }
1750 
1751 //===----------------------------------------------------------------------===//
1752 // Operand
1753 //===----------------------------------------------------------------------===//
1754 
1755 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1756   bool Lost;
1757 
1758   // Convert literal to single precision
1759   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1760                                                APFloat::rmNearestTiesToEven,
1761                                                &Lost);
1762   // We allow precision lost but not overflow or underflow
1763   if (Status != APFloat::opOK &&
1764       Lost &&
1765       ((Status & APFloat::opOverflow)  != 0 ||
1766        (Status & APFloat::opUnderflow) != 0)) {
1767     return false;
1768   }
1769 
1770   return true;
1771 }
1772 
1773 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1774   return isUIntN(Size, Val) || isIntN(Size, Val);
1775 }
1776 
1777 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1778   if (VT.getScalarType() == MVT::i16) {
1779     // FP immediate values are broken.
1780     return isInlinableIntLiteral(Val);
1781   }
1782 
1783   // f16/v2f16 operands work correctly for all values.
1784   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1785 }
1786 
1787 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1788 
1789   // This is a hack to enable named inline values like
1790   // shared_base with both 32-bit and 64-bit operands.
1791   // Note that these values are defined as
1792   // 32-bit operands only.
1793   if (isInlineValue()) {
1794     return true;
1795   }
1796 
1797   if (!isImmTy(ImmTyNone)) {
1798     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1799     return false;
1800   }
1801   // TODO: We should avoid using host float here. It would be better to
1802   // check the float bit values which is what a few other places do.
1803   // We've had bot failures before due to weird NaN support on mips hosts.
1804 
1805   APInt Literal(64, Imm.Val);
1806 
1807   if (Imm.IsFPImm) { // We got fp literal token
1808     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1809       return AMDGPU::isInlinableLiteral64(Imm.Val,
1810                                           AsmParser->hasInv2PiInlineImm());
1811     }
1812 
1813     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1814     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1815       return false;
1816 
1817     if (type.getScalarSizeInBits() == 16) {
1818       return isInlineableLiteralOp16(
1819         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1820         type, AsmParser->hasInv2PiInlineImm());
1821     }
1822 
1823     // Check if single precision literal is inlinable
1824     return AMDGPU::isInlinableLiteral32(
1825       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1826       AsmParser->hasInv2PiInlineImm());
1827   }
1828 
1829   // We got int literal token.
1830   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1831     return AMDGPU::isInlinableLiteral64(Imm.Val,
1832                                         AsmParser->hasInv2PiInlineImm());
1833   }
1834 
1835   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1836     return false;
1837   }
1838 
1839   if (type.getScalarSizeInBits() == 16) {
1840     return isInlineableLiteralOp16(
1841       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1842       type, AsmParser->hasInv2PiInlineImm());
1843   }
1844 
1845   return AMDGPU::isInlinableLiteral32(
1846     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1847     AsmParser->hasInv2PiInlineImm());
1848 }
1849 
1850 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1851   // Check that this immediate can be added as literal
1852   if (!isImmTy(ImmTyNone)) {
1853     return false;
1854   }
1855 
1856   if (!Imm.IsFPImm) {
1857     // We got int literal token.
1858 
1859     if (type == MVT::f64 && hasFPModifiers()) {
1860       // Cannot apply fp modifiers to int literals preserving the same semantics
1861       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1862       // disable these cases.
1863       return false;
1864     }
1865 
1866     unsigned Size = type.getSizeInBits();
1867     if (Size == 64)
1868       Size = 32;
1869 
1870     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1871     // types.
1872     return isSafeTruncation(Imm.Val, Size);
1873   }
1874 
1875   // We got fp literal token
1876   if (type == MVT::f64) { // Expected 64-bit fp operand
1877     // We would set low 64-bits of literal to zeroes but we accept this literals
1878     return true;
1879   }
1880 
1881   if (type == MVT::i64) { // Expected 64-bit int operand
1882     // We don't allow fp literals in 64-bit integer instructions. It is
1883     // unclear how we should encode them.
1884     return false;
1885   }
1886 
1887   // We allow fp literals with f16x2 operands assuming that the specified
1888   // literal goes into the lower half and the upper half is zero. We also
1889   // require that the literal may be losslesly converted to f16.
1890   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1891                      (type == MVT::v2i16)? MVT::i16 :
1892                      (type == MVT::v2f32)? MVT::f32 : type;
1893 
1894   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1895   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1896 }
1897 
1898 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1899   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1900 }
1901 
1902 bool AMDGPUOperand::isVRegWithInputMods() const {
1903   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1904          // GFX90A allows DPP on 64-bit operands.
1905          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1906           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1907 }
1908 
1909 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1910   if (AsmParser->isVI())
1911     return isVReg32();
1912   else if (AsmParser->isGFX9Plus())
1913     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1914   else
1915     return false;
1916 }
1917 
1918 bool AMDGPUOperand::isSDWAFP16Operand() const {
1919   return isSDWAOperand(MVT::f16);
1920 }
1921 
1922 bool AMDGPUOperand::isSDWAFP32Operand() const {
1923   return isSDWAOperand(MVT::f32);
1924 }
1925 
1926 bool AMDGPUOperand::isSDWAInt16Operand() const {
1927   return isSDWAOperand(MVT::i16);
1928 }
1929 
1930 bool AMDGPUOperand::isSDWAInt32Operand() const {
1931   return isSDWAOperand(MVT::i32);
1932 }
1933 
1934 bool AMDGPUOperand::isBoolReg() const {
1935   auto FB = AsmParser->getFeatureBits();
1936   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1937                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1938 }
1939 
1940 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1941 {
1942   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1943   assert(Size == 2 || Size == 4 || Size == 8);
1944 
1945   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1946 
1947   if (Imm.Mods.Abs) {
1948     Val &= ~FpSignMask;
1949   }
1950   if (Imm.Mods.Neg) {
1951     Val ^= FpSignMask;
1952   }
1953 
1954   return Val;
1955 }
1956 
1957 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1958   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1959                              Inst.getNumOperands())) {
1960     addLiteralImmOperand(Inst, Imm.Val,
1961                          ApplyModifiers &
1962                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1963   } else {
1964     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1965     Inst.addOperand(MCOperand::createImm(Imm.Val));
1966     setImmKindNone();
1967   }
1968 }
1969 
1970 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1971   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1972   auto OpNum = Inst.getNumOperands();
1973   // Check that this operand accepts literals
1974   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1975 
1976   if (ApplyModifiers) {
1977     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1978     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1979     Val = applyInputFPModifiers(Val, Size);
1980   }
1981 
1982   APInt Literal(64, Val);
1983   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1984 
1985   if (Imm.IsFPImm) { // We got fp literal token
1986     switch (OpTy) {
1987     case AMDGPU::OPERAND_REG_IMM_INT64:
1988     case AMDGPU::OPERAND_REG_IMM_FP64:
1989     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1990     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1991     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1992       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1993                                        AsmParser->hasInv2PiInlineImm())) {
1994         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1995         setImmKindConst();
1996         return;
1997       }
1998 
1999       // Non-inlineable
2000       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2001         // For fp operands we check if low 32 bits are zeros
2002         if (Literal.getLoBits(32) != 0) {
2003           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2004           "Can't encode literal as exact 64-bit floating-point operand. "
2005           "Low 32-bits will be set to zero");
2006         }
2007 
2008         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2009         setImmKindLiteral();
2010         return;
2011       }
2012 
2013       // We don't allow fp literals in 64-bit integer instructions. It is
2014       // unclear how we should encode them. This case should be checked earlier
2015       // in predicate methods (isLiteralImm())
2016       llvm_unreachable("fp literal in 64-bit integer instruction.");
2017 
2018     case AMDGPU::OPERAND_REG_IMM_INT32:
2019     case AMDGPU::OPERAND_REG_IMM_FP32:
2020     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2021     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2022     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2023     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2024     case AMDGPU::OPERAND_REG_IMM_INT16:
2025     case AMDGPU::OPERAND_REG_IMM_FP16:
2026     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2027     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2028     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2029     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2030     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2031     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2032     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2033     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2034     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2035     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2036     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2037     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2038     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2039     case AMDGPU::OPERAND_REG_IMM_V2INT32: {
2040       bool lost;
2041       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2042       // Convert literal to single precision
2043       FPLiteral.convert(*getOpFltSemantics(OpTy),
2044                         APFloat::rmNearestTiesToEven, &lost);
2045       // We allow precision lost but not overflow or underflow. This should be
2046       // checked earlier in isLiteralImm()
2047 
2048       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2049       Inst.addOperand(MCOperand::createImm(ImmVal));
2050       setImmKindLiteral();
2051       return;
2052     }
2053     default:
2054       llvm_unreachable("invalid operand size");
2055     }
2056 
2057     return;
2058   }
2059 
2060   // We got int literal token.
2061   // Only sign extend inline immediates.
2062   switch (OpTy) {
2063   case AMDGPU::OPERAND_REG_IMM_INT32:
2064   case AMDGPU::OPERAND_REG_IMM_FP32:
2065   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2066   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2067   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2068   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2069   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2070   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2071   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2072   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2073   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2074   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2075     if (isSafeTruncation(Val, 32) &&
2076         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2077                                      AsmParser->hasInv2PiInlineImm())) {
2078       Inst.addOperand(MCOperand::createImm(Val));
2079       setImmKindConst();
2080       return;
2081     }
2082 
2083     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2084     setImmKindLiteral();
2085     return;
2086 
2087   case AMDGPU::OPERAND_REG_IMM_INT64:
2088   case AMDGPU::OPERAND_REG_IMM_FP64:
2089   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2090   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2091   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2092     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2093       Inst.addOperand(MCOperand::createImm(Val));
2094       setImmKindConst();
2095       return;
2096     }
2097 
2098     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2099     setImmKindLiteral();
2100     return;
2101 
2102   case AMDGPU::OPERAND_REG_IMM_INT16:
2103   case AMDGPU::OPERAND_REG_IMM_FP16:
2104   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2105   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2106   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2107   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2108     if (isSafeTruncation(Val, 16) &&
2109         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2110                                      AsmParser->hasInv2PiInlineImm())) {
2111       Inst.addOperand(MCOperand::createImm(Val));
2112       setImmKindConst();
2113       return;
2114     }
2115 
2116     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2117     setImmKindLiteral();
2118     return;
2119 
2120   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2121   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2122   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2123   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2124     assert(isSafeTruncation(Val, 16));
2125     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2126                                         AsmParser->hasInv2PiInlineImm()));
2127 
2128     Inst.addOperand(MCOperand::createImm(Val));
2129     return;
2130   }
2131   default:
2132     llvm_unreachable("invalid operand size");
2133   }
2134 }
2135 
2136 template <unsigned Bitwidth>
2137 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2138   APInt Literal(64, Imm.Val);
2139   setImmKindNone();
2140 
2141   if (!Imm.IsFPImm) {
2142     // We got int literal token.
2143     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2144     return;
2145   }
2146 
2147   bool Lost;
2148   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2149   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2150                     APFloat::rmNearestTiesToEven, &Lost);
2151   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2152 }
2153 
2154 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2155   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2156 }
2157 
2158 static bool isInlineValue(unsigned Reg) {
2159   switch (Reg) {
2160   case AMDGPU::SRC_SHARED_BASE:
2161   case AMDGPU::SRC_SHARED_LIMIT:
2162   case AMDGPU::SRC_PRIVATE_BASE:
2163   case AMDGPU::SRC_PRIVATE_LIMIT:
2164   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2165     return true;
2166   case AMDGPU::SRC_VCCZ:
2167   case AMDGPU::SRC_EXECZ:
2168   case AMDGPU::SRC_SCC:
2169     return true;
2170   case AMDGPU::SGPR_NULL:
2171     return true;
2172   default:
2173     return false;
2174   }
2175 }
2176 
2177 bool AMDGPUOperand::isInlineValue() const {
2178   return isRegKind() && ::isInlineValue(getReg());
2179 }
2180 
2181 //===----------------------------------------------------------------------===//
2182 // AsmParser
2183 //===----------------------------------------------------------------------===//
2184 
2185 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2186   if (Is == IS_VGPR) {
2187     switch (RegWidth) {
2188       default: return -1;
2189       case 1: return AMDGPU::VGPR_32RegClassID;
2190       case 2: return AMDGPU::VReg_64RegClassID;
2191       case 3: return AMDGPU::VReg_96RegClassID;
2192       case 4: return AMDGPU::VReg_128RegClassID;
2193       case 5: return AMDGPU::VReg_160RegClassID;
2194       case 6: return AMDGPU::VReg_192RegClassID;
2195       case 8: return AMDGPU::VReg_256RegClassID;
2196       case 16: return AMDGPU::VReg_512RegClassID;
2197       case 32: return AMDGPU::VReg_1024RegClassID;
2198     }
2199   } else if (Is == IS_TTMP) {
2200     switch (RegWidth) {
2201       default: return -1;
2202       case 1: return AMDGPU::TTMP_32RegClassID;
2203       case 2: return AMDGPU::TTMP_64RegClassID;
2204       case 4: return AMDGPU::TTMP_128RegClassID;
2205       case 8: return AMDGPU::TTMP_256RegClassID;
2206       case 16: return AMDGPU::TTMP_512RegClassID;
2207     }
2208   } else if (Is == IS_SGPR) {
2209     switch (RegWidth) {
2210       default: return -1;
2211       case 1: return AMDGPU::SGPR_32RegClassID;
2212       case 2: return AMDGPU::SGPR_64RegClassID;
2213       case 3: return AMDGPU::SGPR_96RegClassID;
2214       case 4: return AMDGPU::SGPR_128RegClassID;
2215       case 5: return AMDGPU::SGPR_160RegClassID;
2216       case 6: return AMDGPU::SGPR_192RegClassID;
2217       case 8: return AMDGPU::SGPR_256RegClassID;
2218       case 16: return AMDGPU::SGPR_512RegClassID;
2219     }
2220   } else if (Is == IS_AGPR) {
2221     switch (RegWidth) {
2222       default: return -1;
2223       case 1: return AMDGPU::AGPR_32RegClassID;
2224       case 2: return AMDGPU::AReg_64RegClassID;
2225       case 3: return AMDGPU::AReg_96RegClassID;
2226       case 4: return AMDGPU::AReg_128RegClassID;
2227       case 5: return AMDGPU::AReg_160RegClassID;
2228       case 6: return AMDGPU::AReg_192RegClassID;
2229       case 8: return AMDGPU::AReg_256RegClassID;
2230       case 16: return AMDGPU::AReg_512RegClassID;
2231       case 32: return AMDGPU::AReg_1024RegClassID;
2232     }
2233   }
2234   return -1;
2235 }
2236 
2237 static unsigned getSpecialRegForName(StringRef RegName) {
2238   return StringSwitch<unsigned>(RegName)
2239     .Case("exec", AMDGPU::EXEC)
2240     .Case("vcc", AMDGPU::VCC)
2241     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2242     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2243     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2244     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2245     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2246     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2247     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2248     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2249     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2250     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2251     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2252     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2253     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2254     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2255     .Case("m0", AMDGPU::M0)
2256     .Case("vccz", AMDGPU::SRC_VCCZ)
2257     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2258     .Case("execz", AMDGPU::SRC_EXECZ)
2259     .Case("src_execz", AMDGPU::SRC_EXECZ)
2260     .Case("scc", AMDGPU::SRC_SCC)
2261     .Case("src_scc", AMDGPU::SRC_SCC)
2262     .Case("tba", AMDGPU::TBA)
2263     .Case("tma", AMDGPU::TMA)
2264     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2265     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2266     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2267     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2268     .Case("vcc_lo", AMDGPU::VCC_LO)
2269     .Case("vcc_hi", AMDGPU::VCC_HI)
2270     .Case("exec_lo", AMDGPU::EXEC_LO)
2271     .Case("exec_hi", AMDGPU::EXEC_HI)
2272     .Case("tma_lo", AMDGPU::TMA_LO)
2273     .Case("tma_hi", AMDGPU::TMA_HI)
2274     .Case("tba_lo", AMDGPU::TBA_LO)
2275     .Case("tba_hi", AMDGPU::TBA_HI)
2276     .Case("pc", AMDGPU::PC_REG)
2277     .Case("null", AMDGPU::SGPR_NULL)
2278     .Default(AMDGPU::NoRegister);
2279 }
2280 
2281 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2282                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2283   auto R = parseRegister();
2284   if (!R) return true;
2285   assert(R->isReg());
2286   RegNo = R->getReg();
2287   StartLoc = R->getStartLoc();
2288   EndLoc = R->getEndLoc();
2289   return false;
2290 }
2291 
2292 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2293                                     SMLoc &EndLoc) {
2294   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2295 }
2296 
2297 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2298                                                        SMLoc &StartLoc,
2299                                                        SMLoc &EndLoc) {
2300   bool Result =
2301       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2302   bool PendingErrors = getParser().hasPendingError();
2303   getParser().clearPendingErrors();
2304   if (PendingErrors)
2305     return MatchOperand_ParseFail;
2306   if (Result)
2307     return MatchOperand_NoMatch;
2308   return MatchOperand_Success;
2309 }
2310 
2311 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2312                                             RegisterKind RegKind, unsigned Reg1,
2313                                             SMLoc Loc) {
2314   switch (RegKind) {
2315   case IS_SPECIAL:
2316     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2317       Reg = AMDGPU::EXEC;
2318       RegWidth = 2;
2319       return true;
2320     }
2321     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2322       Reg = AMDGPU::FLAT_SCR;
2323       RegWidth = 2;
2324       return true;
2325     }
2326     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2327       Reg = AMDGPU::XNACK_MASK;
2328       RegWidth = 2;
2329       return true;
2330     }
2331     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2332       Reg = AMDGPU::VCC;
2333       RegWidth = 2;
2334       return true;
2335     }
2336     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2337       Reg = AMDGPU::TBA;
2338       RegWidth = 2;
2339       return true;
2340     }
2341     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2342       Reg = AMDGPU::TMA;
2343       RegWidth = 2;
2344       return true;
2345     }
2346     Error(Loc, "register does not fit in the list");
2347     return false;
2348   case IS_VGPR:
2349   case IS_SGPR:
2350   case IS_AGPR:
2351   case IS_TTMP:
2352     if (Reg1 != Reg + RegWidth) {
2353       Error(Loc, "registers in a list must have consecutive indices");
2354       return false;
2355     }
2356     RegWidth++;
2357     return true;
2358   default:
2359     llvm_unreachable("unexpected register kind");
2360   }
2361 }
2362 
2363 struct RegInfo {
2364   StringLiteral Name;
2365   RegisterKind Kind;
2366 };
2367 
2368 static constexpr RegInfo RegularRegisters[] = {
2369   {{"v"},    IS_VGPR},
2370   {{"s"},    IS_SGPR},
2371   {{"ttmp"}, IS_TTMP},
2372   {{"acc"},  IS_AGPR},
2373   {{"a"},    IS_AGPR},
2374 };
2375 
2376 static bool isRegularReg(RegisterKind Kind) {
2377   return Kind == IS_VGPR ||
2378          Kind == IS_SGPR ||
2379          Kind == IS_TTMP ||
2380          Kind == IS_AGPR;
2381 }
2382 
2383 static const RegInfo* getRegularRegInfo(StringRef Str) {
2384   for (const RegInfo &Reg : RegularRegisters)
2385     if (Str.startswith(Reg.Name))
2386       return &Reg;
2387   return nullptr;
2388 }
2389 
2390 static bool getRegNum(StringRef Str, unsigned& Num) {
2391   return !Str.getAsInteger(10, Num);
2392 }
2393 
2394 bool
2395 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2396                             const AsmToken &NextToken) const {
2397 
2398   // A list of consecutive registers: [s0,s1,s2,s3]
2399   if (Token.is(AsmToken::LBrac))
2400     return true;
2401 
2402   if (!Token.is(AsmToken::Identifier))
2403     return false;
2404 
2405   // A single register like s0 or a range of registers like s[0:1]
2406 
2407   StringRef Str = Token.getString();
2408   const RegInfo *Reg = getRegularRegInfo(Str);
2409   if (Reg) {
2410     StringRef RegName = Reg->Name;
2411     StringRef RegSuffix = Str.substr(RegName.size());
2412     if (!RegSuffix.empty()) {
2413       unsigned Num;
2414       // A single register with an index: rXX
2415       if (getRegNum(RegSuffix, Num))
2416         return true;
2417     } else {
2418       // A range of registers: r[XX:YY].
2419       if (NextToken.is(AsmToken::LBrac))
2420         return true;
2421     }
2422   }
2423 
2424   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2425 }
2426 
2427 bool
2428 AMDGPUAsmParser::isRegister()
2429 {
2430   return isRegister(getToken(), peekToken());
2431 }
2432 
2433 unsigned
2434 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2435                                unsigned RegNum,
2436                                unsigned RegWidth,
2437                                SMLoc Loc) {
2438 
2439   assert(isRegularReg(RegKind));
2440 
2441   unsigned AlignSize = 1;
2442   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2443     // SGPR and TTMP registers must be aligned.
2444     // Max required alignment is 4 dwords.
2445     AlignSize = std::min(RegWidth, 4u);
2446   }
2447 
2448   if (RegNum % AlignSize != 0) {
2449     Error(Loc, "invalid register alignment");
2450     return AMDGPU::NoRegister;
2451   }
2452 
2453   unsigned RegIdx = RegNum / AlignSize;
2454   int RCID = getRegClass(RegKind, RegWidth);
2455   if (RCID == -1) {
2456     Error(Loc, "invalid or unsupported register size");
2457     return AMDGPU::NoRegister;
2458   }
2459 
2460   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2461   const MCRegisterClass RC = TRI->getRegClass(RCID);
2462   if (RegIdx >= RC.getNumRegs()) {
2463     Error(Loc, "register index is out of range");
2464     return AMDGPU::NoRegister;
2465   }
2466 
2467   return RC.getRegister(RegIdx);
2468 }
2469 
2470 bool
2471 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2472   int64_t RegLo, RegHi;
2473   if (!skipToken(AsmToken::LBrac, "missing register index"))
2474     return false;
2475 
2476   SMLoc FirstIdxLoc = getLoc();
2477   SMLoc SecondIdxLoc;
2478 
2479   if (!parseExpr(RegLo))
2480     return false;
2481 
2482   if (trySkipToken(AsmToken::Colon)) {
2483     SecondIdxLoc = getLoc();
2484     if (!parseExpr(RegHi))
2485       return false;
2486   } else {
2487     RegHi = RegLo;
2488   }
2489 
2490   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2491     return false;
2492 
2493   if (!isUInt<32>(RegLo)) {
2494     Error(FirstIdxLoc, "invalid register index");
2495     return false;
2496   }
2497 
2498   if (!isUInt<32>(RegHi)) {
2499     Error(SecondIdxLoc, "invalid register index");
2500     return false;
2501   }
2502 
2503   if (RegLo > RegHi) {
2504     Error(FirstIdxLoc, "first register index should not exceed second index");
2505     return false;
2506   }
2507 
2508   Num = static_cast<unsigned>(RegLo);
2509   Width = (RegHi - RegLo) + 1;
2510   return true;
2511 }
2512 
2513 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2514                                           unsigned &RegNum, unsigned &RegWidth,
2515                                           SmallVectorImpl<AsmToken> &Tokens) {
2516   assert(isToken(AsmToken::Identifier));
2517   unsigned Reg = getSpecialRegForName(getTokenStr());
2518   if (Reg) {
2519     RegNum = 0;
2520     RegWidth = 1;
2521     RegKind = IS_SPECIAL;
2522     Tokens.push_back(getToken());
2523     lex(); // skip register name
2524   }
2525   return Reg;
2526 }
2527 
2528 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2529                                           unsigned &RegNum, unsigned &RegWidth,
2530                                           SmallVectorImpl<AsmToken> &Tokens) {
2531   assert(isToken(AsmToken::Identifier));
2532   StringRef RegName = getTokenStr();
2533   auto Loc = getLoc();
2534 
2535   const RegInfo *RI = getRegularRegInfo(RegName);
2536   if (!RI) {
2537     Error(Loc, "invalid register name");
2538     return AMDGPU::NoRegister;
2539   }
2540 
2541   Tokens.push_back(getToken());
2542   lex(); // skip register name
2543 
2544   RegKind = RI->Kind;
2545   StringRef RegSuffix = RegName.substr(RI->Name.size());
2546   if (!RegSuffix.empty()) {
2547     // Single 32-bit register: vXX.
2548     if (!getRegNum(RegSuffix, RegNum)) {
2549       Error(Loc, "invalid register index");
2550       return AMDGPU::NoRegister;
2551     }
2552     RegWidth = 1;
2553   } else {
2554     // Range of registers: v[XX:YY]. ":YY" is optional.
2555     if (!ParseRegRange(RegNum, RegWidth))
2556       return AMDGPU::NoRegister;
2557   }
2558 
2559   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2560 }
2561 
2562 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2563                                        unsigned &RegWidth,
2564                                        SmallVectorImpl<AsmToken> &Tokens) {
2565   unsigned Reg = AMDGPU::NoRegister;
2566   auto ListLoc = getLoc();
2567 
2568   if (!skipToken(AsmToken::LBrac,
2569                  "expected a register or a list of registers")) {
2570     return AMDGPU::NoRegister;
2571   }
2572 
2573   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2574 
2575   auto Loc = getLoc();
2576   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2577     return AMDGPU::NoRegister;
2578   if (RegWidth != 1) {
2579     Error(Loc, "expected a single 32-bit register");
2580     return AMDGPU::NoRegister;
2581   }
2582 
2583   for (; trySkipToken(AsmToken::Comma); ) {
2584     RegisterKind NextRegKind;
2585     unsigned NextReg, NextRegNum, NextRegWidth;
2586     Loc = getLoc();
2587 
2588     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2589                              NextRegNum, NextRegWidth,
2590                              Tokens)) {
2591       return AMDGPU::NoRegister;
2592     }
2593     if (NextRegWidth != 1) {
2594       Error(Loc, "expected a single 32-bit register");
2595       return AMDGPU::NoRegister;
2596     }
2597     if (NextRegKind != RegKind) {
2598       Error(Loc, "registers in a list must be of the same kind");
2599       return AMDGPU::NoRegister;
2600     }
2601     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2602       return AMDGPU::NoRegister;
2603   }
2604 
2605   if (!skipToken(AsmToken::RBrac,
2606                  "expected a comma or a closing square bracket")) {
2607     return AMDGPU::NoRegister;
2608   }
2609 
2610   if (isRegularReg(RegKind))
2611     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2612 
2613   return Reg;
2614 }
2615 
2616 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2617                                           unsigned &RegNum, unsigned &RegWidth,
2618                                           SmallVectorImpl<AsmToken> &Tokens) {
2619   auto Loc = getLoc();
2620   Reg = AMDGPU::NoRegister;
2621 
2622   if (isToken(AsmToken::Identifier)) {
2623     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2624     if (Reg == AMDGPU::NoRegister)
2625       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2626   } else {
2627     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2628   }
2629 
2630   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2631   if (Reg == AMDGPU::NoRegister) {
2632     assert(Parser.hasPendingError());
2633     return false;
2634   }
2635 
2636   if (!subtargetHasRegister(*TRI, Reg)) {
2637     if (Reg == AMDGPU::SGPR_NULL) {
2638       Error(Loc, "'null' operand is not supported on this GPU");
2639     } else {
2640       Error(Loc, "register not available on this GPU");
2641     }
2642     return false;
2643   }
2644 
2645   return true;
2646 }
2647 
2648 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2649                                           unsigned &RegNum, unsigned &RegWidth,
2650                                           bool RestoreOnFailure /*=false*/) {
2651   Reg = AMDGPU::NoRegister;
2652 
2653   SmallVector<AsmToken, 1> Tokens;
2654   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2655     if (RestoreOnFailure) {
2656       while (!Tokens.empty()) {
2657         getLexer().UnLex(Tokens.pop_back_val());
2658       }
2659     }
2660     return true;
2661   }
2662   return false;
2663 }
2664 
2665 Optional<StringRef>
2666 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2667   switch (RegKind) {
2668   case IS_VGPR:
2669     return StringRef(".amdgcn.next_free_vgpr");
2670   case IS_SGPR:
2671     return StringRef(".amdgcn.next_free_sgpr");
2672   default:
2673     return None;
2674   }
2675 }
2676 
2677 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2678   auto SymbolName = getGprCountSymbolName(RegKind);
2679   assert(SymbolName && "initializing invalid register kind");
2680   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2681   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2682 }
2683 
2684 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2685                                             unsigned DwordRegIndex,
2686                                             unsigned RegWidth) {
2687   // Symbols are only defined for GCN targets
2688   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2689     return true;
2690 
2691   auto SymbolName = getGprCountSymbolName(RegKind);
2692   if (!SymbolName)
2693     return true;
2694   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2695 
2696   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2697   int64_t OldCount;
2698 
2699   if (!Sym->isVariable())
2700     return !Error(getLoc(),
2701                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2702   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2703     return !Error(
2704         getLoc(),
2705         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2706 
2707   if (OldCount <= NewMax)
2708     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2709 
2710   return true;
2711 }
2712 
2713 std::unique_ptr<AMDGPUOperand>
2714 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2715   const auto &Tok = getToken();
2716   SMLoc StartLoc = Tok.getLoc();
2717   SMLoc EndLoc = Tok.getEndLoc();
2718   RegisterKind RegKind;
2719   unsigned Reg, RegNum, RegWidth;
2720 
2721   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2722     return nullptr;
2723   }
2724   if (isHsaAbiVersion3Or4(&getSTI())) {
2725     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2726       return nullptr;
2727   } else
2728     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2729   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2730 }
2731 
2732 OperandMatchResultTy
2733 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2734   // TODO: add syntactic sugar for 1/(2*PI)
2735 
2736   assert(!isRegister());
2737   assert(!isModifier());
2738 
2739   const auto& Tok = getToken();
2740   const auto& NextTok = peekToken();
2741   bool IsReal = Tok.is(AsmToken::Real);
2742   SMLoc S = getLoc();
2743   bool Negate = false;
2744 
2745   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2746     lex();
2747     IsReal = true;
2748     Negate = true;
2749   }
2750 
2751   if (IsReal) {
2752     // Floating-point expressions are not supported.
2753     // Can only allow floating-point literals with an
2754     // optional sign.
2755 
2756     StringRef Num = getTokenStr();
2757     lex();
2758 
2759     APFloat RealVal(APFloat::IEEEdouble());
2760     auto roundMode = APFloat::rmNearestTiesToEven;
2761     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2762       return MatchOperand_ParseFail;
2763     }
2764     if (Negate)
2765       RealVal.changeSign();
2766 
2767     Operands.push_back(
2768       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2769                                AMDGPUOperand::ImmTyNone, true));
2770 
2771     return MatchOperand_Success;
2772 
2773   } else {
2774     int64_t IntVal;
2775     const MCExpr *Expr;
2776     SMLoc S = getLoc();
2777 
2778     if (HasSP3AbsModifier) {
2779       // This is a workaround for handling expressions
2780       // as arguments of SP3 'abs' modifier, for example:
2781       //     |1.0|
2782       //     |-1|
2783       //     |1+x|
2784       // This syntax is not compatible with syntax of standard
2785       // MC expressions (due to the trailing '|').
2786       SMLoc EndLoc;
2787       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2788         return MatchOperand_ParseFail;
2789     } else {
2790       if (Parser.parseExpression(Expr))
2791         return MatchOperand_ParseFail;
2792     }
2793 
2794     if (Expr->evaluateAsAbsolute(IntVal)) {
2795       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2796     } else {
2797       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2798     }
2799 
2800     return MatchOperand_Success;
2801   }
2802 
2803   return MatchOperand_NoMatch;
2804 }
2805 
2806 OperandMatchResultTy
2807 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2808   if (!isRegister())
2809     return MatchOperand_NoMatch;
2810 
2811   if (auto R = parseRegister()) {
2812     assert(R->isReg());
2813     Operands.push_back(std::move(R));
2814     return MatchOperand_Success;
2815   }
2816   return MatchOperand_ParseFail;
2817 }
2818 
2819 OperandMatchResultTy
2820 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2821   auto res = parseReg(Operands);
2822   if (res != MatchOperand_NoMatch) {
2823     return res;
2824   } else if (isModifier()) {
2825     return MatchOperand_NoMatch;
2826   } else {
2827     return parseImm(Operands, HasSP3AbsMod);
2828   }
2829 }
2830 
2831 bool
2832 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2833   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2834     const auto &str = Token.getString();
2835     return str == "abs" || str == "neg" || str == "sext";
2836   }
2837   return false;
2838 }
2839 
2840 bool
2841 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2842   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2843 }
2844 
2845 bool
2846 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2847   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2848 }
2849 
2850 bool
2851 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2852   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2853 }
2854 
2855 // Check if this is an operand modifier or an opcode modifier
2856 // which may look like an expression but it is not. We should
2857 // avoid parsing these modifiers as expressions. Currently
2858 // recognized sequences are:
2859 //   |...|
2860 //   abs(...)
2861 //   neg(...)
2862 //   sext(...)
2863 //   -reg
2864 //   -|...|
2865 //   -abs(...)
2866 //   name:...
2867 // Note that simple opcode modifiers like 'gds' may be parsed as
2868 // expressions; this is a special case. See getExpressionAsToken.
2869 //
2870 bool
2871 AMDGPUAsmParser::isModifier() {
2872 
2873   AsmToken Tok = getToken();
2874   AsmToken NextToken[2];
2875   peekTokens(NextToken);
2876 
2877   return isOperandModifier(Tok, NextToken[0]) ||
2878          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2879          isOpcodeModifierWithVal(Tok, NextToken[0]);
2880 }
2881 
2882 // Check if the current token is an SP3 'neg' modifier.
2883 // Currently this modifier is allowed in the following context:
2884 //
2885 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2886 // 2. Before an 'abs' modifier: -abs(...)
2887 // 3. Before an SP3 'abs' modifier: -|...|
2888 //
2889 // In all other cases "-" is handled as a part
2890 // of an expression that follows the sign.
2891 //
2892 // Note: When "-" is followed by an integer literal,
2893 // this is interpreted as integer negation rather
2894 // than a floating-point NEG modifier applied to N.
2895 // Beside being contr-intuitive, such use of floating-point
2896 // NEG modifier would have resulted in different meaning
2897 // of integer literals used with VOP1/2/C and VOP3,
2898 // for example:
2899 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2900 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2901 // Negative fp literals with preceding "-" are
2902 // handled likewise for unifomtity
2903 //
2904 bool
2905 AMDGPUAsmParser::parseSP3NegModifier() {
2906 
2907   AsmToken NextToken[2];
2908   peekTokens(NextToken);
2909 
2910   if (isToken(AsmToken::Minus) &&
2911       (isRegister(NextToken[0], NextToken[1]) ||
2912        NextToken[0].is(AsmToken::Pipe) ||
2913        isId(NextToken[0], "abs"))) {
2914     lex();
2915     return true;
2916   }
2917 
2918   return false;
2919 }
2920 
2921 OperandMatchResultTy
2922 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2923                                               bool AllowImm) {
2924   bool Neg, SP3Neg;
2925   bool Abs, SP3Abs;
2926   SMLoc Loc;
2927 
2928   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2929   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2930     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2931     return MatchOperand_ParseFail;
2932   }
2933 
2934   SP3Neg = parseSP3NegModifier();
2935 
2936   Loc = getLoc();
2937   Neg = trySkipId("neg");
2938   if (Neg && SP3Neg) {
2939     Error(Loc, "expected register or immediate");
2940     return MatchOperand_ParseFail;
2941   }
2942   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2943     return MatchOperand_ParseFail;
2944 
2945   Abs = trySkipId("abs");
2946   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2947     return MatchOperand_ParseFail;
2948 
2949   Loc = getLoc();
2950   SP3Abs = trySkipToken(AsmToken::Pipe);
2951   if (Abs && SP3Abs) {
2952     Error(Loc, "expected register or immediate");
2953     return MatchOperand_ParseFail;
2954   }
2955 
2956   OperandMatchResultTy Res;
2957   if (AllowImm) {
2958     Res = parseRegOrImm(Operands, SP3Abs);
2959   } else {
2960     Res = parseReg(Operands);
2961   }
2962   if (Res != MatchOperand_Success) {
2963     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2964   }
2965 
2966   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2967     return MatchOperand_ParseFail;
2968   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2969     return MatchOperand_ParseFail;
2970   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2971     return MatchOperand_ParseFail;
2972 
2973   AMDGPUOperand::Modifiers Mods;
2974   Mods.Abs = Abs || SP3Abs;
2975   Mods.Neg = Neg || SP3Neg;
2976 
2977   if (Mods.hasFPModifiers()) {
2978     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2979     if (Op.isExpr()) {
2980       Error(Op.getStartLoc(), "expected an absolute expression");
2981       return MatchOperand_ParseFail;
2982     }
2983     Op.setModifiers(Mods);
2984   }
2985   return MatchOperand_Success;
2986 }
2987 
2988 OperandMatchResultTy
2989 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2990                                                bool AllowImm) {
2991   bool Sext = trySkipId("sext");
2992   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2993     return MatchOperand_ParseFail;
2994 
2995   OperandMatchResultTy Res;
2996   if (AllowImm) {
2997     Res = parseRegOrImm(Operands);
2998   } else {
2999     Res = parseReg(Operands);
3000   }
3001   if (Res != MatchOperand_Success) {
3002     return Sext? MatchOperand_ParseFail : Res;
3003   }
3004 
3005   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3006     return MatchOperand_ParseFail;
3007 
3008   AMDGPUOperand::Modifiers Mods;
3009   Mods.Sext = Sext;
3010 
3011   if (Mods.hasIntModifiers()) {
3012     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3013     if (Op.isExpr()) {
3014       Error(Op.getStartLoc(), "expected an absolute expression");
3015       return MatchOperand_ParseFail;
3016     }
3017     Op.setModifiers(Mods);
3018   }
3019 
3020   return MatchOperand_Success;
3021 }
3022 
3023 OperandMatchResultTy
3024 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3025   return parseRegOrImmWithFPInputMods(Operands, false);
3026 }
3027 
3028 OperandMatchResultTy
3029 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3030   return parseRegOrImmWithIntInputMods(Operands, false);
3031 }
3032 
3033 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3034   auto Loc = getLoc();
3035   if (trySkipId("off")) {
3036     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3037                                                 AMDGPUOperand::ImmTyOff, false));
3038     return MatchOperand_Success;
3039   }
3040 
3041   if (!isRegister())
3042     return MatchOperand_NoMatch;
3043 
3044   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3045   if (Reg) {
3046     Operands.push_back(std::move(Reg));
3047     return MatchOperand_Success;
3048   }
3049 
3050   return MatchOperand_ParseFail;
3051 
3052 }
3053 
3054 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3055   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3056 
3057   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3058       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3059       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3060       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3061     return Match_InvalidOperand;
3062 
3063   if ((TSFlags & SIInstrFlags::VOP3) &&
3064       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3065       getForcedEncodingSize() != 64)
3066     return Match_PreferE32;
3067 
3068   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3069       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3070     // v_mac_f32/16 allow only dst_sel == DWORD;
3071     auto OpNum =
3072         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3073     const auto &Op = Inst.getOperand(OpNum);
3074     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3075       return Match_InvalidOperand;
3076     }
3077   }
3078 
3079   return Match_Success;
3080 }
3081 
3082 static ArrayRef<unsigned> getAllVariants() {
3083   static const unsigned Variants[] = {
3084     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3085     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3086   };
3087 
3088   return makeArrayRef(Variants);
3089 }
3090 
3091 // What asm variants we should check
3092 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3093   if (getForcedEncodingSize() == 32) {
3094     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3095     return makeArrayRef(Variants);
3096   }
3097 
3098   if (isForcedVOP3()) {
3099     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3100     return makeArrayRef(Variants);
3101   }
3102 
3103   if (isForcedSDWA()) {
3104     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3105                                         AMDGPUAsmVariants::SDWA9};
3106     return makeArrayRef(Variants);
3107   }
3108 
3109   if (isForcedDPP()) {
3110     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3111     return makeArrayRef(Variants);
3112   }
3113 
3114   return getAllVariants();
3115 }
3116 
3117 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3118   if (getForcedEncodingSize() == 32)
3119     return "e32";
3120 
3121   if (isForcedVOP3())
3122     return "e64";
3123 
3124   if (isForcedSDWA())
3125     return "sdwa";
3126 
3127   if (isForcedDPP())
3128     return "dpp";
3129 
3130   return "";
3131 }
3132 
3133 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3134   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3135   const unsigned Num = Desc.getNumImplicitUses();
3136   for (unsigned i = 0; i < Num; ++i) {
3137     unsigned Reg = Desc.ImplicitUses[i];
3138     switch (Reg) {
3139     case AMDGPU::FLAT_SCR:
3140     case AMDGPU::VCC:
3141     case AMDGPU::VCC_LO:
3142     case AMDGPU::VCC_HI:
3143     case AMDGPU::M0:
3144       return Reg;
3145     default:
3146       break;
3147     }
3148   }
3149   return AMDGPU::NoRegister;
3150 }
3151 
3152 // NB: This code is correct only when used to check constant
3153 // bus limitations because GFX7 support no f16 inline constants.
3154 // Note that there are no cases when a GFX7 opcode violates
3155 // constant bus limitations due to the use of an f16 constant.
3156 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3157                                        unsigned OpIdx) const {
3158   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3159 
3160   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3161     return false;
3162   }
3163 
3164   const MCOperand &MO = Inst.getOperand(OpIdx);
3165 
3166   int64_t Val = MO.getImm();
3167   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3168 
3169   switch (OpSize) { // expected operand size
3170   case 8:
3171     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3172   case 4:
3173     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3174   case 2: {
3175     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3176     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3177         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3178         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3179       return AMDGPU::isInlinableIntLiteral(Val);
3180 
3181     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3182         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3183         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3184       return AMDGPU::isInlinableIntLiteralV216(Val);
3185 
3186     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3187         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3188         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3189       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3190 
3191     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3192   }
3193   default:
3194     llvm_unreachable("invalid operand size");
3195   }
3196 }
3197 
3198 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3199   if (!isGFX10Plus())
3200     return 1;
3201 
3202   switch (Opcode) {
3203   // 64-bit shift instructions can use only one scalar value input
3204   case AMDGPU::V_LSHLREV_B64_e64:
3205   case AMDGPU::V_LSHLREV_B64_gfx10:
3206   case AMDGPU::V_LSHRREV_B64_e64:
3207   case AMDGPU::V_LSHRREV_B64_gfx10:
3208   case AMDGPU::V_ASHRREV_I64_e64:
3209   case AMDGPU::V_ASHRREV_I64_gfx10:
3210   case AMDGPU::V_LSHL_B64_e64:
3211   case AMDGPU::V_LSHR_B64_e64:
3212   case AMDGPU::V_ASHR_I64_e64:
3213     return 1;
3214   default:
3215     return 2;
3216   }
3217 }
3218 
3219 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3220   const MCOperand &MO = Inst.getOperand(OpIdx);
3221   if (MO.isImm()) {
3222     return !isInlineConstant(Inst, OpIdx);
3223   } else if (MO.isReg()) {
3224     auto Reg = MO.getReg();
3225     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3226     auto PReg = mc2PseudoReg(Reg);
3227     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3228   } else {
3229     return true;
3230   }
3231 }
3232 
3233 bool
3234 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3235                                                 const OperandVector &Operands) {
3236   const unsigned Opcode = Inst.getOpcode();
3237   const MCInstrDesc &Desc = MII.get(Opcode);
3238   unsigned LastSGPR = AMDGPU::NoRegister;
3239   unsigned ConstantBusUseCount = 0;
3240   unsigned NumLiterals = 0;
3241   unsigned LiteralSize;
3242 
3243   if (Desc.TSFlags &
3244       (SIInstrFlags::VOPC |
3245        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3246        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3247        SIInstrFlags::SDWA)) {
3248     // Check special imm operands (used by madmk, etc)
3249     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3250       ++ConstantBusUseCount;
3251     }
3252 
3253     SmallDenseSet<unsigned> SGPRsUsed;
3254     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3255     if (SGPRUsed != AMDGPU::NoRegister) {
3256       SGPRsUsed.insert(SGPRUsed);
3257       ++ConstantBusUseCount;
3258     }
3259 
3260     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3261     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3262     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3263 
3264     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3265 
3266     for (int OpIdx : OpIndices) {
3267       if (OpIdx == -1) break;
3268 
3269       const MCOperand &MO = Inst.getOperand(OpIdx);
3270       if (usesConstantBus(Inst, OpIdx)) {
3271         if (MO.isReg()) {
3272           LastSGPR = mc2PseudoReg(MO.getReg());
3273           // Pairs of registers with a partial intersections like these
3274           //   s0, s[0:1]
3275           //   flat_scratch_lo, flat_scratch
3276           //   flat_scratch_lo, flat_scratch_hi
3277           // are theoretically valid but they are disabled anyway.
3278           // Note that this code mimics SIInstrInfo::verifyInstruction
3279           if (!SGPRsUsed.count(LastSGPR)) {
3280             SGPRsUsed.insert(LastSGPR);
3281             ++ConstantBusUseCount;
3282           }
3283         } else { // Expression or a literal
3284 
3285           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3286             continue; // special operand like VINTERP attr_chan
3287 
3288           // An instruction may use only one literal.
3289           // This has been validated on the previous step.
3290           // See validateVOP3Literal.
3291           // This literal may be used as more than one operand.
3292           // If all these operands are of the same size,
3293           // this literal counts as one scalar value.
3294           // Otherwise it counts as 2 scalar values.
3295           // See "GFX10 Shader Programming", section 3.6.2.3.
3296 
3297           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3298           if (Size < 4) Size = 4;
3299 
3300           if (NumLiterals == 0) {
3301             NumLiterals = 1;
3302             LiteralSize = Size;
3303           } else if (LiteralSize != Size) {
3304             NumLiterals = 2;
3305           }
3306         }
3307       }
3308     }
3309   }
3310   ConstantBusUseCount += NumLiterals;
3311 
3312   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3313     return true;
3314 
3315   SMLoc LitLoc = getLitLoc(Operands);
3316   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3317   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3318   Error(Loc, "invalid operand (violates constant bus restrictions)");
3319   return false;
3320 }
3321 
3322 bool
3323 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3324                                                  const OperandVector &Operands) {
3325   const unsigned Opcode = Inst.getOpcode();
3326   const MCInstrDesc &Desc = MII.get(Opcode);
3327 
3328   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3329   if (DstIdx == -1 ||
3330       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3331     return true;
3332   }
3333 
3334   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3335 
3336   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3337   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3338   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3339 
3340   assert(DstIdx != -1);
3341   const MCOperand &Dst = Inst.getOperand(DstIdx);
3342   assert(Dst.isReg());
3343   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3344 
3345   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3346 
3347   for (int SrcIdx : SrcIndices) {
3348     if (SrcIdx == -1) break;
3349     const MCOperand &Src = Inst.getOperand(SrcIdx);
3350     if (Src.isReg()) {
3351       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3352       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3353         Error(getRegLoc(SrcReg, Operands),
3354           "destination must be different than all sources");
3355         return false;
3356       }
3357     }
3358   }
3359 
3360   return true;
3361 }
3362 
3363 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3364 
3365   const unsigned Opc = Inst.getOpcode();
3366   const MCInstrDesc &Desc = MII.get(Opc);
3367 
3368   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3369     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3370     assert(ClampIdx != -1);
3371     return Inst.getOperand(ClampIdx).getImm() == 0;
3372   }
3373 
3374   return true;
3375 }
3376 
3377 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3378 
3379   const unsigned Opc = Inst.getOpcode();
3380   const MCInstrDesc &Desc = MII.get(Opc);
3381 
3382   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3383     return true;
3384 
3385   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3386   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3387   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3388 
3389   assert(VDataIdx != -1);
3390 
3391   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3392     return true;
3393 
3394   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3395   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3396   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3397   if (DMask == 0)
3398     DMask = 1;
3399 
3400   unsigned DataSize =
3401     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3402   if (hasPackedD16()) {
3403     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3404     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3405       DataSize = (DataSize + 1) / 2;
3406   }
3407 
3408   return (VDataSize / 4) == DataSize + TFESize;
3409 }
3410 
3411 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3412   const unsigned Opc = Inst.getOpcode();
3413   const MCInstrDesc &Desc = MII.get(Opc);
3414 
3415   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3416     return true;
3417 
3418   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3419 
3420   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3421       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3422   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3423   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3424   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3425   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3426 
3427   assert(VAddr0Idx != -1);
3428   assert(SrsrcIdx != -1);
3429   assert(SrsrcIdx > VAddr0Idx);
3430 
3431   if (DimIdx == -1)
3432     return true; // intersect_ray
3433 
3434   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3435   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3436   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3437   unsigned VAddrSize =
3438       IsNSA ? SrsrcIdx - VAddr0Idx
3439             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3440   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3441 
3442   unsigned AddrSize =
3443       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3444 
3445   if (!IsNSA) {
3446     if (AddrSize > 8)
3447       AddrSize = 16;
3448     else if (AddrSize > 4)
3449       AddrSize = 8;
3450   }
3451 
3452   return VAddrSize == AddrSize;
3453 }
3454 
3455 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3456 
3457   const unsigned Opc = Inst.getOpcode();
3458   const MCInstrDesc &Desc = MII.get(Opc);
3459 
3460   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3461     return true;
3462   if (!Desc.mayLoad() || !Desc.mayStore())
3463     return true; // Not atomic
3464 
3465   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3466   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3467 
3468   // This is an incomplete check because image_atomic_cmpswap
3469   // may only use 0x3 and 0xf while other atomic operations
3470   // may use 0x1 and 0x3. However these limitations are
3471   // verified when we check that dmask matches dst size.
3472   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3473 }
3474 
3475 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3476 
3477   const unsigned Opc = Inst.getOpcode();
3478   const MCInstrDesc &Desc = MII.get(Opc);
3479 
3480   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3481     return true;
3482 
3483   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3484   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3485 
3486   // GATHER4 instructions use dmask in a different fashion compared to
3487   // other MIMG instructions. The only useful DMASK values are
3488   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3489   // (red,red,red,red) etc.) The ISA document doesn't mention
3490   // this.
3491   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3492 }
3493 
3494 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3495   const unsigned Opc = Inst.getOpcode();
3496   const MCInstrDesc &Desc = MII.get(Opc);
3497 
3498   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3499     return true;
3500 
3501   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3502   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3503       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3504 
3505   if (!BaseOpcode->MSAA)
3506     return true;
3507 
3508   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3509   assert(DimIdx != -1);
3510 
3511   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3512   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3513 
3514   return DimInfo->MSAA;
3515 }
3516 
3517 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3518 {
3519   switch (Opcode) {
3520   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3521   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3522   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3523     return true;
3524   default:
3525     return false;
3526   }
3527 }
3528 
3529 // movrels* opcodes should only allow VGPRS as src0.
3530 // This is specified in .td description for vop1/vop3,
3531 // but sdwa is handled differently. See isSDWAOperand.
3532 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3533                                       const OperandVector &Operands) {
3534 
3535   const unsigned Opc = Inst.getOpcode();
3536   const MCInstrDesc &Desc = MII.get(Opc);
3537 
3538   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3539     return true;
3540 
3541   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3542   assert(Src0Idx != -1);
3543 
3544   SMLoc ErrLoc;
3545   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3546   if (Src0.isReg()) {
3547     auto Reg = mc2PseudoReg(Src0.getReg());
3548     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3549     if (!isSGPR(Reg, TRI))
3550       return true;
3551     ErrLoc = getRegLoc(Reg, Operands);
3552   } else {
3553     ErrLoc = getConstLoc(Operands);
3554   }
3555 
3556   Error(ErrLoc, "source operand must be a VGPR");
3557   return false;
3558 }
3559 
3560 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3561                                           const OperandVector &Operands) {
3562 
3563   const unsigned Opc = Inst.getOpcode();
3564 
3565   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3566     return true;
3567 
3568   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3569   assert(Src0Idx != -1);
3570 
3571   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3572   if (!Src0.isReg())
3573     return true;
3574 
3575   auto Reg = mc2PseudoReg(Src0.getReg());
3576   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3577   if (isSGPR(Reg, TRI)) {
3578     Error(getRegLoc(Reg, Operands),
3579           "source operand must be either a VGPR or an inline constant");
3580     return false;
3581   }
3582 
3583   return true;
3584 }
3585 
3586 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3587   switch (Inst.getOpcode()) {
3588   default:
3589     return true;
3590   case V_DIV_SCALE_F32_gfx6_gfx7:
3591   case V_DIV_SCALE_F32_vi:
3592   case V_DIV_SCALE_F32_gfx10:
3593   case V_DIV_SCALE_F64_gfx6_gfx7:
3594   case V_DIV_SCALE_F64_vi:
3595   case V_DIV_SCALE_F64_gfx10:
3596     break;
3597   }
3598 
3599   // TODO: Check that src0 = src1 or src2.
3600 
3601   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3602                     AMDGPU::OpName::src2_modifiers,
3603                     AMDGPU::OpName::src2_modifiers}) {
3604     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3605             .getImm() &
3606         SISrcMods::ABS) {
3607       return false;
3608     }
3609   }
3610 
3611   return true;
3612 }
3613 
3614 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3615 
3616   const unsigned Opc = Inst.getOpcode();
3617   const MCInstrDesc &Desc = MII.get(Opc);
3618 
3619   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3620     return true;
3621 
3622   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3623   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3624     if (isCI() || isSI())
3625       return false;
3626   }
3627 
3628   return true;
3629 }
3630 
3631 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3632   const unsigned Opc = Inst.getOpcode();
3633   const MCInstrDesc &Desc = MII.get(Opc);
3634 
3635   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3636     return true;
3637 
3638   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3639   if (DimIdx < 0)
3640     return true;
3641 
3642   long Imm = Inst.getOperand(DimIdx).getImm();
3643   if (Imm < 0 || Imm >= 8)
3644     return false;
3645 
3646   return true;
3647 }
3648 
3649 static bool IsRevOpcode(const unsigned Opcode)
3650 {
3651   switch (Opcode) {
3652   case AMDGPU::V_SUBREV_F32_e32:
3653   case AMDGPU::V_SUBREV_F32_e64:
3654   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3655   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3656   case AMDGPU::V_SUBREV_F32_e32_vi:
3657   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3658   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3659   case AMDGPU::V_SUBREV_F32_e64_vi:
3660 
3661   case AMDGPU::V_SUBREV_CO_U32_e32:
3662   case AMDGPU::V_SUBREV_CO_U32_e64:
3663   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3664   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3665 
3666   case AMDGPU::V_SUBBREV_U32_e32:
3667   case AMDGPU::V_SUBBREV_U32_e64:
3668   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3669   case AMDGPU::V_SUBBREV_U32_e32_vi:
3670   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3671   case AMDGPU::V_SUBBREV_U32_e64_vi:
3672 
3673   case AMDGPU::V_SUBREV_U32_e32:
3674   case AMDGPU::V_SUBREV_U32_e64:
3675   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3676   case AMDGPU::V_SUBREV_U32_e32_vi:
3677   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3678   case AMDGPU::V_SUBREV_U32_e64_vi:
3679 
3680   case AMDGPU::V_SUBREV_F16_e32:
3681   case AMDGPU::V_SUBREV_F16_e64:
3682   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3683   case AMDGPU::V_SUBREV_F16_e32_vi:
3684   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3685   case AMDGPU::V_SUBREV_F16_e64_vi:
3686 
3687   case AMDGPU::V_SUBREV_U16_e32:
3688   case AMDGPU::V_SUBREV_U16_e64:
3689   case AMDGPU::V_SUBREV_U16_e32_vi:
3690   case AMDGPU::V_SUBREV_U16_e64_vi:
3691 
3692   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3693   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3694   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3695 
3696   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3697   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3698 
3699   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3700   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3701 
3702   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3703   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3704 
3705   case AMDGPU::V_LSHRREV_B32_e32:
3706   case AMDGPU::V_LSHRREV_B32_e64:
3707   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3708   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3709   case AMDGPU::V_LSHRREV_B32_e32_vi:
3710   case AMDGPU::V_LSHRREV_B32_e64_vi:
3711   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3712   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3713 
3714   case AMDGPU::V_ASHRREV_I32_e32:
3715   case AMDGPU::V_ASHRREV_I32_e64:
3716   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3717   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3718   case AMDGPU::V_ASHRREV_I32_e32_vi:
3719   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3720   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3721   case AMDGPU::V_ASHRREV_I32_e64_vi:
3722 
3723   case AMDGPU::V_LSHLREV_B32_e32:
3724   case AMDGPU::V_LSHLREV_B32_e64:
3725   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3726   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3727   case AMDGPU::V_LSHLREV_B32_e32_vi:
3728   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3729   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3730   case AMDGPU::V_LSHLREV_B32_e64_vi:
3731 
3732   case AMDGPU::V_LSHLREV_B16_e32:
3733   case AMDGPU::V_LSHLREV_B16_e64:
3734   case AMDGPU::V_LSHLREV_B16_e32_vi:
3735   case AMDGPU::V_LSHLREV_B16_e64_vi:
3736   case AMDGPU::V_LSHLREV_B16_gfx10:
3737 
3738   case AMDGPU::V_LSHRREV_B16_e32:
3739   case AMDGPU::V_LSHRREV_B16_e64:
3740   case AMDGPU::V_LSHRREV_B16_e32_vi:
3741   case AMDGPU::V_LSHRREV_B16_e64_vi:
3742   case AMDGPU::V_LSHRREV_B16_gfx10:
3743 
3744   case AMDGPU::V_ASHRREV_I16_e32:
3745   case AMDGPU::V_ASHRREV_I16_e64:
3746   case AMDGPU::V_ASHRREV_I16_e32_vi:
3747   case AMDGPU::V_ASHRREV_I16_e64_vi:
3748   case AMDGPU::V_ASHRREV_I16_gfx10:
3749 
3750   case AMDGPU::V_LSHLREV_B64_e64:
3751   case AMDGPU::V_LSHLREV_B64_gfx10:
3752   case AMDGPU::V_LSHLREV_B64_vi:
3753 
3754   case AMDGPU::V_LSHRREV_B64_e64:
3755   case AMDGPU::V_LSHRREV_B64_gfx10:
3756   case AMDGPU::V_LSHRREV_B64_vi:
3757 
3758   case AMDGPU::V_ASHRREV_I64_e64:
3759   case AMDGPU::V_ASHRREV_I64_gfx10:
3760   case AMDGPU::V_ASHRREV_I64_vi:
3761 
3762   case AMDGPU::V_PK_LSHLREV_B16:
3763   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3764   case AMDGPU::V_PK_LSHLREV_B16_vi:
3765 
3766   case AMDGPU::V_PK_LSHRREV_B16:
3767   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3768   case AMDGPU::V_PK_LSHRREV_B16_vi:
3769   case AMDGPU::V_PK_ASHRREV_I16:
3770   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3771   case AMDGPU::V_PK_ASHRREV_I16_vi:
3772     return true;
3773   default:
3774     return false;
3775   }
3776 }
3777 
3778 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3779 
3780   using namespace SIInstrFlags;
3781   const unsigned Opcode = Inst.getOpcode();
3782   const MCInstrDesc &Desc = MII.get(Opcode);
3783 
3784   // lds_direct register is defined so that it can be used
3785   // with 9-bit operands only. Ignore encodings which do not accept these.
3786   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3787   if ((Desc.TSFlags & Enc) == 0)
3788     return None;
3789 
3790   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3791     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3792     if (SrcIdx == -1)
3793       break;
3794     const auto &Src = Inst.getOperand(SrcIdx);
3795     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3796 
3797       if (isGFX90A())
3798         return StringRef("lds_direct is not supported on this GPU");
3799 
3800       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3801         return StringRef("lds_direct cannot be used with this instruction");
3802 
3803       if (SrcName != OpName::src0)
3804         return StringRef("lds_direct may be used as src0 only");
3805     }
3806   }
3807 
3808   return None;
3809 }
3810 
3811 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3812   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3813     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3814     if (Op.isFlatOffset())
3815       return Op.getStartLoc();
3816   }
3817   return getLoc();
3818 }
3819 
3820 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3821                                          const OperandVector &Operands) {
3822   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3823   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3824     return true;
3825 
3826   auto Opcode = Inst.getOpcode();
3827   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3828   assert(OpNum != -1);
3829 
3830   const auto &Op = Inst.getOperand(OpNum);
3831   if (!hasFlatOffsets() && Op.getImm() != 0) {
3832     Error(getFlatOffsetLoc(Operands),
3833           "flat offset modifier is not supported on this GPU");
3834     return false;
3835   }
3836 
3837   // For FLAT segment the offset must be positive;
3838   // MSB is ignored and forced to zero.
3839   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3840     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3841     if (!isIntN(OffsetSize, Op.getImm())) {
3842       Error(getFlatOffsetLoc(Operands),
3843             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3844       return false;
3845     }
3846   } else {
3847     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3848     if (!isUIntN(OffsetSize, Op.getImm())) {
3849       Error(getFlatOffsetLoc(Operands),
3850             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3851       return false;
3852     }
3853   }
3854 
3855   return true;
3856 }
3857 
3858 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3859   // Start with second operand because SMEM Offset cannot be dst or src0.
3860   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3861     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3862     if (Op.isSMEMOffset())
3863       return Op.getStartLoc();
3864   }
3865   return getLoc();
3866 }
3867 
3868 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3869                                          const OperandVector &Operands) {
3870   if (isCI() || isSI())
3871     return true;
3872 
3873   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3874   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3875     return true;
3876 
3877   auto Opcode = Inst.getOpcode();
3878   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3879   if (OpNum == -1)
3880     return true;
3881 
3882   const auto &Op = Inst.getOperand(OpNum);
3883   if (!Op.isImm())
3884     return true;
3885 
3886   uint64_t Offset = Op.getImm();
3887   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3888   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3889       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3890     return true;
3891 
3892   Error(getSMEMOffsetLoc(Operands),
3893         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3894                                "expected a 21-bit signed offset");
3895 
3896   return false;
3897 }
3898 
3899 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3900   unsigned Opcode = Inst.getOpcode();
3901   const MCInstrDesc &Desc = MII.get(Opcode);
3902   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3903     return true;
3904 
3905   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3906   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3907 
3908   const int OpIndices[] = { Src0Idx, Src1Idx };
3909 
3910   unsigned NumExprs = 0;
3911   unsigned NumLiterals = 0;
3912   uint32_t LiteralValue;
3913 
3914   for (int OpIdx : OpIndices) {
3915     if (OpIdx == -1) break;
3916 
3917     const MCOperand &MO = Inst.getOperand(OpIdx);
3918     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3919     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3920       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3921         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3922         if (NumLiterals == 0 || LiteralValue != Value) {
3923           LiteralValue = Value;
3924           ++NumLiterals;
3925         }
3926       } else if (MO.isExpr()) {
3927         ++NumExprs;
3928       }
3929     }
3930   }
3931 
3932   return NumLiterals + NumExprs <= 1;
3933 }
3934 
3935 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3936   const unsigned Opc = Inst.getOpcode();
3937   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3938       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3939     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3940     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3941 
3942     if (OpSel & ~3)
3943       return false;
3944   }
3945   return true;
3946 }
3947 
3948 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
3949                                   const OperandVector &Operands) {
3950   const unsigned Opc = Inst.getOpcode();
3951   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
3952   if (DppCtrlIdx < 0)
3953     return true;
3954   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
3955 
3956   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
3957     // DPP64 is supported for row_newbcast only.
3958     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3959     if (Src0Idx >= 0 &&
3960         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
3961       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
3962       Error(S, "64 bit dpp only supports row_newbcast");
3963       return false;
3964     }
3965   }
3966 
3967   return true;
3968 }
3969 
3970 // Check if VCC register matches wavefront size
3971 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3972   auto FB = getFeatureBits();
3973   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3974     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3975 }
3976 
3977 // VOP3 literal is only allowed in GFX10+ and only one can be used
3978 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3979                                           const OperandVector &Operands) {
3980   unsigned Opcode = Inst.getOpcode();
3981   const MCInstrDesc &Desc = MII.get(Opcode);
3982   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3983     return true;
3984 
3985   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3986   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3987   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3988 
3989   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3990 
3991   unsigned NumExprs = 0;
3992   unsigned NumLiterals = 0;
3993   uint32_t LiteralValue;
3994 
3995   for (int OpIdx : OpIndices) {
3996     if (OpIdx == -1) break;
3997 
3998     const MCOperand &MO = Inst.getOperand(OpIdx);
3999     if (!MO.isImm() && !MO.isExpr())
4000       continue;
4001     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4002       continue;
4003 
4004     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4005         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4006       Error(getConstLoc(Operands),
4007             "inline constants are not allowed for this operand");
4008       return false;
4009     }
4010 
4011     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4012       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4013       if (NumLiterals == 0 || LiteralValue != Value) {
4014         LiteralValue = Value;
4015         ++NumLiterals;
4016       }
4017     } else if (MO.isExpr()) {
4018       ++NumExprs;
4019     }
4020   }
4021   NumLiterals += NumExprs;
4022 
4023   if (!NumLiterals)
4024     return true;
4025 
4026   if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4027     Error(getLitLoc(Operands), "literal operands are not supported");
4028     return false;
4029   }
4030 
4031   if (NumLiterals > 1) {
4032     Error(getLitLoc(Operands), "only one literal operand is allowed");
4033     return false;
4034   }
4035 
4036   return true;
4037 }
4038 
4039 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4040 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4041                          const MCRegisterInfo *MRI) {
4042   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4043   if (OpIdx < 0)
4044     return -1;
4045 
4046   const MCOperand &Op = Inst.getOperand(OpIdx);
4047   if (!Op.isReg())
4048     return -1;
4049 
4050   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4051   auto Reg = Sub ? Sub : Op.getReg();
4052   const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4053   return AGRP32.contains(Reg) ? 1 : 0;
4054 }
4055 
4056 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4057   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4058   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4059                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4060                   SIInstrFlags::DS)) == 0)
4061     return true;
4062 
4063   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4064                                                       : AMDGPU::OpName::vdata;
4065 
4066   const MCRegisterInfo *MRI = getMRI();
4067   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4068   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4069 
4070   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4071     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4072     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4073       return false;
4074   }
4075 
4076   auto FB = getFeatureBits();
4077   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4078     if (DataAreg < 0 || DstAreg < 0)
4079       return true;
4080     return DstAreg == DataAreg;
4081   }
4082 
4083   return DstAreg < 1 && DataAreg < 1;
4084 }
4085 
4086 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4087   auto FB = getFeatureBits();
4088   if (!FB[AMDGPU::FeatureGFX90AInsts])
4089     return true;
4090 
4091   const MCRegisterInfo *MRI = getMRI();
4092   const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4093   const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4094   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4095     const MCOperand &Op = Inst.getOperand(I);
4096     if (!Op.isReg())
4097       continue;
4098 
4099     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4100     if (!Sub)
4101       continue;
4102 
4103     if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4104       return false;
4105     if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4106       return false;
4107   }
4108 
4109   return true;
4110 }
4111 
4112 // gfx90a has an undocumented limitation:
4113 // DS_GWS opcodes must use even aligned registers.
4114 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4115                                   const OperandVector &Operands) {
4116   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4117     return true;
4118 
4119   int Opc = Inst.getOpcode();
4120   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4121       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4122     return true;
4123 
4124   const MCRegisterInfo *MRI = getMRI();
4125   const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4126   int Data0Pos =
4127       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4128   assert(Data0Pos != -1);
4129   auto Reg = Inst.getOperand(Data0Pos).getReg();
4130   auto RegIdx = Reg - (VGRP32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4131   if (RegIdx & 1) {
4132     SMLoc RegLoc = getRegLoc(Reg, Operands);
4133     Error(RegLoc, "vgpr must be even aligned");
4134     return false;
4135   }
4136 
4137   return true;
4138 }
4139 
4140 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4141                                             const OperandVector &Operands,
4142                                             const SMLoc &IDLoc) {
4143   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4144                                            AMDGPU::OpName::cpol);
4145   if (CPolPos == -1)
4146     return true;
4147 
4148   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4149 
4150   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4151   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4152       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4153     Error(IDLoc, "invalid cache policy for SMRD instruction");
4154     return false;
4155   }
4156 
4157   if (isGFX90A() && (CPol & CPol::SCC)) {
4158     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4159     StringRef CStr(S.getPointer());
4160     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4161     Error(S, "scc is not supported on this GPU");
4162     return false;
4163   }
4164 
4165   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4166     return true;
4167 
4168   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4169     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4170       Error(IDLoc, "instruction must use glc");
4171       return false;
4172     }
4173   } else {
4174     if (CPol & CPol::GLC) {
4175       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4176       StringRef CStr(S.getPointer());
4177       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4178       Error(S, "instruction must not use glc");
4179       return false;
4180     }
4181   }
4182 
4183   return true;
4184 }
4185 
4186 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4187                                           const SMLoc &IDLoc,
4188                                           const OperandVector &Operands) {
4189   if (auto ErrMsg = validateLdsDirect(Inst)) {
4190     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4191     return false;
4192   }
4193   if (!validateSOPLiteral(Inst)) {
4194     Error(getLitLoc(Operands),
4195       "only one literal operand is allowed");
4196     return false;
4197   }
4198   if (!validateVOP3Literal(Inst, Operands)) {
4199     return false;
4200   }
4201   if (!validateConstantBusLimitations(Inst, Operands)) {
4202     return false;
4203   }
4204   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4205     return false;
4206   }
4207   if (!validateIntClampSupported(Inst)) {
4208     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4209       "integer clamping is not supported on this GPU");
4210     return false;
4211   }
4212   if (!validateOpSel(Inst)) {
4213     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4214       "invalid op_sel operand");
4215     return false;
4216   }
4217   if (!validateDPP(Inst, Operands)) {
4218     return false;
4219   }
4220   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4221   if (!validateMIMGD16(Inst)) {
4222     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4223       "d16 modifier is not supported on this GPU");
4224     return false;
4225   }
4226   if (!validateMIMGDim(Inst)) {
4227     Error(IDLoc, "dim modifier is required on this GPU");
4228     return false;
4229   }
4230   if (!validateMIMGMSAA(Inst)) {
4231     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4232           "invalid dim; must be MSAA type");
4233     return false;
4234   }
4235   if (!validateMIMGDataSize(Inst)) {
4236     Error(IDLoc,
4237       "image data size does not match dmask and tfe");
4238     return false;
4239   }
4240   if (!validateMIMGAddrSize(Inst)) {
4241     Error(IDLoc,
4242       "image address size does not match dim and a16");
4243     return false;
4244   }
4245   if (!validateMIMGAtomicDMask(Inst)) {
4246     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4247       "invalid atomic image dmask");
4248     return false;
4249   }
4250   if (!validateMIMGGatherDMask(Inst)) {
4251     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4252       "invalid image_gather dmask: only one bit must be set");
4253     return false;
4254   }
4255   if (!validateMovrels(Inst, Operands)) {
4256     return false;
4257   }
4258   if (!validateFlatOffset(Inst, Operands)) {
4259     return false;
4260   }
4261   if (!validateSMEMOffset(Inst, Operands)) {
4262     return false;
4263   }
4264   if (!validateMAIAccWrite(Inst, Operands)) {
4265     return false;
4266   }
4267   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4268     return false;
4269   }
4270 
4271   if (!validateAGPRLdSt(Inst)) {
4272     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4273     ? "invalid register class: data and dst should be all VGPR or AGPR"
4274     : "invalid register class: agpr loads and stores not supported on this GPU"
4275     );
4276     return false;
4277   }
4278   if (!validateVGPRAlign(Inst)) {
4279     Error(IDLoc,
4280       "invalid register class: vgpr tuples must be 64 bit aligned");
4281     return false;
4282   }
4283   if (!validateGWS(Inst, Operands)) {
4284     return false;
4285   }
4286 
4287   if (!validateDivScale(Inst)) {
4288     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4289     return false;
4290   }
4291   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4292     return false;
4293   }
4294 
4295   return true;
4296 }
4297 
4298 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4299                                             const FeatureBitset &FBS,
4300                                             unsigned VariantID = 0);
4301 
4302 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4303                                 const FeatureBitset &AvailableFeatures,
4304                                 unsigned VariantID);
4305 
4306 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4307                                        const FeatureBitset &FBS) {
4308   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4309 }
4310 
4311 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4312                                        const FeatureBitset &FBS,
4313                                        ArrayRef<unsigned> Variants) {
4314   for (auto Variant : Variants) {
4315     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4316       return true;
4317   }
4318 
4319   return false;
4320 }
4321 
4322 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4323                                                   const SMLoc &IDLoc) {
4324   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4325 
4326   // Check if requested instruction variant is supported.
4327   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4328     return false;
4329 
4330   // This instruction is not supported.
4331   // Clear any other pending errors because they are no longer relevant.
4332   getParser().clearPendingErrors();
4333 
4334   // Requested instruction variant is not supported.
4335   // Check if any other variants are supported.
4336   StringRef VariantName = getMatchedVariantName();
4337   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4338     return Error(IDLoc,
4339                  Twine(VariantName,
4340                        " variant of this instruction is not supported"));
4341   }
4342 
4343   // Finally check if this instruction is supported on any other GPU.
4344   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4345     return Error(IDLoc, "instruction not supported on this GPU");
4346   }
4347 
4348   // Instruction not supported on any GPU. Probably a typo.
4349   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4350   return Error(IDLoc, "invalid instruction" + Suggestion);
4351 }
4352 
4353 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4354                                               OperandVector &Operands,
4355                                               MCStreamer &Out,
4356                                               uint64_t &ErrorInfo,
4357                                               bool MatchingInlineAsm) {
4358   MCInst Inst;
4359   unsigned Result = Match_Success;
4360   for (auto Variant : getMatchedVariants()) {
4361     uint64_t EI;
4362     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4363                                   Variant);
4364     // We order match statuses from least to most specific. We use most specific
4365     // status as resulting
4366     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4367     if ((R == Match_Success) ||
4368         (R == Match_PreferE32) ||
4369         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4370         (R == Match_InvalidOperand && Result != Match_MissingFeature
4371                                    && Result != Match_PreferE32) ||
4372         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4373                                    && Result != Match_MissingFeature
4374                                    && Result != Match_PreferE32)) {
4375       Result = R;
4376       ErrorInfo = EI;
4377     }
4378     if (R == Match_Success)
4379       break;
4380   }
4381 
4382   if (Result == Match_Success) {
4383     if (!validateInstruction(Inst, IDLoc, Operands)) {
4384       return true;
4385     }
4386     Inst.setLoc(IDLoc);
4387     Out.emitInstruction(Inst, getSTI());
4388     return false;
4389   }
4390 
4391   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4392   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4393     return true;
4394   }
4395 
4396   switch (Result) {
4397   default: break;
4398   case Match_MissingFeature:
4399     // It has been verified that the specified instruction
4400     // mnemonic is valid. A match was found but it requires
4401     // features which are not supported on this GPU.
4402     return Error(IDLoc, "operands are not valid for this GPU or mode");
4403 
4404   case Match_InvalidOperand: {
4405     SMLoc ErrorLoc = IDLoc;
4406     if (ErrorInfo != ~0ULL) {
4407       if (ErrorInfo >= Operands.size()) {
4408         return Error(IDLoc, "too few operands for instruction");
4409       }
4410       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4411       if (ErrorLoc == SMLoc())
4412         ErrorLoc = IDLoc;
4413     }
4414     return Error(ErrorLoc, "invalid operand for instruction");
4415   }
4416 
4417   case Match_PreferE32:
4418     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4419                         "should be encoded as e32");
4420   case Match_MnemonicFail:
4421     llvm_unreachable("Invalid instructions should have been handled already");
4422   }
4423   llvm_unreachable("Implement any new match types added!");
4424 }
4425 
4426 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4427   int64_t Tmp = -1;
4428   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4429     return true;
4430   }
4431   if (getParser().parseAbsoluteExpression(Tmp)) {
4432     return true;
4433   }
4434   Ret = static_cast<uint32_t>(Tmp);
4435   return false;
4436 }
4437 
4438 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4439                                                uint32_t &Minor) {
4440   if (ParseAsAbsoluteExpression(Major))
4441     return TokError("invalid major version");
4442 
4443   if (!trySkipToken(AsmToken::Comma))
4444     return TokError("minor version number required, comma expected");
4445 
4446   if (ParseAsAbsoluteExpression(Minor))
4447     return TokError("invalid minor version");
4448 
4449   return false;
4450 }
4451 
4452 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4453   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4454     return TokError("directive only supported for amdgcn architecture");
4455 
4456   std::string TargetIDDirective;
4457   SMLoc TargetStart = getTok().getLoc();
4458   if (getParser().parseEscapedString(TargetIDDirective))
4459     return true;
4460 
4461   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4462   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4463     return getParser().Error(TargetRange.Start,
4464         (Twine(".amdgcn_target directive's target id ") +
4465          Twine(TargetIDDirective) +
4466          Twine(" does not match the specified target id ") +
4467          Twine(getTargetStreamer().getTargetID()->toString())).str());
4468 
4469   return false;
4470 }
4471 
4472 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4473   return Error(Range.Start, "value out of range", Range);
4474 }
4475 
4476 bool AMDGPUAsmParser::calculateGPRBlocks(
4477     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4478     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4479     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4480     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4481   // TODO(scott.linder): These calculations are duplicated from
4482   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4483   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4484 
4485   unsigned NumVGPRs = NextFreeVGPR;
4486   unsigned NumSGPRs = NextFreeSGPR;
4487 
4488   if (Version.Major >= 10)
4489     NumSGPRs = 0;
4490   else {
4491     unsigned MaxAddressableNumSGPRs =
4492         IsaInfo::getAddressableNumSGPRs(&getSTI());
4493 
4494     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4495         NumSGPRs > MaxAddressableNumSGPRs)
4496       return OutOfRangeError(SGPRRange);
4497 
4498     NumSGPRs +=
4499         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4500 
4501     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4502         NumSGPRs > MaxAddressableNumSGPRs)
4503       return OutOfRangeError(SGPRRange);
4504 
4505     if (Features.test(FeatureSGPRInitBug))
4506       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4507   }
4508 
4509   VGPRBlocks =
4510       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4511   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4512 
4513   return false;
4514 }
4515 
4516 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4517   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4518     return TokError("directive only supported for amdgcn architecture");
4519 
4520   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4521     return TokError("directive only supported for amdhsa OS");
4522 
4523   StringRef KernelName;
4524   if (getParser().parseIdentifier(KernelName))
4525     return true;
4526 
4527   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4528 
4529   StringSet<> Seen;
4530 
4531   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4532 
4533   SMRange VGPRRange;
4534   uint64_t NextFreeVGPR = 0;
4535   uint64_t AccumOffset = 0;
4536   SMRange SGPRRange;
4537   uint64_t NextFreeSGPR = 0;
4538   unsigned UserSGPRCount = 0;
4539   bool ReserveVCC = true;
4540   bool ReserveFlatScr = true;
4541   Optional<bool> EnableWavefrontSize32;
4542 
4543   while (true) {
4544     while (trySkipToken(AsmToken::EndOfStatement));
4545 
4546     StringRef ID;
4547     SMRange IDRange = getTok().getLocRange();
4548     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4549       return true;
4550 
4551     if (ID == ".end_amdhsa_kernel")
4552       break;
4553 
4554     if (Seen.find(ID) != Seen.end())
4555       return TokError(".amdhsa_ directives cannot be repeated");
4556     Seen.insert(ID);
4557 
4558     SMLoc ValStart = getLoc();
4559     int64_t IVal;
4560     if (getParser().parseAbsoluteExpression(IVal))
4561       return true;
4562     SMLoc ValEnd = getLoc();
4563     SMRange ValRange = SMRange(ValStart, ValEnd);
4564 
4565     if (IVal < 0)
4566       return OutOfRangeError(ValRange);
4567 
4568     uint64_t Val = IVal;
4569 
4570 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4571   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4572     return OutOfRangeError(RANGE);                                             \
4573   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4574 
4575     if (ID == ".amdhsa_group_segment_fixed_size") {
4576       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4577         return OutOfRangeError(ValRange);
4578       KD.group_segment_fixed_size = Val;
4579     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4580       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4581         return OutOfRangeError(ValRange);
4582       KD.private_segment_fixed_size = Val;
4583     } else if (ID == ".amdhsa_kernarg_size") {
4584       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4585         return OutOfRangeError(ValRange);
4586       KD.kernarg_size = Val;
4587     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4588       if (hasArchitectedFlatScratch())
4589         return Error(IDRange.Start,
4590                      "directive is not supported with architected flat scratch",
4591                      IDRange);
4592       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4593                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4594                        Val, ValRange);
4595       if (Val)
4596         UserSGPRCount += 4;
4597     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4598       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4599                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4600                        ValRange);
4601       if (Val)
4602         UserSGPRCount += 2;
4603     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4604       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4605                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4606                        ValRange);
4607       if (Val)
4608         UserSGPRCount += 2;
4609     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4610       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4611                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4612                        Val, ValRange);
4613       if (Val)
4614         UserSGPRCount += 2;
4615     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4616       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4617                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4618                        ValRange);
4619       if (Val)
4620         UserSGPRCount += 2;
4621     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4622       if (hasArchitectedFlatScratch())
4623         return Error(IDRange.Start,
4624                      "directive is not supported with architected flat scratch",
4625                      IDRange);
4626       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4627                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4628                        ValRange);
4629       if (Val)
4630         UserSGPRCount += 2;
4631     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4632       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4633                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4634                        Val, ValRange);
4635       if (Val)
4636         UserSGPRCount += 1;
4637     } else if (ID == ".amdhsa_wavefront_size32") {
4638       if (IVersion.Major < 10)
4639         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4640       EnableWavefrontSize32 = Val;
4641       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4642                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4643                        Val, ValRange);
4644     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4645       if (hasArchitectedFlatScratch())
4646         return Error(IDRange.Start,
4647                      "directive is not supported with architected flat scratch",
4648                      IDRange);
4649       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4650                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4651     } else if (ID == ".amdhsa_enable_private_segment") {
4652       if (!hasArchitectedFlatScratch())
4653         return Error(
4654             IDRange.Start,
4655             "directive is not supported without architected flat scratch",
4656             IDRange);
4657       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4658                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4659     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4660       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4661                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4662                        ValRange);
4663     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4664       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4665                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4666                        ValRange);
4667     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4668       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4669                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4670                        ValRange);
4671     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4672       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4673                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4674                        ValRange);
4675     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4676       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4677                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4678                        ValRange);
4679     } else if (ID == ".amdhsa_next_free_vgpr") {
4680       VGPRRange = ValRange;
4681       NextFreeVGPR = Val;
4682     } else if (ID == ".amdhsa_next_free_sgpr") {
4683       SGPRRange = ValRange;
4684       NextFreeSGPR = Val;
4685     } else if (ID == ".amdhsa_accum_offset") {
4686       if (!isGFX90A())
4687         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4688       AccumOffset = Val;
4689     } else if (ID == ".amdhsa_reserve_vcc") {
4690       if (!isUInt<1>(Val))
4691         return OutOfRangeError(ValRange);
4692       ReserveVCC = Val;
4693     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4694       if (IVersion.Major < 7)
4695         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4696       if (hasArchitectedFlatScratch())
4697         return Error(IDRange.Start,
4698                      "directive is not supported with architected flat scratch",
4699                      IDRange);
4700       if (!isUInt<1>(Val))
4701         return OutOfRangeError(ValRange);
4702       ReserveFlatScr = Val;
4703     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4704       if (IVersion.Major < 8)
4705         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4706       if (!isUInt<1>(Val))
4707         return OutOfRangeError(ValRange);
4708       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4709         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4710                                  IDRange);
4711     } else if (ID == ".amdhsa_float_round_mode_32") {
4712       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4713                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4714     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4715       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4716                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4717     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4718       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4719                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4720     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4721       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4722                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4723                        ValRange);
4724     } else if (ID == ".amdhsa_dx10_clamp") {
4725       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4726                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4727     } else if (ID == ".amdhsa_ieee_mode") {
4728       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4729                        Val, ValRange);
4730     } else if (ID == ".amdhsa_fp16_overflow") {
4731       if (IVersion.Major < 9)
4732         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4733       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4734                        ValRange);
4735     } else if (ID == ".amdhsa_tg_split") {
4736       if (!isGFX90A())
4737         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4738       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4739                        ValRange);
4740     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4741       if (IVersion.Major < 10)
4742         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4743       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4744                        ValRange);
4745     } else if (ID == ".amdhsa_memory_ordered") {
4746       if (IVersion.Major < 10)
4747         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4748       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4749                        ValRange);
4750     } else if (ID == ".amdhsa_forward_progress") {
4751       if (IVersion.Major < 10)
4752         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4753       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4754                        ValRange);
4755     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4756       PARSE_BITS_ENTRY(
4757           KD.compute_pgm_rsrc2,
4758           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4759           ValRange);
4760     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4761       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4762                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4763                        Val, ValRange);
4764     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4765       PARSE_BITS_ENTRY(
4766           KD.compute_pgm_rsrc2,
4767           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4768           ValRange);
4769     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4770       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4771                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4772                        Val, ValRange);
4773     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4774       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4775                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4776                        Val, ValRange);
4777     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4778       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4779                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4780                        Val, ValRange);
4781     } else if (ID == ".amdhsa_exception_int_div_zero") {
4782       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4783                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4784                        Val, ValRange);
4785     } else {
4786       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4787     }
4788 
4789 #undef PARSE_BITS_ENTRY
4790   }
4791 
4792   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4793     return TokError(".amdhsa_next_free_vgpr directive is required");
4794 
4795   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4796     return TokError(".amdhsa_next_free_sgpr directive is required");
4797 
4798   unsigned VGPRBlocks;
4799   unsigned SGPRBlocks;
4800   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4801                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4802                          EnableWavefrontSize32, NextFreeVGPR,
4803                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4804                          SGPRBlocks))
4805     return true;
4806 
4807   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4808           VGPRBlocks))
4809     return OutOfRangeError(VGPRRange);
4810   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4811                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4812 
4813   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4814           SGPRBlocks))
4815     return OutOfRangeError(SGPRRange);
4816   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4817                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4818                   SGPRBlocks);
4819 
4820   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4821     return TokError("too many user SGPRs enabled");
4822   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4823                   UserSGPRCount);
4824 
4825   if (isGFX90A()) {
4826     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4827       return TokError(".amdhsa_accum_offset directive is required");
4828     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4829       return TokError("accum_offset should be in range [4..256] in "
4830                       "increments of 4");
4831     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4832       return TokError("accum_offset exceeds total VGPR allocation");
4833     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4834                     (AccumOffset / 4 - 1));
4835   }
4836 
4837   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4838       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4839       ReserveFlatScr);
4840   return false;
4841 }
4842 
4843 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4844   uint32_t Major;
4845   uint32_t Minor;
4846 
4847   if (ParseDirectiveMajorMinor(Major, Minor))
4848     return true;
4849 
4850   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4851   return false;
4852 }
4853 
4854 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4855   uint32_t Major;
4856   uint32_t Minor;
4857   uint32_t Stepping;
4858   StringRef VendorName;
4859   StringRef ArchName;
4860 
4861   // If this directive has no arguments, then use the ISA version for the
4862   // targeted GPU.
4863   if (isToken(AsmToken::EndOfStatement)) {
4864     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4865     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4866                                                         ISA.Stepping,
4867                                                         "AMD", "AMDGPU");
4868     return false;
4869   }
4870 
4871   if (ParseDirectiveMajorMinor(Major, Minor))
4872     return true;
4873 
4874   if (!trySkipToken(AsmToken::Comma))
4875     return TokError("stepping version number required, comma expected");
4876 
4877   if (ParseAsAbsoluteExpression(Stepping))
4878     return TokError("invalid stepping version");
4879 
4880   if (!trySkipToken(AsmToken::Comma))
4881     return TokError("vendor name required, comma expected");
4882 
4883   if (!parseString(VendorName, "invalid vendor name"))
4884     return true;
4885 
4886   if (!trySkipToken(AsmToken::Comma))
4887     return TokError("arch name required, comma expected");
4888 
4889   if (!parseString(ArchName, "invalid arch name"))
4890     return true;
4891 
4892   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
4893                                                       VendorName, ArchName);
4894   return false;
4895 }
4896 
4897 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4898                                                amd_kernel_code_t &Header) {
4899   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4900   // assembly for backwards compatibility.
4901   if (ID == "max_scratch_backing_memory_byte_size") {
4902     Parser.eatToEndOfStatement();
4903     return false;
4904   }
4905 
4906   SmallString<40> ErrStr;
4907   raw_svector_ostream Err(ErrStr);
4908   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4909     return TokError(Err.str());
4910   }
4911   Lex();
4912 
4913   if (ID == "enable_wavefront_size32") {
4914     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4915       if (!isGFX10Plus())
4916         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4917       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4918         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4919     } else {
4920       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4921         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4922     }
4923   }
4924 
4925   if (ID == "wavefront_size") {
4926     if (Header.wavefront_size == 5) {
4927       if (!isGFX10Plus())
4928         return TokError("wavefront_size=5 is only allowed on GFX10+");
4929       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4930         return TokError("wavefront_size=5 requires +WavefrontSize32");
4931     } else if (Header.wavefront_size == 6) {
4932       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4933         return TokError("wavefront_size=6 requires +WavefrontSize64");
4934     }
4935   }
4936 
4937   if (ID == "enable_wgp_mode") {
4938     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
4939         !isGFX10Plus())
4940       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4941   }
4942 
4943   if (ID == "enable_mem_ordered") {
4944     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
4945         !isGFX10Plus())
4946       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4947   }
4948 
4949   if (ID == "enable_fwd_progress") {
4950     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
4951         !isGFX10Plus())
4952       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4953   }
4954 
4955   return false;
4956 }
4957 
4958 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4959   amd_kernel_code_t Header;
4960   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4961 
4962   while (true) {
4963     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4964     // will set the current token to EndOfStatement.
4965     while(trySkipToken(AsmToken::EndOfStatement));
4966 
4967     StringRef ID;
4968     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4969       return true;
4970 
4971     if (ID == ".end_amd_kernel_code_t")
4972       break;
4973 
4974     if (ParseAMDKernelCodeTValue(ID, Header))
4975       return true;
4976   }
4977 
4978   getTargetStreamer().EmitAMDKernelCodeT(Header);
4979 
4980   return false;
4981 }
4982 
4983 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4984   StringRef KernelName;
4985   if (!parseId(KernelName, "expected symbol name"))
4986     return true;
4987 
4988   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4989                                            ELF::STT_AMDGPU_HSA_KERNEL);
4990 
4991   KernelScope.initialize(getContext());
4992   return false;
4993 }
4994 
4995 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4996   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4997     return Error(getLoc(),
4998                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4999                  "architectures");
5000   }
5001 
5002   auto TargetIDDirective = getLexer().getTok().getStringContents();
5003   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5004     return Error(getParser().getTok().getLoc(), "target id must match options");
5005 
5006   getTargetStreamer().EmitISAVersion();
5007   Lex();
5008 
5009   return false;
5010 }
5011 
5012 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5013   const char *AssemblerDirectiveBegin;
5014   const char *AssemblerDirectiveEnd;
5015   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5016       isHsaAbiVersion3Or4(&getSTI())
5017           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5018                             HSAMD::V3::AssemblerDirectiveEnd)
5019           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5020                             HSAMD::AssemblerDirectiveEnd);
5021 
5022   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5023     return Error(getLoc(),
5024                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5025                  "not available on non-amdhsa OSes")).str());
5026   }
5027 
5028   std::string HSAMetadataString;
5029   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5030                           HSAMetadataString))
5031     return true;
5032 
5033   if (isHsaAbiVersion3Or4(&getSTI())) {
5034     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5035       return Error(getLoc(), "invalid HSA metadata");
5036   } else {
5037     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5038       return Error(getLoc(), "invalid HSA metadata");
5039   }
5040 
5041   return false;
5042 }
5043 
5044 /// Common code to parse out a block of text (typically YAML) between start and
5045 /// end directives.
5046 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5047                                           const char *AssemblerDirectiveEnd,
5048                                           std::string &CollectString) {
5049 
5050   raw_string_ostream CollectStream(CollectString);
5051 
5052   getLexer().setSkipSpace(false);
5053 
5054   bool FoundEnd = false;
5055   while (!isToken(AsmToken::Eof)) {
5056     while (isToken(AsmToken::Space)) {
5057       CollectStream << getTokenStr();
5058       Lex();
5059     }
5060 
5061     if (trySkipId(AssemblerDirectiveEnd)) {
5062       FoundEnd = true;
5063       break;
5064     }
5065 
5066     CollectStream << Parser.parseStringToEndOfStatement()
5067                   << getContext().getAsmInfo()->getSeparatorString();
5068 
5069     Parser.eatToEndOfStatement();
5070   }
5071 
5072   getLexer().setSkipSpace(true);
5073 
5074   if (isToken(AsmToken::Eof) && !FoundEnd) {
5075     return TokError(Twine("expected directive ") +
5076                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5077   }
5078 
5079   CollectStream.flush();
5080   return false;
5081 }
5082 
5083 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5084 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5085   std::string String;
5086   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5087                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5088     return true;
5089 
5090   auto PALMetadata = getTargetStreamer().getPALMetadata();
5091   if (!PALMetadata->setFromString(String))
5092     return Error(getLoc(), "invalid PAL metadata");
5093   return false;
5094 }
5095 
5096 /// Parse the assembler directive for old linear-format PAL metadata.
5097 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5098   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5099     return Error(getLoc(),
5100                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5101                  "not available on non-amdpal OSes")).str());
5102   }
5103 
5104   auto PALMetadata = getTargetStreamer().getPALMetadata();
5105   PALMetadata->setLegacy();
5106   for (;;) {
5107     uint32_t Key, Value;
5108     if (ParseAsAbsoluteExpression(Key)) {
5109       return TokError(Twine("invalid value in ") +
5110                       Twine(PALMD::AssemblerDirective));
5111     }
5112     if (!trySkipToken(AsmToken::Comma)) {
5113       return TokError(Twine("expected an even number of values in ") +
5114                       Twine(PALMD::AssemblerDirective));
5115     }
5116     if (ParseAsAbsoluteExpression(Value)) {
5117       return TokError(Twine("invalid value in ") +
5118                       Twine(PALMD::AssemblerDirective));
5119     }
5120     PALMetadata->setRegister(Key, Value);
5121     if (!trySkipToken(AsmToken::Comma))
5122       break;
5123   }
5124   return false;
5125 }
5126 
5127 /// ParseDirectiveAMDGPULDS
5128 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5129 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5130   if (getParser().checkForValidSection())
5131     return true;
5132 
5133   StringRef Name;
5134   SMLoc NameLoc = getLoc();
5135   if (getParser().parseIdentifier(Name))
5136     return TokError("expected identifier in directive");
5137 
5138   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5139   if (parseToken(AsmToken::Comma, "expected ','"))
5140     return true;
5141 
5142   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5143 
5144   int64_t Size;
5145   SMLoc SizeLoc = getLoc();
5146   if (getParser().parseAbsoluteExpression(Size))
5147     return true;
5148   if (Size < 0)
5149     return Error(SizeLoc, "size must be non-negative");
5150   if (Size > LocalMemorySize)
5151     return Error(SizeLoc, "size is too large");
5152 
5153   int64_t Alignment = 4;
5154   if (trySkipToken(AsmToken::Comma)) {
5155     SMLoc AlignLoc = getLoc();
5156     if (getParser().parseAbsoluteExpression(Alignment))
5157       return true;
5158     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5159       return Error(AlignLoc, "alignment must be a power of two");
5160 
5161     // Alignment larger than the size of LDS is possible in theory, as long
5162     // as the linker manages to place to symbol at address 0, but we do want
5163     // to make sure the alignment fits nicely into a 32-bit integer.
5164     if (Alignment >= 1u << 31)
5165       return Error(AlignLoc, "alignment is too large");
5166   }
5167 
5168   if (parseToken(AsmToken::EndOfStatement,
5169                  "unexpected token in '.amdgpu_lds' directive"))
5170     return true;
5171 
5172   Symbol->redefineIfPossible();
5173   if (!Symbol->isUndefined())
5174     return Error(NameLoc, "invalid symbol redefinition");
5175 
5176   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5177   return false;
5178 }
5179 
5180 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5181   StringRef IDVal = DirectiveID.getString();
5182 
5183   if (isHsaAbiVersion3Or4(&getSTI())) {
5184     if (IDVal == ".amdhsa_kernel")
5185      return ParseDirectiveAMDHSAKernel();
5186 
5187     // TODO: Restructure/combine with PAL metadata directive.
5188     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5189       return ParseDirectiveHSAMetadata();
5190   } else {
5191     if (IDVal == ".hsa_code_object_version")
5192       return ParseDirectiveHSACodeObjectVersion();
5193 
5194     if (IDVal == ".hsa_code_object_isa")
5195       return ParseDirectiveHSACodeObjectISA();
5196 
5197     if (IDVal == ".amd_kernel_code_t")
5198       return ParseDirectiveAMDKernelCodeT();
5199 
5200     if (IDVal == ".amdgpu_hsa_kernel")
5201       return ParseDirectiveAMDGPUHsaKernel();
5202 
5203     if (IDVal == ".amd_amdgpu_isa")
5204       return ParseDirectiveISAVersion();
5205 
5206     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5207       return ParseDirectiveHSAMetadata();
5208   }
5209 
5210   if (IDVal == ".amdgcn_target")
5211     return ParseDirectiveAMDGCNTarget();
5212 
5213   if (IDVal == ".amdgpu_lds")
5214     return ParseDirectiveAMDGPULDS();
5215 
5216   if (IDVal == PALMD::AssemblerDirectiveBegin)
5217     return ParseDirectivePALMetadataBegin();
5218 
5219   if (IDVal == PALMD::AssemblerDirective)
5220     return ParseDirectivePALMetadata();
5221 
5222   return true;
5223 }
5224 
5225 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5226                                            unsigned RegNo) {
5227 
5228   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5229        R.isValid(); ++R) {
5230     if (*R == RegNo)
5231       return isGFX9Plus();
5232   }
5233 
5234   // GFX10 has 2 more SGPRs 104 and 105.
5235   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5236        R.isValid(); ++R) {
5237     if (*R == RegNo)
5238       return hasSGPR104_SGPR105();
5239   }
5240 
5241   switch (RegNo) {
5242   case AMDGPU::SRC_SHARED_BASE:
5243   case AMDGPU::SRC_SHARED_LIMIT:
5244   case AMDGPU::SRC_PRIVATE_BASE:
5245   case AMDGPU::SRC_PRIVATE_LIMIT:
5246   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5247     return isGFX9Plus();
5248   case AMDGPU::TBA:
5249   case AMDGPU::TBA_LO:
5250   case AMDGPU::TBA_HI:
5251   case AMDGPU::TMA:
5252   case AMDGPU::TMA_LO:
5253   case AMDGPU::TMA_HI:
5254     return !isGFX9Plus();
5255   case AMDGPU::XNACK_MASK:
5256   case AMDGPU::XNACK_MASK_LO:
5257   case AMDGPU::XNACK_MASK_HI:
5258     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5259   case AMDGPU::SGPR_NULL:
5260     return isGFX10Plus();
5261   default:
5262     break;
5263   }
5264 
5265   if (isCI())
5266     return true;
5267 
5268   if (isSI() || isGFX10Plus()) {
5269     // No flat_scr on SI.
5270     // On GFX10 flat scratch is not a valid register operand and can only be
5271     // accessed with s_setreg/s_getreg.
5272     switch (RegNo) {
5273     case AMDGPU::FLAT_SCR:
5274     case AMDGPU::FLAT_SCR_LO:
5275     case AMDGPU::FLAT_SCR_HI:
5276       return false;
5277     default:
5278       return true;
5279     }
5280   }
5281 
5282   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5283   // SI/CI have.
5284   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5285        R.isValid(); ++R) {
5286     if (*R == RegNo)
5287       return hasSGPR102_SGPR103();
5288   }
5289 
5290   return true;
5291 }
5292 
5293 OperandMatchResultTy
5294 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5295                               OperandMode Mode) {
5296   // Try to parse with a custom parser
5297   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5298 
5299   // If we successfully parsed the operand or if there as an error parsing,
5300   // we are done.
5301   //
5302   // If we are parsing after we reach EndOfStatement then this means we
5303   // are appending default values to the Operands list.  This is only done
5304   // by custom parser, so we shouldn't continue on to the generic parsing.
5305   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5306       isToken(AsmToken::EndOfStatement))
5307     return ResTy;
5308 
5309   SMLoc RBraceLoc;
5310   SMLoc LBraceLoc = getLoc();
5311   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5312     unsigned Prefix = Operands.size();
5313 
5314     for (;;) {
5315       auto Loc = getLoc();
5316       ResTy = parseReg(Operands);
5317       if (ResTy == MatchOperand_NoMatch)
5318         Error(Loc, "expected a register");
5319       if (ResTy != MatchOperand_Success)
5320         return MatchOperand_ParseFail;
5321 
5322       RBraceLoc = getLoc();
5323       if (trySkipToken(AsmToken::RBrac))
5324         break;
5325 
5326       if (!skipToken(AsmToken::Comma,
5327                      "expected a comma or a closing square bracket")) {
5328         return MatchOperand_ParseFail;
5329       }
5330     }
5331 
5332     if (Operands.size() - Prefix > 1) {
5333       Operands.insert(Operands.begin() + Prefix,
5334                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5335       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5336     }
5337 
5338     return MatchOperand_Success;
5339   }
5340 
5341   return parseRegOrImm(Operands);
5342 }
5343 
5344 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5345   // Clear any forced encodings from the previous instruction.
5346   setForcedEncodingSize(0);
5347   setForcedDPP(false);
5348   setForcedSDWA(false);
5349 
5350   if (Name.endswith("_e64")) {
5351     setForcedEncodingSize(64);
5352     return Name.substr(0, Name.size() - 4);
5353   } else if (Name.endswith("_e32")) {
5354     setForcedEncodingSize(32);
5355     return Name.substr(0, Name.size() - 4);
5356   } else if (Name.endswith("_dpp")) {
5357     setForcedDPP(true);
5358     return Name.substr(0, Name.size() - 4);
5359   } else if (Name.endswith("_sdwa")) {
5360     setForcedSDWA(true);
5361     return Name.substr(0, Name.size() - 5);
5362   }
5363   return Name;
5364 }
5365 
5366 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5367                                        StringRef Name,
5368                                        SMLoc NameLoc, OperandVector &Operands) {
5369   // Add the instruction mnemonic
5370   Name = parseMnemonicSuffix(Name);
5371   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5372 
5373   bool IsMIMG = Name.startswith("image_");
5374 
5375   while (!trySkipToken(AsmToken::EndOfStatement)) {
5376     OperandMode Mode = OperandMode_Default;
5377     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5378       Mode = OperandMode_NSA;
5379     CPolSeen = 0;
5380     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5381 
5382     if (Res != MatchOperand_Success) {
5383       checkUnsupportedInstruction(Name, NameLoc);
5384       if (!Parser.hasPendingError()) {
5385         // FIXME: use real operand location rather than the current location.
5386         StringRef Msg =
5387           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5388                                             "not a valid operand.";
5389         Error(getLoc(), Msg);
5390       }
5391       while (!trySkipToken(AsmToken::EndOfStatement)) {
5392         lex();
5393       }
5394       return true;
5395     }
5396 
5397     // Eat the comma or space if there is one.
5398     trySkipToken(AsmToken::Comma);
5399   }
5400 
5401   return false;
5402 }
5403 
5404 //===----------------------------------------------------------------------===//
5405 // Utility functions
5406 //===----------------------------------------------------------------------===//
5407 
5408 OperandMatchResultTy
5409 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5410 
5411   if (!trySkipId(Prefix, AsmToken::Colon))
5412     return MatchOperand_NoMatch;
5413 
5414   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5415 }
5416 
5417 OperandMatchResultTy
5418 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5419                                     AMDGPUOperand::ImmTy ImmTy,
5420                                     bool (*ConvertResult)(int64_t&)) {
5421   SMLoc S = getLoc();
5422   int64_t Value = 0;
5423 
5424   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5425   if (Res != MatchOperand_Success)
5426     return Res;
5427 
5428   if (ConvertResult && !ConvertResult(Value)) {
5429     Error(S, "invalid " + StringRef(Prefix) + " value.");
5430   }
5431 
5432   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5433   return MatchOperand_Success;
5434 }
5435 
5436 OperandMatchResultTy
5437 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5438                                              OperandVector &Operands,
5439                                              AMDGPUOperand::ImmTy ImmTy,
5440                                              bool (*ConvertResult)(int64_t&)) {
5441   SMLoc S = getLoc();
5442   if (!trySkipId(Prefix, AsmToken::Colon))
5443     return MatchOperand_NoMatch;
5444 
5445   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5446     return MatchOperand_ParseFail;
5447 
5448   unsigned Val = 0;
5449   const unsigned MaxSize = 4;
5450 
5451   // FIXME: How to verify the number of elements matches the number of src
5452   // operands?
5453   for (int I = 0; ; ++I) {
5454     int64_t Op;
5455     SMLoc Loc = getLoc();
5456     if (!parseExpr(Op))
5457       return MatchOperand_ParseFail;
5458 
5459     if (Op != 0 && Op != 1) {
5460       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5461       return MatchOperand_ParseFail;
5462     }
5463 
5464     Val |= (Op << I);
5465 
5466     if (trySkipToken(AsmToken::RBrac))
5467       break;
5468 
5469     if (I + 1 == MaxSize) {
5470       Error(getLoc(), "expected a closing square bracket");
5471       return MatchOperand_ParseFail;
5472     }
5473 
5474     if (!skipToken(AsmToken::Comma, "expected a comma"))
5475       return MatchOperand_ParseFail;
5476   }
5477 
5478   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5479   return MatchOperand_Success;
5480 }
5481 
5482 OperandMatchResultTy
5483 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5484                                AMDGPUOperand::ImmTy ImmTy) {
5485   int64_t Bit;
5486   SMLoc S = getLoc();
5487 
5488   if (trySkipId(Name)) {
5489     Bit = 1;
5490   } else if (trySkipId("no", Name)) {
5491     Bit = 0;
5492   } else {
5493     return MatchOperand_NoMatch;
5494   }
5495 
5496   if (Name == "r128" && !hasMIMG_R128()) {
5497     Error(S, "r128 modifier is not supported on this GPU");
5498     return MatchOperand_ParseFail;
5499   }
5500   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5501     Error(S, "a16 modifier is not supported on this GPU");
5502     return MatchOperand_ParseFail;
5503   }
5504 
5505   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5506     ImmTy = AMDGPUOperand::ImmTyR128A16;
5507 
5508   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5509   return MatchOperand_Success;
5510 }
5511 
5512 OperandMatchResultTy
5513 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5514   unsigned CPolOn = 0;
5515   unsigned CPolOff = 0;
5516   SMLoc S = getLoc();
5517 
5518   if (trySkipId("glc"))
5519     CPolOn = AMDGPU::CPol::GLC;
5520   else if (trySkipId("noglc"))
5521     CPolOff = AMDGPU::CPol::GLC;
5522   else if (trySkipId("slc"))
5523     CPolOn = AMDGPU::CPol::SLC;
5524   else if (trySkipId("noslc"))
5525     CPolOff = AMDGPU::CPol::SLC;
5526   else if (trySkipId("dlc"))
5527     CPolOn = AMDGPU::CPol::DLC;
5528   else if (trySkipId("nodlc"))
5529     CPolOff = AMDGPU::CPol::DLC;
5530   else if (trySkipId("scc"))
5531     CPolOn = AMDGPU::CPol::SCC;
5532   else if (trySkipId("noscc"))
5533     CPolOff = AMDGPU::CPol::SCC;
5534   else
5535     return MatchOperand_NoMatch;
5536 
5537   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5538     Error(S, "dlc modifier is not supported on this GPU");
5539     return MatchOperand_ParseFail;
5540   }
5541 
5542   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5543     Error(S, "scc modifier is not supported on this GPU");
5544     return MatchOperand_ParseFail;
5545   }
5546 
5547   if (CPolSeen & (CPolOn | CPolOff)) {
5548     Error(S, "duplicate cache policy modifier");
5549     return MatchOperand_ParseFail;
5550   }
5551 
5552   CPolSeen |= (CPolOn | CPolOff);
5553 
5554   for (unsigned I = 1; I != Operands.size(); ++I) {
5555     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5556     if (Op.isCPol()) {
5557       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5558       return MatchOperand_Success;
5559     }
5560   }
5561 
5562   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5563                                               AMDGPUOperand::ImmTyCPol));
5564 
5565   return MatchOperand_Success;
5566 }
5567 
5568 static void addOptionalImmOperand(
5569   MCInst& Inst, const OperandVector& Operands,
5570   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5571   AMDGPUOperand::ImmTy ImmT,
5572   int64_t Default = 0) {
5573   auto i = OptionalIdx.find(ImmT);
5574   if (i != OptionalIdx.end()) {
5575     unsigned Idx = i->second;
5576     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5577   } else {
5578     Inst.addOperand(MCOperand::createImm(Default));
5579   }
5580 }
5581 
5582 OperandMatchResultTy
5583 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5584                                        StringRef &Value,
5585                                        SMLoc &StringLoc) {
5586   if (!trySkipId(Prefix, AsmToken::Colon))
5587     return MatchOperand_NoMatch;
5588 
5589   StringLoc = getLoc();
5590   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5591                                                   : MatchOperand_ParseFail;
5592 }
5593 
5594 //===----------------------------------------------------------------------===//
5595 // MTBUF format
5596 //===----------------------------------------------------------------------===//
5597 
5598 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5599                                   int64_t MaxVal,
5600                                   int64_t &Fmt) {
5601   int64_t Val;
5602   SMLoc Loc = getLoc();
5603 
5604   auto Res = parseIntWithPrefix(Pref, Val);
5605   if (Res == MatchOperand_ParseFail)
5606     return false;
5607   if (Res == MatchOperand_NoMatch)
5608     return true;
5609 
5610   if (Val < 0 || Val > MaxVal) {
5611     Error(Loc, Twine("out of range ", StringRef(Pref)));
5612     return false;
5613   }
5614 
5615   Fmt = Val;
5616   return true;
5617 }
5618 
5619 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5620 // values to live in a joint format operand in the MCInst encoding.
5621 OperandMatchResultTy
5622 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5623   using namespace llvm::AMDGPU::MTBUFFormat;
5624 
5625   int64_t Dfmt = DFMT_UNDEF;
5626   int64_t Nfmt = NFMT_UNDEF;
5627 
5628   // dfmt and nfmt can appear in either order, and each is optional.
5629   for (int I = 0; I < 2; ++I) {
5630     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5631       return MatchOperand_ParseFail;
5632 
5633     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5634       return MatchOperand_ParseFail;
5635     }
5636     // Skip optional comma between dfmt/nfmt
5637     // but guard against 2 commas following each other.
5638     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5639         !peekToken().is(AsmToken::Comma)) {
5640       trySkipToken(AsmToken::Comma);
5641     }
5642   }
5643 
5644   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5645     return MatchOperand_NoMatch;
5646 
5647   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5648   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5649 
5650   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5651   return MatchOperand_Success;
5652 }
5653 
5654 OperandMatchResultTy
5655 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5656   using namespace llvm::AMDGPU::MTBUFFormat;
5657 
5658   int64_t Fmt = UFMT_UNDEF;
5659 
5660   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5661     return MatchOperand_ParseFail;
5662 
5663   if (Fmt == UFMT_UNDEF)
5664     return MatchOperand_NoMatch;
5665 
5666   Format = Fmt;
5667   return MatchOperand_Success;
5668 }
5669 
5670 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5671                                     int64_t &Nfmt,
5672                                     StringRef FormatStr,
5673                                     SMLoc Loc) {
5674   using namespace llvm::AMDGPU::MTBUFFormat;
5675   int64_t Format;
5676 
5677   Format = getDfmt(FormatStr);
5678   if (Format != DFMT_UNDEF) {
5679     Dfmt = Format;
5680     return true;
5681   }
5682 
5683   Format = getNfmt(FormatStr, getSTI());
5684   if (Format != NFMT_UNDEF) {
5685     Nfmt = Format;
5686     return true;
5687   }
5688 
5689   Error(Loc, "unsupported format");
5690   return false;
5691 }
5692 
5693 OperandMatchResultTy
5694 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5695                                           SMLoc FormatLoc,
5696                                           int64_t &Format) {
5697   using namespace llvm::AMDGPU::MTBUFFormat;
5698 
5699   int64_t Dfmt = DFMT_UNDEF;
5700   int64_t Nfmt = NFMT_UNDEF;
5701   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5702     return MatchOperand_ParseFail;
5703 
5704   if (trySkipToken(AsmToken::Comma)) {
5705     StringRef Str;
5706     SMLoc Loc = getLoc();
5707     if (!parseId(Str, "expected a format string") ||
5708         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5709       return MatchOperand_ParseFail;
5710     }
5711     if (Dfmt == DFMT_UNDEF) {
5712       Error(Loc, "duplicate numeric format");
5713       return MatchOperand_ParseFail;
5714     } else if (Nfmt == NFMT_UNDEF) {
5715       Error(Loc, "duplicate data format");
5716       return MatchOperand_ParseFail;
5717     }
5718   }
5719 
5720   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5721   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5722 
5723   if (isGFX10Plus()) {
5724     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5725     if (Ufmt == UFMT_UNDEF) {
5726       Error(FormatLoc, "unsupported format");
5727       return MatchOperand_ParseFail;
5728     }
5729     Format = Ufmt;
5730   } else {
5731     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5732   }
5733 
5734   return MatchOperand_Success;
5735 }
5736 
5737 OperandMatchResultTy
5738 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5739                                             SMLoc Loc,
5740                                             int64_t &Format) {
5741   using namespace llvm::AMDGPU::MTBUFFormat;
5742 
5743   auto Id = getUnifiedFormat(FormatStr);
5744   if (Id == UFMT_UNDEF)
5745     return MatchOperand_NoMatch;
5746 
5747   if (!isGFX10Plus()) {
5748     Error(Loc, "unified format is not supported on this GPU");
5749     return MatchOperand_ParseFail;
5750   }
5751 
5752   Format = Id;
5753   return MatchOperand_Success;
5754 }
5755 
5756 OperandMatchResultTy
5757 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5758   using namespace llvm::AMDGPU::MTBUFFormat;
5759   SMLoc Loc = getLoc();
5760 
5761   if (!parseExpr(Format))
5762     return MatchOperand_ParseFail;
5763   if (!isValidFormatEncoding(Format, getSTI())) {
5764     Error(Loc, "out of range format");
5765     return MatchOperand_ParseFail;
5766   }
5767 
5768   return MatchOperand_Success;
5769 }
5770 
5771 OperandMatchResultTy
5772 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5773   using namespace llvm::AMDGPU::MTBUFFormat;
5774 
5775   if (!trySkipId("format", AsmToken::Colon))
5776     return MatchOperand_NoMatch;
5777 
5778   if (trySkipToken(AsmToken::LBrac)) {
5779     StringRef FormatStr;
5780     SMLoc Loc = getLoc();
5781     if (!parseId(FormatStr, "expected a format string"))
5782       return MatchOperand_ParseFail;
5783 
5784     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5785     if (Res == MatchOperand_NoMatch)
5786       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5787     if (Res != MatchOperand_Success)
5788       return Res;
5789 
5790     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5791       return MatchOperand_ParseFail;
5792 
5793     return MatchOperand_Success;
5794   }
5795 
5796   return parseNumericFormat(Format);
5797 }
5798 
5799 OperandMatchResultTy
5800 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5801   using namespace llvm::AMDGPU::MTBUFFormat;
5802 
5803   int64_t Format = getDefaultFormatEncoding(getSTI());
5804   OperandMatchResultTy Res;
5805   SMLoc Loc = getLoc();
5806 
5807   // Parse legacy format syntax.
5808   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5809   if (Res == MatchOperand_ParseFail)
5810     return Res;
5811 
5812   bool FormatFound = (Res == MatchOperand_Success);
5813 
5814   Operands.push_back(
5815     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5816 
5817   if (FormatFound)
5818     trySkipToken(AsmToken::Comma);
5819 
5820   if (isToken(AsmToken::EndOfStatement)) {
5821     // We are expecting an soffset operand,
5822     // but let matcher handle the error.
5823     return MatchOperand_Success;
5824   }
5825 
5826   // Parse soffset.
5827   Res = parseRegOrImm(Operands);
5828   if (Res != MatchOperand_Success)
5829     return Res;
5830 
5831   trySkipToken(AsmToken::Comma);
5832 
5833   if (!FormatFound) {
5834     Res = parseSymbolicOrNumericFormat(Format);
5835     if (Res == MatchOperand_ParseFail)
5836       return Res;
5837     if (Res == MatchOperand_Success) {
5838       auto Size = Operands.size();
5839       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5840       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5841       Op.setImm(Format);
5842     }
5843     return MatchOperand_Success;
5844   }
5845 
5846   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5847     Error(getLoc(), "duplicate format");
5848     return MatchOperand_ParseFail;
5849   }
5850   return MatchOperand_Success;
5851 }
5852 
5853 //===----------------------------------------------------------------------===//
5854 // ds
5855 //===----------------------------------------------------------------------===//
5856 
5857 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5858                                     const OperandVector &Operands) {
5859   OptionalImmIndexMap OptionalIdx;
5860 
5861   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5862     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5863 
5864     // Add the register arguments
5865     if (Op.isReg()) {
5866       Op.addRegOperands(Inst, 1);
5867       continue;
5868     }
5869 
5870     // Handle optional arguments
5871     OptionalIdx[Op.getImmTy()] = i;
5872   }
5873 
5874   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5875   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5876   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5877 
5878   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5879 }
5880 
5881 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5882                                 bool IsGdsHardcoded) {
5883   OptionalImmIndexMap OptionalIdx;
5884 
5885   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5886     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5887 
5888     // Add the register arguments
5889     if (Op.isReg()) {
5890       Op.addRegOperands(Inst, 1);
5891       continue;
5892     }
5893 
5894     if (Op.isToken() && Op.getToken() == "gds") {
5895       IsGdsHardcoded = true;
5896       continue;
5897     }
5898 
5899     // Handle optional arguments
5900     OptionalIdx[Op.getImmTy()] = i;
5901   }
5902 
5903   AMDGPUOperand::ImmTy OffsetType =
5904     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5905      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5906      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5907                                                       AMDGPUOperand::ImmTyOffset;
5908 
5909   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5910 
5911   if (!IsGdsHardcoded) {
5912     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5913   }
5914   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5915 }
5916 
5917 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5918   OptionalImmIndexMap OptionalIdx;
5919 
5920   unsigned OperandIdx[4];
5921   unsigned EnMask = 0;
5922   int SrcIdx = 0;
5923 
5924   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5925     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5926 
5927     // Add the register arguments
5928     if (Op.isReg()) {
5929       assert(SrcIdx < 4);
5930       OperandIdx[SrcIdx] = Inst.size();
5931       Op.addRegOperands(Inst, 1);
5932       ++SrcIdx;
5933       continue;
5934     }
5935 
5936     if (Op.isOff()) {
5937       assert(SrcIdx < 4);
5938       OperandIdx[SrcIdx] = Inst.size();
5939       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5940       ++SrcIdx;
5941       continue;
5942     }
5943 
5944     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5945       Op.addImmOperands(Inst, 1);
5946       continue;
5947     }
5948 
5949     if (Op.isToken() && Op.getToken() == "done")
5950       continue;
5951 
5952     // Handle optional arguments
5953     OptionalIdx[Op.getImmTy()] = i;
5954   }
5955 
5956   assert(SrcIdx == 4);
5957 
5958   bool Compr = false;
5959   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5960     Compr = true;
5961     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5962     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5963     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5964   }
5965 
5966   for (auto i = 0; i < SrcIdx; ++i) {
5967     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5968       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5969     }
5970   }
5971 
5972   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5973   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5974 
5975   Inst.addOperand(MCOperand::createImm(EnMask));
5976 }
5977 
5978 //===----------------------------------------------------------------------===//
5979 // s_waitcnt
5980 //===----------------------------------------------------------------------===//
5981 
5982 static bool
5983 encodeCnt(
5984   const AMDGPU::IsaVersion ISA,
5985   int64_t &IntVal,
5986   int64_t CntVal,
5987   bool Saturate,
5988   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5989   unsigned (*decode)(const IsaVersion &Version, unsigned))
5990 {
5991   bool Failed = false;
5992 
5993   IntVal = encode(ISA, IntVal, CntVal);
5994   if (CntVal != decode(ISA, IntVal)) {
5995     if (Saturate) {
5996       IntVal = encode(ISA, IntVal, -1);
5997     } else {
5998       Failed = true;
5999     }
6000   }
6001   return Failed;
6002 }
6003 
6004 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6005 
6006   SMLoc CntLoc = getLoc();
6007   StringRef CntName = getTokenStr();
6008 
6009   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6010       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6011     return false;
6012 
6013   int64_t CntVal;
6014   SMLoc ValLoc = getLoc();
6015   if (!parseExpr(CntVal))
6016     return false;
6017 
6018   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6019 
6020   bool Failed = true;
6021   bool Sat = CntName.endswith("_sat");
6022 
6023   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6024     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6025   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6026     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6027   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6028     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6029   } else {
6030     Error(CntLoc, "invalid counter name " + CntName);
6031     return false;
6032   }
6033 
6034   if (Failed) {
6035     Error(ValLoc, "too large value for " + CntName);
6036     return false;
6037   }
6038 
6039   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6040     return false;
6041 
6042   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6043     if (isToken(AsmToken::EndOfStatement)) {
6044       Error(getLoc(), "expected a counter name");
6045       return false;
6046     }
6047   }
6048 
6049   return true;
6050 }
6051 
6052 OperandMatchResultTy
6053 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6054   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6055   int64_t Waitcnt = getWaitcntBitMask(ISA);
6056   SMLoc S = getLoc();
6057 
6058   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6059     while (!isToken(AsmToken::EndOfStatement)) {
6060       if (!parseCnt(Waitcnt))
6061         return MatchOperand_ParseFail;
6062     }
6063   } else {
6064     if (!parseExpr(Waitcnt))
6065       return MatchOperand_ParseFail;
6066   }
6067 
6068   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6069   return MatchOperand_Success;
6070 }
6071 
6072 bool
6073 AMDGPUOperand::isSWaitCnt() const {
6074   return isImm();
6075 }
6076 
6077 //===----------------------------------------------------------------------===//
6078 // hwreg
6079 //===----------------------------------------------------------------------===//
6080 
6081 bool
6082 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6083                                 OperandInfoTy &Offset,
6084                                 OperandInfoTy &Width) {
6085   using namespace llvm::AMDGPU::Hwreg;
6086 
6087   // The register may be specified by name or using a numeric code
6088   HwReg.Loc = getLoc();
6089   if (isToken(AsmToken::Identifier) &&
6090       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
6091     HwReg.IsSymbolic = true;
6092     lex(); // skip register name
6093   } else if (!parseExpr(HwReg.Id, "a register name")) {
6094     return false;
6095   }
6096 
6097   if (trySkipToken(AsmToken::RParen))
6098     return true;
6099 
6100   // parse optional params
6101   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6102     return false;
6103 
6104   Offset.Loc = getLoc();
6105   if (!parseExpr(Offset.Id))
6106     return false;
6107 
6108   if (!skipToken(AsmToken::Comma, "expected a comma"))
6109     return false;
6110 
6111   Width.Loc = getLoc();
6112   return parseExpr(Width.Id) &&
6113          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6114 }
6115 
6116 bool
6117 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6118                                const OperandInfoTy &Offset,
6119                                const OperandInfoTy &Width) {
6120 
6121   using namespace llvm::AMDGPU::Hwreg;
6122 
6123   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6124     Error(HwReg.Loc,
6125           "specified hardware register is not supported on this GPU");
6126     return false;
6127   }
6128   if (!isValidHwreg(HwReg.Id)) {
6129     Error(HwReg.Loc,
6130           "invalid code of hardware register: only 6-bit values are legal");
6131     return false;
6132   }
6133   if (!isValidHwregOffset(Offset.Id)) {
6134     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6135     return false;
6136   }
6137   if (!isValidHwregWidth(Width.Id)) {
6138     Error(Width.Loc,
6139           "invalid bitfield width: only values from 1 to 32 are legal");
6140     return false;
6141   }
6142   return true;
6143 }
6144 
6145 OperandMatchResultTy
6146 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6147   using namespace llvm::AMDGPU::Hwreg;
6148 
6149   int64_t ImmVal = 0;
6150   SMLoc Loc = getLoc();
6151 
6152   if (trySkipId("hwreg", AsmToken::LParen)) {
6153     OperandInfoTy HwReg(ID_UNKNOWN_);
6154     OperandInfoTy Offset(OFFSET_DEFAULT_);
6155     OperandInfoTy Width(WIDTH_DEFAULT_);
6156     if (parseHwregBody(HwReg, Offset, Width) &&
6157         validateHwreg(HwReg, Offset, Width)) {
6158       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6159     } else {
6160       return MatchOperand_ParseFail;
6161     }
6162   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6163     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6164       Error(Loc, "invalid immediate: only 16-bit values are legal");
6165       return MatchOperand_ParseFail;
6166     }
6167   } else {
6168     return MatchOperand_ParseFail;
6169   }
6170 
6171   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6172   return MatchOperand_Success;
6173 }
6174 
6175 bool AMDGPUOperand::isHwreg() const {
6176   return isImmTy(ImmTyHwreg);
6177 }
6178 
6179 //===----------------------------------------------------------------------===//
6180 // sendmsg
6181 //===----------------------------------------------------------------------===//
6182 
6183 bool
6184 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6185                                   OperandInfoTy &Op,
6186                                   OperandInfoTy &Stream) {
6187   using namespace llvm::AMDGPU::SendMsg;
6188 
6189   Msg.Loc = getLoc();
6190   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6191     Msg.IsSymbolic = true;
6192     lex(); // skip message name
6193   } else if (!parseExpr(Msg.Id, "a message name")) {
6194     return false;
6195   }
6196 
6197   if (trySkipToken(AsmToken::Comma)) {
6198     Op.IsDefined = true;
6199     Op.Loc = getLoc();
6200     if (isToken(AsmToken::Identifier) &&
6201         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6202       lex(); // skip operation name
6203     } else if (!parseExpr(Op.Id, "an operation name")) {
6204       return false;
6205     }
6206 
6207     if (trySkipToken(AsmToken::Comma)) {
6208       Stream.IsDefined = true;
6209       Stream.Loc = getLoc();
6210       if (!parseExpr(Stream.Id))
6211         return false;
6212     }
6213   }
6214 
6215   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6216 }
6217 
6218 bool
6219 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6220                                  const OperandInfoTy &Op,
6221                                  const OperandInfoTy &Stream) {
6222   using namespace llvm::AMDGPU::SendMsg;
6223 
6224   // Validation strictness depends on whether message is specified
6225   // in a symbolc or in a numeric form. In the latter case
6226   // only encoding possibility is checked.
6227   bool Strict = Msg.IsSymbolic;
6228 
6229   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6230     Error(Msg.Loc, "invalid message id");
6231     return false;
6232   }
6233   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6234     if (Op.IsDefined) {
6235       Error(Op.Loc, "message does not support operations");
6236     } else {
6237       Error(Msg.Loc, "missing message operation");
6238     }
6239     return false;
6240   }
6241   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6242     Error(Op.Loc, "invalid operation id");
6243     return false;
6244   }
6245   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6246     Error(Stream.Loc, "message operation does not support streams");
6247     return false;
6248   }
6249   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6250     Error(Stream.Loc, "invalid message stream id");
6251     return false;
6252   }
6253   return true;
6254 }
6255 
6256 OperandMatchResultTy
6257 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6258   using namespace llvm::AMDGPU::SendMsg;
6259 
6260   int64_t ImmVal = 0;
6261   SMLoc Loc = getLoc();
6262 
6263   if (trySkipId("sendmsg", AsmToken::LParen)) {
6264     OperandInfoTy Msg(ID_UNKNOWN_);
6265     OperandInfoTy Op(OP_NONE_);
6266     OperandInfoTy Stream(STREAM_ID_NONE_);
6267     if (parseSendMsgBody(Msg, Op, Stream) &&
6268         validateSendMsg(Msg, Op, Stream)) {
6269       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6270     } else {
6271       return MatchOperand_ParseFail;
6272     }
6273   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6274     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6275       Error(Loc, "invalid immediate: only 16-bit values are legal");
6276       return MatchOperand_ParseFail;
6277     }
6278   } else {
6279     return MatchOperand_ParseFail;
6280   }
6281 
6282   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6283   return MatchOperand_Success;
6284 }
6285 
6286 bool AMDGPUOperand::isSendMsg() const {
6287   return isImmTy(ImmTySendMsg);
6288 }
6289 
6290 //===----------------------------------------------------------------------===//
6291 // v_interp
6292 //===----------------------------------------------------------------------===//
6293 
6294 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6295   StringRef Str;
6296   SMLoc S = getLoc();
6297 
6298   if (!parseId(Str))
6299     return MatchOperand_NoMatch;
6300 
6301   int Slot = StringSwitch<int>(Str)
6302     .Case("p10", 0)
6303     .Case("p20", 1)
6304     .Case("p0", 2)
6305     .Default(-1);
6306 
6307   if (Slot == -1) {
6308     Error(S, "invalid interpolation slot");
6309     return MatchOperand_ParseFail;
6310   }
6311 
6312   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6313                                               AMDGPUOperand::ImmTyInterpSlot));
6314   return MatchOperand_Success;
6315 }
6316 
6317 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6318   StringRef Str;
6319   SMLoc S = getLoc();
6320 
6321   if (!parseId(Str))
6322     return MatchOperand_NoMatch;
6323 
6324   if (!Str.startswith("attr")) {
6325     Error(S, "invalid interpolation attribute");
6326     return MatchOperand_ParseFail;
6327   }
6328 
6329   StringRef Chan = Str.take_back(2);
6330   int AttrChan = StringSwitch<int>(Chan)
6331     .Case(".x", 0)
6332     .Case(".y", 1)
6333     .Case(".z", 2)
6334     .Case(".w", 3)
6335     .Default(-1);
6336   if (AttrChan == -1) {
6337     Error(S, "invalid or missing interpolation attribute channel");
6338     return MatchOperand_ParseFail;
6339   }
6340 
6341   Str = Str.drop_back(2).drop_front(4);
6342 
6343   uint8_t Attr;
6344   if (Str.getAsInteger(10, Attr)) {
6345     Error(S, "invalid or missing interpolation attribute number");
6346     return MatchOperand_ParseFail;
6347   }
6348 
6349   if (Attr > 63) {
6350     Error(S, "out of bounds interpolation attribute number");
6351     return MatchOperand_ParseFail;
6352   }
6353 
6354   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6355 
6356   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6357                                               AMDGPUOperand::ImmTyInterpAttr));
6358   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6359                                               AMDGPUOperand::ImmTyAttrChan));
6360   return MatchOperand_Success;
6361 }
6362 
6363 //===----------------------------------------------------------------------===//
6364 // exp
6365 //===----------------------------------------------------------------------===//
6366 
6367 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6368   using namespace llvm::AMDGPU::Exp;
6369 
6370   StringRef Str;
6371   SMLoc S = getLoc();
6372 
6373   if (!parseId(Str))
6374     return MatchOperand_NoMatch;
6375 
6376   unsigned Id = getTgtId(Str);
6377   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6378     Error(S, (Id == ET_INVALID) ?
6379                 "invalid exp target" :
6380                 "exp target is not supported on this GPU");
6381     return MatchOperand_ParseFail;
6382   }
6383 
6384   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6385                                               AMDGPUOperand::ImmTyExpTgt));
6386   return MatchOperand_Success;
6387 }
6388 
6389 //===----------------------------------------------------------------------===//
6390 // parser helpers
6391 //===----------------------------------------------------------------------===//
6392 
6393 bool
6394 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6395   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6396 }
6397 
6398 bool
6399 AMDGPUAsmParser::isId(const StringRef Id) const {
6400   return isId(getToken(), Id);
6401 }
6402 
6403 bool
6404 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6405   return getTokenKind() == Kind;
6406 }
6407 
6408 bool
6409 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6410   if (isId(Id)) {
6411     lex();
6412     return true;
6413   }
6414   return false;
6415 }
6416 
6417 bool
6418 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6419   if (isToken(AsmToken::Identifier)) {
6420     StringRef Tok = getTokenStr();
6421     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6422       lex();
6423       return true;
6424     }
6425   }
6426   return false;
6427 }
6428 
6429 bool
6430 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6431   if (isId(Id) && peekToken().is(Kind)) {
6432     lex();
6433     lex();
6434     return true;
6435   }
6436   return false;
6437 }
6438 
6439 bool
6440 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6441   if (isToken(Kind)) {
6442     lex();
6443     return true;
6444   }
6445   return false;
6446 }
6447 
6448 bool
6449 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6450                            const StringRef ErrMsg) {
6451   if (!trySkipToken(Kind)) {
6452     Error(getLoc(), ErrMsg);
6453     return false;
6454   }
6455   return true;
6456 }
6457 
6458 bool
6459 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6460   SMLoc S = getLoc();
6461 
6462   const MCExpr *Expr;
6463   if (Parser.parseExpression(Expr))
6464     return false;
6465 
6466   if (Expr->evaluateAsAbsolute(Imm))
6467     return true;
6468 
6469   if (Expected.empty()) {
6470     Error(S, "expected absolute expression");
6471   } else {
6472     Error(S, Twine("expected ", Expected) +
6473              Twine(" or an absolute expression"));
6474   }
6475   return false;
6476 }
6477 
6478 bool
6479 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6480   SMLoc S = getLoc();
6481 
6482   const MCExpr *Expr;
6483   if (Parser.parseExpression(Expr))
6484     return false;
6485 
6486   int64_t IntVal;
6487   if (Expr->evaluateAsAbsolute(IntVal)) {
6488     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6489   } else {
6490     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6491   }
6492   return true;
6493 }
6494 
6495 bool
6496 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6497   if (isToken(AsmToken::String)) {
6498     Val = getToken().getStringContents();
6499     lex();
6500     return true;
6501   } else {
6502     Error(getLoc(), ErrMsg);
6503     return false;
6504   }
6505 }
6506 
6507 bool
6508 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6509   if (isToken(AsmToken::Identifier)) {
6510     Val = getTokenStr();
6511     lex();
6512     return true;
6513   } else {
6514     if (!ErrMsg.empty())
6515       Error(getLoc(), ErrMsg);
6516     return false;
6517   }
6518 }
6519 
6520 AsmToken
6521 AMDGPUAsmParser::getToken() const {
6522   return Parser.getTok();
6523 }
6524 
6525 AsmToken
6526 AMDGPUAsmParser::peekToken() {
6527   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6528 }
6529 
6530 void
6531 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6532   auto TokCount = getLexer().peekTokens(Tokens);
6533 
6534   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6535     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6536 }
6537 
6538 AsmToken::TokenKind
6539 AMDGPUAsmParser::getTokenKind() const {
6540   return getLexer().getKind();
6541 }
6542 
6543 SMLoc
6544 AMDGPUAsmParser::getLoc() const {
6545   return getToken().getLoc();
6546 }
6547 
6548 StringRef
6549 AMDGPUAsmParser::getTokenStr() const {
6550   return getToken().getString();
6551 }
6552 
6553 void
6554 AMDGPUAsmParser::lex() {
6555   Parser.Lex();
6556 }
6557 
6558 SMLoc
6559 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6560                                const OperandVector &Operands) const {
6561   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6562     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6563     if (Test(Op))
6564       return Op.getStartLoc();
6565   }
6566   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6567 }
6568 
6569 SMLoc
6570 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6571                            const OperandVector &Operands) const {
6572   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6573   return getOperandLoc(Test, Operands);
6574 }
6575 
6576 SMLoc
6577 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6578                            const OperandVector &Operands) const {
6579   auto Test = [=](const AMDGPUOperand& Op) {
6580     return Op.isRegKind() && Op.getReg() == Reg;
6581   };
6582   return getOperandLoc(Test, Operands);
6583 }
6584 
6585 SMLoc
6586 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6587   auto Test = [](const AMDGPUOperand& Op) {
6588     return Op.IsImmKindLiteral() || Op.isExpr();
6589   };
6590   return getOperandLoc(Test, Operands);
6591 }
6592 
6593 SMLoc
6594 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6595   auto Test = [](const AMDGPUOperand& Op) {
6596     return Op.isImmKindConst();
6597   };
6598   return getOperandLoc(Test, Operands);
6599 }
6600 
6601 //===----------------------------------------------------------------------===//
6602 // swizzle
6603 //===----------------------------------------------------------------------===//
6604 
6605 LLVM_READNONE
6606 static unsigned
6607 encodeBitmaskPerm(const unsigned AndMask,
6608                   const unsigned OrMask,
6609                   const unsigned XorMask) {
6610   using namespace llvm::AMDGPU::Swizzle;
6611 
6612   return BITMASK_PERM_ENC |
6613          (AndMask << BITMASK_AND_SHIFT) |
6614          (OrMask  << BITMASK_OR_SHIFT)  |
6615          (XorMask << BITMASK_XOR_SHIFT);
6616 }
6617 
6618 bool
6619 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6620                                      const unsigned MinVal,
6621                                      const unsigned MaxVal,
6622                                      const StringRef ErrMsg,
6623                                      SMLoc &Loc) {
6624   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6625     return false;
6626   }
6627   Loc = getLoc();
6628   if (!parseExpr(Op)) {
6629     return false;
6630   }
6631   if (Op < MinVal || Op > MaxVal) {
6632     Error(Loc, ErrMsg);
6633     return false;
6634   }
6635 
6636   return true;
6637 }
6638 
6639 bool
6640 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6641                                       const unsigned MinVal,
6642                                       const unsigned MaxVal,
6643                                       const StringRef ErrMsg) {
6644   SMLoc Loc;
6645   for (unsigned i = 0; i < OpNum; ++i) {
6646     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6647       return false;
6648   }
6649 
6650   return true;
6651 }
6652 
6653 bool
6654 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6655   using namespace llvm::AMDGPU::Swizzle;
6656 
6657   int64_t Lane[LANE_NUM];
6658   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6659                            "expected a 2-bit lane id")) {
6660     Imm = QUAD_PERM_ENC;
6661     for (unsigned I = 0; I < LANE_NUM; ++I) {
6662       Imm |= Lane[I] << (LANE_SHIFT * I);
6663     }
6664     return true;
6665   }
6666   return false;
6667 }
6668 
6669 bool
6670 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6671   using namespace llvm::AMDGPU::Swizzle;
6672 
6673   SMLoc Loc;
6674   int64_t GroupSize;
6675   int64_t LaneIdx;
6676 
6677   if (!parseSwizzleOperand(GroupSize,
6678                            2, 32,
6679                            "group size must be in the interval [2,32]",
6680                            Loc)) {
6681     return false;
6682   }
6683   if (!isPowerOf2_64(GroupSize)) {
6684     Error(Loc, "group size must be a power of two");
6685     return false;
6686   }
6687   if (parseSwizzleOperand(LaneIdx,
6688                           0, GroupSize - 1,
6689                           "lane id must be in the interval [0,group size - 1]",
6690                           Loc)) {
6691     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6692     return true;
6693   }
6694   return false;
6695 }
6696 
6697 bool
6698 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6699   using namespace llvm::AMDGPU::Swizzle;
6700 
6701   SMLoc Loc;
6702   int64_t GroupSize;
6703 
6704   if (!parseSwizzleOperand(GroupSize,
6705                            2, 32,
6706                            "group size must be in the interval [2,32]",
6707                            Loc)) {
6708     return false;
6709   }
6710   if (!isPowerOf2_64(GroupSize)) {
6711     Error(Loc, "group size must be a power of two");
6712     return false;
6713   }
6714 
6715   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6716   return true;
6717 }
6718 
6719 bool
6720 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6721   using namespace llvm::AMDGPU::Swizzle;
6722 
6723   SMLoc Loc;
6724   int64_t GroupSize;
6725 
6726   if (!parseSwizzleOperand(GroupSize,
6727                            1, 16,
6728                            "group size must be in the interval [1,16]",
6729                            Loc)) {
6730     return false;
6731   }
6732   if (!isPowerOf2_64(GroupSize)) {
6733     Error(Loc, "group size must be a power of two");
6734     return false;
6735   }
6736 
6737   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6738   return true;
6739 }
6740 
6741 bool
6742 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6743   using namespace llvm::AMDGPU::Swizzle;
6744 
6745   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6746     return false;
6747   }
6748 
6749   StringRef Ctl;
6750   SMLoc StrLoc = getLoc();
6751   if (!parseString(Ctl)) {
6752     return false;
6753   }
6754   if (Ctl.size() != BITMASK_WIDTH) {
6755     Error(StrLoc, "expected a 5-character mask");
6756     return false;
6757   }
6758 
6759   unsigned AndMask = 0;
6760   unsigned OrMask = 0;
6761   unsigned XorMask = 0;
6762 
6763   for (size_t i = 0; i < Ctl.size(); ++i) {
6764     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6765     switch(Ctl[i]) {
6766     default:
6767       Error(StrLoc, "invalid mask");
6768       return false;
6769     case '0':
6770       break;
6771     case '1':
6772       OrMask |= Mask;
6773       break;
6774     case 'p':
6775       AndMask |= Mask;
6776       break;
6777     case 'i':
6778       AndMask |= Mask;
6779       XorMask |= Mask;
6780       break;
6781     }
6782   }
6783 
6784   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6785   return true;
6786 }
6787 
6788 bool
6789 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6790 
6791   SMLoc OffsetLoc = getLoc();
6792 
6793   if (!parseExpr(Imm, "a swizzle macro")) {
6794     return false;
6795   }
6796   if (!isUInt<16>(Imm)) {
6797     Error(OffsetLoc, "expected a 16-bit offset");
6798     return false;
6799   }
6800   return true;
6801 }
6802 
6803 bool
6804 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6805   using namespace llvm::AMDGPU::Swizzle;
6806 
6807   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6808 
6809     SMLoc ModeLoc = getLoc();
6810     bool Ok = false;
6811 
6812     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6813       Ok = parseSwizzleQuadPerm(Imm);
6814     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6815       Ok = parseSwizzleBitmaskPerm(Imm);
6816     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6817       Ok = parseSwizzleBroadcast(Imm);
6818     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6819       Ok = parseSwizzleSwap(Imm);
6820     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6821       Ok = parseSwizzleReverse(Imm);
6822     } else {
6823       Error(ModeLoc, "expected a swizzle mode");
6824     }
6825 
6826     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6827   }
6828 
6829   return false;
6830 }
6831 
6832 OperandMatchResultTy
6833 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6834   SMLoc S = getLoc();
6835   int64_t Imm = 0;
6836 
6837   if (trySkipId("offset")) {
6838 
6839     bool Ok = false;
6840     if (skipToken(AsmToken::Colon, "expected a colon")) {
6841       if (trySkipId("swizzle")) {
6842         Ok = parseSwizzleMacro(Imm);
6843       } else {
6844         Ok = parseSwizzleOffset(Imm);
6845       }
6846     }
6847 
6848     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6849 
6850     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6851   } else {
6852     // Swizzle "offset" operand is optional.
6853     // If it is omitted, try parsing other optional operands.
6854     return parseOptionalOpr(Operands);
6855   }
6856 }
6857 
6858 bool
6859 AMDGPUOperand::isSwizzle() const {
6860   return isImmTy(ImmTySwizzle);
6861 }
6862 
6863 //===----------------------------------------------------------------------===//
6864 // VGPR Index Mode
6865 //===----------------------------------------------------------------------===//
6866 
6867 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6868 
6869   using namespace llvm::AMDGPU::VGPRIndexMode;
6870 
6871   if (trySkipToken(AsmToken::RParen)) {
6872     return OFF;
6873   }
6874 
6875   int64_t Imm = 0;
6876 
6877   while (true) {
6878     unsigned Mode = 0;
6879     SMLoc S = getLoc();
6880 
6881     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6882       if (trySkipId(IdSymbolic[ModeId])) {
6883         Mode = 1 << ModeId;
6884         break;
6885       }
6886     }
6887 
6888     if (Mode == 0) {
6889       Error(S, (Imm == 0)?
6890                "expected a VGPR index mode or a closing parenthesis" :
6891                "expected a VGPR index mode");
6892       return UNDEF;
6893     }
6894 
6895     if (Imm & Mode) {
6896       Error(S, "duplicate VGPR index mode");
6897       return UNDEF;
6898     }
6899     Imm |= Mode;
6900 
6901     if (trySkipToken(AsmToken::RParen))
6902       break;
6903     if (!skipToken(AsmToken::Comma,
6904                    "expected a comma or a closing parenthesis"))
6905       return UNDEF;
6906   }
6907 
6908   return Imm;
6909 }
6910 
6911 OperandMatchResultTy
6912 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6913 
6914   using namespace llvm::AMDGPU::VGPRIndexMode;
6915 
6916   int64_t Imm = 0;
6917   SMLoc S = getLoc();
6918 
6919   if (trySkipId("gpr_idx", AsmToken::LParen)) {
6920     Imm = parseGPRIdxMacro();
6921     if (Imm == UNDEF)
6922       return MatchOperand_ParseFail;
6923   } else {
6924     if (getParser().parseAbsoluteExpression(Imm))
6925       return MatchOperand_ParseFail;
6926     if (Imm < 0 || !isUInt<4>(Imm)) {
6927       Error(S, "invalid immediate: only 4-bit values are legal");
6928       return MatchOperand_ParseFail;
6929     }
6930   }
6931 
6932   Operands.push_back(
6933       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6934   return MatchOperand_Success;
6935 }
6936 
6937 bool AMDGPUOperand::isGPRIdxMode() const {
6938   return isImmTy(ImmTyGprIdxMode);
6939 }
6940 
6941 //===----------------------------------------------------------------------===//
6942 // sopp branch targets
6943 //===----------------------------------------------------------------------===//
6944 
6945 OperandMatchResultTy
6946 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6947 
6948   // Make sure we are not parsing something
6949   // that looks like a label or an expression but is not.
6950   // This will improve error messages.
6951   if (isRegister() || isModifier())
6952     return MatchOperand_NoMatch;
6953 
6954   if (!parseExpr(Operands))
6955     return MatchOperand_ParseFail;
6956 
6957   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6958   assert(Opr.isImm() || Opr.isExpr());
6959   SMLoc Loc = Opr.getStartLoc();
6960 
6961   // Currently we do not support arbitrary expressions as branch targets.
6962   // Only labels and absolute expressions are accepted.
6963   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6964     Error(Loc, "expected an absolute expression or a label");
6965   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6966     Error(Loc, "expected a 16-bit signed jump offset");
6967   }
6968 
6969   return MatchOperand_Success;
6970 }
6971 
6972 //===----------------------------------------------------------------------===//
6973 // Boolean holding registers
6974 //===----------------------------------------------------------------------===//
6975 
6976 OperandMatchResultTy
6977 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6978   return parseReg(Operands);
6979 }
6980 
6981 //===----------------------------------------------------------------------===//
6982 // mubuf
6983 //===----------------------------------------------------------------------===//
6984 
6985 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
6986   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
6987 }
6988 
6989 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6990                                    const OperandVector &Operands,
6991                                    bool IsAtomic,
6992                                    bool IsLds) {
6993   bool IsLdsOpcode = IsLds;
6994   bool HasLdsModifier = false;
6995   OptionalImmIndexMap OptionalIdx;
6996   unsigned FirstOperandIdx = 1;
6997   bool IsAtomicReturn = false;
6998 
6999   if (IsAtomic) {
7000     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7001       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7002       if (!Op.isCPol())
7003         continue;
7004       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7005       break;
7006     }
7007 
7008     if (!IsAtomicReturn) {
7009       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7010       if (NewOpc != -1)
7011         Inst.setOpcode(NewOpc);
7012     }
7013 
7014     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7015                       SIInstrFlags::IsAtomicRet;
7016   }
7017 
7018   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7019     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7020 
7021     // Add the register arguments
7022     if (Op.isReg()) {
7023       Op.addRegOperands(Inst, 1);
7024       // Insert a tied src for atomic return dst.
7025       // This cannot be postponed as subsequent calls to
7026       // addImmOperands rely on correct number of MC operands.
7027       if (IsAtomicReturn && i == FirstOperandIdx)
7028         Op.addRegOperands(Inst, 1);
7029       continue;
7030     }
7031 
7032     // Handle the case where soffset is an immediate
7033     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7034       Op.addImmOperands(Inst, 1);
7035       continue;
7036     }
7037 
7038     HasLdsModifier |= Op.isLDS();
7039 
7040     // Handle tokens like 'offen' which are sometimes hard-coded into the
7041     // asm string.  There are no MCInst operands for these.
7042     if (Op.isToken()) {
7043       continue;
7044     }
7045     assert(Op.isImm());
7046 
7047     // Handle optional arguments
7048     OptionalIdx[Op.getImmTy()] = i;
7049   }
7050 
7051   // This is a workaround for an llvm quirk which may result in an
7052   // incorrect instruction selection. Lds and non-lds versions of
7053   // MUBUF instructions are identical except that lds versions
7054   // have mandatory 'lds' modifier. However this modifier follows
7055   // optional modifiers and llvm asm matcher regards this 'lds'
7056   // modifier as an optional one. As a result, an lds version
7057   // of opcode may be selected even if it has no 'lds' modifier.
7058   if (IsLdsOpcode && !HasLdsModifier) {
7059     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7060     if (NoLdsOpcode != -1) { // Got lds version - correct it.
7061       Inst.setOpcode(NoLdsOpcode);
7062       IsLdsOpcode = false;
7063     }
7064   }
7065 
7066   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7067   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7068 
7069   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7070     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7071   }
7072   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7073 }
7074 
7075 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7076   OptionalImmIndexMap OptionalIdx;
7077 
7078   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7079     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7080 
7081     // Add the register arguments
7082     if (Op.isReg()) {
7083       Op.addRegOperands(Inst, 1);
7084       continue;
7085     }
7086 
7087     // Handle the case where soffset is an immediate
7088     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7089       Op.addImmOperands(Inst, 1);
7090       continue;
7091     }
7092 
7093     // Handle tokens like 'offen' which are sometimes hard-coded into the
7094     // asm string.  There are no MCInst operands for these.
7095     if (Op.isToken()) {
7096       continue;
7097     }
7098     assert(Op.isImm());
7099 
7100     // Handle optional arguments
7101     OptionalIdx[Op.getImmTy()] = i;
7102   }
7103 
7104   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7105                         AMDGPUOperand::ImmTyOffset);
7106   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7107   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7108   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7109   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7110 }
7111 
7112 //===----------------------------------------------------------------------===//
7113 // mimg
7114 //===----------------------------------------------------------------------===//
7115 
7116 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7117                               bool IsAtomic) {
7118   unsigned I = 1;
7119   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7120   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7121     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7122   }
7123 
7124   if (IsAtomic) {
7125     // Add src, same as dst
7126     assert(Desc.getNumDefs() == 1);
7127     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7128   }
7129 
7130   OptionalImmIndexMap OptionalIdx;
7131 
7132   for (unsigned E = Operands.size(); I != E; ++I) {
7133     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7134 
7135     // Add the register arguments
7136     if (Op.isReg()) {
7137       Op.addRegOperands(Inst, 1);
7138     } else if (Op.isImmModifier()) {
7139       OptionalIdx[Op.getImmTy()] = I;
7140     } else if (!Op.isToken()) {
7141       llvm_unreachable("unexpected operand type");
7142     }
7143   }
7144 
7145   bool IsGFX10Plus = isGFX10Plus();
7146 
7147   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7148   if (IsGFX10Plus)
7149     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7150   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7151   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7152   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7153   if (IsGFX10Plus)
7154     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7155   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7156     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7157   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7158   if (!IsGFX10Plus)
7159     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7160   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7161 }
7162 
7163 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7164   cvtMIMG(Inst, Operands, true);
7165 }
7166 
7167 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7168   OptionalImmIndexMap OptionalIdx;
7169   bool IsAtomicReturn = false;
7170 
7171   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7172     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7173     if (!Op.isCPol())
7174       continue;
7175     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7176     break;
7177   }
7178 
7179   if (!IsAtomicReturn) {
7180     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7181     if (NewOpc != -1)
7182       Inst.setOpcode(NewOpc);
7183   }
7184 
7185   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7186                     SIInstrFlags::IsAtomicRet;
7187 
7188   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7189     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7190 
7191     // Add the register arguments
7192     if (Op.isReg()) {
7193       Op.addRegOperands(Inst, 1);
7194       if (IsAtomicReturn && i == 1)
7195         Op.addRegOperands(Inst, 1);
7196       continue;
7197     }
7198 
7199     // Handle the case where soffset is an immediate
7200     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7201       Op.addImmOperands(Inst, 1);
7202       continue;
7203     }
7204 
7205     // Handle tokens like 'offen' which are sometimes hard-coded into the
7206     // asm string.  There are no MCInst operands for these.
7207     if (Op.isToken()) {
7208       continue;
7209     }
7210     assert(Op.isImm());
7211 
7212     // Handle optional arguments
7213     OptionalIdx[Op.getImmTy()] = i;
7214   }
7215 
7216   if ((int)Inst.getNumOperands() <=
7217       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7218     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7219   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7220 }
7221 
7222 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7223                                       const OperandVector &Operands) {
7224   for (unsigned I = 1; I < Operands.size(); ++I) {
7225     auto &Operand = (AMDGPUOperand &)*Operands[I];
7226     if (Operand.isReg())
7227       Operand.addRegOperands(Inst, 1);
7228   }
7229 
7230   Inst.addOperand(MCOperand::createImm(1)); // a16
7231 }
7232 
7233 //===----------------------------------------------------------------------===//
7234 // smrd
7235 //===----------------------------------------------------------------------===//
7236 
7237 bool AMDGPUOperand::isSMRDOffset8() const {
7238   return isImm() && isUInt<8>(getImm());
7239 }
7240 
7241 bool AMDGPUOperand::isSMEMOffset() const {
7242   return isImm(); // Offset range is checked later by validator.
7243 }
7244 
7245 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7246   // 32-bit literals are only supported on CI and we only want to use them
7247   // when the offset is > 8-bits.
7248   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7249 }
7250 
7251 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7252   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7253 }
7254 
7255 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7256   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7257 }
7258 
7259 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7260   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7261 }
7262 
7263 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7264   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7265 }
7266 
7267 //===----------------------------------------------------------------------===//
7268 // vop3
7269 //===----------------------------------------------------------------------===//
7270 
7271 static bool ConvertOmodMul(int64_t &Mul) {
7272   if (Mul != 1 && Mul != 2 && Mul != 4)
7273     return false;
7274 
7275   Mul >>= 1;
7276   return true;
7277 }
7278 
7279 static bool ConvertOmodDiv(int64_t &Div) {
7280   if (Div == 1) {
7281     Div = 0;
7282     return true;
7283   }
7284 
7285   if (Div == 2) {
7286     Div = 3;
7287     return true;
7288   }
7289 
7290   return false;
7291 }
7292 
7293 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7294 // This is intentional and ensures compatibility with sp3.
7295 // See bug 35397 for details.
7296 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7297   if (BoundCtrl == 0 || BoundCtrl == 1) {
7298     BoundCtrl = 1;
7299     return true;
7300   }
7301   return false;
7302 }
7303 
7304 // Note: the order in this table matches the order of operands in AsmString.
7305 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7306   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7307   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7308   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7309   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7310   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7311   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7312   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7313   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7314   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7315   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7316   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7317   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7318   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7319   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7320   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7321   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7322   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7323   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7324   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7325   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7326   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7327   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7328   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7329   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7330   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7331   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7332   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7333   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7334   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7335   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7336   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7337   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7338   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7339   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7340   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7341   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7342   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7343   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7344   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7345   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7346   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7347 };
7348 
7349 void AMDGPUAsmParser::onBeginOfFile() {
7350   if (!getParser().getStreamer().getTargetStreamer() ||
7351       getSTI().getTargetTriple().getArch() == Triple::r600)
7352     return;
7353 
7354   if (!getTargetStreamer().getTargetID())
7355     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7356 
7357   if (isHsaAbiVersion3Or4(&getSTI()))
7358     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7359 }
7360 
7361 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7362 
7363   OperandMatchResultTy res = parseOptionalOpr(Operands);
7364 
7365   // This is a hack to enable hardcoded mandatory operands which follow
7366   // optional operands.
7367   //
7368   // Current design assumes that all operands after the first optional operand
7369   // are also optional. However implementation of some instructions violates
7370   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7371   //
7372   // To alleviate this problem, we have to (implicitly) parse extra operands
7373   // to make sure autogenerated parser of custom operands never hit hardcoded
7374   // mandatory operands.
7375 
7376   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7377     if (res != MatchOperand_Success ||
7378         isToken(AsmToken::EndOfStatement))
7379       break;
7380 
7381     trySkipToken(AsmToken::Comma);
7382     res = parseOptionalOpr(Operands);
7383   }
7384 
7385   return res;
7386 }
7387 
7388 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7389   OperandMatchResultTy res;
7390   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7391     // try to parse any optional operand here
7392     if (Op.IsBit) {
7393       res = parseNamedBit(Op.Name, Operands, Op.Type);
7394     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7395       res = parseOModOperand(Operands);
7396     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7397                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7398                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7399       res = parseSDWASel(Operands, Op.Name, Op.Type);
7400     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7401       res = parseSDWADstUnused(Operands);
7402     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7403                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7404                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7405                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7406       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7407                                         Op.ConvertResult);
7408     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7409       res = parseDim(Operands);
7410     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7411       res = parseCPol(Operands);
7412     } else {
7413       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7414     }
7415     if (res != MatchOperand_NoMatch) {
7416       return res;
7417     }
7418   }
7419   return MatchOperand_NoMatch;
7420 }
7421 
7422 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7423   StringRef Name = getTokenStr();
7424   if (Name == "mul") {
7425     return parseIntWithPrefix("mul", Operands,
7426                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7427   }
7428 
7429   if (Name == "div") {
7430     return parseIntWithPrefix("div", Operands,
7431                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7432   }
7433 
7434   return MatchOperand_NoMatch;
7435 }
7436 
7437 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7438   cvtVOP3P(Inst, Operands);
7439 
7440   int Opc = Inst.getOpcode();
7441 
7442   int SrcNum;
7443   const int Ops[] = { AMDGPU::OpName::src0,
7444                       AMDGPU::OpName::src1,
7445                       AMDGPU::OpName::src2 };
7446   for (SrcNum = 0;
7447        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7448        ++SrcNum);
7449   assert(SrcNum > 0);
7450 
7451   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7452   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7453 
7454   if ((OpSel & (1 << SrcNum)) != 0) {
7455     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7456     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7457     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7458   }
7459 }
7460 
7461 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7462       // 1. This operand is input modifiers
7463   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7464       // 2. This is not last operand
7465       && Desc.NumOperands > (OpNum + 1)
7466       // 3. Next operand is register class
7467       && Desc.OpInfo[OpNum + 1].RegClass != -1
7468       // 4. Next register is not tied to any other operand
7469       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7470 }
7471 
7472 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7473 {
7474   OptionalImmIndexMap OptionalIdx;
7475   unsigned Opc = Inst.getOpcode();
7476 
7477   unsigned I = 1;
7478   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7479   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7480     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7481   }
7482 
7483   for (unsigned E = Operands.size(); I != E; ++I) {
7484     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7485     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7486       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7487     } else if (Op.isInterpSlot() ||
7488                Op.isInterpAttr() ||
7489                Op.isAttrChan()) {
7490       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7491     } else if (Op.isImmModifier()) {
7492       OptionalIdx[Op.getImmTy()] = I;
7493     } else {
7494       llvm_unreachable("unhandled operand type");
7495     }
7496   }
7497 
7498   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7499     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7500   }
7501 
7502   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7503     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7504   }
7505 
7506   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7507     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7508   }
7509 }
7510 
7511 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7512                               OptionalImmIndexMap &OptionalIdx) {
7513   unsigned Opc = Inst.getOpcode();
7514 
7515   unsigned I = 1;
7516   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7517   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7518     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7519   }
7520 
7521   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7522     // This instruction has src modifiers
7523     for (unsigned E = Operands.size(); I != E; ++I) {
7524       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7525       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7526         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7527       } else if (Op.isImmModifier()) {
7528         OptionalIdx[Op.getImmTy()] = I;
7529       } else if (Op.isRegOrImm()) {
7530         Op.addRegOrImmOperands(Inst, 1);
7531       } else {
7532         llvm_unreachable("unhandled operand type");
7533       }
7534     }
7535   } else {
7536     // No src modifiers
7537     for (unsigned E = Operands.size(); I != E; ++I) {
7538       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7539       if (Op.isMod()) {
7540         OptionalIdx[Op.getImmTy()] = I;
7541       } else {
7542         Op.addRegOrImmOperands(Inst, 1);
7543       }
7544     }
7545   }
7546 
7547   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7548     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7549   }
7550 
7551   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7552     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7553   }
7554 
7555   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7556   // it has src2 register operand that is tied to dst operand
7557   // we don't allow modifiers for this operand in assembler so src2_modifiers
7558   // should be 0.
7559   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7560       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7561       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7562       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7563       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7564       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7565       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7566       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7567       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7568       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7569       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7570     auto it = Inst.begin();
7571     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7572     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7573     ++it;
7574     // Copy the operand to ensure it's not invalidated when Inst grows.
7575     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7576   }
7577 }
7578 
7579 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7580   OptionalImmIndexMap OptionalIdx;
7581   cvtVOP3(Inst, Operands, OptionalIdx);
7582 }
7583 
7584 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7585                                OptionalImmIndexMap &OptIdx) {
7586   const int Opc = Inst.getOpcode();
7587   const MCInstrDesc &Desc = MII.get(Opc);
7588 
7589   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7590 
7591   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7592     assert(!IsPacked);
7593     Inst.addOperand(Inst.getOperand(0));
7594   }
7595 
7596   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7597   // instruction, and then figure out where to actually put the modifiers
7598 
7599   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7600   if (OpSelIdx != -1) {
7601     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7602   }
7603 
7604   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7605   if (OpSelHiIdx != -1) {
7606     int DefaultVal = IsPacked ? -1 : 0;
7607     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7608                           DefaultVal);
7609   }
7610 
7611   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7612   if (NegLoIdx != -1) {
7613     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7614     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7615   }
7616 
7617   const int Ops[] = { AMDGPU::OpName::src0,
7618                       AMDGPU::OpName::src1,
7619                       AMDGPU::OpName::src2 };
7620   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7621                          AMDGPU::OpName::src1_modifiers,
7622                          AMDGPU::OpName::src2_modifiers };
7623 
7624   unsigned OpSel = 0;
7625   unsigned OpSelHi = 0;
7626   unsigned NegLo = 0;
7627   unsigned NegHi = 0;
7628 
7629   if (OpSelIdx != -1)
7630     OpSel = Inst.getOperand(OpSelIdx).getImm();
7631 
7632   if (OpSelHiIdx != -1)
7633     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7634 
7635   if (NegLoIdx != -1) {
7636     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7637     NegLo = Inst.getOperand(NegLoIdx).getImm();
7638     NegHi = Inst.getOperand(NegHiIdx).getImm();
7639   }
7640 
7641   for (int J = 0; J < 3; ++J) {
7642     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7643     if (OpIdx == -1)
7644       break;
7645 
7646     uint32_t ModVal = 0;
7647 
7648     if ((OpSel & (1 << J)) != 0)
7649       ModVal |= SISrcMods::OP_SEL_0;
7650 
7651     if ((OpSelHi & (1 << J)) != 0)
7652       ModVal |= SISrcMods::OP_SEL_1;
7653 
7654     if ((NegLo & (1 << J)) != 0)
7655       ModVal |= SISrcMods::NEG;
7656 
7657     if ((NegHi & (1 << J)) != 0)
7658       ModVal |= SISrcMods::NEG_HI;
7659 
7660     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7661 
7662     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7663   }
7664 }
7665 
7666 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7667   OptionalImmIndexMap OptIdx;
7668   cvtVOP3(Inst, Operands, OptIdx);
7669   cvtVOP3P(Inst, Operands, OptIdx);
7670 }
7671 
7672 //===----------------------------------------------------------------------===//
7673 // dpp
7674 //===----------------------------------------------------------------------===//
7675 
7676 bool AMDGPUOperand::isDPP8() const {
7677   return isImmTy(ImmTyDPP8);
7678 }
7679 
7680 bool AMDGPUOperand::isDPPCtrl() const {
7681   using namespace AMDGPU::DPP;
7682 
7683   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7684   if (result) {
7685     int64_t Imm = getImm();
7686     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7687            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7688            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7689            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7690            (Imm == DppCtrl::WAVE_SHL1) ||
7691            (Imm == DppCtrl::WAVE_ROL1) ||
7692            (Imm == DppCtrl::WAVE_SHR1) ||
7693            (Imm == DppCtrl::WAVE_ROR1) ||
7694            (Imm == DppCtrl::ROW_MIRROR) ||
7695            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7696            (Imm == DppCtrl::BCAST15) ||
7697            (Imm == DppCtrl::BCAST31) ||
7698            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7699            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7700   }
7701   return false;
7702 }
7703 
7704 //===----------------------------------------------------------------------===//
7705 // mAI
7706 //===----------------------------------------------------------------------===//
7707 
7708 bool AMDGPUOperand::isBLGP() const {
7709   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7710 }
7711 
7712 bool AMDGPUOperand::isCBSZ() const {
7713   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7714 }
7715 
7716 bool AMDGPUOperand::isABID() const {
7717   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7718 }
7719 
7720 bool AMDGPUOperand::isS16Imm() const {
7721   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7722 }
7723 
7724 bool AMDGPUOperand::isU16Imm() const {
7725   return isImm() && isUInt<16>(getImm());
7726 }
7727 
7728 //===----------------------------------------------------------------------===//
7729 // dim
7730 //===----------------------------------------------------------------------===//
7731 
7732 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7733   // We want to allow "dim:1D" etc.,
7734   // but the initial 1 is tokenized as an integer.
7735   std::string Token;
7736   if (isToken(AsmToken::Integer)) {
7737     SMLoc Loc = getToken().getEndLoc();
7738     Token = std::string(getTokenStr());
7739     lex();
7740     if (getLoc() != Loc)
7741       return false;
7742   }
7743 
7744   StringRef Suffix;
7745   if (!parseId(Suffix))
7746     return false;
7747   Token += Suffix;
7748 
7749   StringRef DimId = Token;
7750   if (DimId.startswith("SQ_RSRC_IMG_"))
7751     DimId = DimId.drop_front(12);
7752 
7753   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7754   if (!DimInfo)
7755     return false;
7756 
7757   Encoding = DimInfo->Encoding;
7758   return true;
7759 }
7760 
7761 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7762   if (!isGFX10Plus())
7763     return MatchOperand_NoMatch;
7764 
7765   SMLoc S = getLoc();
7766 
7767   if (!trySkipId("dim", AsmToken::Colon))
7768     return MatchOperand_NoMatch;
7769 
7770   unsigned Encoding;
7771   SMLoc Loc = getLoc();
7772   if (!parseDimId(Encoding)) {
7773     Error(Loc, "invalid dim value");
7774     return MatchOperand_ParseFail;
7775   }
7776 
7777   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7778                                               AMDGPUOperand::ImmTyDim));
7779   return MatchOperand_Success;
7780 }
7781 
7782 //===----------------------------------------------------------------------===//
7783 // dpp
7784 //===----------------------------------------------------------------------===//
7785 
7786 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7787   SMLoc S = getLoc();
7788 
7789   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7790     return MatchOperand_NoMatch;
7791 
7792   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7793 
7794   int64_t Sels[8];
7795 
7796   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7797     return MatchOperand_ParseFail;
7798 
7799   for (size_t i = 0; i < 8; ++i) {
7800     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7801       return MatchOperand_ParseFail;
7802 
7803     SMLoc Loc = getLoc();
7804     if (getParser().parseAbsoluteExpression(Sels[i]))
7805       return MatchOperand_ParseFail;
7806     if (0 > Sels[i] || 7 < Sels[i]) {
7807       Error(Loc, "expected a 3-bit value");
7808       return MatchOperand_ParseFail;
7809     }
7810   }
7811 
7812   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7813     return MatchOperand_ParseFail;
7814 
7815   unsigned DPP8 = 0;
7816   for (size_t i = 0; i < 8; ++i)
7817     DPP8 |= (Sels[i] << (i * 3));
7818 
7819   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7820   return MatchOperand_Success;
7821 }
7822 
7823 bool
7824 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7825                                     const OperandVector &Operands) {
7826   if (Ctrl == "row_newbcast")
7827     return isGFX90A();
7828 
7829   if (Ctrl == "row_share" ||
7830       Ctrl == "row_xmask")
7831     return isGFX10Plus();
7832 
7833   if (Ctrl == "wave_shl" ||
7834       Ctrl == "wave_shr" ||
7835       Ctrl == "wave_rol" ||
7836       Ctrl == "wave_ror" ||
7837       Ctrl == "row_bcast")
7838     return isVI() || isGFX9();
7839 
7840   return Ctrl == "row_mirror" ||
7841          Ctrl == "row_half_mirror" ||
7842          Ctrl == "quad_perm" ||
7843          Ctrl == "row_shl" ||
7844          Ctrl == "row_shr" ||
7845          Ctrl == "row_ror";
7846 }
7847 
7848 int64_t
7849 AMDGPUAsmParser::parseDPPCtrlPerm() {
7850   // quad_perm:[%d,%d,%d,%d]
7851 
7852   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7853     return -1;
7854 
7855   int64_t Val = 0;
7856   for (int i = 0; i < 4; ++i) {
7857     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7858       return -1;
7859 
7860     int64_t Temp;
7861     SMLoc Loc = getLoc();
7862     if (getParser().parseAbsoluteExpression(Temp))
7863       return -1;
7864     if (Temp < 0 || Temp > 3) {
7865       Error(Loc, "expected a 2-bit value");
7866       return -1;
7867     }
7868 
7869     Val += (Temp << i * 2);
7870   }
7871 
7872   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7873     return -1;
7874 
7875   return Val;
7876 }
7877 
7878 int64_t
7879 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7880   using namespace AMDGPU::DPP;
7881 
7882   // sel:%d
7883 
7884   int64_t Val;
7885   SMLoc Loc = getLoc();
7886 
7887   if (getParser().parseAbsoluteExpression(Val))
7888     return -1;
7889 
7890   struct DppCtrlCheck {
7891     int64_t Ctrl;
7892     int Lo;
7893     int Hi;
7894   };
7895 
7896   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7897     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
7898     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
7899     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
7900     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
7901     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
7902     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
7903     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
7904     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7905     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7906     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
7907     .Default({-1, 0, 0});
7908 
7909   bool Valid;
7910   if (Check.Ctrl == -1) {
7911     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
7912     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
7913   } else {
7914     Valid = Check.Lo <= Val && Val <= Check.Hi;
7915     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
7916   }
7917 
7918   if (!Valid) {
7919     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
7920     return -1;
7921   }
7922 
7923   return Val;
7924 }
7925 
7926 OperandMatchResultTy
7927 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7928   using namespace AMDGPU::DPP;
7929 
7930   if (!isToken(AsmToken::Identifier) ||
7931       !isSupportedDPPCtrl(getTokenStr(), Operands))
7932     return MatchOperand_NoMatch;
7933 
7934   SMLoc S = getLoc();
7935   int64_t Val = -1;
7936   StringRef Ctrl;
7937 
7938   parseId(Ctrl);
7939 
7940   if (Ctrl == "row_mirror") {
7941     Val = DppCtrl::ROW_MIRROR;
7942   } else if (Ctrl == "row_half_mirror") {
7943     Val = DppCtrl::ROW_HALF_MIRROR;
7944   } else {
7945     if (skipToken(AsmToken::Colon, "expected a colon")) {
7946       if (Ctrl == "quad_perm") {
7947         Val = parseDPPCtrlPerm();
7948       } else {
7949         Val = parseDPPCtrlSel(Ctrl);
7950       }
7951     }
7952   }
7953 
7954   if (Val == -1)
7955     return MatchOperand_ParseFail;
7956 
7957   Operands.push_back(
7958     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
7959   return MatchOperand_Success;
7960 }
7961 
7962 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7963   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7964 }
7965 
7966 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7967   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7968 }
7969 
7970 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7971   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7972 }
7973 
7974 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7975   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7976 }
7977 
7978 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7979   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7980 }
7981 
7982 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7983   OptionalImmIndexMap OptionalIdx;
7984 
7985   unsigned Opc = Inst.getOpcode();
7986   bool HasModifiers =
7987       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
7988   unsigned I = 1;
7989   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7990   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7991     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7992   }
7993 
7994   int Fi = 0;
7995   for (unsigned E = Operands.size(); I != E; ++I) {
7996     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7997                                             MCOI::TIED_TO);
7998     if (TiedTo != -1) {
7999       assert((unsigned)TiedTo < Inst.getNumOperands());
8000       // handle tied old or src2 for MAC instructions
8001       Inst.addOperand(Inst.getOperand(TiedTo));
8002     }
8003     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8004     // Add the register arguments
8005     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8006       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8007       // Skip it.
8008       continue;
8009     }
8010 
8011     if (IsDPP8) {
8012       if (Op.isDPP8()) {
8013         Op.addImmOperands(Inst, 1);
8014       } else if (HasModifiers &&
8015                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8016         Op.addRegWithFPInputModsOperands(Inst, 2);
8017       } else if (Op.isFI()) {
8018         Fi = Op.getImm();
8019       } else if (Op.isReg()) {
8020         Op.addRegOperands(Inst, 1);
8021       } else {
8022         llvm_unreachable("Invalid operand type");
8023       }
8024     } else {
8025       if (HasModifiers &&
8026           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8027         Op.addRegWithFPInputModsOperands(Inst, 2);
8028       } else if (Op.isReg()) {
8029         Op.addRegOperands(Inst, 1);
8030       } else if (Op.isDPPCtrl()) {
8031         Op.addImmOperands(Inst, 1);
8032       } else if (Op.isImm()) {
8033         // Handle optional arguments
8034         OptionalIdx[Op.getImmTy()] = I;
8035       } else {
8036         llvm_unreachable("Invalid operand type");
8037       }
8038     }
8039   }
8040 
8041   if (IsDPP8) {
8042     using namespace llvm::AMDGPU::DPP;
8043     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8044   } else {
8045     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8046     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8047     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8048     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8049       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8050     }
8051   }
8052 }
8053 
8054 //===----------------------------------------------------------------------===//
8055 // sdwa
8056 //===----------------------------------------------------------------------===//
8057 
8058 OperandMatchResultTy
8059 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8060                               AMDGPUOperand::ImmTy Type) {
8061   using namespace llvm::AMDGPU::SDWA;
8062 
8063   SMLoc S = getLoc();
8064   StringRef Value;
8065   OperandMatchResultTy res;
8066 
8067   SMLoc StringLoc;
8068   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8069   if (res != MatchOperand_Success) {
8070     return res;
8071   }
8072 
8073   int64_t Int;
8074   Int = StringSwitch<int64_t>(Value)
8075         .Case("BYTE_0", SdwaSel::BYTE_0)
8076         .Case("BYTE_1", SdwaSel::BYTE_1)
8077         .Case("BYTE_2", SdwaSel::BYTE_2)
8078         .Case("BYTE_3", SdwaSel::BYTE_3)
8079         .Case("WORD_0", SdwaSel::WORD_0)
8080         .Case("WORD_1", SdwaSel::WORD_1)
8081         .Case("DWORD", SdwaSel::DWORD)
8082         .Default(0xffffffff);
8083 
8084   if (Int == 0xffffffff) {
8085     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8086     return MatchOperand_ParseFail;
8087   }
8088 
8089   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8090   return MatchOperand_Success;
8091 }
8092 
8093 OperandMatchResultTy
8094 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8095   using namespace llvm::AMDGPU::SDWA;
8096 
8097   SMLoc S = getLoc();
8098   StringRef Value;
8099   OperandMatchResultTy res;
8100 
8101   SMLoc StringLoc;
8102   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8103   if (res != MatchOperand_Success) {
8104     return res;
8105   }
8106 
8107   int64_t Int;
8108   Int = StringSwitch<int64_t>(Value)
8109         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8110         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8111         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8112         .Default(0xffffffff);
8113 
8114   if (Int == 0xffffffff) {
8115     Error(StringLoc, "invalid dst_unused value");
8116     return MatchOperand_ParseFail;
8117   }
8118 
8119   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8120   return MatchOperand_Success;
8121 }
8122 
8123 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8124   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8125 }
8126 
8127 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8128   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8129 }
8130 
8131 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8132   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8133 }
8134 
8135 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8136   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8137 }
8138 
8139 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8140   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8141 }
8142 
8143 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8144                               uint64_t BasicInstType,
8145                               bool SkipDstVcc,
8146                               bool SkipSrcVcc) {
8147   using namespace llvm::AMDGPU::SDWA;
8148 
8149   OptionalImmIndexMap OptionalIdx;
8150   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8151   bool SkippedVcc = false;
8152 
8153   unsigned I = 1;
8154   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8155   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8156     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8157   }
8158 
8159   for (unsigned E = Operands.size(); I != E; ++I) {
8160     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8161     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8162         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8163       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8164       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8165       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8166       // Skip VCC only if we didn't skip it on previous iteration.
8167       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8168       if (BasicInstType == SIInstrFlags::VOP2 &&
8169           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8170            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8171         SkippedVcc = true;
8172         continue;
8173       } else if (BasicInstType == SIInstrFlags::VOPC &&
8174                  Inst.getNumOperands() == 0) {
8175         SkippedVcc = true;
8176         continue;
8177       }
8178     }
8179     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8180       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8181     } else if (Op.isImm()) {
8182       // Handle optional arguments
8183       OptionalIdx[Op.getImmTy()] = I;
8184     } else {
8185       llvm_unreachable("Invalid operand type");
8186     }
8187     SkippedVcc = false;
8188   }
8189 
8190   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8191       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8192       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8193     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8194     switch (BasicInstType) {
8195     case SIInstrFlags::VOP1:
8196       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8197       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8198         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8199       }
8200       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8201       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8202       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8203       break;
8204 
8205     case SIInstrFlags::VOP2:
8206       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8207       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8208         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8209       }
8210       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8211       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8212       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8213       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8214       break;
8215 
8216     case SIInstrFlags::VOPC:
8217       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8218         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8219       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8220       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8221       break;
8222 
8223     default:
8224       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8225     }
8226   }
8227 
8228   // special case v_mac_{f16, f32}:
8229   // it has src2 register operand that is tied to dst operand
8230   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8231       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8232     auto it = Inst.begin();
8233     std::advance(
8234       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8235     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8236   }
8237 }
8238 
8239 //===----------------------------------------------------------------------===//
8240 // mAI
8241 //===----------------------------------------------------------------------===//
8242 
8243 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8244   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8245 }
8246 
8247 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8248   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8249 }
8250 
8251 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8252   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8253 }
8254 
8255 /// Force static initialization.
8256 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8257   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8258   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8259 }
8260 
8261 #define GET_REGISTER_MATCHER
8262 #define GET_MATCHER_IMPLEMENTATION
8263 #define GET_MNEMONIC_SPELL_CHECKER
8264 #define GET_MNEMONIC_CHECKER
8265 #include "AMDGPUGenAsmMatcher.inc"
8266 
8267 // This fuction should be defined after auto-generated include so that we have
8268 // MatchClassKind enum defined
8269 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8270                                                      unsigned Kind) {
8271   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8272   // But MatchInstructionImpl() expects to meet token and fails to validate
8273   // operand. This method checks if we are given immediate operand but expect to
8274   // get corresponding token.
8275   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8276   switch (Kind) {
8277   case MCK_addr64:
8278     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8279   case MCK_gds:
8280     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8281   case MCK_lds:
8282     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8283   case MCK_idxen:
8284     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8285   case MCK_offen:
8286     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8287   case MCK_SSrcB32:
8288     // When operands have expression values, they will return true for isToken,
8289     // because it is not possible to distinguish between a token and an
8290     // expression at parse time. MatchInstructionImpl() will always try to
8291     // match an operand as a token, when isToken returns true, and when the
8292     // name of the expression is not a valid token, the match will fail,
8293     // so we need to handle it here.
8294     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8295   case MCK_SSrcF32:
8296     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8297   case MCK_SoppBrTarget:
8298     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8299   case MCK_VReg32OrOff:
8300     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8301   case MCK_InterpSlot:
8302     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8303   case MCK_Attr:
8304     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8305   case MCK_AttrChan:
8306     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8307   case MCK_ImmSMEMOffset:
8308     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8309   case MCK_SReg_64:
8310   case MCK_SReg_64_XEXEC:
8311     // Null is defined as a 32-bit register but
8312     // it should also be enabled with 64-bit operands.
8313     // The following code enables it for SReg_64 operands
8314     // used as source and destination. Remaining source
8315     // operands are handled in isInlinableImm.
8316     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8317   default:
8318     return Match_InvalidOperand;
8319   }
8320 }
8321 
8322 //===----------------------------------------------------------------------===//
8323 // endpgm
8324 //===----------------------------------------------------------------------===//
8325 
8326 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8327   SMLoc S = getLoc();
8328   int64_t Imm = 0;
8329 
8330   if (!parseExpr(Imm)) {
8331     // The operand is optional, if not present default to 0
8332     Imm = 0;
8333   }
8334 
8335   if (!isUInt<16>(Imm)) {
8336     Error(S, "expected a 16-bit value");
8337     return MatchOperand_ParseFail;
8338   }
8339 
8340   Operands.push_back(
8341       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8342   return MatchOperand_Success;
8343 }
8344 
8345 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8346