1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/Support/AMDGPUMetadata.h"
32 #include "llvm/Support/AMDHSAKernelDescriptor.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/MachineValueType.h"
35 #include "llvm/Support/TargetParser.h"
36 #include "llvm/Support/TargetRegistry.h"
37 
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::amdhsa;
41 
42 namespace {
43 
44 class AMDGPUAsmParser;
45 
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47 
48 //===----------------------------------------------------------------------===//
49 // Operand
50 //===----------------------------------------------------------------------===//
51 
52 class AMDGPUOperand : public MCParsedAsmOperand {
53   enum KindTy {
54     Token,
55     Immediate,
56     Register,
57     Expression
58   } Kind;
59 
60   SMLoc StartLoc, EndLoc;
61   const AMDGPUAsmParser *AsmParser;
62 
63 public:
64   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
66 
67   using Ptr = std::unique_ptr<AMDGPUOperand>;
68 
69   struct Modifiers {
70     bool Abs = false;
71     bool Neg = false;
72     bool Sext = false;
73 
74     bool hasFPModifiers() const { return Abs || Neg; }
75     bool hasIntModifiers() const { return Sext; }
76     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77 
78     int64_t getFPModifiersOperand() const {
79       int64_t Operand = 0;
80       Operand |= Abs ? SISrcMods::ABS : 0u;
81       Operand |= Neg ? SISrcMods::NEG : 0u;
82       return Operand;
83     }
84 
85     int64_t getIntModifiersOperand() const {
86       int64_t Operand = 0;
87       Operand |= Sext ? SISrcMods::SEXT : 0u;
88       return Operand;
89     }
90 
91     int64_t getModifiersOperand() const {
92       assert(!(hasFPModifiers() && hasIntModifiers())
93            && "fp and int modifiers should not be used simultaneously");
94       if (hasFPModifiers()) {
95         return getFPModifiersOperand();
96       } else if (hasIntModifiers()) {
97         return getIntModifiersOperand();
98       } else {
99         return 0;
100       }
101     }
102 
103     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104   };
105 
106   enum ImmTy {
107     ImmTyNone,
108     ImmTyGDS,
109     ImmTyLDS,
110     ImmTyOffen,
111     ImmTyIdxen,
112     ImmTyAddr64,
113     ImmTyOffset,
114     ImmTyInstOffset,
115     ImmTyOffset0,
116     ImmTyOffset1,
117     ImmTyCPol,
118     ImmTySWZ,
119     ImmTyTFE,
120     ImmTyD16,
121     ImmTyClampSI,
122     ImmTyOModSI,
123     ImmTyDPP8,
124     ImmTyDppCtrl,
125     ImmTyDppRowMask,
126     ImmTyDppBankMask,
127     ImmTyDppBoundCtrl,
128     ImmTyDppFi,
129     ImmTySdwaDstSel,
130     ImmTySdwaSrc0Sel,
131     ImmTySdwaSrc1Sel,
132     ImmTySdwaDstUnused,
133     ImmTyDMask,
134     ImmTyDim,
135     ImmTyUNorm,
136     ImmTyDA,
137     ImmTyR128A16,
138     ImmTyA16,
139     ImmTyLWE,
140     ImmTyExpTgt,
141     ImmTyExpCompr,
142     ImmTyExpVM,
143     ImmTyFORMAT,
144     ImmTyHwreg,
145     ImmTyOff,
146     ImmTySendMsg,
147     ImmTyInterpSlot,
148     ImmTyInterpAttr,
149     ImmTyAttrChan,
150     ImmTyOpSel,
151     ImmTyOpSelHi,
152     ImmTyNegLo,
153     ImmTyNegHi,
154     ImmTySwizzle,
155     ImmTyGprIdxMode,
156     ImmTyHigh,
157     ImmTyBLGP,
158     ImmTyCBSZ,
159     ImmTyABID,
160     ImmTyEndpgm,
161   };
162 
163   enum ImmKindTy {
164     ImmKindTyNone,
165     ImmKindTyLiteral,
166     ImmKindTyConst,
167   };
168 
169 private:
170   struct TokOp {
171     const char *Data;
172     unsigned Length;
173   };
174 
175   struct ImmOp {
176     int64_t Val;
177     ImmTy Type;
178     bool IsFPImm;
179     mutable ImmKindTy Kind;
180     Modifiers Mods;
181   };
182 
183   struct RegOp {
184     unsigned RegNo;
185     Modifiers Mods;
186   };
187 
188   union {
189     TokOp Tok;
190     ImmOp Imm;
191     RegOp Reg;
192     const MCExpr *Expr;
193   };
194 
195 public:
196   bool isToken() const override {
197     if (Kind == Token)
198       return true;
199 
200     // When parsing operands, we can't always tell if something was meant to be
201     // a token, like 'gds', or an expression that references a global variable.
202     // In this case, we assume the string is an expression, and if we need to
203     // interpret is a token, then we treat the symbol name as the token.
204     return isSymbolRefExpr();
205   }
206 
207   bool isSymbolRefExpr() const {
208     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
209   }
210 
211   bool isImm() const override {
212     return Kind == Immediate;
213   }
214 
215   void setImmKindNone() const {
216     assert(isImm());
217     Imm.Kind = ImmKindTyNone;
218   }
219 
220   void setImmKindLiteral() const {
221     assert(isImm());
222     Imm.Kind = ImmKindTyLiteral;
223   }
224 
225   void setImmKindConst() const {
226     assert(isImm());
227     Imm.Kind = ImmKindTyConst;
228   }
229 
230   bool IsImmKindLiteral() const {
231     return isImm() && Imm.Kind == ImmKindTyLiteral;
232   }
233 
234   bool isImmKindConst() const {
235     return isImm() && Imm.Kind == ImmKindTyConst;
236   }
237 
238   bool isInlinableImm(MVT type) const;
239   bool isLiteralImm(MVT type) const;
240 
241   bool isRegKind() const {
242     return Kind == Register;
243   }
244 
245   bool isReg() const override {
246     return isRegKind() && !hasModifiers();
247   }
248 
249   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
250     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
251   }
252 
253   bool isRegOrImmWithInt16InputMods() const {
254     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
255   }
256 
257   bool isRegOrImmWithInt32InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259   }
260 
261   bool isRegOrImmWithInt64InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
263   }
264 
265   bool isRegOrImmWithFP16InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
267   }
268 
269   bool isRegOrImmWithFP32InputMods() const {
270     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
271   }
272 
273   bool isRegOrImmWithFP64InputMods() const {
274     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
275   }
276 
277   bool isVReg() const {
278     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
279            isRegClass(AMDGPU::VReg_64RegClassID) ||
280            isRegClass(AMDGPU::VReg_96RegClassID) ||
281            isRegClass(AMDGPU::VReg_128RegClassID) ||
282            isRegClass(AMDGPU::VReg_160RegClassID) ||
283            isRegClass(AMDGPU::VReg_192RegClassID) ||
284            isRegClass(AMDGPU::VReg_256RegClassID) ||
285            isRegClass(AMDGPU::VReg_512RegClassID) ||
286            isRegClass(AMDGPU::VReg_1024RegClassID);
287   }
288 
289   bool isVReg32() const {
290     return isRegClass(AMDGPU::VGPR_32RegClassID);
291   }
292 
293   bool isVReg32OrOff() const {
294     return isOff() || isVReg32();
295   }
296 
297   bool isNull() const {
298     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
299   }
300 
301   bool isVRegWithInputMods() const;
302 
303   bool isSDWAOperand(MVT type) const;
304   bool isSDWAFP16Operand() const;
305   bool isSDWAFP32Operand() const;
306   bool isSDWAInt16Operand() const;
307   bool isSDWAInt32Operand() const;
308 
309   bool isImmTy(ImmTy ImmT) const {
310     return isImm() && Imm.Type == ImmT;
311   }
312 
313   bool isImmModifier() const {
314     return isImm() && Imm.Type != ImmTyNone;
315   }
316 
317   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
318   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
319   bool isDMask() const { return isImmTy(ImmTyDMask); }
320   bool isDim() const { return isImmTy(ImmTyDim); }
321   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
322   bool isDA() const { return isImmTy(ImmTyDA); }
323   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
324   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
325   bool isLWE() const { return isImmTy(ImmTyLWE); }
326   bool isOff() const { return isImmTy(ImmTyOff); }
327   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
328   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
329   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
330   bool isOffen() const { return isImmTy(ImmTyOffen); }
331   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
332   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
333   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
334   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
335   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
336 
337   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
338   bool isGDS() const { return isImmTy(ImmTyGDS); }
339   bool isLDS() const { return isImmTy(ImmTyLDS); }
340   bool isCPol() const { return isImmTy(ImmTyCPol); }
341   bool isSWZ() const { return isImmTy(ImmTySWZ); }
342   bool isTFE() const { return isImmTy(ImmTyTFE); }
343   bool isD16() const { return isImmTy(ImmTyD16); }
344   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
345   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
346   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
347   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
348   bool isFI() const { return isImmTy(ImmTyDppFi); }
349   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
350   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
351   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
352   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
353   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
354   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
355   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
356   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
357   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
358   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
359   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
360   bool isHigh() const { return isImmTy(ImmTyHigh); }
361 
362   bool isMod() const {
363     return isClampSI() || isOModSI();
364   }
365 
366   bool isRegOrImm() const {
367     return isReg() || isImm();
368   }
369 
370   bool isRegClass(unsigned RCID) const;
371 
372   bool isInlineValue() const;
373 
374   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
375     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
376   }
377 
378   bool isSCSrcB16() const {
379     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
380   }
381 
382   bool isSCSrcV2B16() const {
383     return isSCSrcB16();
384   }
385 
386   bool isSCSrcB32() const {
387     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
388   }
389 
390   bool isSCSrcB64() const {
391     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
392   }
393 
394   bool isBoolReg() const;
395 
396   bool isSCSrcF16() const {
397     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
398   }
399 
400   bool isSCSrcV2F16() const {
401     return isSCSrcF16();
402   }
403 
404   bool isSCSrcF32() const {
405     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
406   }
407 
408   bool isSCSrcF64() const {
409     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
410   }
411 
412   bool isSSrcB32() const {
413     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
414   }
415 
416   bool isSSrcB16() const {
417     return isSCSrcB16() || isLiteralImm(MVT::i16);
418   }
419 
420   bool isSSrcV2B16() const {
421     llvm_unreachable("cannot happen");
422     return isSSrcB16();
423   }
424 
425   bool isSSrcB64() const {
426     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
427     // See isVSrc64().
428     return isSCSrcB64() || isLiteralImm(MVT::i64);
429   }
430 
431   bool isSSrcF32() const {
432     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
433   }
434 
435   bool isSSrcF64() const {
436     return isSCSrcB64() || isLiteralImm(MVT::f64);
437   }
438 
439   bool isSSrcF16() const {
440     return isSCSrcB16() || isLiteralImm(MVT::f16);
441   }
442 
443   bool isSSrcV2F16() const {
444     llvm_unreachable("cannot happen");
445     return isSSrcF16();
446   }
447 
448   bool isSSrcV2FP32() const {
449     llvm_unreachable("cannot happen");
450     return isSSrcF32();
451   }
452 
453   bool isSCSrcV2FP32() const {
454     llvm_unreachable("cannot happen");
455     return isSCSrcF32();
456   }
457 
458   bool isSSrcV2INT32() const {
459     llvm_unreachable("cannot happen");
460     return isSSrcB32();
461   }
462 
463   bool isSCSrcV2INT32() const {
464     llvm_unreachable("cannot happen");
465     return isSCSrcB32();
466   }
467 
468   bool isSSrcOrLdsB32() const {
469     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
470            isLiteralImm(MVT::i32) || isExpr();
471   }
472 
473   bool isVCSrcB32() const {
474     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
475   }
476 
477   bool isVCSrcB64() const {
478     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
479   }
480 
481   bool isVCSrcB16() const {
482     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
483   }
484 
485   bool isVCSrcV2B16() const {
486     return isVCSrcB16();
487   }
488 
489   bool isVCSrcF32() const {
490     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
491   }
492 
493   bool isVCSrcF64() const {
494     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
495   }
496 
497   bool isVCSrcF16() const {
498     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
499   }
500 
501   bool isVCSrcV2F16() const {
502     return isVCSrcF16();
503   }
504 
505   bool isVSrcB32() const {
506     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
507   }
508 
509   bool isVSrcB64() const {
510     return isVCSrcF64() || isLiteralImm(MVT::i64);
511   }
512 
513   bool isVSrcB16() const {
514     return isVCSrcB16() || isLiteralImm(MVT::i16);
515   }
516 
517   bool isVSrcV2B16() const {
518     return isVSrcB16() || isLiteralImm(MVT::v2i16);
519   }
520 
521   bool isVCSrcV2FP32() const {
522     return isVCSrcF64();
523   }
524 
525   bool isVSrcV2FP32() const {
526     return isVSrcF64() || isLiteralImm(MVT::v2f32);
527   }
528 
529   bool isVCSrcV2INT32() const {
530     return isVCSrcB64();
531   }
532 
533   bool isVSrcV2INT32() const {
534     return isVSrcB64() || isLiteralImm(MVT::v2i32);
535   }
536 
537   bool isVSrcF32() const {
538     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
539   }
540 
541   bool isVSrcF64() const {
542     return isVCSrcF64() || isLiteralImm(MVT::f64);
543   }
544 
545   bool isVSrcF16() const {
546     return isVCSrcF16() || isLiteralImm(MVT::f16);
547   }
548 
549   bool isVSrcV2F16() const {
550     return isVSrcF16() || isLiteralImm(MVT::v2f16);
551   }
552 
553   bool isVISrcB32() const {
554     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
555   }
556 
557   bool isVISrcB16() const {
558     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
559   }
560 
561   bool isVISrcV2B16() const {
562     return isVISrcB16();
563   }
564 
565   bool isVISrcF32() const {
566     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
567   }
568 
569   bool isVISrcF16() const {
570     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
571   }
572 
573   bool isVISrcV2F16() const {
574     return isVISrcF16() || isVISrcB32();
575   }
576 
577   bool isVISrc_64B64() const {
578     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
579   }
580 
581   bool isVISrc_64F64() const {
582     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
583   }
584 
585   bool isVISrc_64V2FP32() const {
586     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
587   }
588 
589   bool isVISrc_64V2INT32() const {
590     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
591   }
592 
593   bool isVISrc_256B64() const {
594     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
595   }
596 
597   bool isVISrc_256F64() const {
598     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
599   }
600 
601   bool isVISrc_128B16() const {
602     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
603   }
604 
605   bool isVISrc_128V2B16() const {
606     return isVISrc_128B16();
607   }
608 
609   bool isVISrc_128B32() const {
610     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
611   }
612 
613   bool isVISrc_128F32() const {
614     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
615   }
616 
617   bool isVISrc_256V2FP32() const {
618     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
619   }
620 
621   bool isVISrc_256V2INT32() const {
622     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
623   }
624 
625   bool isVISrc_512B32() const {
626     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
627   }
628 
629   bool isVISrc_512B16() const {
630     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
631   }
632 
633   bool isVISrc_512V2B16() const {
634     return isVISrc_512B16();
635   }
636 
637   bool isVISrc_512F32() const {
638     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
639   }
640 
641   bool isVISrc_512F16() const {
642     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
643   }
644 
645   bool isVISrc_512V2F16() const {
646     return isVISrc_512F16() || isVISrc_512B32();
647   }
648 
649   bool isVISrc_1024B32() const {
650     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
651   }
652 
653   bool isVISrc_1024B16() const {
654     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
655   }
656 
657   bool isVISrc_1024V2B16() const {
658     return isVISrc_1024B16();
659   }
660 
661   bool isVISrc_1024F32() const {
662     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
663   }
664 
665   bool isVISrc_1024F16() const {
666     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
667   }
668 
669   bool isVISrc_1024V2F16() const {
670     return isVISrc_1024F16() || isVISrc_1024B32();
671   }
672 
673   bool isAISrcB32() const {
674     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
675   }
676 
677   bool isAISrcB16() const {
678     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
679   }
680 
681   bool isAISrcV2B16() const {
682     return isAISrcB16();
683   }
684 
685   bool isAISrcF32() const {
686     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
687   }
688 
689   bool isAISrcF16() const {
690     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
691   }
692 
693   bool isAISrcV2F16() const {
694     return isAISrcF16() || isAISrcB32();
695   }
696 
697   bool isAISrc_64B64() const {
698     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
699   }
700 
701   bool isAISrc_64F64() const {
702     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
703   }
704 
705   bool isAISrc_128B32() const {
706     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
707   }
708 
709   bool isAISrc_128B16() const {
710     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
711   }
712 
713   bool isAISrc_128V2B16() const {
714     return isAISrc_128B16();
715   }
716 
717   bool isAISrc_128F32() const {
718     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
719   }
720 
721   bool isAISrc_128F16() const {
722     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
723   }
724 
725   bool isAISrc_128V2F16() const {
726     return isAISrc_128F16() || isAISrc_128B32();
727   }
728 
729   bool isVISrc_128F16() const {
730     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
731   }
732 
733   bool isVISrc_128V2F16() const {
734     return isVISrc_128F16() || isVISrc_128B32();
735   }
736 
737   bool isAISrc_256B64() const {
738     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
739   }
740 
741   bool isAISrc_256F64() const {
742     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
743   }
744 
745   bool isAISrc_512B32() const {
746     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
747   }
748 
749   bool isAISrc_512B16() const {
750     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
751   }
752 
753   bool isAISrc_512V2B16() const {
754     return isAISrc_512B16();
755   }
756 
757   bool isAISrc_512F32() const {
758     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
759   }
760 
761   bool isAISrc_512F16() const {
762     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
763   }
764 
765   bool isAISrc_512V2F16() const {
766     return isAISrc_512F16() || isAISrc_512B32();
767   }
768 
769   bool isAISrc_1024B32() const {
770     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
771   }
772 
773   bool isAISrc_1024B16() const {
774     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
775   }
776 
777   bool isAISrc_1024V2B16() const {
778     return isAISrc_1024B16();
779   }
780 
781   bool isAISrc_1024F32() const {
782     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
783   }
784 
785   bool isAISrc_1024F16() const {
786     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
787   }
788 
789   bool isAISrc_1024V2F16() const {
790     return isAISrc_1024F16() || isAISrc_1024B32();
791   }
792 
793   bool isKImmFP32() const {
794     return isLiteralImm(MVT::f32);
795   }
796 
797   bool isKImmFP16() const {
798     return isLiteralImm(MVT::f16);
799   }
800 
801   bool isMem() const override {
802     return false;
803   }
804 
805   bool isExpr() const {
806     return Kind == Expression;
807   }
808 
809   bool isSoppBrTarget() const {
810     return isExpr() || isImm();
811   }
812 
813   bool isSWaitCnt() const;
814   bool isHwreg() const;
815   bool isSendMsg() const;
816   bool isSwizzle() const;
817   bool isSMRDOffset8() const;
818   bool isSMEMOffset() const;
819   bool isSMRDLiteralOffset() const;
820   bool isDPP8() const;
821   bool isDPPCtrl() const;
822   bool isBLGP() const;
823   bool isCBSZ() const;
824   bool isABID() const;
825   bool isGPRIdxMode() const;
826   bool isS16Imm() const;
827   bool isU16Imm() const;
828   bool isEndpgm() const;
829 
830   StringRef getExpressionAsToken() const {
831     assert(isExpr());
832     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
833     return S->getSymbol().getName();
834   }
835 
836   StringRef getToken() const {
837     assert(isToken());
838 
839     if (Kind == Expression)
840       return getExpressionAsToken();
841 
842     return StringRef(Tok.Data, Tok.Length);
843   }
844 
845   int64_t getImm() const {
846     assert(isImm());
847     return Imm.Val;
848   }
849 
850   void setImm(int64_t Val) {
851     assert(isImm());
852     Imm.Val = Val;
853   }
854 
855   ImmTy getImmTy() const {
856     assert(isImm());
857     return Imm.Type;
858   }
859 
860   unsigned getReg() const override {
861     assert(isRegKind());
862     return Reg.RegNo;
863   }
864 
865   SMLoc getStartLoc() const override {
866     return StartLoc;
867   }
868 
869   SMLoc getEndLoc() const override {
870     return EndLoc;
871   }
872 
873   SMRange getLocRange() const {
874     return SMRange(StartLoc, EndLoc);
875   }
876 
877   Modifiers getModifiers() const {
878     assert(isRegKind() || isImmTy(ImmTyNone));
879     return isRegKind() ? Reg.Mods : Imm.Mods;
880   }
881 
882   void setModifiers(Modifiers Mods) {
883     assert(isRegKind() || isImmTy(ImmTyNone));
884     if (isRegKind())
885       Reg.Mods = Mods;
886     else
887       Imm.Mods = Mods;
888   }
889 
890   bool hasModifiers() const {
891     return getModifiers().hasModifiers();
892   }
893 
894   bool hasFPModifiers() const {
895     return getModifiers().hasFPModifiers();
896   }
897 
898   bool hasIntModifiers() const {
899     return getModifiers().hasIntModifiers();
900   }
901 
902   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
903 
904   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
905 
906   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
907 
908   template <unsigned Bitwidth>
909   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
910 
911   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
912     addKImmFPOperands<16>(Inst, N);
913   }
914 
915   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
916     addKImmFPOperands<32>(Inst, N);
917   }
918 
919   void addRegOperands(MCInst &Inst, unsigned N) const;
920 
921   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
922     addRegOperands(Inst, N);
923   }
924 
925   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
926     if (isRegKind())
927       addRegOperands(Inst, N);
928     else if (isExpr())
929       Inst.addOperand(MCOperand::createExpr(Expr));
930     else
931       addImmOperands(Inst, N);
932   }
933 
934   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
935     Modifiers Mods = getModifiers();
936     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
937     if (isRegKind()) {
938       addRegOperands(Inst, N);
939     } else {
940       addImmOperands(Inst, N, false);
941     }
942   }
943 
944   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
945     assert(!hasIntModifiers());
946     addRegOrImmWithInputModsOperands(Inst, N);
947   }
948 
949   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
950     assert(!hasFPModifiers());
951     addRegOrImmWithInputModsOperands(Inst, N);
952   }
953 
954   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
955     Modifiers Mods = getModifiers();
956     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
957     assert(isRegKind());
958     addRegOperands(Inst, N);
959   }
960 
961   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
962     assert(!hasIntModifiers());
963     addRegWithInputModsOperands(Inst, N);
964   }
965 
966   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
967     assert(!hasFPModifiers());
968     addRegWithInputModsOperands(Inst, N);
969   }
970 
971   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
972     if (isImm())
973       addImmOperands(Inst, N);
974     else {
975       assert(isExpr());
976       Inst.addOperand(MCOperand::createExpr(Expr));
977     }
978   }
979 
980   static void printImmTy(raw_ostream& OS, ImmTy Type) {
981     switch (Type) {
982     case ImmTyNone: OS << "None"; break;
983     case ImmTyGDS: OS << "GDS"; break;
984     case ImmTyLDS: OS << "LDS"; break;
985     case ImmTyOffen: OS << "Offen"; break;
986     case ImmTyIdxen: OS << "Idxen"; break;
987     case ImmTyAddr64: OS << "Addr64"; break;
988     case ImmTyOffset: OS << "Offset"; break;
989     case ImmTyInstOffset: OS << "InstOffset"; break;
990     case ImmTyOffset0: OS << "Offset0"; break;
991     case ImmTyOffset1: OS << "Offset1"; break;
992     case ImmTyCPol: OS << "CPol"; break;
993     case ImmTySWZ: OS << "SWZ"; break;
994     case ImmTyTFE: OS << "TFE"; break;
995     case ImmTyD16: OS << "D16"; break;
996     case ImmTyFORMAT: OS << "FORMAT"; break;
997     case ImmTyClampSI: OS << "ClampSI"; break;
998     case ImmTyOModSI: OS << "OModSI"; break;
999     case ImmTyDPP8: OS << "DPP8"; break;
1000     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1001     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1002     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1003     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1004     case ImmTyDppFi: OS << "FI"; break;
1005     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1006     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1007     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1008     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1009     case ImmTyDMask: OS << "DMask"; break;
1010     case ImmTyDim: OS << "Dim"; break;
1011     case ImmTyUNorm: OS << "UNorm"; break;
1012     case ImmTyDA: OS << "DA"; break;
1013     case ImmTyR128A16: OS << "R128A16"; break;
1014     case ImmTyA16: OS << "A16"; break;
1015     case ImmTyLWE: OS << "LWE"; break;
1016     case ImmTyOff: OS << "Off"; break;
1017     case ImmTyExpTgt: OS << "ExpTgt"; break;
1018     case ImmTyExpCompr: OS << "ExpCompr"; break;
1019     case ImmTyExpVM: OS << "ExpVM"; break;
1020     case ImmTyHwreg: OS << "Hwreg"; break;
1021     case ImmTySendMsg: OS << "SendMsg"; break;
1022     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1023     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1024     case ImmTyAttrChan: OS << "AttrChan"; break;
1025     case ImmTyOpSel: OS << "OpSel"; break;
1026     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1027     case ImmTyNegLo: OS << "NegLo"; break;
1028     case ImmTyNegHi: OS << "NegHi"; break;
1029     case ImmTySwizzle: OS << "Swizzle"; break;
1030     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1031     case ImmTyHigh: OS << "High"; break;
1032     case ImmTyBLGP: OS << "BLGP"; break;
1033     case ImmTyCBSZ: OS << "CBSZ"; break;
1034     case ImmTyABID: OS << "ABID"; break;
1035     case ImmTyEndpgm: OS << "Endpgm"; break;
1036     }
1037   }
1038 
1039   void print(raw_ostream &OS) const override {
1040     switch (Kind) {
1041     case Register:
1042       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1043       break;
1044     case Immediate:
1045       OS << '<' << getImm();
1046       if (getImmTy() != ImmTyNone) {
1047         OS << " type: "; printImmTy(OS, getImmTy());
1048       }
1049       OS << " mods: " << Imm.Mods << '>';
1050       break;
1051     case Token:
1052       OS << '\'' << getToken() << '\'';
1053       break;
1054     case Expression:
1055       OS << "<expr " << *Expr << '>';
1056       break;
1057     }
1058   }
1059 
1060   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1061                                       int64_t Val, SMLoc Loc,
1062                                       ImmTy Type = ImmTyNone,
1063                                       bool IsFPImm = false) {
1064     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1065     Op->Imm.Val = Val;
1066     Op->Imm.IsFPImm = IsFPImm;
1067     Op->Imm.Kind = ImmKindTyNone;
1068     Op->Imm.Type = Type;
1069     Op->Imm.Mods = Modifiers();
1070     Op->StartLoc = Loc;
1071     Op->EndLoc = Loc;
1072     return Op;
1073   }
1074 
1075   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1076                                         StringRef Str, SMLoc Loc,
1077                                         bool HasExplicitEncodingSize = true) {
1078     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1079     Res->Tok.Data = Str.data();
1080     Res->Tok.Length = Str.size();
1081     Res->StartLoc = Loc;
1082     Res->EndLoc = Loc;
1083     return Res;
1084   }
1085 
1086   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1087                                       unsigned RegNo, SMLoc S,
1088                                       SMLoc E) {
1089     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1090     Op->Reg.RegNo = RegNo;
1091     Op->Reg.Mods = Modifiers();
1092     Op->StartLoc = S;
1093     Op->EndLoc = E;
1094     return Op;
1095   }
1096 
1097   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1098                                        const class MCExpr *Expr, SMLoc S) {
1099     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1100     Op->Expr = Expr;
1101     Op->StartLoc = S;
1102     Op->EndLoc = S;
1103     return Op;
1104   }
1105 };
1106 
1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1108   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1109   return OS;
1110 }
1111 
1112 //===----------------------------------------------------------------------===//
1113 // AsmParser
1114 //===----------------------------------------------------------------------===//
1115 
1116 // Holds info related to the current kernel, e.g. count of SGPRs used.
1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1118 // .amdgpu_hsa_kernel or at EOF.
1119 class KernelScopeInfo {
1120   int SgprIndexUnusedMin = -1;
1121   int VgprIndexUnusedMin = -1;
1122   MCContext *Ctx = nullptr;
1123 
1124   void usesSgprAt(int i) {
1125     if (i >= SgprIndexUnusedMin) {
1126       SgprIndexUnusedMin = ++i;
1127       if (Ctx) {
1128         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1129         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1130       }
1131     }
1132   }
1133 
1134   void usesVgprAt(int i) {
1135     if (i >= VgprIndexUnusedMin) {
1136       VgprIndexUnusedMin = ++i;
1137       if (Ctx) {
1138         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1139         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1140       }
1141     }
1142   }
1143 
1144 public:
1145   KernelScopeInfo() = default;
1146 
1147   void initialize(MCContext &Context) {
1148     Ctx = &Context;
1149     usesSgprAt(SgprIndexUnusedMin = -1);
1150     usesVgprAt(VgprIndexUnusedMin = -1);
1151   }
1152 
1153   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1154     switch (RegKind) {
1155       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1156       case IS_AGPR: // fall through
1157       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1158       default: break;
1159     }
1160   }
1161 };
1162 
1163 class AMDGPUAsmParser : public MCTargetAsmParser {
1164   MCAsmParser &Parser;
1165 
1166   // Number of extra operands parsed after the first optional operand.
1167   // This may be necessary to skip hardcoded mandatory operands.
1168   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1169 
1170   unsigned ForcedEncodingSize = 0;
1171   bool ForcedDPP = false;
1172   bool ForcedSDWA = false;
1173   KernelScopeInfo KernelScope;
1174   unsigned CPolSeen;
1175 
1176   /// @name Auto-generated Match Functions
1177   /// {
1178 
1179 #define GET_ASSEMBLER_HEADER
1180 #include "AMDGPUGenAsmMatcher.inc"
1181 
1182   /// }
1183 
1184 private:
1185   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1186   bool OutOfRangeError(SMRange Range);
1187   /// Calculate VGPR/SGPR blocks required for given target, reserved
1188   /// registers, and user-specified NextFreeXGPR values.
1189   ///
1190   /// \param Features [in] Target features, used for bug corrections.
1191   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1192   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1193   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1194   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1195   /// descriptor field, if valid.
1196   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1197   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1198   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1199   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1200   /// \param VGPRBlocks [out] Result VGPR block count.
1201   /// \param SGPRBlocks [out] Result SGPR block count.
1202   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1203                           bool FlatScrUsed, bool XNACKUsed,
1204                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1205                           SMRange VGPRRange, unsigned NextFreeSGPR,
1206                           SMRange SGPRRange, unsigned &VGPRBlocks,
1207                           unsigned &SGPRBlocks);
1208   bool ParseDirectiveAMDGCNTarget();
1209   bool ParseDirectiveAMDHSAKernel();
1210   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1211   bool ParseDirectiveHSACodeObjectVersion();
1212   bool ParseDirectiveHSACodeObjectISA();
1213   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1214   bool ParseDirectiveAMDKernelCodeT();
1215   // TODO: Possibly make subtargetHasRegister const.
1216   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1217   bool ParseDirectiveAMDGPUHsaKernel();
1218 
1219   bool ParseDirectiveISAVersion();
1220   bool ParseDirectiveHSAMetadata();
1221   bool ParseDirectivePALMetadataBegin();
1222   bool ParseDirectivePALMetadata();
1223   bool ParseDirectiveAMDGPULDS();
1224 
1225   /// Common code to parse out a block of text (typically YAML) between start and
1226   /// end directives.
1227   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1228                            const char *AssemblerDirectiveEnd,
1229                            std::string &CollectString);
1230 
1231   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1232                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1233   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1234                            unsigned &RegNum, unsigned &RegWidth,
1235                            bool RestoreOnFailure = false);
1236   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1237                            unsigned &RegNum, unsigned &RegWidth,
1238                            SmallVectorImpl<AsmToken> &Tokens);
1239   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1240                            unsigned &RegWidth,
1241                            SmallVectorImpl<AsmToken> &Tokens);
1242   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1243                            unsigned &RegWidth,
1244                            SmallVectorImpl<AsmToken> &Tokens);
1245   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1246                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1247   bool ParseRegRange(unsigned& Num, unsigned& Width);
1248   unsigned getRegularReg(RegisterKind RegKind,
1249                          unsigned RegNum,
1250                          unsigned RegWidth,
1251                          SMLoc Loc);
1252 
1253   bool isRegister();
1254   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1255   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1256   void initializeGprCountSymbol(RegisterKind RegKind);
1257   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1258                              unsigned RegWidth);
1259   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1260                     bool IsAtomic, bool IsLds = false);
1261   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1262                  bool IsGdsHardcoded);
1263 
1264 public:
1265   enum AMDGPUMatchResultTy {
1266     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1267   };
1268   enum OperandMode {
1269     OperandMode_Default,
1270     OperandMode_NSA,
1271   };
1272 
1273   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1274 
1275   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1276                const MCInstrInfo &MII,
1277                const MCTargetOptions &Options)
1278       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1279     MCAsmParserExtension::Initialize(Parser);
1280 
1281     if (getFeatureBits().none()) {
1282       // Set default features.
1283       copySTI().ToggleFeature("southern-islands");
1284     }
1285 
1286     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1287 
1288     {
1289       // TODO: make those pre-defined variables read-only.
1290       // Currently there is none suitable machinery in the core llvm-mc for this.
1291       // MCSymbol::isRedefinable is intended for another purpose, and
1292       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1293       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1294       MCContext &Ctx = getContext();
1295       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1296         MCSymbol *Sym =
1297             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1298         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1299         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1300         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1301         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1302         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1303       } else {
1304         MCSymbol *Sym =
1305             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1306         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1307         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1308         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1309         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1310         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1311       }
1312       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1313         initializeGprCountSymbol(IS_VGPR);
1314         initializeGprCountSymbol(IS_SGPR);
1315       } else
1316         KernelScope.initialize(getContext());
1317     }
1318   }
1319 
1320   bool hasMIMG_R128() const {
1321     return AMDGPU::hasMIMG_R128(getSTI());
1322   }
1323 
1324   bool hasPackedD16() const {
1325     return AMDGPU::hasPackedD16(getSTI());
1326   }
1327 
1328   bool hasGFX10A16() const {
1329     return AMDGPU::hasGFX10A16(getSTI());
1330   }
1331 
1332   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1333 
1334   bool isSI() const {
1335     return AMDGPU::isSI(getSTI());
1336   }
1337 
1338   bool isCI() const {
1339     return AMDGPU::isCI(getSTI());
1340   }
1341 
1342   bool isVI() const {
1343     return AMDGPU::isVI(getSTI());
1344   }
1345 
1346   bool isGFX9() const {
1347     return AMDGPU::isGFX9(getSTI());
1348   }
1349 
1350   bool isGFX90A() const {
1351     return AMDGPU::isGFX90A(getSTI());
1352   }
1353 
1354   bool isGFX9Plus() const {
1355     return AMDGPU::isGFX9Plus(getSTI());
1356   }
1357 
1358   bool isGFX10() const {
1359     return AMDGPU::isGFX10(getSTI());
1360   }
1361 
1362   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1363 
1364   bool isGFX10_BEncoding() const {
1365     return AMDGPU::isGFX10_BEncoding(getSTI());
1366   }
1367 
1368   bool hasInv2PiInlineImm() const {
1369     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1370   }
1371 
1372   bool hasFlatOffsets() const {
1373     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1374   }
1375 
1376   bool hasArchitectedFlatScratch() const {
1377     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1378   }
1379 
1380   bool hasSGPR102_SGPR103() const {
1381     return !isVI() && !isGFX9();
1382   }
1383 
1384   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1385 
1386   bool hasIntClamp() const {
1387     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1388   }
1389 
1390   AMDGPUTargetStreamer &getTargetStreamer() {
1391     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1392     return static_cast<AMDGPUTargetStreamer &>(TS);
1393   }
1394 
1395   const MCRegisterInfo *getMRI() const {
1396     // We need this const_cast because for some reason getContext() is not const
1397     // in MCAsmParser.
1398     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1399   }
1400 
1401   const MCInstrInfo *getMII() const {
1402     return &MII;
1403   }
1404 
1405   const FeatureBitset &getFeatureBits() const {
1406     return getSTI().getFeatureBits();
1407   }
1408 
1409   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1410   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1411   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1412 
1413   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1414   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1415   bool isForcedDPP() const { return ForcedDPP; }
1416   bool isForcedSDWA() const { return ForcedSDWA; }
1417   ArrayRef<unsigned> getMatchedVariants() const;
1418   StringRef getMatchedVariantName() const;
1419 
1420   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1421   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1422                      bool RestoreOnFailure);
1423   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1424   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1425                                         SMLoc &EndLoc) override;
1426   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1427   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1428                                       unsigned Kind) override;
1429   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1430                                OperandVector &Operands, MCStreamer &Out,
1431                                uint64_t &ErrorInfo,
1432                                bool MatchingInlineAsm) override;
1433   bool ParseDirective(AsmToken DirectiveID) override;
1434   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1435                                     OperandMode Mode = OperandMode_Default);
1436   StringRef parseMnemonicSuffix(StringRef Name);
1437   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1438                         SMLoc NameLoc, OperandVector &Operands) override;
1439   //bool ProcessInstruction(MCInst &Inst);
1440 
1441   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1442 
1443   OperandMatchResultTy
1444   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1445                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1446                      bool (*ConvertResult)(int64_t &) = nullptr);
1447 
1448   OperandMatchResultTy
1449   parseOperandArrayWithPrefix(const char *Prefix,
1450                               OperandVector &Operands,
1451                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1452                               bool (*ConvertResult)(int64_t&) = nullptr);
1453 
1454   OperandMatchResultTy
1455   parseNamedBit(StringRef Name, OperandVector &Operands,
1456                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1457   OperandMatchResultTy parseCPol(OperandVector &Operands);
1458   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1459                                              StringRef &Value,
1460                                              SMLoc &StringLoc);
1461 
1462   bool isModifier();
1463   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1464   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1465   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1466   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1467   bool parseSP3NegModifier();
1468   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1469   OperandMatchResultTy parseReg(OperandVector &Operands);
1470   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1471   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1472   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1473   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1474   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1475   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1476   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1477   OperandMatchResultTy parseUfmt(int64_t &Format);
1478   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1479   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1480   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1481   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1482   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1483   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1484   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1485 
1486   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1487   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1488   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1489   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1490 
1491   bool parseCnt(int64_t &IntVal);
1492   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1493   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1494 
1495 private:
1496   struct OperandInfoTy {
1497     SMLoc Loc;
1498     int64_t Id;
1499     bool IsSymbolic = false;
1500     bool IsDefined = false;
1501 
1502     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1503   };
1504 
1505   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1506   bool validateSendMsg(const OperandInfoTy &Msg,
1507                        const OperandInfoTy &Op,
1508                        const OperandInfoTy &Stream);
1509 
1510   bool parseHwregBody(OperandInfoTy &HwReg,
1511                       OperandInfoTy &Offset,
1512                       OperandInfoTy &Width);
1513   bool validateHwreg(const OperandInfoTy &HwReg,
1514                      const OperandInfoTy &Offset,
1515                      const OperandInfoTy &Width);
1516 
1517   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1518   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1519 
1520   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1521                       const OperandVector &Operands) const;
1522   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1523   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1524   SMLoc getLitLoc(const OperandVector &Operands) const;
1525   SMLoc getConstLoc(const OperandVector &Operands) const;
1526 
1527   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1528   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1529   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1530   bool validateSOPLiteral(const MCInst &Inst) const;
1531   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1532   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1533   bool validateIntClampSupported(const MCInst &Inst);
1534   bool validateMIMGAtomicDMask(const MCInst &Inst);
1535   bool validateMIMGGatherDMask(const MCInst &Inst);
1536   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1537   bool validateMIMGDataSize(const MCInst &Inst);
1538   bool validateMIMGAddrSize(const MCInst &Inst);
1539   bool validateMIMGD16(const MCInst &Inst);
1540   bool validateMIMGDim(const MCInst &Inst);
1541   bool validateMIMGMSAA(const MCInst &Inst);
1542   bool validateOpSel(const MCInst &Inst);
1543   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1544   bool validateVccOperand(unsigned Reg) const;
1545   bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1546   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1547   bool validateAGPRLdSt(const MCInst &Inst) const;
1548   bool validateVGPRAlign(const MCInst &Inst) const;
1549   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1550   bool validateDivScale(const MCInst &Inst);
1551   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1552                              const SMLoc &IDLoc);
1553   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1554   unsigned getConstantBusLimit(unsigned Opcode) const;
1555   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1556   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1557   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1558 
1559   bool isSupportedMnemo(StringRef Mnemo,
1560                         const FeatureBitset &FBS);
1561   bool isSupportedMnemo(StringRef Mnemo,
1562                         const FeatureBitset &FBS,
1563                         ArrayRef<unsigned> Variants);
1564   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1565 
1566   bool isId(const StringRef Id) const;
1567   bool isId(const AsmToken &Token, const StringRef Id) const;
1568   bool isToken(const AsmToken::TokenKind Kind) const;
1569   bool trySkipId(const StringRef Id);
1570   bool trySkipId(const StringRef Pref, const StringRef Id);
1571   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1572   bool trySkipToken(const AsmToken::TokenKind Kind);
1573   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1574   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1575   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1576 
1577   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1578   AsmToken::TokenKind getTokenKind() const;
1579   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1580   bool parseExpr(OperandVector &Operands);
1581   StringRef getTokenStr() const;
1582   AsmToken peekToken();
1583   AsmToken getToken() const;
1584   SMLoc getLoc() const;
1585   void lex();
1586 
1587 public:
1588   void onBeginOfFile() override;
1589 
1590   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1591   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1592 
1593   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1594   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1595   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1596   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1597   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1598   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1599 
1600   bool parseSwizzleOperand(int64_t &Op,
1601                            const unsigned MinVal,
1602                            const unsigned MaxVal,
1603                            const StringRef ErrMsg,
1604                            SMLoc &Loc);
1605   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1606                             const unsigned MinVal,
1607                             const unsigned MaxVal,
1608                             const StringRef ErrMsg);
1609   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1610   bool parseSwizzleOffset(int64_t &Imm);
1611   bool parseSwizzleMacro(int64_t &Imm);
1612   bool parseSwizzleQuadPerm(int64_t &Imm);
1613   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1614   bool parseSwizzleBroadcast(int64_t &Imm);
1615   bool parseSwizzleSwap(int64_t &Imm);
1616   bool parseSwizzleReverse(int64_t &Imm);
1617 
1618   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1619   int64_t parseGPRIdxMacro();
1620 
1621   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1622   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1623   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1624   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1625 
1626   AMDGPUOperand::Ptr defaultCPol() const;
1627 
1628   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1629   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1630   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1631   AMDGPUOperand::Ptr defaultFlatOffset() const;
1632 
1633   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1634 
1635   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1636                OptionalImmIndexMap &OptionalIdx);
1637   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1638   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1639   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1640   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1641                 OptionalImmIndexMap &OptionalIdx);
1642 
1643   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1644 
1645   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1646                bool IsAtomic = false);
1647   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1648   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1649 
1650   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1651 
1652   bool parseDimId(unsigned &Encoding);
1653   OperandMatchResultTy parseDim(OperandVector &Operands);
1654   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1655   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1656   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1657   int64_t parseDPPCtrlSel(StringRef Ctrl);
1658   int64_t parseDPPCtrlPerm();
1659   AMDGPUOperand::Ptr defaultRowMask() const;
1660   AMDGPUOperand::Ptr defaultBankMask() const;
1661   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1662   AMDGPUOperand::Ptr defaultFI() const;
1663   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1664   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1665 
1666   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1667                                     AMDGPUOperand::ImmTy Type);
1668   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1669   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1670   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1671   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1672   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1673   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1674   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1675                uint64_t BasicInstType,
1676                bool SkipDstVcc = false,
1677                bool SkipSrcVcc = false);
1678 
1679   AMDGPUOperand::Ptr defaultBLGP() const;
1680   AMDGPUOperand::Ptr defaultCBSZ() const;
1681   AMDGPUOperand::Ptr defaultABID() const;
1682 
1683   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1684   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1685 };
1686 
1687 struct OptionalOperand {
1688   const char *Name;
1689   AMDGPUOperand::ImmTy Type;
1690   bool IsBit;
1691   bool (*ConvertResult)(int64_t&);
1692 };
1693 
1694 } // end anonymous namespace
1695 
1696 // May be called with integer type with equivalent bitwidth.
1697 static const fltSemantics *getFltSemantics(unsigned Size) {
1698   switch (Size) {
1699   case 4:
1700     return &APFloat::IEEEsingle();
1701   case 8:
1702     return &APFloat::IEEEdouble();
1703   case 2:
1704     return &APFloat::IEEEhalf();
1705   default:
1706     llvm_unreachable("unsupported fp type");
1707   }
1708 }
1709 
1710 static const fltSemantics *getFltSemantics(MVT VT) {
1711   return getFltSemantics(VT.getSizeInBits() / 8);
1712 }
1713 
1714 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1715   switch (OperandType) {
1716   case AMDGPU::OPERAND_REG_IMM_INT32:
1717   case AMDGPU::OPERAND_REG_IMM_FP32:
1718   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1719   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1720   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1721   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1722   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1723   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1724   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1725   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1726     return &APFloat::IEEEsingle();
1727   case AMDGPU::OPERAND_REG_IMM_INT64:
1728   case AMDGPU::OPERAND_REG_IMM_FP64:
1729   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1730   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1731   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1732     return &APFloat::IEEEdouble();
1733   case AMDGPU::OPERAND_REG_IMM_INT16:
1734   case AMDGPU::OPERAND_REG_IMM_FP16:
1735   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1736   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1737   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1738   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1739   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1740   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1741   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1742   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1743   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1744   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1745     return &APFloat::IEEEhalf();
1746   default:
1747     llvm_unreachable("unsupported fp type");
1748   }
1749 }
1750 
1751 //===----------------------------------------------------------------------===//
1752 // Operand
1753 //===----------------------------------------------------------------------===//
1754 
1755 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1756   bool Lost;
1757 
1758   // Convert literal to single precision
1759   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1760                                                APFloat::rmNearestTiesToEven,
1761                                                &Lost);
1762   // We allow precision lost but not overflow or underflow
1763   if (Status != APFloat::opOK &&
1764       Lost &&
1765       ((Status & APFloat::opOverflow)  != 0 ||
1766        (Status & APFloat::opUnderflow) != 0)) {
1767     return false;
1768   }
1769 
1770   return true;
1771 }
1772 
1773 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1774   return isUIntN(Size, Val) || isIntN(Size, Val);
1775 }
1776 
1777 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1778   if (VT.getScalarType() == MVT::i16) {
1779     // FP immediate values are broken.
1780     return isInlinableIntLiteral(Val);
1781   }
1782 
1783   // f16/v2f16 operands work correctly for all values.
1784   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1785 }
1786 
1787 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1788 
1789   // This is a hack to enable named inline values like
1790   // shared_base with both 32-bit and 64-bit operands.
1791   // Note that these values are defined as
1792   // 32-bit operands only.
1793   if (isInlineValue()) {
1794     return true;
1795   }
1796 
1797   if (!isImmTy(ImmTyNone)) {
1798     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1799     return false;
1800   }
1801   // TODO: We should avoid using host float here. It would be better to
1802   // check the float bit values which is what a few other places do.
1803   // We've had bot failures before due to weird NaN support on mips hosts.
1804 
1805   APInt Literal(64, Imm.Val);
1806 
1807   if (Imm.IsFPImm) { // We got fp literal token
1808     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1809       return AMDGPU::isInlinableLiteral64(Imm.Val,
1810                                           AsmParser->hasInv2PiInlineImm());
1811     }
1812 
1813     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1814     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1815       return false;
1816 
1817     if (type.getScalarSizeInBits() == 16) {
1818       return isInlineableLiteralOp16(
1819         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1820         type, AsmParser->hasInv2PiInlineImm());
1821     }
1822 
1823     // Check if single precision literal is inlinable
1824     return AMDGPU::isInlinableLiteral32(
1825       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1826       AsmParser->hasInv2PiInlineImm());
1827   }
1828 
1829   // We got int literal token.
1830   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1831     return AMDGPU::isInlinableLiteral64(Imm.Val,
1832                                         AsmParser->hasInv2PiInlineImm());
1833   }
1834 
1835   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1836     return false;
1837   }
1838 
1839   if (type.getScalarSizeInBits() == 16) {
1840     return isInlineableLiteralOp16(
1841       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1842       type, AsmParser->hasInv2PiInlineImm());
1843   }
1844 
1845   return AMDGPU::isInlinableLiteral32(
1846     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1847     AsmParser->hasInv2PiInlineImm());
1848 }
1849 
1850 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1851   // Check that this immediate can be added as literal
1852   if (!isImmTy(ImmTyNone)) {
1853     return false;
1854   }
1855 
1856   if (!Imm.IsFPImm) {
1857     // We got int literal token.
1858 
1859     if (type == MVT::f64 && hasFPModifiers()) {
1860       // Cannot apply fp modifiers to int literals preserving the same semantics
1861       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1862       // disable these cases.
1863       return false;
1864     }
1865 
1866     unsigned Size = type.getSizeInBits();
1867     if (Size == 64)
1868       Size = 32;
1869 
1870     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1871     // types.
1872     return isSafeTruncation(Imm.Val, Size);
1873   }
1874 
1875   // We got fp literal token
1876   if (type == MVT::f64) { // Expected 64-bit fp operand
1877     // We would set low 64-bits of literal to zeroes but we accept this literals
1878     return true;
1879   }
1880 
1881   if (type == MVT::i64) { // Expected 64-bit int operand
1882     // We don't allow fp literals in 64-bit integer instructions. It is
1883     // unclear how we should encode them.
1884     return false;
1885   }
1886 
1887   // We allow fp literals with f16x2 operands assuming that the specified
1888   // literal goes into the lower half and the upper half is zero. We also
1889   // require that the literal may be losslesly converted to f16.
1890   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1891                      (type == MVT::v2i16)? MVT::i16 :
1892                      (type == MVT::v2f32)? MVT::f32 : type;
1893 
1894   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1895   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1896 }
1897 
1898 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1899   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1900 }
1901 
1902 bool AMDGPUOperand::isVRegWithInputMods() const {
1903   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1904          // GFX90A allows DPP on 64-bit operands.
1905          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1906           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1907 }
1908 
1909 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1910   if (AsmParser->isVI())
1911     return isVReg32();
1912   else if (AsmParser->isGFX9Plus())
1913     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1914   else
1915     return false;
1916 }
1917 
1918 bool AMDGPUOperand::isSDWAFP16Operand() const {
1919   return isSDWAOperand(MVT::f16);
1920 }
1921 
1922 bool AMDGPUOperand::isSDWAFP32Operand() const {
1923   return isSDWAOperand(MVT::f32);
1924 }
1925 
1926 bool AMDGPUOperand::isSDWAInt16Operand() const {
1927   return isSDWAOperand(MVT::i16);
1928 }
1929 
1930 bool AMDGPUOperand::isSDWAInt32Operand() const {
1931   return isSDWAOperand(MVT::i32);
1932 }
1933 
1934 bool AMDGPUOperand::isBoolReg() const {
1935   auto FB = AsmParser->getFeatureBits();
1936   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1937                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1938 }
1939 
1940 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1941 {
1942   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1943   assert(Size == 2 || Size == 4 || Size == 8);
1944 
1945   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1946 
1947   if (Imm.Mods.Abs) {
1948     Val &= ~FpSignMask;
1949   }
1950   if (Imm.Mods.Neg) {
1951     Val ^= FpSignMask;
1952   }
1953 
1954   return Val;
1955 }
1956 
1957 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1958   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1959                              Inst.getNumOperands())) {
1960     addLiteralImmOperand(Inst, Imm.Val,
1961                          ApplyModifiers &
1962                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1963   } else {
1964     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1965     Inst.addOperand(MCOperand::createImm(Imm.Val));
1966     setImmKindNone();
1967   }
1968 }
1969 
1970 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1971   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1972   auto OpNum = Inst.getNumOperands();
1973   // Check that this operand accepts literals
1974   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1975 
1976   if (ApplyModifiers) {
1977     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1978     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1979     Val = applyInputFPModifiers(Val, Size);
1980   }
1981 
1982   APInt Literal(64, Val);
1983   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1984 
1985   if (Imm.IsFPImm) { // We got fp literal token
1986     switch (OpTy) {
1987     case AMDGPU::OPERAND_REG_IMM_INT64:
1988     case AMDGPU::OPERAND_REG_IMM_FP64:
1989     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1990     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1991     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1992       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1993                                        AsmParser->hasInv2PiInlineImm())) {
1994         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1995         setImmKindConst();
1996         return;
1997       }
1998 
1999       // Non-inlineable
2000       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2001         // For fp operands we check if low 32 bits are zeros
2002         if (Literal.getLoBits(32) != 0) {
2003           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2004           "Can't encode literal as exact 64-bit floating-point operand. "
2005           "Low 32-bits will be set to zero");
2006         }
2007 
2008         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2009         setImmKindLiteral();
2010         return;
2011       }
2012 
2013       // We don't allow fp literals in 64-bit integer instructions. It is
2014       // unclear how we should encode them. This case should be checked earlier
2015       // in predicate methods (isLiteralImm())
2016       llvm_unreachable("fp literal in 64-bit integer instruction.");
2017 
2018     case AMDGPU::OPERAND_REG_IMM_INT32:
2019     case AMDGPU::OPERAND_REG_IMM_FP32:
2020     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2021     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2022     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2023     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2024     case AMDGPU::OPERAND_REG_IMM_INT16:
2025     case AMDGPU::OPERAND_REG_IMM_FP16:
2026     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2027     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2028     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2029     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2030     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2031     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2032     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2033     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2034     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2035     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2036     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2037     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2038     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2039     case AMDGPU::OPERAND_REG_IMM_V2INT32: {
2040       bool lost;
2041       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2042       // Convert literal to single precision
2043       FPLiteral.convert(*getOpFltSemantics(OpTy),
2044                         APFloat::rmNearestTiesToEven, &lost);
2045       // We allow precision lost but not overflow or underflow. This should be
2046       // checked earlier in isLiteralImm()
2047 
2048       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2049       Inst.addOperand(MCOperand::createImm(ImmVal));
2050       setImmKindLiteral();
2051       return;
2052     }
2053     default:
2054       llvm_unreachable("invalid operand size");
2055     }
2056 
2057     return;
2058   }
2059 
2060   // We got int literal token.
2061   // Only sign extend inline immediates.
2062   switch (OpTy) {
2063   case AMDGPU::OPERAND_REG_IMM_INT32:
2064   case AMDGPU::OPERAND_REG_IMM_FP32:
2065   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2066   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2067   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2068   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2069   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2070   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2071   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2072   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2073   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2074   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2075     if (isSafeTruncation(Val, 32) &&
2076         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2077                                      AsmParser->hasInv2PiInlineImm())) {
2078       Inst.addOperand(MCOperand::createImm(Val));
2079       setImmKindConst();
2080       return;
2081     }
2082 
2083     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2084     setImmKindLiteral();
2085     return;
2086 
2087   case AMDGPU::OPERAND_REG_IMM_INT64:
2088   case AMDGPU::OPERAND_REG_IMM_FP64:
2089   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2090   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2091   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2092     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2093       Inst.addOperand(MCOperand::createImm(Val));
2094       setImmKindConst();
2095       return;
2096     }
2097 
2098     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2099     setImmKindLiteral();
2100     return;
2101 
2102   case AMDGPU::OPERAND_REG_IMM_INT16:
2103   case AMDGPU::OPERAND_REG_IMM_FP16:
2104   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2105   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2106   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2107   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2108     if (isSafeTruncation(Val, 16) &&
2109         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2110                                      AsmParser->hasInv2PiInlineImm())) {
2111       Inst.addOperand(MCOperand::createImm(Val));
2112       setImmKindConst();
2113       return;
2114     }
2115 
2116     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2117     setImmKindLiteral();
2118     return;
2119 
2120   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2121   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2122   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2123   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2124     assert(isSafeTruncation(Val, 16));
2125     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2126                                         AsmParser->hasInv2PiInlineImm()));
2127 
2128     Inst.addOperand(MCOperand::createImm(Val));
2129     return;
2130   }
2131   default:
2132     llvm_unreachable("invalid operand size");
2133   }
2134 }
2135 
2136 template <unsigned Bitwidth>
2137 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2138   APInt Literal(64, Imm.Val);
2139   setImmKindNone();
2140 
2141   if (!Imm.IsFPImm) {
2142     // We got int literal token.
2143     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2144     return;
2145   }
2146 
2147   bool Lost;
2148   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2149   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2150                     APFloat::rmNearestTiesToEven, &Lost);
2151   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2152 }
2153 
2154 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2155   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2156 }
2157 
2158 static bool isInlineValue(unsigned Reg) {
2159   switch (Reg) {
2160   case AMDGPU::SRC_SHARED_BASE:
2161   case AMDGPU::SRC_SHARED_LIMIT:
2162   case AMDGPU::SRC_PRIVATE_BASE:
2163   case AMDGPU::SRC_PRIVATE_LIMIT:
2164   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2165     return true;
2166   case AMDGPU::SRC_VCCZ:
2167   case AMDGPU::SRC_EXECZ:
2168   case AMDGPU::SRC_SCC:
2169     return true;
2170   case AMDGPU::SGPR_NULL:
2171     return true;
2172   default:
2173     return false;
2174   }
2175 }
2176 
2177 bool AMDGPUOperand::isInlineValue() const {
2178   return isRegKind() && ::isInlineValue(getReg());
2179 }
2180 
2181 //===----------------------------------------------------------------------===//
2182 // AsmParser
2183 //===----------------------------------------------------------------------===//
2184 
2185 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2186   if (Is == IS_VGPR) {
2187     switch (RegWidth) {
2188       default: return -1;
2189       case 1: return AMDGPU::VGPR_32RegClassID;
2190       case 2: return AMDGPU::VReg_64RegClassID;
2191       case 3: return AMDGPU::VReg_96RegClassID;
2192       case 4: return AMDGPU::VReg_128RegClassID;
2193       case 5: return AMDGPU::VReg_160RegClassID;
2194       case 6: return AMDGPU::VReg_192RegClassID;
2195       case 8: return AMDGPU::VReg_256RegClassID;
2196       case 16: return AMDGPU::VReg_512RegClassID;
2197       case 32: return AMDGPU::VReg_1024RegClassID;
2198     }
2199   } else if (Is == IS_TTMP) {
2200     switch (RegWidth) {
2201       default: return -1;
2202       case 1: return AMDGPU::TTMP_32RegClassID;
2203       case 2: return AMDGPU::TTMP_64RegClassID;
2204       case 4: return AMDGPU::TTMP_128RegClassID;
2205       case 8: return AMDGPU::TTMP_256RegClassID;
2206       case 16: return AMDGPU::TTMP_512RegClassID;
2207     }
2208   } else if (Is == IS_SGPR) {
2209     switch (RegWidth) {
2210       default: return -1;
2211       case 1: return AMDGPU::SGPR_32RegClassID;
2212       case 2: return AMDGPU::SGPR_64RegClassID;
2213       case 3: return AMDGPU::SGPR_96RegClassID;
2214       case 4: return AMDGPU::SGPR_128RegClassID;
2215       case 5: return AMDGPU::SGPR_160RegClassID;
2216       case 6: return AMDGPU::SGPR_192RegClassID;
2217       case 8: return AMDGPU::SGPR_256RegClassID;
2218       case 16: return AMDGPU::SGPR_512RegClassID;
2219     }
2220   } else if (Is == IS_AGPR) {
2221     switch (RegWidth) {
2222       default: return -1;
2223       case 1: return AMDGPU::AGPR_32RegClassID;
2224       case 2: return AMDGPU::AReg_64RegClassID;
2225       case 3: return AMDGPU::AReg_96RegClassID;
2226       case 4: return AMDGPU::AReg_128RegClassID;
2227       case 5: return AMDGPU::AReg_160RegClassID;
2228       case 6: return AMDGPU::AReg_192RegClassID;
2229       case 8: return AMDGPU::AReg_256RegClassID;
2230       case 16: return AMDGPU::AReg_512RegClassID;
2231       case 32: return AMDGPU::AReg_1024RegClassID;
2232     }
2233   }
2234   return -1;
2235 }
2236 
2237 static unsigned getSpecialRegForName(StringRef RegName) {
2238   return StringSwitch<unsigned>(RegName)
2239     .Case("exec", AMDGPU::EXEC)
2240     .Case("vcc", AMDGPU::VCC)
2241     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2242     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2243     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2244     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2245     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2246     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2247     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2248     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2249     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2250     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2251     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2252     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2253     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2254     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2255     .Case("m0", AMDGPU::M0)
2256     .Case("vccz", AMDGPU::SRC_VCCZ)
2257     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2258     .Case("execz", AMDGPU::SRC_EXECZ)
2259     .Case("src_execz", AMDGPU::SRC_EXECZ)
2260     .Case("scc", AMDGPU::SRC_SCC)
2261     .Case("src_scc", AMDGPU::SRC_SCC)
2262     .Case("tba", AMDGPU::TBA)
2263     .Case("tma", AMDGPU::TMA)
2264     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2265     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2266     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2267     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2268     .Case("vcc_lo", AMDGPU::VCC_LO)
2269     .Case("vcc_hi", AMDGPU::VCC_HI)
2270     .Case("exec_lo", AMDGPU::EXEC_LO)
2271     .Case("exec_hi", AMDGPU::EXEC_HI)
2272     .Case("tma_lo", AMDGPU::TMA_LO)
2273     .Case("tma_hi", AMDGPU::TMA_HI)
2274     .Case("tba_lo", AMDGPU::TBA_LO)
2275     .Case("tba_hi", AMDGPU::TBA_HI)
2276     .Case("pc", AMDGPU::PC_REG)
2277     .Case("null", AMDGPU::SGPR_NULL)
2278     .Default(AMDGPU::NoRegister);
2279 }
2280 
2281 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2282                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2283   auto R = parseRegister();
2284   if (!R) return true;
2285   assert(R->isReg());
2286   RegNo = R->getReg();
2287   StartLoc = R->getStartLoc();
2288   EndLoc = R->getEndLoc();
2289   return false;
2290 }
2291 
2292 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2293                                     SMLoc &EndLoc) {
2294   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2295 }
2296 
2297 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2298                                                        SMLoc &StartLoc,
2299                                                        SMLoc &EndLoc) {
2300   bool Result =
2301       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2302   bool PendingErrors = getParser().hasPendingError();
2303   getParser().clearPendingErrors();
2304   if (PendingErrors)
2305     return MatchOperand_ParseFail;
2306   if (Result)
2307     return MatchOperand_NoMatch;
2308   return MatchOperand_Success;
2309 }
2310 
2311 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2312                                             RegisterKind RegKind, unsigned Reg1,
2313                                             SMLoc Loc) {
2314   switch (RegKind) {
2315   case IS_SPECIAL:
2316     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2317       Reg = AMDGPU::EXEC;
2318       RegWidth = 2;
2319       return true;
2320     }
2321     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2322       Reg = AMDGPU::FLAT_SCR;
2323       RegWidth = 2;
2324       return true;
2325     }
2326     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2327       Reg = AMDGPU::XNACK_MASK;
2328       RegWidth = 2;
2329       return true;
2330     }
2331     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2332       Reg = AMDGPU::VCC;
2333       RegWidth = 2;
2334       return true;
2335     }
2336     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2337       Reg = AMDGPU::TBA;
2338       RegWidth = 2;
2339       return true;
2340     }
2341     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2342       Reg = AMDGPU::TMA;
2343       RegWidth = 2;
2344       return true;
2345     }
2346     Error(Loc, "register does not fit in the list");
2347     return false;
2348   case IS_VGPR:
2349   case IS_SGPR:
2350   case IS_AGPR:
2351   case IS_TTMP:
2352     if (Reg1 != Reg + RegWidth) {
2353       Error(Loc, "registers in a list must have consecutive indices");
2354       return false;
2355     }
2356     RegWidth++;
2357     return true;
2358   default:
2359     llvm_unreachable("unexpected register kind");
2360   }
2361 }
2362 
2363 struct RegInfo {
2364   StringLiteral Name;
2365   RegisterKind Kind;
2366 };
2367 
2368 static constexpr RegInfo RegularRegisters[] = {
2369   {{"v"},    IS_VGPR},
2370   {{"s"},    IS_SGPR},
2371   {{"ttmp"}, IS_TTMP},
2372   {{"acc"},  IS_AGPR},
2373   {{"a"},    IS_AGPR},
2374 };
2375 
2376 static bool isRegularReg(RegisterKind Kind) {
2377   return Kind == IS_VGPR ||
2378          Kind == IS_SGPR ||
2379          Kind == IS_TTMP ||
2380          Kind == IS_AGPR;
2381 }
2382 
2383 static const RegInfo* getRegularRegInfo(StringRef Str) {
2384   for (const RegInfo &Reg : RegularRegisters)
2385     if (Str.startswith(Reg.Name))
2386       return &Reg;
2387   return nullptr;
2388 }
2389 
2390 static bool getRegNum(StringRef Str, unsigned& Num) {
2391   return !Str.getAsInteger(10, Num);
2392 }
2393 
2394 bool
2395 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2396                             const AsmToken &NextToken) const {
2397 
2398   // A list of consecutive registers: [s0,s1,s2,s3]
2399   if (Token.is(AsmToken::LBrac))
2400     return true;
2401 
2402   if (!Token.is(AsmToken::Identifier))
2403     return false;
2404 
2405   // A single register like s0 or a range of registers like s[0:1]
2406 
2407   StringRef Str = Token.getString();
2408   const RegInfo *Reg = getRegularRegInfo(Str);
2409   if (Reg) {
2410     StringRef RegName = Reg->Name;
2411     StringRef RegSuffix = Str.substr(RegName.size());
2412     if (!RegSuffix.empty()) {
2413       unsigned Num;
2414       // A single register with an index: rXX
2415       if (getRegNum(RegSuffix, Num))
2416         return true;
2417     } else {
2418       // A range of registers: r[XX:YY].
2419       if (NextToken.is(AsmToken::LBrac))
2420         return true;
2421     }
2422   }
2423 
2424   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2425 }
2426 
2427 bool
2428 AMDGPUAsmParser::isRegister()
2429 {
2430   return isRegister(getToken(), peekToken());
2431 }
2432 
2433 unsigned
2434 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2435                                unsigned RegNum,
2436                                unsigned RegWidth,
2437                                SMLoc Loc) {
2438 
2439   assert(isRegularReg(RegKind));
2440 
2441   unsigned AlignSize = 1;
2442   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2443     // SGPR and TTMP registers must be aligned.
2444     // Max required alignment is 4 dwords.
2445     AlignSize = std::min(RegWidth, 4u);
2446   }
2447 
2448   if (RegNum % AlignSize != 0) {
2449     Error(Loc, "invalid register alignment");
2450     return AMDGPU::NoRegister;
2451   }
2452 
2453   unsigned RegIdx = RegNum / AlignSize;
2454   int RCID = getRegClass(RegKind, RegWidth);
2455   if (RCID == -1) {
2456     Error(Loc, "invalid or unsupported register size");
2457     return AMDGPU::NoRegister;
2458   }
2459 
2460   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2461   const MCRegisterClass RC = TRI->getRegClass(RCID);
2462   if (RegIdx >= RC.getNumRegs()) {
2463     Error(Loc, "register index is out of range");
2464     return AMDGPU::NoRegister;
2465   }
2466 
2467   return RC.getRegister(RegIdx);
2468 }
2469 
2470 bool
2471 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2472   int64_t RegLo, RegHi;
2473   if (!skipToken(AsmToken::LBrac, "missing register index"))
2474     return false;
2475 
2476   SMLoc FirstIdxLoc = getLoc();
2477   SMLoc SecondIdxLoc;
2478 
2479   if (!parseExpr(RegLo))
2480     return false;
2481 
2482   if (trySkipToken(AsmToken::Colon)) {
2483     SecondIdxLoc = getLoc();
2484     if (!parseExpr(RegHi))
2485       return false;
2486   } else {
2487     RegHi = RegLo;
2488   }
2489 
2490   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2491     return false;
2492 
2493   if (!isUInt<32>(RegLo)) {
2494     Error(FirstIdxLoc, "invalid register index");
2495     return false;
2496   }
2497 
2498   if (!isUInt<32>(RegHi)) {
2499     Error(SecondIdxLoc, "invalid register index");
2500     return false;
2501   }
2502 
2503   if (RegLo > RegHi) {
2504     Error(FirstIdxLoc, "first register index should not exceed second index");
2505     return false;
2506   }
2507 
2508   Num = static_cast<unsigned>(RegLo);
2509   Width = (RegHi - RegLo) + 1;
2510   return true;
2511 }
2512 
2513 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2514                                           unsigned &RegNum, unsigned &RegWidth,
2515                                           SmallVectorImpl<AsmToken> &Tokens) {
2516   assert(isToken(AsmToken::Identifier));
2517   unsigned Reg = getSpecialRegForName(getTokenStr());
2518   if (Reg) {
2519     RegNum = 0;
2520     RegWidth = 1;
2521     RegKind = IS_SPECIAL;
2522     Tokens.push_back(getToken());
2523     lex(); // skip register name
2524   }
2525   return Reg;
2526 }
2527 
2528 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2529                                           unsigned &RegNum, unsigned &RegWidth,
2530                                           SmallVectorImpl<AsmToken> &Tokens) {
2531   assert(isToken(AsmToken::Identifier));
2532   StringRef RegName = getTokenStr();
2533   auto Loc = getLoc();
2534 
2535   const RegInfo *RI = getRegularRegInfo(RegName);
2536   if (!RI) {
2537     Error(Loc, "invalid register name");
2538     return AMDGPU::NoRegister;
2539   }
2540 
2541   Tokens.push_back(getToken());
2542   lex(); // skip register name
2543 
2544   RegKind = RI->Kind;
2545   StringRef RegSuffix = RegName.substr(RI->Name.size());
2546   if (!RegSuffix.empty()) {
2547     // Single 32-bit register: vXX.
2548     if (!getRegNum(RegSuffix, RegNum)) {
2549       Error(Loc, "invalid register index");
2550       return AMDGPU::NoRegister;
2551     }
2552     RegWidth = 1;
2553   } else {
2554     // Range of registers: v[XX:YY]. ":YY" is optional.
2555     if (!ParseRegRange(RegNum, RegWidth))
2556       return AMDGPU::NoRegister;
2557   }
2558 
2559   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2560 }
2561 
2562 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2563                                        unsigned &RegWidth,
2564                                        SmallVectorImpl<AsmToken> &Tokens) {
2565   unsigned Reg = AMDGPU::NoRegister;
2566   auto ListLoc = getLoc();
2567 
2568   if (!skipToken(AsmToken::LBrac,
2569                  "expected a register or a list of registers")) {
2570     return AMDGPU::NoRegister;
2571   }
2572 
2573   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2574 
2575   auto Loc = getLoc();
2576   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2577     return AMDGPU::NoRegister;
2578   if (RegWidth != 1) {
2579     Error(Loc, "expected a single 32-bit register");
2580     return AMDGPU::NoRegister;
2581   }
2582 
2583   for (; trySkipToken(AsmToken::Comma); ) {
2584     RegisterKind NextRegKind;
2585     unsigned NextReg, NextRegNum, NextRegWidth;
2586     Loc = getLoc();
2587 
2588     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2589                              NextRegNum, NextRegWidth,
2590                              Tokens)) {
2591       return AMDGPU::NoRegister;
2592     }
2593     if (NextRegWidth != 1) {
2594       Error(Loc, "expected a single 32-bit register");
2595       return AMDGPU::NoRegister;
2596     }
2597     if (NextRegKind != RegKind) {
2598       Error(Loc, "registers in a list must be of the same kind");
2599       return AMDGPU::NoRegister;
2600     }
2601     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2602       return AMDGPU::NoRegister;
2603   }
2604 
2605   if (!skipToken(AsmToken::RBrac,
2606                  "expected a comma or a closing square bracket")) {
2607     return AMDGPU::NoRegister;
2608   }
2609 
2610   if (isRegularReg(RegKind))
2611     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2612 
2613   return Reg;
2614 }
2615 
2616 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2617                                           unsigned &RegNum, unsigned &RegWidth,
2618                                           SmallVectorImpl<AsmToken> &Tokens) {
2619   auto Loc = getLoc();
2620   Reg = AMDGPU::NoRegister;
2621 
2622   if (isToken(AsmToken::Identifier)) {
2623     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2624     if (Reg == AMDGPU::NoRegister)
2625       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2626   } else {
2627     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2628   }
2629 
2630   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2631   if (Reg == AMDGPU::NoRegister) {
2632     assert(Parser.hasPendingError());
2633     return false;
2634   }
2635 
2636   if (!subtargetHasRegister(*TRI, Reg)) {
2637     if (Reg == AMDGPU::SGPR_NULL) {
2638       Error(Loc, "'null' operand is not supported on this GPU");
2639     } else {
2640       Error(Loc, "register not available on this GPU");
2641     }
2642     return false;
2643   }
2644 
2645   return true;
2646 }
2647 
2648 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2649                                           unsigned &RegNum, unsigned &RegWidth,
2650                                           bool RestoreOnFailure /*=false*/) {
2651   Reg = AMDGPU::NoRegister;
2652 
2653   SmallVector<AsmToken, 1> Tokens;
2654   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2655     if (RestoreOnFailure) {
2656       while (!Tokens.empty()) {
2657         getLexer().UnLex(Tokens.pop_back_val());
2658       }
2659     }
2660     return true;
2661   }
2662   return false;
2663 }
2664 
2665 Optional<StringRef>
2666 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2667   switch (RegKind) {
2668   case IS_VGPR:
2669     return StringRef(".amdgcn.next_free_vgpr");
2670   case IS_SGPR:
2671     return StringRef(".amdgcn.next_free_sgpr");
2672   default:
2673     return None;
2674   }
2675 }
2676 
2677 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2678   auto SymbolName = getGprCountSymbolName(RegKind);
2679   assert(SymbolName && "initializing invalid register kind");
2680   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2681   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2682 }
2683 
2684 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2685                                             unsigned DwordRegIndex,
2686                                             unsigned RegWidth) {
2687   // Symbols are only defined for GCN targets
2688   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2689     return true;
2690 
2691   auto SymbolName = getGprCountSymbolName(RegKind);
2692   if (!SymbolName)
2693     return true;
2694   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2695 
2696   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2697   int64_t OldCount;
2698 
2699   if (!Sym->isVariable())
2700     return !Error(getLoc(),
2701                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2702   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2703     return !Error(
2704         getLoc(),
2705         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2706 
2707   if (OldCount <= NewMax)
2708     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2709 
2710   return true;
2711 }
2712 
2713 std::unique_ptr<AMDGPUOperand>
2714 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2715   const auto &Tok = getToken();
2716   SMLoc StartLoc = Tok.getLoc();
2717   SMLoc EndLoc = Tok.getEndLoc();
2718   RegisterKind RegKind;
2719   unsigned Reg, RegNum, RegWidth;
2720 
2721   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2722     return nullptr;
2723   }
2724   if (isHsaAbiVersion3Or4(&getSTI())) {
2725     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2726       return nullptr;
2727   } else
2728     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2729   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2730 }
2731 
2732 OperandMatchResultTy
2733 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2734   // TODO: add syntactic sugar for 1/(2*PI)
2735 
2736   assert(!isRegister());
2737   assert(!isModifier());
2738 
2739   const auto& Tok = getToken();
2740   const auto& NextTok = peekToken();
2741   bool IsReal = Tok.is(AsmToken::Real);
2742   SMLoc S = getLoc();
2743   bool Negate = false;
2744 
2745   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2746     lex();
2747     IsReal = true;
2748     Negate = true;
2749   }
2750 
2751   if (IsReal) {
2752     // Floating-point expressions are not supported.
2753     // Can only allow floating-point literals with an
2754     // optional sign.
2755 
2756     StringRef Num = getTokenStr();
2757     lex();
2758 
2759     APFloat RealVal(APFloat::IEEEdouble());
2760     auto roundMode = APFloat::rmNearestTiesToEven;
2761     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2762       return MatchOperand_ParseFail;
2763     }
2764     if (Negate)
2765       RealVal.changeSign();
2766 
2767     Operands.push_back(
2768       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2769                                AMDGPUOperand::ImmTyNone, true));
2770 
2771     return MatchOperand_Success;
2772 
2773   } else {
2774     int64_t IntVal;
2775     const MCExpr *Expr;
2776     SMLoc S = getLoc();
2777 
2778     if (HasSP3AbsModifier) {
2779       // This is a workaround for handling expressions
2780       // as arguments of SP3 'abs' modifier, for example:
2781       //     |1.0|
2782       //     |-1|
2783       //     |1+x|
2784       // This syntax is not compatible with syntax of standard
2785       // MC expressions (due to the trailing '|').
2786       SMLoc EndLoc;
2787       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2788         return MatchOperand_ParseFail;
2789     } else {
2790       if (Parser.parseExpression(Expr))
2791         return MatchOperand_ParseFail;
2792     }
2793 
2794     if (Expr->evaluateAsAbsolute(IntVal)) {
2795       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2796     } else {
2797       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2798     }
2799 
2800     return MatchOperand_Success;
2801   }
2802 
2803   return MatchOperand_NoMatch;
2804 }
2805 
2806 OperandMatchResultTy
2807 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2808   if (!isRegister())
2809     return MatchOperand_NoMatch;
2810 
2811   if (auto R = parseRegister()) {
2812     assert(R->isReg());
2813     Operands.push_back(std::move(R));
2814     return MatchOperand_Success;
2815   }
2816   return MatchOperand_ParseFail;
2817 }
2818 
2819 OperandMatchResultTy
2820 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2821   auto res = parseReg(Operands);
2822   if (res != MatchOperand_NoMatch) {
2823     return res;
2824   } else if (isModifier()) {
2825     return MatchOperand_NoMatch;
2826   } else {
2827     return parseImm(Operands, HasSP3AbsMod);
2828   }
2829 }
2830 
2831 bool
2832 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2833   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2834     const auto &str = Token.getString();
2835     return str == "abs" || str == "neg" || str == "sext";
2836   }
2837   return false;
2838 }
2839 
2840 bool
2841 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2842   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2843 }
2844 
2845 bool
2846 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2847   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2848 }
2849 
2850 bool
2851 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2852   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2853 }
2854 
2855 // Check if this is an operand modifier or an opcode modifier
2856 // which may look like an expression but it is not. We should
2857 // avoid parsing these modifiers as expressions. Currently
2858 // recognized sequences are:
2859 //   |...|
2860 //   abs(...)
2861 //   neg(...)
2862 //   sext(...)
2863 //   -reg
2864 //   -|...|
2865 //   -abs(...)
2866 //   name:...
2867 // Note that simple opcode modifiers like 'gds' may be parsed as
2868 // expressions; this is a special case. See getExpressionAsToken.
2869 //
2870 bool
2871 AMDGPUAsmParser::isModifier() {
2872 
2873   AsmToken Tok = getToken();
2874   AsmToken NextToken[2];
2875   peekTokens(NextToken);
2876 
2877   return isOperandModifier(Tok, NextToken[0]) ||
2878          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2879          isOpcodeModifierWithVal(Tok, NextToken[0]);
2880 }
2881 
2882 // Check if the current token is an SP3 'neg' modifier.
2883 // Currently this modifier is allowed in the following context:
2884 //
2885 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2886 // 2. Before an 'abs' modifier: -abs(...)
2887 // 3. Before an SP3 'abs' modifier: -|...|
2888 //
2889 // In all other cases "-" is handled as a part
2890 // of an expression that follows the sign.
2891 //
2892 // Note: When "-" is followed by an integer literal,
2893 // this is interpreted as integer negation rather
2894 // than a floating-point NEG modifier applied to N.
2895 // Beside being contr-intuitive, such use of floating-point
2896 // NEG modifier would have resulted in different meaning
2897 // of integer literals used with VOP1/2/C and VOP3,
2898 // for example:
2899 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2900 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2901 // Negative fp literals with preceding "-" are
2902 // handled likewise for unifomtity
2903 //
2904 bool
2905 AMDGPUAsmParser::parseSP3NegModifier() {
2906 
2907   AsmToken NextToken[2];
2908   peekTokens(NextToken);
2909 
2910   if (isToken(AsmToken::Minus) &&
2911       (isRegister(NextToken[0], NextToken[1]) ||
2912        NextToken[0].is(AsmToken::Pipe) ||
2913        isId(NextToken[0], "abs"))) {
2914     lex();
2915     return true;
2916   }
2917 
2918   return false;
2919 }
2920 
2921 OperandMatchResultTy
2922 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2923                                               bool AllowImm) {
2924   bool Neg, SP3Neg;
2925   bool Abs, SP3Abs;
2926   SMLoc Loc;
2927 
2928   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2929   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2930     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2931     return MatchOperand_ParseFail;
2932   }
2933 
2934   SP3Neg = parseSP3NegModifier();
2935 
2936   Loc = getLoc();
2937   Neg = trySkipId("neg");
2938   if (Neg && SP3Neg) {
2939     Error(Loc, "expected register or immediate");
2940     return MatchOperand_ParseFail;
2941   }
2942   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2943     return MatchOperand_ParseFail;
2944 
2945   Abs = trySkipId("abs");
2946   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2947     return MatchOperand_ParseFail;
2948 
2949   Loc = getLoc();
2950   SP3Abs = trySkipToken(AsmToken::Pipe);
2951   if (Abs && SP3Abs) {
2952     Error(Loc, "expected register or immediate");
2953     return MatchOperand_ParseFail;
2954   }
2955 
2956   OperandMatchResultTy Res;
2957   if (AllowImm) {
2958     Res = parseRegOrImm(Operands, SP3Abs);
2959   } else {
2960     Res = parseReg(Operands);
2961   }
2962   if (Res != MatchOperand_Success) {
2963     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2964   }
2965 
2966   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2967     return MatchOperand_ParseFail;
2968   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2969     return MatchOperand_ParseFail;
2970   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2971     return MatchOperand_ParseFail;
2972 
2973   AMDGPUOperand::Modifiers Mods;
2974   Mods.Abs = Abs || SP3Abs;
2975   Mods.Neg = Neg || SP3Neg;
2976 
2977   if (Mods.hasFPModifiers()) {
2978     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2979     if (Op.isExpr()) {
2980       Error(Op.getStartLoc(), "expected an absolute expression");
2981       return MatchOperand_ParseFail;
2982     }
2983     Op.setModifiers(Mods);
2984   }
2985   return MatchOperand_Success;
2986 }
2987 
2988 OperandMatchResultTy
2989 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2990                                                bool AllowImm) {
2991   bool Sext = trySkipId("sext");
2992   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2993     return MatchOperand_ParseFail;
2994 
2995   OperandMatchResultTy Res;
2996   if (AllowImm) {
2997     Res = parseRegOrImm(Operands);
2998   } else {
2999     Res = parseReg(Operands);
3000   }
3001   if (Res != MatchOperand_Success) {
3002     return Sext? MatchOperand_ParseFail : Res;
3003   }
3004 
3005   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3006     return MatchOperand_ParseFail;
3007 
3008   AMDGPUOperand::Modifiers Mods;
3009   Mods.Sext = Sext;
3010 
3011   if (Mods.hasIntModifiers()) {
3012     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3013     if (Op.isExpr()) {
3014       Error(Op.getStartLoc(), "expected an absolute expression");
3015       return MatchOperand_ParseFail;
3016     }
3017     Op.setModifiers(Mods);
3018   }
3019 
3020   return MatchOperand_Success;
3021 }
3022 
3023 OperandMatchResultTy
3024 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3025   return parseRegOrImmWithFPInputMods(Operands, false);
3026 }
3027 
3028 OperandMatchResultTy
3029 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3030   return parseRegOrImmWithIntInputMods(Operands, false);
3031 }
3032 
3033 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3034   auto Loc = getLoc();
3035   if (trySkipId("off")) {
3036     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3037                                                 AMDGPUOperand::ImmTyOff, false));
3038     return MatchOperand_Success;
3039   }
3040 
3041   if (!isRegister())
3042     return MatchOperand_NoMatch;
3043 
3044   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3045   if (Reg) {
3046     Operands.push_back(std::move(Reg));
3047     return MatchOperand_Success;
3048   }
3049 
3050   return MatchOperand_ParseFail;
3051 
3052 }
3053 
3054 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3055   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3056 
3057   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3058       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3059       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3060       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3061     return Match_InvalidOperand;
3062 
3063   if ((TSFlags & SIInstrFlags::VOP3) &&
3064       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3065       getForcedEncodingSize() != 64)
3066     return Match_PreferE32;
3067 
3068   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3069       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3070     // v_mac_f32/16 allow only dst_sel == DWORD;
3071     auto OpNum =
3072         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3073     const auto &Op = Inst.getOperand(OpNum);
3074     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3075       return Match_InvalidOperand;
3076     }
3077   }
3078 
3079   return Match_Success;
3080 }
3081 
3082 static ArrayRef<unsigned> getAllVariants() {
3083   static const unsigned Variants[] = {
3084     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3085     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3086   };
3087 
3088   return makeArrayRef(Variants);
3089 }
3090 
3091 // What asm variants we should check
3092 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3093   if (getForcedEncodingSize() == 32) {
3094     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3095     return makeArrayRef(Variants);
3096   }
3097 
3098   if (isForcedVOP3()) {
3099     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3100     return makeArrayRef(Variants);
3101   }
3102 
3103   if (isForcedSDWA()) {
3104     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3105                                         AMDGPUAsmVariants::SDWA9};
3106     return makeArrayRef(Variants);
3107   }
3108 
3109   if (isForcedDPP()) {
3110     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3111     return makeArrayRef(Variants);
3112   }
3113 
3114   return getAllVariants();
3115 }
3116 
3117 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3118   if (getForcedEncodingSize() == 32)
3119     return "e32";
3120 
3121   if (isForcedVOP3())
3122     return "e64";
3123 
3124   if (isForcedSDWA())
3125     return "sdwa";
3126 
3127   if (isForcedDPP())
3128     return "dpp";
3129 
3130   return "";
3131 }
3132 
3133 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3134   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3135   const unsigned Num = Desc.getNumImplicitUses();
3136   for (unsigned i = 0; i < Num; ++i) {
3137     unsigned Reg = Desc.ImplicitUses[i];
3138     switch (Reg) {
3139     case AMDGPU::FLAT_SCR:
3140     case AMDGPU::VCC:
3141     case AMDGPU::VCC_LO:
3142     case AMDGPU::VCC_HI:
3143     case AMDGPU::M0:
3144       return Reg;
3145     default:
3146       break;
3147     }
3148   }
3149   return AMDGPU::NoRegister;
3150 }
3151 
3152 // NB: This code is correct only when used to check constant
3153 // bus limitations because GFX7 support no f16 inline constants.
3154 // Note that there are no cases when a GFX7 opcode violates
3155 // constant bus limitations due to the use of an f16 constant.
3156 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3157                                        unsigned OpIdx) const {
3158   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3159 
3160   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3161     return false;
3162   }
3163 
3164   const MCOperand &MO = Inst.getOperand(OpIdx);
3165 
3166   int64_t Val = MO.getImm();
3167   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3168 
3169   switch (OpSize) { // expected operand size
3170   case 8:
3171     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3172   case 4:
3173     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3174   case 2: {
3175     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3176     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3177         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3178         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3179       return AMDGPU::isInlinableIntLiteral(Val);
3180 
3181     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3182         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3183         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3184       return AMDGPU::isInlinableIntLiteralV216(Val);
3185 
3186     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3187         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3188         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3189       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3190 
3191     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3192   }
3193   default:
3194     llvm_unreachable("invalid operand size");
3195   }
3196 }
3197 
3198 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3199   if (!isGFX10Plus())
3200     return 1;
3201 
3202   switch (Opcode) {
3203   // 64-bit shift instructions can use only one scalar value input
3204   case AMDGPU::V_LSHLREV_B64_e64:
3205   case AMDGPU::V_LSHLREV_B64_gfx10:
3206   case AMDGPU::V_LSHRREV_B64_e64:
3207   case AMDGPU::V_LSHRREV_B64_gfx10:
3208   case AMDGPU::V_ASHRREV_I64_e64:
3209   case AMDGPU::V_ASHRREV_I64_gfx10:
3210   case AMDGPU::V_LSHL_B64_e64:
3211   case AMDGPU::V_LSHR_B64_e64:
3212   case AMDGPU::V_ASHR_I64_e64:
3213     return 1;
3214   default:
3215     return 2;
3216   }
3217 }
3218 
3219 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3220   const MCOperand &MO = Inst.getOperand(OpIdx);
3221   if (MO.isImm()) {
3222     return !isInlineConstant(Inst, OpIdx);
3223   } else if (MO.isReg()) {
3224     auto Reg = MO.getReg();
3225     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3226     auto PReg = mc2PseudoReg(Reg);
3227     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3228   } else {
3229     return true;
3230   }
3231 }
3232 
3233 bool
3234 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3235                                                 const OperandVector &Operands) {
3236   const unsigned Opcode = Inst.getOpcode();
3237   const MCInstrDesc &Desc = MII.get(Opcode);
3238   unsigned LastSGPR = AMDGPU::NoRegister;
3239   unsigned ConstantBusUseCount = 0;
3240   unsigned NumLiterals = 0;
3241   unsigned LiteralSize;
3242 
3243   if (Desc.TSFlags &
3244       (SIInstrFlags::VOPC |
3245        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3246        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3247        SIInstrFlags::SDWA)) {
3248     // Check special imm operands (used by madmk, etc)
3249     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3250       ++ConstantBusUseCount;
3251     }
3252 
3253     SmallDenseSet<unsigned> SGPRsUsed;
3254     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3255     if (SGPRUsed != AMDGPU::NoRegister) {
3256       SGPRsUsed.insert(SGPRUsed);
3257       ++ConstantBusUseCount;
3258     }
3259 
3260     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3261     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3262     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3263 
3264     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3265 
3266     for (int OpIdx : OpIndices) {
3267       if (OpIdx == -1) break;
3268 
3269       const MCOperand &MO = Inst.getOperand(OpIdx);
3270       if (usesConstantBus(Inst, OpIdx)) {
3271         if (MO.isReg()) {
3272           LastSGPR = mc2PseudoReg(MO.getReg());
3273           // Pairs of registers with a partial intersections like these
3274           //   s0, s[0:1]
3275           //   flat_scratch_lo, flat_scratch
3276           //   flat_scratch_lo, flat_scratch_hi
3277           // are theoretically valid but they are disabled anyway.
3278           // Note that this code mimics SIInstrInfo::verifyInstruction
3279           if (!SGPRsUsed.count(LastSGPR)) {
3280             SGPRsUsed.insert(LastSGPR);
3281             ++ConstantBusUseCount;
3282           }
3283         } else { // Expression or a literal
3284 
3285           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3286             continue; // special operand like VINTERP attr_chan
3287 
3288           // An instruction may use only one literal.
3289           // This has been validated on the previous step.
3290           // See validateVOP3Literal.
3291           // This literal may be used as more than one operand.
3292           // If all these operands are of the same size,
3293           // this literal counts as one scalar value.
3294           // Otherwise it counts as 2 scalar values.
3295           // See "GFX10 Shader Programming", section 3.6.2.3.
3296 
3297           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3298           if (Size < 4) Size = 4;
3299 
3300           if (NumLiterals == 0) {
3301             NumLiterals = 1;
3302             LiteralSize = Size;
3303           } else if (LiteralSize != Size) {
3304             NumLiterals = 2;
3305           }
3306         }
3307       }
3308     }
3309   }
3310   ConstantBusUseCount += NumLiterals;
3311 
3312   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3313     return true;
3314 
3315   SMLoc LitLoc = getLitLoc(Operands);
3316   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3317   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3318   Error(Loc, "invalid operand (violates constant bus restrictions)");
3319   return false;
3320 }
3321 
3322 bool
3323 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3324                                                  const OperandVector &Operands) {
3325   const unsigned Opcode = Inst.getOpcode();
3326   const MCInstrDesc &Desc = MII.get(Opcode);
3327 
3328   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3329   if (DstIdx == -1 ||
3330       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3331     return true;
3332   }
3333 
3334   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3335 
3336   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3337   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3338   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3339 
3340   assert(DstIdx != -1);
3341   const MCOperand &Dst = Inst.getOperand(DstIdx);
3342   assert(Dst.isReg());
3343   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3344 
3345   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3346 
3347   for (int SrcIdx : SrcIndices) {
3348     if (SrcIdx == -1) break;
3349     const MCOperand &Src = Inst.getOperand(SrcIdx);
3350     if (Src.isReg()) {
3351       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3352       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3353         Error(getRegLoc(SrcReg, Operands),
3354           "destination must be different than all sources");
3355         return false;
3356       }
3357     }
3358   }
3359 
3360   return true;
3361 }
3362 
3363 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3364 
3365   const unsigned Opc = Inst.getOpcode();
3366   const MCInstrDesc &Desc = MII.get(Opc);
3367 
3368   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3369     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3370     assert(ClampIdx != -1);
3371     return Inst.getOperand(ClampIdx).getImm() == 0;
3372   }
3373 
3374   return true;
3375 }
3376 
3377 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3378 
3379   const unsigned Opc = Inst.getOpcode();
3380   const MCInstrDesc &Desc = MII.get(Opc);
3381 
3382   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3383     return true;
3384 
3385   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3386   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3387   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3388 
3389   assert(VDataIdx != -1);
3390 
3391   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3392     return true;
3393 
3394   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3395   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3396   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3397   if (DMask == 0)
3398     DMask = 1;
3399 
3400   unsigned DataSize =
3401     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3402   if (hasPackedD16()) {
3403     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3404     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3405       DataSize = (DataSize + 1) / 2;
3406   }
3407 
3408   return (VDataSize / 4) == DataSize + TFESize;
3409 }
3410 
3411 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3412   const unsigned Opc = Inst.getOpcode();
3413   const MCInstrDesc &Desc = MII.get(Opc);
3414 
3415   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3416     return true;
3417 
3418   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3419 
3420   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3421       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3422   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3423   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3424   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3425   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3426 
3427   assert(VAddr0Idx != -1);
3428   assert(SrsrcIdx != -1);
3429   assert(SrsrcIdx > VAddr0Idx);
3430 
3431   if (DimIdx == -1)
3432     return true; // intersect_ray
3433 
3434   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3435   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3436   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3437   unsigned ActualAddrSize =
3438       IsNSA ? SrsrcIdx - VAddr0Idx
3439             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3440   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3441 
3442   unsigned ExpectedAddrSize =
3443       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3444 
3445   if (!IsNSA) {
3446     if (ExpectedAddrSize > 8)
3447       ExpectedAddrSize = 16;
3448     else if (ExpectedAddrSize > 5)
3449       ExpectedAddrSize = 8;
3450 
3451     // Allow oversized 8 VGPR vaddr when only 5 VGPR are required.
3452     // This provides backward compatibility for assembly created
3453     // before 160b types were directly supported.
3454     if (ExpectedAddrSize == 5 && ActualAddrSize == 8)
3455       return true;
3456   }
3457 
3458   return ActualAddrSize == ExpectedAddrSize;
3459 }
3460 
3461 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3462 
3463   const unsigned Opc = Inst.getOpcode();
3464   const MCInstrDesc &Desc = MII.get(Opc);
3465 
3466   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3467     return true;
3468   if (!Desc.mayLoad() || !Desc.mayStore())
3469     return true; // Not atomic
3470 
3471   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3472   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3473 
3474   // This is an incomplete check because image_atomic_cmpswap
3475   // may only use 0x3 and 0xf while other atomic operations
3476   // may use 0x1 and 0x3. However these limitations are
3477   // verified when we check that dmask matches dst size.
3478   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3479 }
3480 
3481 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3482 
3483   const unsigned Opc = Inst.getOpcode();
3484   const MCInstrDesc &Desc = MII.get(Opc);
3485 
3486   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3487     return true;
3488 
3489   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3490   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3491 
3492   // GATHER4 instructions use dmask in a different fashion compared to
3493   // other MIMG instructions. The only useful DMASK values are
3494   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3495   // (red,red,red,red) etc.) The ISA document doesn't mention
3496   // this.
3497   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3498 }
3499 
3500 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3501   const unsigned Opc = Inst.getOpcode();
3502   const MCInstrDesc &Desc = MII.get(Opc);
3503 
3504   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3505     return true;
3506 
3507   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3508   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3509       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3510 
3511   if (!BaseOpcode->MSAA)
3512     return true;
3513 
3514   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3515   assert(DimIdx != -1);
3516 
3517   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3518   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3519 
3520   return DimInfo->MSAA;
3521 }
3522 
3523 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3524 {
3525   switch (Opcode) {
3526   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3527   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3528   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3529     return true;
3530   default:
3531     return false;
3532   }
3533 }
3534 
3535 // movrels* opcodes should only allow VGPRS as src0.
3536 // This is specified in .td description for vop1/vop3,
3537 // but sdwa is handled differently. See isSDWAOperand.
3538 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3539                                       const OperandVector &Operands) {
3540 
3541   const unsigned Opc = Inst.getOpcode();
3542   const MCInstrDesc &Desc = MII.get(Opc);
3543 
3544   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3545     return true;
3546 
3547   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3548   assert(Src0Idx != -1);
3549 
3550   SMLoc ErrLoc;
3551   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3552   if (Src0.isReg()) {
3553     auto Reg = mc2PseudoReg(Src0.getReg());
3554     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3555     if (!isSGPR(Reg, TRI))
3556       return true;
3557     ErrLoc = getRegLoc(Reg, Operands);
3558   } else {
3559     ErrLoc = getConstLoc(Operands);
3560   }
3561 
3562   Error(ErrLoc, "source operand must be a VGPR");
3563   return false;
3564 }
3565 
3566 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3567                                           const OperandVector &Operands) {
3568 
3569   const unsigned Opc = Inst.getOpcode();
3570 
3571   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3572     return true;
3573 
3574   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3575   assert(Src0Idx != -1);
3576 
3577   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3578   if (!Src0.isReg())
3579     return true;
3580 
3581   auto Reg = mc2PseudoReg(Src0.getReg());
3582   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3583   if (isSGPR(Reg, TRI)) {
3584     Error(getRegLoc(Reg, Operands),
3585           "source operand must be either a VGPR or an inline constant");
3586     return false;
3587   }
3588 
3589   return true;
3590 }
3591 
3592 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3593   switch (Inst.getOpcode()) {
3594   default:
3595     return true;
3596   case V_DIV_SCALE_F32_gfx6_gfx7:
3597   case V_DIV_SCALE_F32_vi:
3598   case V_DIV_SCALE_F32_gfx10:
3599   case V_DIV_SCALE_F64_gfx6_gfx7:
3600   case V_DIV_SCALE_F64_vi:
3601   case V_DIV_SCALE_F64_gfx10:
3602     break;
3603   }
3604 
3605   // TODO: Check that src0 = src1 or src2.
3606 
3607   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3608                     AMDGPU::OpName::src2_modifiers,
3609                     AMDGPU::OpName::src2_modifiers}) {
3610     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3611             .getImm() &
3612         SISrcMods::ABS) {
3613       return false;
3614     }
3615   }
3616 
3617   return true;
3618 }
3619 
3620 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3621 
3622   const unsigned Opc = Inst.getOpcode();
3623   const MCInstrDesc &Desc = MII.get(Opc);
3624 
3625   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3626     return true;
3627 
3628   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3629   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3630     if (isCI() || isSI())
3631       return false;
3632   }
3633 
3634   return true;
3635 }
3636 
3637 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3638   const unsigned Opc = Inst.getOpcode();
3639   const MCInstrDesc &Desc = MII.get(Opc);
3640 
3641   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3642     return true;
3643 
3644   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3645   if (DimIdx < 0)
3646     return true;
3647 
3648   long Imm = Inst.getOperand(DimIdx).getImm();
3649   if (Imm < 0 || Imm >= 8)
3650     return false;
3651 
3652   return true;
3653 }
3654 
3655 static bool IsRevOpcode(const unsigned Opcode)
3656 {
3657   switch (Opcode) {
3658   case AMDGPU::V_SUBREV_F32_e32:
3659   case AMDGPU::V_SUBREV_F32_e64:
3660   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3661   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3662   case AMDGPU::V_SUBREV_F32_e32_vi:
3663   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3664   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3665   case AMDGPU::V_SUBREV_F32_e64_vi:
3666 
3667   case AMDGPU::V_SUBREV_CO_U32_e32:
3668   case AMDGPU::V_SUBREV_CO_U32_e64:
3669   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3670   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3671 
3672   case AMDGPU::V_SUBBREV_U32_e32:
3673   case AMDGPU::V_SUBBREV_U32_e64:
3674   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3675   case AMDGPU::V_SUBBREV_U32_e32_vi:
3676   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3677   case AMDGPU::V_SUBBREV_U32_e64_vi:
3678 
3679   case AMDGPU::V_SUBREV_U32_e32:
3680   case AMDGPU::V_SUBREV_U32_e64:
3681   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3682   case AMDGPU::V_SUBREV_U32_e32_vi:
3683   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3684   case AMDGPU::V_SUBREV_U32_e64_vi:
3685 
3686   case AMDGPU::V_SUBREV_F16_e32:
3687   case AMDGPU::V_SUBREV_F16_e64:
3688   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3689   case AMDGPU::V_SUBREV_F16_e32_vi:
3690   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3691   case AMDGPU::V_SUBREV_F16_e64_vi:
3692 
3693   case AMDGPU::V_SUBREV_U16_e32:
3694   case AMDGPU::V_SUBREV_U16_e64:
3695   case AMDGPU::V_SUBREV_U16_e32_vi:
3696   case AMDGPU::V_SUBREV_U16_e64_vi:
3697 
3698   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3699   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3700   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3701 
3702   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3703   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3704 
3705   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3706   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3707 
3708   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3709   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3710 
3711   case AMDGPU::V_LSHRREV_B32_e32:
3712   case AMDGPU::V_LSHRREV_B32_e64:
3713   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3714   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3715   case AMDGPU::V_LSHRREV_B32_e32_vi:
3716   case AMDGPU::V_LSHRREV_B32_e64_vi:
3717   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3718   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3719 
3720   case AMDGPU::V_ASHRREV_I32_e32:
3721   case AMDGPU::V_ASHRREV_I32_e64:
3722   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3723   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3724   case AMDGPU::V_ASHRREV_I32_e32_vi:
3725   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3726   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3727   case AMDGPU::V_ASHRREV_I32_e64_vi:
3728 
3729   case AMDGPU::V_LSHLREV_B32_e32:
3730   case AMDGPU::V_LSHLREV_B32_e64:
3731   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3732   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3733   case AMDGPU::V_LSHLREV_B32_e32_vi:
3734   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3735   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3736   case AMDGPU::V_LSHLREV_B32_e64_vi:
3737 
3738   case AMDGPU::V_LSHLREV_B16_e32:
3739   case AMDGPU::V_LSHLREV_B16_e64:
3740   case AMDGPU::V_LSHLREV_B16_e32_vi:
3741   case AMDGPU::V_LSHLREV_B16_e64_vi:
3742   case AMDGPU::V_LSHLREV_B16_gfx10:
3743 
3744   case AMDGPU::V_LSHRREV_B16_e32:
3745   case AMDGPU::V_LSHRREV_B16_e64:
3746   case AMDGPU::V_LSHRREV_B16_e32_vi:
3747   case AMDGPU::V_LSHRREV_B16_e64_vi:
3748   case AMDGPU::V_LSHRREV_B16_gfx10:
3749 
3750   case AMDGPU::V_ASHRREV_I16_e32:
3751   case AMDGPU::V_ASHRREV_I16_e64:
3752   case AMDGPU::V_ASHRREV_I16_e32_vi:
3753   case AMDGPU::V_ASHRREV_I16_e64_vi:
3754   case AMDGPU::V_ASHRREV_I16_gfx10:
3755 
3756   case AMDGPU::V_LSHLREV_B64_e64:
3757   case AMDGPU::V_LSHLREV_B64_gfx10:
3758   case AMDGPU::V_LSHLREV_B64_vi:
3759 
3760   case AMDGPU::V_LSHRREV_B64_e64:
3761   case AMDGPU::V_LSHRREV_B64_gfx10:
3762   case AMDGPU::V_LSHRREV_B64_vi:
3763 
3764   case AMDGPU::V_ASHRREV_I64_e64:
3765   case AMDGPU::V_ASHRREV_I64_gfx10:
3766   case AMDGPU::V_ASHRREV_I64_vi:
3767 
3768   case AMDGPU::V_PK_LSHLREV_B16:
3769   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3770   case AMDGPU::V_PK_LSHLREV_B16_vi:
3771 
3772   case AMDGPU::V_PK_LSHRREV_B16:
3773   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3774   case AMDGPU::V_PK_LSHRREV_B16_vi:
3775   case AMDGPU::V_PK_ASHRREV_I16:
3776   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3777   case AMDGPU::V_PK_ASHRREV_I16_vi:
3778     return true;
3779   default:
3780     return false;
3781   }
3782 }
3783 
3784 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3785 
3786   using namespace SIInstrFlags;
3787   const unsigned Opcode = Inst.getOpcode();
3788   const MCInstrDesc &Desc = MII.get(Opcode);
3789 
3790   // lds_direct register is defined so that it can be used
3791   // with 9-bit operands only. Ignore encodings which do not accept these.
3792   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3793   if ((Desc.TSFlags & Enc) == 0)
3794     return None;
3795 
3796   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3797     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3798     if (SrcIdx == -1)
3799       break;
3800     const auto &Src = Inst.getOperand(SrcIdx);
3801     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3802 
3803       if (isGFX90A())
3804         return StringRef("lds_direct is not supported on this GPU");
3805 
3806       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3807         return StringRef("lds_direct cannot be used with this instruction");
3808 
3809       if (SrcName != OpName::src0)
3810         return StringRef("lds_direct may be used as src0 only");
3811     }
3812   }
3813 
3814   return None;
3815 }
3816 
3817 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3818   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3819     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3820     if (Op.isFlatOffset())
3821       return Op.getStartLoc();
3822   }
3823   return getLoc();
3824 }
3825 
3826 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3827                                          const OperandVector &Operands) {
3828   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3829   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3830     return true;
3831 
3832   auto Opcode = Inst.getOpcode();
3833   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3834   assert(OpNum != -1);
3835 
3836   const auto &Op = Inst.getOperand(OpNum);
3837   if (!hasFlatOffsets() && Op.getImm() != 0) {
3838     Error(getFlatOffsetLoc(Operands),
3839           "flat offset modifier is not supported on this GPU");
3840     return false;
3841   }
3842 
3843   // For FLAT segment the offset must be positive;
3844   // MSB is ignored and forced to zero.
3845   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3846     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3847     if (!isIntN(OffsetSize, Op.getImm())) {
3848       Error(getFlatOffsetLoc(Operands),
3849             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3850       return false;
3851     }
3852   } else {
3853     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3854     if (!isUIntN(OffsetSize, Op.getImm())) {
3855       Error(getFlatOffsetLoc(Operands),
3856             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3857       return false;
3858     }
3859   }
3860 
3861   return true;
3862 }
3863 
3864 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3865   // Start with second operand because SMEM Offset cannot be dst or src0.
3866   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3867     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3868     if (Op.isSMEMOffset())
3869       return Op.getStartLoc();
3870   }
3871   return getLoc();
3872 }
3873 
3874 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3875                                          const OperandVector &Operands) {
3876   if (isCI() || isSI())
3877     return true;
3878 
3879   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3880   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3881     return true;
3882 
3883   auto Opcode = Inst.getOpcode();
3884   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3885   if (OpNum == -1)
3886     return true;
3887 
3888   const auto &Op = Inst.getOperand(OpNum);
3889   if (!Op.isImm())
3890     return true;
3891 
3892   uint64_t Offset = Op.getImm();
3893   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3894   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3895       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3896     return true;
3897 
3898   Error(getSMEMOffsetLoc(Operands),
3899         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3900                                "expected a 21-bit signed offset");
3901 
3902   return false;
3903 }
3904 
3905 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3906   unsigned Opcode = Inst.getOpcode();
3907   const MCInstrDesc &Desc = MII.get(Opcode);
3908   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3909     return true;
3910 
3911   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3912   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3913 
3914   const int OpIndices[] = { Src0Idx, Src1Idx };
3915 
3916   unsigned NumExprs = 0;
3917   unsigned NumLiterals = 0;
3918   uint32_t LiteralValue;
3919 
3920   for (int OpIdx : OpIndices) {
3921     if (OpIdx == -1) break;
3922 
3923     const MCOperand &MO = Inst.getOperand(OpIdx);
3924     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3925     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3926       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3927         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3928         if (NumLiterals == 0 || LiteralValue != Value) {
3929           LiteralValue = Value;
3930           ++NumLiterals;
3931         }
3932       } else if (MO.isExpr()) {
3933         ++NumExprs;
3934       }
3935     }
3936   }
3937 
3938   return NumLiterals + NumExprs <= 1;
3939 }
3940 
3941 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3942   const unsigned Opc = Inst.getOpcode();
3943   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3944       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3945     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3946     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3947 
3948     if (OpSel & ~3)
3949       return false;
3950   }
3951   return true;
3952 }
3953 
3954 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
3955                                   const OperandVector &Operands) {
3956   const unsigned Opc = Inst.getOpcode();
3957   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
3958   if (DppCtrlIdx < 0)
3959     return true;
3960   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
3961 
3962   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
3963     // DPP64 is supported for row_newbcast only.
3964     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3965     if (Src0Idx >= 0 &&
3966         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
3967       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
3968       Error(S, "64 bit dpp only supports row_newbcast");
3969       return false;
3970     }
3971   }
3972 
3973   return true;
3974 }
3975 
3976 // Check if VCC register matches wavefront size
3977 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3978   auto FB = getFeatureBits();
3979   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3980     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3981 }
3982 
3983 // VOP3 literal is only allowed in GFX10+ and only one can be used
3984 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3985                                           const OperandVector &Operands) {
3986   unsigned Opcode = Inst.getOpcode();
3987   const MCInstrDesc &Desc = MII.get(Opcode);
3988   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3989     return true;
3990 
3991   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3992   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3993   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3994 
3995   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3996 
3997   unsigned NumExprs = 0;
3998   unsigned NumLiterals = 0;
3999   uint32_t LiteralValue;
4000 
4001   for (int OpIdx : OpIndices) {
4002     if (OpIdx == -1) break;
4003 
4004     const MCOperand &MO = Inst.getOperand(OpIdx);
4005     if (!MO.isImm() && !MO.isExpr())
4006       continue;
4007     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4008       continue;
4009 
4010     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4011         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4012       Error(getConstLoc(Operands),
4013             "inline constants are not allowed for this operand");
4014       return false;
4015     }
4016 
4017     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4018       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4019       if (NumLiterals == 0 || LiteralValue != Value) {
4020         LiteralValue = Value;
4021         ++NumLiterals;
4022       }
4023     } else if (MO.isExpr()) {
4024       ++NumExprs;
4025     }
4026   }
4027   NumLiterals += NumExprs;
4028 
4029   if (!NumLiterals)
4030     return true;
4031 
4032   if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4033     Error(getLitLoc(Operands), "literal operands are not supported");
4034     return false;
4035   }
4036 
4037   if (NumLiterals > 1) {
4038     Error(getLitLoc(Operands), "only one literal operand is allowed");
4039     return false;
4040   }
4041 
4042   return true;
4043 }
4044 
4045 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4046 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4047                          const MCRegisterInfo *MRI) {
4048   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4049   if (OpIdx < 0)
4050     return -1;
4051 
4052   const MCOperand &Op = Inst.getOperand(OpIdx);
4053   if (!Op.isReg())
4054     return -1;
4055 
4056   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4057   auto Reg = Sub ? Sub : Op.getReg();
4058   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4059   return AGPR32.contains(Reg) ? 1 : 0;
4060 }
4061 
4062 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4063   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4064   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4065                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4066                   SIInstrFlags::DS)) == 0)
4067     return true;
4068 
4069   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4070                                                       : AMDGPU::OpName::vdata;
4071 
4072   const MCRegisterInfo *MRI = getMRI();
4073   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4074   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4075 
4076   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4077     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4078     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4079       return false;
4080   }
4081 
4082   auto FB = getFeatureBits();
4083   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4084     if (DataAreg < 0 || DstAreg < 0)
4085       return true;
4086     return DstAreg == DataAreg;
4087   }
4088 
4089   return DstAreg < 1 && DataAreg < 1;
4090 }
4091 
4092 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4093   auto FB = getFeatureBits();
4094   if (!FB[AMDGPU::FeatureGFX90AInsts])
4095     return true;
4096 
4097   const MCRegisterInfo *MRI = getMRI();
4098   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4099   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4100   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4101     const MCOperand &Op = Inst.getOperand(I);
4102     if (!Op.isReg())
4103       continue;
4104 
4105     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4106     if (!Sub)
4107       continue;
4108 
4109     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4110       return false;
4111     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4112       return false;
4113   }
4114 
4115   return true;
4116 }
4117 
4118 // gfx90a has an undocumented limitation:
4119 // DS_GWS opcodes must use even aligned registers.
4120 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4121                                   const OperandVector &Operands) {
4122   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4123     return true;
4124 
4125   int Opc = Inst.getOpcode();
4126   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4127       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4128     return true;
4129 
4130   const MCRegisterInfo *MRI = getMRI();
4131   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4132   int Data0Pos =
4133       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4134   assert(Data0Pos != -1);
4135   auto Reg = Inst.getOperand(Data0Pos).getReg();
4136   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4137   if (RegIdx & 1) {
4138     SMLoc RegLoc = getRegLoc(Reg, Operands);
4139     Error(RegLoc, "vgpr must be even aligned");
4140     return false;
4141   }
4142 
4143   return true;
4144 }
4145 
4146 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4147                                             const OperandVector &Operands,
4148                                             const SMLoc &IDLoc) {
4149   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4150                                            AMDGPU::OpName::cpol);
4151   if (CPolPos == -1)
4152     return true;
4153 
4154   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4155 
4156   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4157   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4158       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4159     Error(IDLoc, "invalid cache policy for SMRD instruction");
4160     return false;
4161   }
4162 
4163   if (isGFX90A() && (CPol & CPol::SCC)) {
4164     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4165     StringRef CStr(S.getPointer());
4166     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4167     Error(S, "scc is not supported on this GPU");
4168     return false;
4169   }
4170 
4171   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4172     return true;
4173 
4174   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4175     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4176       Error(IDLoc, "instruction must use glc");
4177       return false;
4178     }
4179   } else {
4180     if (CPol & CPol::GLC) {
4181       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4182       StringRef CStr(S.getPointer());
4183       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4184       Error(S, "instruction must not use glc");
4185       return false;
4186     }
4187   }
4188 
4189   return true;
4190 }
4191 
4192 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4193                                           const SMLoc &IDLoc,
4194                                           const OperandVector &Operands) {
4195   if (auto ErrMsg = validateLdsDirect(Inst)) {
4196     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4197     return false;
4198   }
4199   if (!validateSOPLiteral(Inst)) {
4200     Error(getLitLoc(Operands),
4201       "only one literal operand is allowed");
4202     return false;
4203   }
4204   if (!validateVOP3Literal(Inst, Operands)) {
4205     return false;
4206   }
4207   if (!validateConstantBusLimitations(Inst, Operands)) {
4208     return false;
4209   }
4210   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4211     return false;
4212   }
4213   if (!validateIntClampSupported(Inst)) {
4214     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4215       "integer clamping is not supported on this GPU");
4216     return false;
4217   }
4218   if (!validateOpSel(Inst)) {
4219     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4220       "invalid op_sel operand");
4221     return false;
4222   }
4223   if (!validateDPP(Inst, Operands)) {
4224     return false;
4225   }
4226   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4227   if (!validateMIMGD16(Inst)) {
4228     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4229       "d16 modifier is not supported on this GPU");
4230     return false;
4231   }
4232   if (!validateMIMGDim(Inst)) {
4233     Error(IDLoc, "dim modifier is required on this GPU");
4234     return false;
4235   }
4236   if (!validateMIMGMSAA(Inst)) {
4237     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4238           "invalid dim; must be MSAA type");
4239     return false;
4240   }
4241   if (!validateMIMGDataSize(Inst)) {
4242     Error(IDLoc,
4243       "image data size does not match dmask and tfe");
4244     return false;
4245   }
4246   if (!validateMIMGAddrSize(Inst)) {
4247     Error(IDLoc,
4248       "image address size does not match dim and a16");
4249     return false;
4250   }
4251   if (!validateMIMGAtomicDMask(Inst)) {
4252     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4253       "invalid atomic image dmask");
4254     return false;
4255   }
4256   if (!validateMIMGGatherDMask(Inst)) {
4257     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4258       "invalid image_gather dmask: only one bit must be set");
4259     return false;
4260   }
4261   if (!validateMovrels(Inst, Operands)) {
4262     return false;
4263   }
4264   if (!validateFlatOffset(Inst, Operands)) {
4265     return false;
4266   }
4267   if (!validateSMEMOffset(Inst, Operands)) {
4268     return false;
4269   }
4270   if (!validateMAIAccWrite(Inst, Operands)) {
4271     return false;
4272   }
4273   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4274     return false;
4275   }
4276 
4277   if (!validateAGPRLdSt(Inst)) {
4278     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4279     ? "invalid register class: data and dst should be all VGPR or AGPR"
4280     : "invalid register class: agpr loads and stores not supported on this GPU"
4281     );
4282     return false;
4283   }
4284   if (!validateVGPRAlign(Inst)) {
4285     Error(IDLoc,
4286       "invalid register class: vgpr tuples must be 64 bit aligned");
4287     return false;
4288   }
4289   if (!validateGWS(Inst, Operands)) {
4290     return false;
4291   }
4292 
4293   if (!validateDivScale(Inst)) {
4294     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4295     return false;
4296   }
4297   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4298     return false;
4299   }
4300 
4301   return true;
4302 }
4303 
4304 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4305                                             const FeatureBitset &FBS,
4306                                             unsigned VariantID = 0);
4307 
4308 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4309                                 const FeatureBitset &AvailableFeatures,
4310                                 unsigned VariantID);
4311 
4312 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4313                                        const FeatureBitset &FBS) {
4314   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4315 }
4316 
4317 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4318                                        const FeatureBitset &FBS,
4319                                        ArrayRef<unsigned> Variants) {
4320   for (auto Variant : Variants) {
4321     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4322       return true;
4323   }
4324 
4325   return false;
4326 }
4327 
4328 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4329                                                   const SMLoc &IDLoc) {
4330   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4331 
4332   // Check if requested instruction variant is supported.
4333   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4334     return false;
4335 
4336   // This instruction is not supported.
4337   // Clear any other pending errors because they are no longer relevant.
4338   getParser().clearPendingErrors();
4339 
4340   // Requested instruction variant is not supported.
4341   // Check if any other variants are supported.
4342   StringRef VariantName = getMatchedVariantName();
4343   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4344     return Error(IDLoc,
4345                  Twine(VariantName,
4346                        " variant of this instruction is not supported"));
4347   }
4348 
4349   // Finally check if this instruction is supported on any other GPU.
4350   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4351     return Error(IDLoc, "instruction not supported on this GPU");
4352   }
4353 
4354   // Instruction not supported on any GPU. Probably a typo.
4355   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4356   return Error(IDLoc, "invalid instruction" + Suggestion);
4357 }
4358 
4359 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4360                                               OperandVector &Operands,
4361                                               MCStreamer &Out,
4362                                               uint64_t &ErrorInfo,
4363                                               bool MatchingInlineAsm) {
4364   MCInst Inst;
4365   unsigned Result = Match_Success;
4366   for (auto Variant : getMatchedVariants()) {
4367     uint64_t EI;
4368     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4369                                   Variant);
4370     // We order match statuses from least to most specific. We use most specific
4371     // status as resulting
4372     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4373     if ((R == Match_Success) ||
4374         (R == Match_PreferE32) ||
4375         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4376         (R == Match_InvalidOperand && Result != Match_MissingFeature
4377                                    && Result != Match_PreferE32) ||
4378         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4379                                    && Result != Match_MissingFeature
4380                                    && Result != Match_PreferE32)) {
4381       Result = R;
4382       ErrorInfo = EI;
4383     }
4384     if (R == Match_Success)
4385       break;
4386   }
4387 
4388   if (Result == Match_Success) {
4389     if (!validateInstruction(Inst, IDLoc, Operands)) {
4390       return true;
4391     }
4392     Inst.setLoc(IDLoc);
4393     Out.emitInstruction(Inst, getSTI());
4394     return false;
4395   }
4396 
4397   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4398   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4399     return true;
4400   }
4401 
4402   switch (Result) {
4403   default: break;
4404   case Match_MissingFeature:
4405     // It has been verified that the specified instruction
4406     // mnemonic is valid. A match was found but it requires
4407     // features which are not supported on this GPU.
4408     return Error(IDLoc, "operands are not valid for this GPU or mode");
4409 
4410   case Match_InvalidOperand: {
4411     SMLoc ErrorLoc = IDLoc;
4412     if (ErrorInfo != ~0ULL) {
4413       if (ErrorInfo >= Operands.size()) {
4414         return Error(IDLoc, "too few operands for instruction");
4415       }
4416       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4417       if (ErrorLoc == SMLoc())
4418         ErrorLoc = IDLoc;
4419     }
4420     return Error(ErrorLoc, "invalid operand for instruction");
4421   }
4422 
4423   case Match_PreferE32:
4424     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4425                         "should be encoded as e32");
4426   case Match_MnemonicFail:
4427     llvm_unreachable("Invalid instructions should have been handled already");
4428   }
4429   llvm_unreachable("Implement any new match types added!");
4430 }
4431 
4432 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4433   int64_t Tmp = -1;
4434   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4435     return true;
4436   }
4437   if (getParser().parseAbsoluteExpression(Tmp)) {
4438     return true;
4439   }
4440   Ret = static_cast<uint32_t>(Tmp);
4441   return false;
4442 }
4443 
4444 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4445                                                uint32_t &Minor) {
4446   if (ParseAsAbsoluteExpression(Major))
4447     return TokError("invalid major version");
4448 
4449   if (!trySkipToken(AsmToken::Comma))
4450     return TokError("minor version number required, comma expected");
4451 
4452   if (ParseAsAbsoluteExpression(Minor))
4453     return TokError("invalid minor version");
4454 
4455   return false;
4456 }
4457 
4458 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4459   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4460     return TokError("directive only supported for amdgcn architecture");
4461 
4462   std::string TargetIDDirective;
4463   SMLoc TargetStart = getTok().getLoc();
4464   if (getParser().parseEscapedString(TargetIDDirective))
4465     return true;
4466 
4467   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4468   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4469     return getParser().Error(TargetRange.Start,
4470         (Twine(".amdgcn_target directive's target id ") +
4471          Twine(TargetIDDirective) +
4472          Twine(" does not match the specified target id ") +
4473          Twine(getTargetStreamer().getTargetID()->toString())).str());
4474 
4475   return false;
4476 }
4477 
4478 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4479   return Error(Range.Start, "value out of range", Range);
4480 }
4481 
4482 bool AMDGPUAsmParser::calculateGPRBlocks(
4483     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4484     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4485     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4486     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4487   // TODO(scott.linder): These calculations are duplicated from
4488   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4489   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4490 
4491   unsigned NumVGPRs = NextFreeVGPR;
4492   unsigned NumSGPRs = NextFreeSGPR;
4493 
4494   if (Version.Major >= 10)
4495     NumSGPRs = 0;
4496   else {
4497     unsigned MaxAddressableNumSGPRs =
4498         IsaInfo::getAddressableNumSGPRs(&getSTI());
4499 
4500     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4501         NumSGPRs > MaxAddressableNumSGPRs)
4502       return OutOfRangeError(SGPRRange);
4503 
4504     NumSGPRs +=
4505         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4506 
4507     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4508         NumSGPRs > MaxAddressableNumSGPRs)
4509       return OutOfRangeError(SGPRRange);
4510 
4511     if (Features.test(FeatureSGPRInitBug))
4512       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4513   }
4514 
4515   VGPRBlocks =
4516       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4517   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4518 
4519   return false;
4520 }
4521 
4522 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4523   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4524     return TokError("directive only supported for amdgcn architecture");
4525 
4526   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4527     return TokError("directive only supported for amdhsa OS");
4528 
4529   StringRef KernelName;
4530   if (getParser().parseIdentifier(KernelName))
4531     return true;
4532 
4533   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4534 
4535   StringSet<> Seen;
4536 
4537   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4538 
4539   SMRange VGPRRange;
4540   uint64_t NextFreeVGPR = 0;
4541   uint64_t AccumOffset = 0;
4542   SMRange SGPRRange;
4543   uint64_t NextFreeSGPR = 0;
4544   unsigned UserSGPRCount = 0;
4545   bool ReserveVCC = true;
4546   bool ReserveFlatScr = true;
4547   Optional<bool> EnableWavefrontSize32;
4548 
4549   while (true) {
4550     while (trySkipToken(AsmToken::EndOfStatement));
4551 
4552     StringRef ID;
4553     SMRange IDRange = getTok().getLocRange();
4554     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4555       return true;
4556 
4557     if (ID == ".end_amdhsa_kernel")
4558       break;
4559 
4560     if (Seen.find(ID) != Seen.end())
4561       return TokError(".amdhsa_ directives cannot be repeated");
4562     Seen.insert(ID);
4563 
4564     SMLoc ValStart = getLoc();
4565     int64_t IVal;
4566     if (getParser().parseAbsoluteExpression(IVal))
4567       return true;
4568     SMLoc ValEnd = getLoc();
4569     SMRange ValRange = SMRange(ValStart, ValEnd);
4570 
4571     if (IVal < 0)
4572       return OutOfRangeError(ValRange);
4573 
4574     uint64_t Val = IVal;
4575 
4576 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4577   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4578     return OutOfRangeError(RANGE);                                             \
4579   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4580 
4581     if (ID == ".amdhsa_group_segment_fixed_size") {
4582       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4583         return OutOfRangeError(ValRange);
4584       KD.group_segment_fixed_size = Val;
4585     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4586       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4587         return OutOfRangeError(ValRange);
4588       KD.private_segment_fixed_size = Val;
4589     } else if (ID == ".amdhsa_kernarg_size") {
4590       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4591         return OutOfRangeError(ValRange);
4592       KD.kernarg_size = Val;
4593     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4594       if (hasArchitectedFlatScratch())
4595         return Error(IDRange.Start,
4596                      "directive is not supported with architected flat scratch",
4597                      IDRange);
4598       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4599                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4600                        Val, ValRange);
4601       if (Val)
4602         UserSGPRCount += 4;
4603     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4604       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4605                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4606                        ValRange);
4607       if (Val)
4608         UserSGPRCount += 2;
4609     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4610       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4611                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4612                        ValRange);
4613       if (Val)
4614         UserSGPRCount += 2;
4615     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4616       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4617                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4618                        Val, ValRange);
4619       if (Val)
4620         UserSGPRCount += 2;
4621     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4622       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4623                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4624                        ValRange);
4625       if (Val)
4626         UserSGPRCount += 2;
4627     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4628       if (hasArchitectedFlatScratch())
4629         return Error(IDRange.Start,
4630                      "directive is not supported with architected flat scratch",
4631                      IDRange);
4632       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4633                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4634                        ValRange);
4635       if (Val)
4636         UserSGPRCount += 2;
4637     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4638       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4639                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4640                        Val, ValRange);
4641       if (Val)
4642         UserSGPRCount += 1;
4643     } else if (ID == ".amdhsa_wavefront_size32") {
4644       if (IVersion.Major < 10)
4645         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4646       EnableWavefrontSize32 = Val;
4647       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4648                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4649                        Val, ValRange);
4650     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4651       if (hasArchitectedFlatScratch())
4652         return Error(IDRange.Start,
4653                      "directive is not supported with architected flat scratch",
4654                      IDRange);
4655       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4656                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4657     } else if (ID == ".amdhsa_enable_private_segment") {
4658       if (!hasArchitectedFlatScratch())
4659         return Error(
4660             IDRange.Start,
4661             "directive is not supported without architected flat scratch",
4662             IDRange);
4663       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4664                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4665     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4666       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4667                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4668                        ValRange);
4669     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4670       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4671                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4672                        ValRange);
4673     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4674       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4675                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4676                        ValRange);
4677     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4678       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4679                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4680                        ValRange);
4681     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4682       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4683                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4684                        ValRange);
4685     } else if (ID == ".amdhsa_next_free_vgpr") {
4686       VGPRRange = ValRange;
4687       NextFreeVGPR = Val;
4688     } else if (ID == ".amdhsa_next_free_sgpr") {
4689       SGPRRange = ValRange;
4690       NextFreeSGPR = Val;
4691     } else if (ID == ".amdhsa_accum_offset") {
4692       if (!isGFX90A())
4693         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4694       AccumOffset = Val;
4695     } else if (ID == ".amdhsa_reserve_vcc") {
4696       if (!isUInt<1>(Val))
4697         return OutOfRangeError(ValRange);
4698       ReserveVCC = Val;
4699     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4700       if (IVersion.Major < 7)
4701         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4702       if (hasArchitectedFlatScratch())
4703         return Error(IDRange.Start,
4704                      "directive is not supported with architected flat scratch",
4705                      IDRange);
4706       if (!isUInt<1>(Val))
4707         return OutOfRangeError(ValRange);
4708       ReserveFlatScr = Val;
4709     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4710       if (IVersion.Major < 8)
4711         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4712       if (!isUInt<1>(Val))
4713         return OutOfRangeError(ValRange);
4714       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4715         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4716                                  IDRange);
4717     } else if (ID == ".amdhsa_float_round_mode_32") {
4718       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4719                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4720     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4721       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4722                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4723     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4724       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4725                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4726     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4727       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4728                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4729                        ValRange);
4730     } else if (ID == ".amdhsa_dx10_clamp") {
4731       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4732                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4733     } else if (ID == ".amdhsa_ieee_mode") {
4734       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4735                        Val, ValRange);
4736     } else if (ID == ".amdhsa_fp16_overflow") {
4737       if (IVersion.Major < 9)
4738         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4739       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4740                        ValRange);
4741     } else if (ID == ".amdhsa_tg_split") {
4742       if (!isGFX90A())
4743         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4744       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4745                        ValRange);
4746     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4747       if (IVersion.Major < 10)
4748         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4749       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4750                        ValRange);
4751     } else if (ID == ".amdhsa_memory_ordered") {
4752       if (IVersion.Major < 10)
4753         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4754       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4755                        ValRange);
4756     } else if (ID == ".amdhsa_forward_progress") {
4757       if (IVersion.Major < 10)
4758         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4759       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4760                        ValRange);
4761     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4762       PARSE_BITS_ENTRY(
4763           KD.compute_pgm_rsrc2,
4764           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4765           ValRange);
4766     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4767       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4768                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4769                        Val, ValRange);
4770     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4771       PARSE_BITS_ENTRY(
4772           KD.compute_pgm_rsrc2,
4773           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4774           ValRange);
4775     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4776       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4777                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4778                        Val, ValRange);
4779     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4780       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4781                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4782                        Val, ValRange);
4783     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4784       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4785                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4786                        Val, ValRange);
4787     } else if (ID == ".amdhsa_exception_int_div_zero") {
4788       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4789                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4790                        Val, ValRange);
4791     } else {
4792       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4793     }
4794 
4795 #undef PARSE_BITS_ENTRY
4796   }
4797 
4798   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4799     return TokError(".amdhsa_next_free_vgpr directive is required");
4800 
4801   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4802     return TokError(".amdhsa_next_free_sgpr directive is required");
4803 
4804   unsigned VGPRBlocks;
4805   unsigned SGPRBlocks;
4806   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4807                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4808                          EnableWavefrontSize32, NextFreeVGPR,
4809                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4810                          SGPRBlocks))
4811     return true;
4812 
4813   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4814           VGPRBlocks))
4815     return OutOfRangeError(VGPRRange);
4816   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4817                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4818 
4819   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4820           SGPRBlocks))
4821     return OutOfRangeError(SGPRRange);
4822   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4823                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4824                   SGPRBlocks);
4825 
4826   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4827     return TokError("too many user SGPRs enabled");
4828   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4829                   UserSGPRCount);
4830 
4831   if (isGFX90A()) {
4832     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4833       return TokError(".amdhsa_accum_offset directive is required");
4834     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4835       return TokError("accum_offset should be in range [4..256] in "
4836                       "increments of 4");
4837     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4838       return TokError("accum_offset exceeds total VGPR allocation");
4839     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4840                     (AccumOffset / 4 - 1));
4841   }
4842 
4843   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4844       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4845       ReserveFlatScr);
4846   return false;
4847 }
4848 
4849 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4850   uint32_t Major;
4851   uint32_t Minor;
4852 
4853   if (ParseDirectiveMajorMinor(Major, Minor))
4854     return true;
4855 
4856   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4857   return false;
4858 }
4859 
4860 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4861   uint32_t Major;
4862   uint32_t Minor;
4863   uint32_t Stepping;
4864   StringRef VendorName;
4865   StringRef ArchName;
4866 
4867   // If this directive has no arguments, then use the ISA version for the
4868   // targeted GPU.
4869   if (isToken(AsmToken::EndOfStatement)) {
4870     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4871     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4872                                                         ISA.Stepping,
4873                                                         "AMD", "AMDGPU");
4874     return false;
4875   }
4876 
4877   if (ParseDirectiveMajorMinor(Major, Minor))
4878     return true;
4879 
4880   if (!trySkipToken(AsmToken::Comma))
4881     return TokError("stepping version number required, comma expected");
4882 
4883   if (ParseAsAbsoluteExpression(Stepping))
4884     return TokError("invalid stepping version");
4885 
4886   if (!trySkipToken(AsmToken::Comma))
4887     return TokError("vendor name required, comma expected");
4888 
4889   if (!parseString(VendorName, "invalid vendor name"))
4890     return true;
4891 
4892   if (!trySkipToken(AsmToken::Comma))
4893     return TokError("arch name required, comma expected");
4894 
4895   if (!parseString(ArchName, "invalid arch name"))
4896     return true;
4897 
4898   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
4899                                                       VendorName, ArchName);
4900   return false;
4901 }
4902 
4903 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4904                                                amd_kernel_code_t &Header) {
4905   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4906   // assembly for backwards compatibility.
4907   if (ID == "max_scratch_backing_memory_byte_size") {
4908     Parser.eatToEndOfStatement();
4909     return false;
4910   }
4911 
4912   SmallString<40> ErrStr;
4913   raw_svector_ostream Err(ErrStr);
4914   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4915     return TokError(Err.str());
4916   }
4917   Lex();
4918 
4919   if (ID == "enable_wavefront_size32") {
4920     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4921       if (!isGFX10Plus())
4922         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4923       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4924         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4925     } else {
4926       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4927         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4928     }
4929   }
4930 
4931   if (ID == "wavefront_size") {
4932     if (Header.wavefront_size == 5) {
4933       if (!isGFX10Plus())
4934         return TokError("wavefront_size=5 is only allowed on GFX10+");
4935       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4936         return TokError("wavefront_size=5 requires +WavefrontSize32");
4937     } else if (Header.wavefront_size == 6) {
4938       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4939         return TokError("wavefront_size=6 requires +WavefrontSize64");
4940     }
4941   }
4942 
4943   if (ID == "enable_wgp_mode") {
4944     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
4945         !isGFX10Plus())
4946       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4947   }
4948 
4949   if (ID == "enable_mem_ordered") {
4950     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
4951         !isGFX10Plus())
4952       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4953   }
4954 
4955   if (ID == "enable_fwd_progress") {
4956     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
4957         !isGFX10Plus())
4958       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4959   }
4960 
4961   return false;
4962 }
4963 
4964 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4965   amd_kernel_code_t Header;
4966   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4967 
4968   while (true) {
4969     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4970     // will set the current token to EndOfStatement.
4971     while(trySkipToken(AsmToken::EndOfStatement));
4972 
4973     StringRef ID;
4974     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4975       return true;
4976 
4977     if (ID == ".end_amd_kernel_code_t")
4978       break;
4979 
4980     if (ParseAMDKernelCodeTValue(ID, Header))
4981       return true;
4982   }
4983 
4984   getTargetStreamer().EmitAMDKernelCodeT(Header);
4985 
4986   return false;
4987 }
4988 
4989 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4990   StringRef KernelName;
4991   if (!parseId(KernelName, "expected symbol name"))
4992     return true;
4993 
4994   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4995                                            ELF::STT_AMDGPU_HSA_KERNEL);
4996 
4997   KernelScope.initialize(getContext());
4998   return false;
4999 }
5000 
5001 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5002   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5003     return Error(getLoc(),
5004                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5005                  "architectures");
5006   }
5007 
5008   auto TargetIDDirective = getLexer().getTok().getStringContents();
5009   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5010     return Error(getParser().getTok().getLoc(), "target id must match options");
5011 
5012   getTargetStreamer().EmitISAVersion();
5013   Lex();
5014 
5015   return false;
5016 }
5017 
5018 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5019   const char *AssemblerDirectiveBegin;
5020   const char *AssemblerDirectiveEnd;
5021   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5022       isHsaAbiVersion3Or4(&getSTI())
5023           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5024                             HSAMD::V3::AssemblerDirectiveEnd)
5025           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5026                             HSAMD::AssemblerDirectiveEnd);
5027 
5028   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5029     return Error(getLoc(),
5030                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5031                  "not available on non-amdhsa OSes")).str());
5032   }
5033 
5034   std::string HSAMetadataString;
5035   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5036                           HSAMetadataString))
5037     return true;
5038 
5039   if (isHsaAbiVersion3Or4(&getSTI())) {
5040     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5041       return Error(getLoc(), "invalid HSA metadata");
5042   } else {
5043     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5044       return Error(getLoc(), "invalid HSA metadata");
5045   }
5046 
5047   return false;
5048 }
5049 
5050 /// Common code to parse out a block of text (typically YAML) between start and
5051 /// end directives.
5052 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5053                                           const char *AssemblerDirectiveEnd,
5054                                           std::string &CollectString) {
5055 
5056   raw_string_ostream CollectStream(CollectString);
5057 
5058   getLexer().setSkipSpace(false);
5059 
5060   bool FoundEnd = false;
5061   while (!isToken(AsmToken::Eof)) {
5062     while (isToken(AsmToken::Space)) {
5063       CollectStream << getTokenStr();
5064       Lex();
5065     }
5066 
5067     if (trySkipId(AssemblerDirectiveEnd)) {
5068       FoundEnd = true;
5069       break;
5070     }
5071 
5072     CollectStream << Parser.parseStringToEndOfStatement()
5073                   << getContext().getAsmInfo()->getSeparatorString();
5074 
5075     Parser.eatToEndOfStatement();
5076   }
5077 
5078   getLexer().setSkipSpace(true);
5079 
5080   if (isToken(AsmToken::Eof) && !FoundEnd) {
5081     return TokError(Twine("expected directive ") +
5082                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5083   }
5084 
5085   CollectStream.flush();
5086   return false;
5087 }
5088 
5089 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5090 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5091   std::string String;
5092   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5093                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5094     return true;
5095 
5096   auto PALMetadata = getTargetStreamer().getPALMetadata();
5097   if (!PALMetadata->setFromString(String))
5098     return Error(getLoc(), "invalid PAL metadata");
5099   return false;
5100 }
5101 
5102 /// Parse the assembler directive for old linear-format PAL metadata.
5103 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5104   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5105     return Error(getLoc(),
5106                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5107                  "not available on non-amdpal OSes")).str());
5108   }
5109 
5110   auto PALMetadata = getTargetStreamer().getPALMetadata();
5111   PALMetadata->setLegacy();
5112   for (;;) {
5113     uint32_t Key, Value;
5114     if (ParseAsAbsoluteExpression(Key)) {
5115       return TokError(Twine("invalid value in ") +
5116                       Twine(PALMD::AssemblerDirective));
5117     }
5118     if (!trySkipToken(AsmToken::Comma)) {
5119       return TokError(Twine("expected an even number of values in ") +
5120                       Twine(PALMD::AssemblerDirective));
5121     }
5122     if (ParseAsAbsoluteExpression(Value)) {
5123       return TokError(Twine("invalid value in ") +
5124                       Twine(PALMD::AssemblerDirective));
5125     }
5126     PALMetadata->setRegister(Key, Value);
5127     if (!trySkipToken(AsmToken::Comma))
5128       break;
5129   }
5130   return false;
5131 }
5132 
5133 /// ParseDirectiveAMDGPULDS
5134 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5135 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5136   if (getParser().checkForValidSection())
5137     return true;
5138 
5139   StringRef Name;
5140   SMLoc NameLoc = getLoc();
5141   if (getParser().parseIdentifier(Name))
5142     return TokError("expected identifier in directive");
5143 
5144   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5145   if (parseToken(AsmToken::Comma, "expected ','"))
5146     return true;
5147 
5148   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5149 
5150   int64_t Size;
5151   SMLoc SizeLoc = getLoc();
5152   if (getParser().parseAbsoluteExpression(Size))
5153     return true;
5154   if (Size < 0)
5155     return Error(SizeLoc, "size must be non-negative");
5156   if (Size > LocalMemorySize)
5157     return Error(SizeLoc, "size is too large");
5158 
5159   int64_t Alignment = 4;
5160   if (trySkipToken(AsmToken::Comma)) {
5161     SMLoc AlignLoc = getLoc();
5162     if (getParser().parseAbsoluteExpression(Alignment))
5163       return true;
5164     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5165       return Error(AlignLoc, "alignment must be a power of two");
5166 
5167     // Alignment larger than the size of LDS is possible in theory, as long
5168     // as the linker manages to place to symbol at address 0, but we do want
5169     // to make sure the alignment fits nicely into a 32-bit integer.
5170     if (Alignment >= 1u << 31)
5171       return Error(AlignLoc, "alignment is too large");
5172   }
5173 
5174   if (parseToken(AsmToken::EndOfStatement,
5175                  "unexpected token in '.amdgpu_lds' directive"))
5176     return true;
5177 
5178   Symbol->redefineIfPossible();
5179   if (!Symbol->isUndefined())
5180     return Error(NameLoc, "invalid symbol redefinition");
5181 
5182   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5183   return false;
5184 }
5185 
5186 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5187   StringRef IDVal = DirectiveID.getString();
5188 
5189   if (isHsaAbiVersion3Or4(&getSTI())) {
5190     if (IDVal == ".amdhsa_kernel")
5191      return ParseDirectiveAMDHSAKernel();
5192 
5193     // TODO: Restructure/combine with PAL metadata directive.
5194     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5195       return ParseDirectiveHSAMetadata();
5196   } else {
5197     if (IDVal == ".hsa_code_object_version")
5198       return ParseDirectiveHSACodeObjectVersion();
5199 
5200     if (IDVal == ".hsa_code_object_isa")
5201       return ParseDirectiveHSACodeObjectISA();
5202 
5203     if (IDVal == ".amd_kernel_code_t")
5204       return ParseDirectiveAMDKernelCodeT();
5205 
5206     if (IDVal == ".amdgpu_hsa_kernel")
5207       return ParseDirectiveAMDGPUHsaKernel();
5208 
5209     if (IDVal == ".amd_amdgpu_isa")
5210       return ParseDirectiveISAVersion();
5211 
5212     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5213       return ParseDirectiveHSAMetadata();
5214   }
5215 
5216   if (IDVal == ".amdgcn_target")
5217     return ParseDirectiveAMDGCNTarget();
5218 
5219   if (IDVal == ".amdgpu_lds")
5220     return ParseDirectiveAMDGPULDS();
5221 
5222   if (IDVal == PALMD::AssemblerDirectiveBegin)
5223     return ParseDirectivePALMetadataBegin();
5224 
5225   if (IDVal == PALMD::AssemblerDirective)
5226     return ParseDirectivePALMetadata();
5227 
5228   return true;
5229 }
5230 
5231 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5232                                            unsigned RegNo) {
5233 
5234   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5235        R.isValid(); ++R) {
5236     if (*R == RegNo)
5237       return isGFX9Plus();
5238   }
5239 
5240   // GFX10 has 2 more SGPRs 104 and 105.
5241   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5242        R.isValid(); ++R) {
5243     if (*R == RegNo)
5244       return hasSGPR104_SGPR105();
5245   }
5246 
5247   switch (RegNo) {
5248   case AMDGPU::SRC_SHARED_BASE:
5249   case AMDGPU::SRC_SHARED_LIMIT:
5250   case AMDGPU::SRC_PRIVATE_BASE:
5251   case AMDGPU::SRC_PRIVATE_LIMIT:
5252   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5253     return isGFX9Plus();
5254   case AMDGPU::TBA:
5255   case AMDGPU::TBA_LO:
5256   case AMDGPU::TBA_HI:
5257   case AMDGPU::TMA:
5258   case AMDGPU::TMA_LO:
5259   case AMDGPU::TMA_HI:
5260     return !isGFX9Plus();
5261   case AMDGPU::XNACK_MASK:
5262   case AMDGPU::XNACK_MASK_LO:
5263   case AMDGPU::XNACK_MASK_HI:
5264     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5265   case AMDGPU::SGPR_NULL:
5266     return isGFX10Plus();
5267   default:
5268     break;
5269   }
5270 
5271   if (isCI())
5272     return true;
5273 
5274   if (isSI() || isGFX10Plus()) {
5275     // No flat_scr on SI.
5276     // On GFX10 flat scratch is not a valid register operand and can only be
5277     // accessed with s_setreg/s_getreg.
5278     switch (RegNo) {
5279     case AMDGPU::FLAT_SCR:
5280     case AMDGPU::FLAT_SCR_LO:
5281     case AMDGPU::FLAT_SCR_HI:
5282       return false;
5283     default:
5284       return true;
5285     }
5286   }
5287 
5288   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5289   // SI/CI have.
5290   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5291        R.isValid(); ++R) {
5292     if (*R == RegNo)
5293       return hasSGPR102_SGPR103();
5294   }
5295 
5296   return true;
5297 }
5298 
5299 OperandMatchResultTy
5300 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5301                               OperandMode Mode) {
5302   // Try to parse with a custom parser
5303   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5304 
5305   // If we successfully parsed the operand or if there as an error parsing,
5306   // we are done.
5307   //
5308   // If we are parsing after we reach EndOfStatement then this means we
5309   // are appending default values to the Operands list.  This is only done
5310   // by custom parser, so we shouldn't continue on to the generic parsing.
5311   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5312       isToken(AsmToken::EndOfStatement))
5313     return ResTy;
5314 
5315   SMLoc RBraceLoc;
5316   SMLoc LBraceLoc = getLoc();
5317   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5318     unsigned Prefix = Operands.size();
5319 
5320     for (;;) {
5321       auto Loc = getLoc();
5322       ResTy = parseReg(Operands);
5323       if (ResTy == MatchOperand_NoMatch)
5324         Error(Loc, "expected a register");
5325       if (ResTy != MatchOperand_Success)
5326         return MatchOperand_ParseFail;
5327 
5328       RBraceLoc = getLoc();
5329       if (trySkipToken(AsmToken::RBrac))
5330         break;
5331 
5332       if (!skipToken(AsmToken::Comma,
5333                      "expected a comma or a closing square bracket")) {
5334         return MatchOperand_ParseFail;
5335       }
5336     }
5337 
5338     if (Operands.size() - Prefix > 1) {
5339       Operands.insert(Operands.begin() + Prefix,
5340                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5341       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5342     }
5343 
5344     return MatchOperand_Success;
5345   }
5346 
5347   return parseRegOrImm(Operands);
5348 }
5349 
5350 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5351   // Clear any forced encodings from the previous instruction.
5352   setForcedEncodingSize(0);
5353   setForcedDPP(false);
5354   setForcedSDWA(false);
5355 
5356   if (Name.endswith("_e64")) {
5357     setForcedEncodingSize(64);
5358     return Name.substr(0, Name.size() - 4);
5359   } else if (Name.endswith("_e32")) {
5360     setForcedEncodingSize(32);
5361     return Name.substr(0, Name.size() - 4);
5362   } else if (Name.endswith("_dpp")) {
5363     setForcedDPP(true);
5364     return Name.substr(0, Name.size() - 4);
5365   } else if (Name.endswith("_sdwa")) {
5366     setForcedSDWA(true);
5367     return Name.substr(0, Name.size() - 5);
5368   }
5369   return Name;
5370 }
5371 
5372 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5373                                        StringRef Name,
5374                                        SMLoc NameLoc, OperandVector &Operands) {
5375   // Add the instruction mnemonic
5376   Name = parseMnemonicSuffix(Name);
5377   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5378 
5379   bool IsMIMG = Name.startswith("image_");
5380 
5381   while (!trySkipToken(AsmToken::EndOfStatement)) {
5382     OperandMode Mode = OperandMode_Default;
5383     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5384       Mode = OperandMode_NSA;
5385     CPolSeen = 0;
5386     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5387 
5388     if (Res != MatchOperand_Success) {
5389       checkUnsupportedInstruction(Name, NameLoc);
5390       if (!Parser.hasPendingError()) {
5391         // FIXME: use real operand location rather than the current location.
5392         StringRef Msg =
5393           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5394                                             "not a valid operand.";
5395         Error(getLoc(), Msg);
5396       }
5397       while (!trySkipToken(AsmToken::EndOfStatement)) {
5398         lex();
5399       }
5400       return true;
5401     }
5402 
5403     // Eat the comma or space if there is one.
5404     trySkipToken(AsmToken::Comma);
5405   }
5406 
5407   return false;
5408 }
5409 
5410 //===----------------------------------------------------------------------===//
5411 // Utility functions
5412 //===----------------------------------------------------------------------===//
5413 
5414 OperandMatchResultTy
5415 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5416 
5417   if (!trySkipId(Prefix, AsmToken::Colon))
5418     return MatchOperand_NoMatch;
5419 
5420   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5421 }
5422 
5423 OperandMatchResultTy
5424 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5425                                     AMDGPUOperand::ImmTy ImmTy,
5426                                     bool (*ConvertResult)(int64_t&)) {
5427   SMLoc S = getLoc();
5428   int64_t Value = 0;
5429 
5430   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5431   if (Res != MatchOperand_Success)
5432     return Res;
5433 
5434   if (ConvertResult && !ConvertResult(Value)) {
5435     Error(S, "invalid " + StringRef(Prefix) + " value.");
5436   }
5437 
5438   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5439   return MatchOperand_Success;
5440 }
5441 
5442 OperandMatchResultTy
5443 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5444                                              OperandVector &Operands,
5445                                              AMDGPUOperand::ImmTy ImmTy,
5446                                              bool (*ConvertResult)(int64_t&)) {
5447   SMLoc S = getLoc();
5448   if (!trySkipId(Prefix, AsmToken::Colon))
5449     return MatchOperand_NoMatch;
5450 
5451   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5452     return MatchOperand_ParseFail;
5453 
5454   unsigned Val = 0;
5455   const unsigned MaxSize = 4;
5456 
5457   // FIXME: How to verify the number of elements matches the number of src
5458   // operands?
5459   for (int I = 0; ; ++I) {
5460     int64_t Op;
5461     SMLoc Loc = getLoc();
5462     if (!parseExpr(Op))
5463       return MatchOperand_ParseFail;
5464 
5465     if (Op != 0 && Op != 1) {
5466       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5467       return MatchOperand_ParseFail;
5468     }
5469 
5470     Val |= (Op << I);
5471 
5472     if (trySkipToken(AsmToken::RBrac))
5473       break;
5474 
5475     if (I + 1 == MaxSize) {
5476       Error(getLoc(), "expected a closing square bracket");
5477       return MatchOperand_ParseFail;
5478     }
5479 
5480     if (!skipToken(AsmToken::Comma, "expected a comma"))
5481       return MatchOperand_ParseFail;
5482   }
5483 
5484   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5485   return MatchOperand_Success;
5486 }
5487 
5488 OperandMatchResultTy
5489 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5490                                AMDGPUOperand::ImmTy ImmTy) {
5491   int64_t Bit;
5492   SMLoc S = getLoc();
5493 
5494   if (trySkipId(Name)) {
5495     Bit = 1;
5496   } else if (trySkipId("no", Name)) {
5497     Bit = 0;
5498   } else {
5499     return MatchOperand_NoMatch;
5500   }
5501 
5502   if (Name == "r128" && !hasMIMG_R128()) {
5503     Error(S, "r128 modifier is not supported on this GPU");
5504     return MatchOperand_ParseFail;
5505   }
5506   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5507     Error(S, "a16 modifier is not supported on this GPU");
5508     return MatchOperand_ParseFail;
5509   }
5510 
5511   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5512     ImmTy = AMDGPUOperand::ImmTyR128A16;
5513 
5514   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5515   return MatchOperand_Success;
5516 }
5517 
5518 OperandMatchResultTy
5519 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5520   unsigned CPolOn = 0;
5521   unsigned CPolOff = 0;
5522   SMLoc S = getLoc();
5523 
5524   if (trySkipId("glc"))
5525     CPolOn = AMDGPU::CPol::GLC;
5526   else if (trySkipId("noglc"))
5527     CPolOff = AMDGPU::CPol::GLC;
5528   else if (trySkipId("slc"))
5529     CPolOn = AMDGPU::CPol::SLC;
5530   else if (trySkipId("noslc"))
5531     CPolOff = AMDGPU::CPol::SLC;
5532   else if (trySkipId("dlc"))
5533     CPolOn = AMDGPU::CPol::DLC;
5534   else if (trySkipId("nodlc"))
5535     CPolOff = AMDGPU::CPol::DLC;
5536   else if (trySkipId("scc"))
5537     CPolOn = AMDGPU::CPol::SCC;
5538   else if (trySkipId("noscc"))
5539     CPolOff = AMDGPU::CPol::SCC;
5540   else
5541     return MatchOperand_NoMatch;
5542 
5543   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5544     Error(S, "dlc modifier is not supported on this GPU");
5545     return MatchOperand_ParseFail;
5546   }
5547 
5548   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5549     Error(S, "scc modifier is not supported on this GPU");
5550     return MatchOperand_ParseFail;
5551   }
5552 
5553   if (CPolSeen & (CPolOn | CPolOff)) {
5554     Error(S, "duplicate cache policy modifier");
5555     return MatchOperand_ParseFail;
5556   }
5557 
5558   CPolSeen |= (CPolOn | CPolOff);
5559 
5560   for (unsigned I = 1; I != Operands.size(); ++I) {
5561     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5562     if (Op.isCPol()) {
5563       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5564       return MatchOperand_Success;
5565     }
5566   }
5567 
5568   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5569                                               AMDGPUOperand::ImmTyCPol));
5570 
5571   return MatchOperand_Success;
5572 }
5573 
5574 static void addOptionalImmOperand(
5575   MCInst& Inst, const OperandVector& Operands,
5576   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5577   AMDGPUOperand::ImmTy ImmT,
5578   int64_t Default = 0) {
5579   auto i = OptionalIdx.find(ImmT);
5580   if (i != OptionalIdx.end()) {
5581     unsigned Idx = i->second;
5582     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5583   } else {
5584     Inst.addOperand(MCOperand::createImm(Default));
5585   }
5586 }
5587 
5588 OperandMatchResultTy
5589 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5590                                        StringRef &Value,
5591                                        SMLoc &StringLoc) {
5592   if (!trySkipId(Prefix, AsmToken::Colon))
5593     return MatchOperand_NoMatch;
5594 
5595   StringLoc = getLoc();
5596   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5597                                                   : MatchOperand_ParseFail;
5598 }
5599 
5600 //===----------------------------------------------------------------------===//
5601 // MTBUF format
5602 //===----------------------------------------------------------------------===//
5603 
5604 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5605                                   int64_t MaxVal,
5606                                   int64_t &Fmt) {
5607   int64_t Val;
5608   SMLoc Loc = getLoc();
5609 
5610   auto Res = parseIntWithPrefix(Pref, Val);
5611   if (Res == MatchOperand_ParseFail)
5612     return false;
5613   if (Res == MatchOperand_NoMatch)
5614     return true;
5615 
5616   if (Val < 0 || Val > MaxVal) {
5617     Error(Loc, Twine("out of range ", StringRef(Pref)));
5618     return false;
5619   }
5620 
5621   Fmt = Val;
5622   return true;
5623 }
5624 
5625 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5626 // values to live in a joint format operand in the MCInst encoding.
5627 OperandMatchResultTy
5628 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5629   using namespace llvm::AMDGPU::MTBUFFormat;
5630 
5631   int64_t Dfmt = DFMT_UNDEF;
5632   int64_t Nfmt = NFMT_UNDEF;
5633 
5634   // dfmt and nfmt can appear in either order, and each is optional.
5635   for (int I = 0; I < 2; ++I) {
5636     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5637       return MatchOperand_ParseFail;
5638 
5639     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5640       return MatchOperand_ParseFail;
5641     }
5642     // Skip optional comma between dfmt/nfmt
5643     // but guard against 2 commas following each other.
5644     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5645         !peekToken().is(AsmToken::Comma)) {
5646       trySkipToken(AsmToken::Comma);
5647     }
5648   }
5649 
5650   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5651     return MatchOperand_NoMatch;
5652 
5653   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5654   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5655 
5656   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5657   return MatchOperand_Success;
5658 }
5659 
5660 OperandMatchResultTy
5661 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5662   using namespace llvm::AMDGPU::MTBUFFormat;
5663 
5664   int64_t Fmt = UFMT_UNDEF;
5665 
5666   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5667     return MatchOperand_ParseFail;
5668 
5669   if (Fmt == UFMT_UNDEF)
5670     return MatchOperand_NoMatch;
5671 
5672   Format = Fmt;
5673   return MatchOperand_Success;
5674 }
5675 
5676 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5677                                     int64_t &Nfmt,
5678                                     StringRef FormatStr,
5679                                     SMLoc Loc) {
5680   using namespace llvm::AMDGPU::MTBUFFormat;
5681   int64_t Format;
5682 
5683   Format = getDfmt(FormatStr);
5684   if (Format != DFMT_UNDEF) {
5685     Dfmt = Format;
5686     return true;
5687   }
5688 
5689   Format = getNfmt(FormatStr, getSTI());
5690   if (Format != NFMT_UNDEF) {
5691     Nfmt = Format;
5692     return true;
5693   }
5694 
5695   Error(Loc, "unsupported format");
5696   return false;
5697 }
5698 
5699 OperandMatchResultTy
5700 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5701                                           SMLoc FormatLoc,
5702                                           int64_t &Format) {
5703   using namespace llvm::AMDGPU::MTBUFFormat;
5704 
5705   int64_t Dfmt = DFMT_UNDEF;
5706   int64_t Nfmt = NFMT_UNDEF;
5707   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5708     return MatchOperand_ParseFail;
5709 
5710   if (trySkipToken(AsmToken::Comma)) {
5711     StringRef Str;
5712     SMLoc Loc = getLoc();
5713     if (!parseId(Str, "expected a format string") ||
5714         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5715       return MatchOperand_ParseFail;
5716     }
5717     if (Dfmt == DFMT_UNDEF) {
5718       Error(Loc, "duplicate numeric format");
5719       return MatchOperand_ParseFail;
5720     } else if (Nfmt == NFMT_UNDEF) {
5721       Error(Loc, "duplicate data format");
5722       return MatchOperand_ParseFail;
5723     }
5724   }
5725 
5726   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5727   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5728 
5729   if (isGFX10Plus()) {
5730     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5731     if (Ufmt == UFMT_UNDEF) {
5732       Error(FormatLoc, "unsupported format");
5733       return MatchOperand_ParseFail;
5734     }
5735     Format = Ufmt;
5736   } else {
5737     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5738   }
5739 
5740   return MatchOperand_Success;
5741 }
5742 
5743 OperandMatchResultTy
5744 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5745                                             SMLoc Loc,
5746                                             int64_t &Format) {
5747   using namespace llvm::AMDGPU::MTBUFFormat;
5748 
5749   auto Id = getUnifiedFormat(FormatStr);
5750   if (Id == UFMT_UNDEF)
5751     return MatchOperand_NoMatch;
5752 
5753   if (!isGFX10Plus()) {
5754     Error(Loc, "unified format is not supported on this GPU");
5755     return MatchOperand_ParseFail;
5756   }
5757 
5758   Format = Id;
5759   return MatchOperand_Success;
5760 }
5761 
5762 OperandMatchResultTy
5763 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5764   using namespace llvm::AMDGPU::MTBUFFormat;
5765   SMLoc Loc = getLoc();
5766 
5767   if (!parseExpr(Format))
5768     return MatchOperand_ParseFail;
5769   if (!isValidFormatEncoding(Format, getSTI())) {
5770     Error(Loc, "out of range format");
5771     return MatchOperand_ParseFail;
5772   }
5773 
5774   return MatchOperand_Success;
5775 }
5776 
5777 OperandMatchResultTy
5778 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5779   using namespace llvm::AMDGPU::MTBUFFormat;
5780 
5781   if (!trySkipId("format", AsmToken::Colon))
5782     return MatchOperand_NoMatch;
5783 
5784   if (trySkipToken(AsmToken::LBrac)) {
5785     StringRef FormatStr;
5786     SMLoc Loc = getLoc();
5787     if (!parseId(FormatStr, "expected a format string"))
5788       return MatchOperand_ParseFail;
5789 
5790     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5791     if (Res == MatchOperand_NoMatch)
5792       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5793     if (Res != MatchOperand_Success)
5794       return Res;
5795 
5796     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5797       return MatchOperand_ParseFail;
5798 
5799     return MatchOperand_Success;
5800   }
5801 
5802   return parseNumericFormat(Format);
5803 }
5804 
5805 OperandMatchResultTy
5806 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5807   using namespace llvm::AMDGPU::MTBUFFormat;
5808 
5809   int64_t Format = getDefaultFormatEncoding(getSTI());
5810   OperandMatchResultTy Res;
5811   SMLoc Loc = getLoc();
5812 
5813   // Parse legacy format syntax.
5814   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5815   if (Res == MatchOperand_ParseFail)
5816     return Res;
5817 
5818   bool FormatFound = (Res == MatchOperand_Success);
5819 
5820   Operands.push_back(
5821     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5822 
5823   if (FormatFound)
5824     trySkipToken(AsmToken::Comma);
5825 
5826   if (isToken(AsmToken::EndOfStatement)) {
5827     // We are expecting an soffset operand,
5828     // but let matcher handle the error.
5829     return MatchOperand_Success;
5830   }
5831 
5832   // Parse soffset.
5833   Res = parseRegOrImm(Operands);
5834   if (Res != MatchOperand_Success)
5835     return Res;
5836 
5837   trySkipToken(AsmToken::Comma);
5838 
5839   if (!FormatFound) {
5840     Res = parseSymbolicOrNumericFormat(Format);
5841     if (Res == MatchOperand_ParseFail)
5842       return Res;
5843     if (Res == MatchOperand_Success) {
5844       auto Size = Operands.size();
5845       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5846       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5847       Op.setImm(Format);
5848     }
5849     return MatchOperand_Success;
5850   }
5851 
5852   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5853     Error(getLoc(), "duplicate format");
5854     return MatchOperand_ParseFail;
5855   }
5856   return MatchOperand_Success;
5857 }
5858 
5859 //===----------------------------------------------------------------------===//
5860 // ds
5861 //===----------------------------------------------------------------------===//
5862 
5863 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5864                                     const OperandVector &Operands) {
5865   OptionalImmIndexMap OptionalIdx;
5866 
5867   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5868     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5869 
5870     // Add the register arguments
5871     if (Op.isReg()) {
5872       Op.addRegOperands(Inst, 1);
5873       continue;
5874     }
5875 
5876     // Handle optional arguments
5877     OptionalIdx[Op.getImmTy()] = i;
5878   }
5879 
5880   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5881   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5882   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5883 
5884   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5885 }
5886 
5887 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5888                                 bool IsGdsHardcoded) {
5889   OptionalImmIndexMap OptionalIdx;
5890 
5891   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5892     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5893 
5894     // Add the register arguments
5895     if (Op.isReg()) {
5896       Op.addRegOperands(Inst, 1);
5897       continue;
5898     }
5899 
5900     if (Op.isToken() && Op.getToken() == "gds") {
5901       IsGdsHardcoded = true;
5902       continue;
5903     }
5904 
5905     // Handle optional arguments
5906     OptionalIdx[Op.getImmTy()] = i;
5907   }
5908 
5909   AMDGPUOperand::ImmTy OffsetType =
5910     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5911      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5912      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5913                                                       AMDGPUOperand::ImmTyOffset;
5914 
5915   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5916 
5917   if (!IsGdsHardcoded) {
5918     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5919   }
5920   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5921 }
5922 
5923 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5924   OptionalImmIndexMap OptionalIdx;
5925 
5926   unsigned OperandIdx[4];
5927   unsigned EnMask = 0;
5928   int SrcIdx = 0;
5929 
5930   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5931     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5932 
5933     // Add the register arguments
5934     if (Op.isReg()) {
5935       assert(SrcIdx < 4);
5936       OperandIdx[SrcIdx] = Inst.size();
5937       Op.addRegOperands(Inst, 1);
5938       ++SrcIdx;
5939       continue;
5940     }
5941 
5942     if (Op.isOff()) {
5943       assert(SrcIdx < 4);
5944       OperandIdx[SrcIdx] = Inst.size();
5945       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5946       ++SrcIdx;
5947       continue;
5948     }
5949 
5950     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5951       Op.addImmOperands(Inst, 1);
5952       continue;
5953     }
5954 
5955     if (Op.isToken() && Op.getToken() == "done")
5956       continue;
5957 
5958     // Handle optional arguments
5959     OptionalIdx[Op.getImmTy()] = i;
5960   }
5961 
5962   assert(SrcIdx == 4);
5963 
5964   bool Compr = false;
5965   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5966     Compr = true;
5967     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5968     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5969     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5970   }
5971 
5972   for (auto i = 0; i < SrcIdx; ++i) {
5973     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5974       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5975     }
5976   }
5977 
5978   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5979   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5980 
5981   Inst.addOperand(MCOperand::createImm(EnMask));
5982 }
5983 
5984 //===----------------------------------------------------------------------===//
5985 // s_waitcnt
5986 //===----------------------------------------------------------------------===//
5987 
5988 static bool
5989 encodeCnt(
5990   const AMDGPU::IsaVersion ISA,
5991   int64_t &IntVal,
5992   int64_t CntVal,
5993   bool Saturate,
5994   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5995   unsigned (*decode)(const IsaVersion &Version, unsigned))
5996 {
5997   bool Failed = false;
5998 
5999   IntVal = encode(ISA, IntVal, CntVal);
6000   if (CntVal != decode(ISA, IntVal)) {
6001     if (Saturate) {
6002       IntVal = encode(ISA, IntVal, -1);
6003     } else {
6004       Failed = true;
6005     }
6006   }
6007   return Failed;
6008 }
6009 
6010 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6011 
6012   SMLoc CntLoc = getLoc();
6013   StringRef CntName = getTokenStr();
6014 
6015   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6016       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6017     return false;
6018 
6019   int64_t CntVal;
6020   SMLoc ValLoc = getLoc();
6021   if (!parseExpr(CntVal))
6022     return false;
6023 
6024   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6025 
6026   bool Failed = true;
6027   bool Sat = CntName.endswith("_sat");
6028 
6029   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6030     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6031   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6032     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6033   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6034     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6035   } else {
6036     Error(CntLoc, "invalid counter name " + CntName);
6037     return false;
6038   }
6039 
6040   if (Failed) {
6041     Error(ValLoc, "too large value for " + CntName);
6042     return false;
6043   }
6044 
6045   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6046     return false;
6047 
6048   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6049     if (isToken(AsmToken::EndOfStatement)) {
6050       Error(getLoc(), "expected a counter name");
6051       return false;
6052     }
6053   }
6054 
6055   return true;
6056 }
6057 
6058 OperandMatchResultTy
6059 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6060   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6061   int64_t Waitcnt = getWaitcntBitMask(ISA);
6062   SMLoc S = getLoc();
6063 
6064   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6065     while (!isToken(AsmToken::EndOfStatement)) {
6066       if (!parseCnt(Waitcnt))
6067         return MatchOperand_ParseFail;
6068     }
6069   } else {
6070     if (!parseExpr(Waitcnt))
6071       return MatchOperand_ParseFail;
6072   }
6073 
6074   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6075   return MatchOperand_Success;
6076 }
6077 
6078 bool
6079 AMDGPUOperand::isSWaitCnt() const {
6080   return isImm();
6081 }
6082 
6083 //===----------------------------------------------------------------------===//
6084 // hwreg
6085 //===----------------------------------------------------------------------===//
6086 
6087 bool
6088 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6089                                 OperandInfoTy &Offset,
6090                                 OperandInfoTy &Width) {
6091   using namespace llvm::AMDGPU::Hwreg;
6092 
6093   // The register may be specified by name or using a numeric code
6094   HwReg.Loc = getLoc();
6095   if (isToken(AsmToken::Identifier) &&
6096       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
6097     HwReg.IsSymbolic = true;
6098     lex(); // skip register name
6099   } else if (!parseExpr(HwReg.Id, "a register name")) {
6100     return false;
6101   }
6102 
6103   if (trySkipToken(AsmToken::RParen))
6104     return true;
6105 
6106   // parse optional params
6107   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6108     return false;
6109 
6110   Offset.Loc = getLoc();
6111   if (!parseExpr(Offset.Id))
6112     return false;
6113 
6114   if (!skipToken(AsmToken::Comma, "expected a comma"))
6115     return false;
6116 
6117   Width.Loc = getLoc();
6118   return parseExpr(Width.Id) &&
6119          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6120 }
6121 
6122 bool
6123 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6124                                const OperandInfoTy &Offset,
6125                                const OperandInfoTy &Width) {
6126 
6127   using namespace llvm::AMDGPU::Hwreg;
6128 
6129   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6130     Error(HwReg.Loc,
6131           "specified hardware register is not supported on this GPU");
6132     return false;
6133   }
6134   if (!isValidHwreg(HwReg.Id)) {
6135     Error(HwReg.Loc,
6136           "invalid code of hardware register: only 6-bit values are legal");
6137     return false;
6138   }
6139   if (!isValidHwregOffset(Offset.Id)) {
6140     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6141     return false;
6142   }
6143   if (!isValidHwregWidth(Width.Id)) {
6144     Error(Width.Loc,
6145           "invalid bitfield width: only values from 1 to 32 are legal");
6146     return false;
6147   }
6148   return true;
6149 }
6150 
6151 OperandMatchResultTy
6152 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6153   using namespace llvm::AMDGPU::Hwreg;
6154 
6155   int64_t ImmVal = 0;
6156   SMLoc Loc = getLoc();
6157 
6158   if (trySkipId("hwreg", AsmToken::LParen)) {
6159     OperandInfoTy HwReg(ID_UNKNOWN_);
6160     OperandInfoTy Offset(OFFSET_DEFAULT_);
6161     OperandInfoTy Width(WIDTH_DEFAULT_);
6162     if (parseHwregBody(HwReg, Offset, Width) &&
6163         validateHwreg(HwReg, Offset, Width)) {
6164       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6165     } else {
6166       return MatchOperand_ParseFail;
6167     }
6168   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6169     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6170       Error(Loc, "invalid immediate: only 16-bit values are legal");
6171       return MatchOperand_ParseFail;
6172     }
6173   } else {
6174     return MatchOperand_ParseFail;
6175   }
6176 
6177   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6178   return MatchOperand_Success;
6179 }
6180 
6181 bool AMDGPUOperand::isHwreg() const {
6182   return isImmTy(ImmTyHwreg);
6183 }
6184 
6185 //===----------------------------------------------------------------------===//
6186 // sendmsg
6187 //===----------------------------------------------------------------------===//
6188 
6189 bool
6190 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6191                                   OperandInfoTy &Op,
6192                                   OperandInfoTy &Stream) {
6193   using namespace llvm::AMDGPU::SendMsg;
6194 
6195   Msg.Loc = getLoc();
6196   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6197     Msg.IsSymbolic = true;
6198     lex(); // skip message name
6199   } else if (!parseExpr(Msg.Id, "a message name")) {
6200     return false;
6201   }
6202 
6203   if (trySkipToken(AsmToken::Comma)) {
6204     Op.IsDefined = true;
6205     Op.Loc = getLoc();
6206     if (isToken(AsmToken::Identifier) &&
6207         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6208       lex(); // skip operation name
6209     } else if (!parseExpr(Op.Id, "an operation name")) {
6210       return false;
6211     }
6212 
6213     if (trySkipToken(AsmToken::Comma)) {
6214       Stream.IsDefined = true;
6215       Stream.Loc = getLoc();
6216       if (!parseExpr(Stream.Id))
6217         return false;
6218     }
6219   }
6220 
6221   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6222 }
6223 
6224 bool
6225 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6226                                  const OperandInfoTy &Op,
6227                                  const OperandInfoTy &Stream) {
6228   using namespace llvm::AMDGPU::SendMsg;
6229 
6230   // Validation strictness depends on whether message is specified
6231   // in a symbolc or in a numeric form. In the latter case
6232   // only encoding possibility is checked.
6233   bool Strict = Msg.IsSymbolic;
6234 
6235   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6236     Error(Msg.Loc, "invalid message id");
6237     return false;
6238   }
6239   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6240     if (Op.IsDefined) {
6241       Error(Op.Loc, "message does not support operations");
6242     } else {
6243       Error(Msg.Loc, "missing message operation");
6244     }
6245     return false;
6246   }
6247   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6248     Error(Op.Loc, "invalid operation id");
6249     return false;
6250   }
6251   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6252     Error(Stream.Loc, "message operation does not support streams");
6253     return false;
6254   }
6255   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6256     Error(Stream.Loc, "invalid message stream id");
6257     return false;
6258   }
6259   return true;
6260 }
6261 
6262 OperandMatchResultTy
6263 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6264   using namespace llvm::AMDGPU::SendMsg;
6265 
6266   int64_t ImmVal = 0;
6267   SMLoc Loc = getLoc();
6268 
6269   if (trySkipId("sendmsg", AsmToken::LParen)) {
6270     OperandInfoTy Msg(ID_UNKNOWN_);
6271     OperandInfoTy Op(OP_NONE_);
6272     OperandInfoTy Stream(STREAM_ID_NONE_);
6273     if (parseSendMsgBody(Msg, Op, Stream) &&
6274         validateSendMsg(Msg, Op, Stream)) {
6275       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6276     } else {
6277       return MatchOperand_ParseFail;
6278     }
6279   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6280     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6281       Error(Loc, "invalid immediate: only 16-bit values are legal");
6282       return MatchOperand_ParseFail;
6283     }
6284   } else {
6285     return MatchOperand_ParseFail;
6286   }
6287 
6288   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6289   return MatchOperand_Success;
6290 }
6291 
6292 bool AMDGPUOperand::isSendMsg() const {
6293   return isImmTy(ImmTySendMsg);
6294 }
6295 
6296 //===----------------------------------------------------------------------===//
6297 // v_interp
6298 //===----------------------------------------------------------------------===//
6299 
6300 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6301   StringRef Str;
6302   SMLoc S = getLoc();
6303 
6304   if (!parseId(Str))
6305     return MatchOperand_NoMatch;
6306 
6307   int Slot = StringSwitch<int>(Str)
6308     .Case("p10", 0)
6309     .Case("p20", 1)
6310     .Case("p0", 2)
6311     .Default(-1);
6312 
6313   if (Slot == -1) {
6314     Error(S, "invalid interpolation slot");
6315     return MatchOperand_ParseFail;
6316   }
6317 
6318   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6319                                               AMDGPUOperand::ImmTyInterpSlot));
6320   return MatchOperand_Success;
6321 }
6322 
6323 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6324   StringRef Str;
6325   SMLoc S = getLoc();
6326 
6327   if (!parseId(Str))
6328     return MatchOperand_NoMatch;
6329 
6330   if (!Str.startswith("attr")) {
6331     Error(S, "invalid interpolation attribute");
6332     return MatchOperand_ParseFail;
6333   }
6334 
6335   StringRef Chan = Str.take_back(2);
6336   int AttrChan = StringSwitch<int>(Chan)
6337     .Case(".x", 0)
6338     .Case(".y", 1)
6339     .Case(".z", 2)
6340     .Case(".w", 3)
6341     .Default(-1);
6342   if (AttrChan == -1) {
6343     Error(S, "invalid or missing interpolation attribute channel");
6344     return MatchOperand_ParseFail;
6345   }
6346 
6347   Str = Str.drop_back(2).drop_front(4);
6348 
6349   uint8_t Attr;
6350   if (Str.getAsInteger(10, Attr)) {
6351     Error(S, "invalid or missing interpolation attribute number");
6352     return MatchOperand_ParseFail;
6353   }
6354 
6355   if (Attr > 63) {
6356     Error(S, "out of bounds interpolation attribute number");
6357     return MatchOperand_ParseFail;
6358   }
6359 
6360   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6361 
6362   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6363                                               AMDGPUOperand::ImmTyInterpAttr));
6364   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6365                                               AMDGPUOperand::ImmTyAttrChan));
6366   return MatchOperand_Success;
6367 }
6368 
6369 //===----------------------------------------------------------------------===//
6370 // exp
6371 //===----------------------------------------------------------------------===//
6372 
6373 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6374   using namespace llvm::AMDGPU::Exp;
6375 
6376   StringRef Str;
6377   SMLoc S = getLoc();
6378 
6379   if (!parseId(Str))
6380     return MatchOperand_NoMatch;
6381 
6382   unsigned Id = getTgtId(Str);
6383   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6384     Error(S, (Id == ET_INVALID) ?
6385                 "invalid exp target" :
6386                 "exp target is not supported on this GPU");
6387     return MatchOperand_ParseFail;
6388   }
6389 
6390   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6391                                               AMDGPUOperand::ImmTyExpTgt));
6392   return MatchOperand_Success;
6393 }
6394 
6395 //===----------------------------------------------------------------------===//
6396 // parser helpers
6397 //===----------------------------------------------------------------------===//
6398 
6399 bool
6400 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6401   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6402 }
6403 
6404 bool
6405 AMDGPUAsmParser::isId(const StringRef Id) const {
6406   return isId(getToken(), Id);
6407 }
6408 
6409 bool
6410 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6411   return getTokenKind() == Kind;
6412 }
6413 
6414 bool
6415 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6416   if (isId(Id)) {
6417     lex();
6418     return true;
6419   }
6420   return false;
6421 }
6422 
6423 bool
6424 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6425   if (isToken(AsmToken::Identifier)) {
6426     StringRef Tok = getTokenStr();
6427     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6428       lex();
6429       return true;
6430     }
6431   }
6432   return false;
6433 }
6434 
6435 bool
6436 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6437   if (isId(Id) && peekToken().is(Kind)) {
6438     lex();
6439     lex();
6440     return true;
6441   }
6442   return false;
6443 }
6444 
6445 bool
6446 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6447   if (isToken(Kind)) {
6448     lex();
6449     return true;
6450   }
6451   return false;
6452 }
6453 
6454 bool
6455 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6456                            const StringRef ErrMsg) {
6457   if (!trySkipToken(Kind)) {
6458     Error(getLoc(), ErrMsg);
6459     return false;
6460   }
6461   return true;
6462 }
6463 
6464 bool
6465 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6466   SMLoc S = getLoc();
6467 
6468   const MCExpr *Expr;
6469   if (Parser.parseExpression(Expr))
6470     return false;
6471 
6472   if (Expr->evaluateAsAbsolute(Imm))
6473     return true;
6474 
6475   if (Expected.empty()) {
6476     Error(S, "expected absolute expression");
6477   } else {
6478     Error(S, Twine("expected ", Expected) +
6479              Twine(" or an absolute expression"));
6480   }
6481   return false;
6482 }
6483 
6484 bool
6485 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6486   SMLoc S = getLoc();
6487 
6488   const MCExpr *Expr;
6489   if (Parser.parseExpression(Expr))
6490     return false;
6491 
6492   int64_t IntVal;
6493   if (Expr->evaluateAsAbsolute(IntVal)) {
6494     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6495   } else {
6496     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6497   }
6498   return true;
6499 }
6500 
6501 bool
6502 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6503   if (isToken(AsmToken::String)) {
6504     Val = getToken().getStringContents();
6505     lex();
6506     return true;
6507   } else {
6508     Error(getLoc(), ErrMsg);
6509     return false;
6510   }
6511 }
6512 
6513 bool
6514 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6515   if (isToken(AsmToken::Identifier)) {
6516     Val = getTokenStr();
6517     lex();
6518     return true;
6519   } else {
6520     if (!ErrMsg.empty())
6521       Error(getLoc(), ErrMsg);
6522     return false;
6523   }
6524 }
6525 
6526 AsmToken
6527 AMDGPUAsmParser::getToken() const {
6528   return Parser.getTok();
6529 }
6530 
6531 AsmToken
6532 AMDGPUAsmParser::peekToken() {
6533   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6534 }
6535 
6536 void
6537 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6538   auto TokCount = getLexer().peekTokens(Tokens);
6539 
6540   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6541     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6542 }
6543 
6544 AsmToken::TokenKind
6545 AMDGPUAsmParser::getTokenKind() const {
6546   return getLexer().getKind();
6547 }
6548 
6549 SMLoc
6550 AMDGPUAsmParser::getLoc() const {
6551   return getToken().getLoc();
6552 }
6553 
6554 StringRef
6555 AMDGPUAsmParser::getTokenStr() const {
6556   return getToken().getString();
6557 }
6558 
6559 void
6560 AMDGPUAsmParser::lex() {
6561   Parser.Lex();
6562 }
6563 
6564 SMLoc
6565 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6566                                const OperandVector &Operands) const {
6567   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6568     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6569     if (Test(Op))
6570       return Op.getStartLoc();
6571   }
6572   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6573 }
6574 
6575 SMLoc
6576 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6577                            const OperandVector &Operands) const {
6578   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6579   return getOperandLoc(Test, Operands);
6580 }
6581 
6582 SMLoc
6583 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6584                            const OperandVector &Operands) const {
6585   auto Test = [=](const AMDGPUOperand& Op) {
6586     return Op.isRegKind() && Op.getReg() == Reg;
6587   };
6588   return getOperandLoc(Test, Operands);
6589 }
6590 
6591 SMLoc
6592 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6593   auto Test = [](const AMDGPUOperand& Op) {
6594     return Op.IsImmKindLiteral() || Op.isExpr();
6595   };
6596   return getOperandLoc(Test, Operands);
6597 }
6598 
6599 SMLoc
6600 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6601   auto Test = [](const AMDGPUOperand& Op) {
6602     return Op.isImmKindConst();
6603   };
6604   return getOperandLoc(Test, Operands);
6605 }
6606 
6607 //===----------------------------------------------------------------------===//
6608 // swizzle
6609 //===----------------------------------------------------------------------===//
6610 
6611 LLVM_READNONE
6612 static unsigned
6613 encodeBitmaskPerm(const unsigned AndMask,
6614                   const unsigned OrMask,
6615                   const unsigned XorMask) {
6616   using namespace llvm::AMDGPU::Swizzle;
6617 
6618   return BITMASK_PERM_ENC |
6619          (AndMask << BITMASK_AND_SHIFT) |
6620          (OrMask  << BITMASK_OR_SHIFT)  |
6621          (XorMask << BITMASK_XOR_SHIFT);
6622 }
6623 
6624 bool
6625 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6626                                      const unsigned MinVal,
6627                                      const unsigned MaxVal,
6628                                      const StringRef ErrMsg,
6629                                      SMLoc &Loc) {
6630   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6631     return false;
6632   }
6633   Loc = getLoc();
6634   if (!parseExpr(Op)) {
6635     return false;
6636   }
6637   if (Op < MinVal || Op > MaxVal) {
6638     Error(Loc, ErrMsg);
6639     return false;
6640   }
6641 
6642   return true;
6643 }
6644 
6645 bool
6646 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6647                                       const unsigned MinVal,
6648                                       const unsigned MaxVal,
6649                                       const StringRef ErrMsg) {
6650   SMLoc Loc;
6651   for (unsigned i = 0; i < OpNum; ++i) {
6652     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6653       return false;
6654   }
6655 
6656   return true;
6657 }
6658 
6659 bool
6660 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6661   using namespace llvm::AMDGPU::Swizzle;
6662 
6663   int64_t Lane[LANE_NUM];
6664   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6665                            "expected a 2-bit lane id")) {
6666     Imm = QUAD_PERM_ENC;
6667     for (unsigned I = 0; I < LANE_NUM; ++I) {
6668       Imm |= Lane[I] << (LANE_SHIFT * I);
6669     }
6670     return true;
6671   }
6672   return false;
6673 }
6674 
6675 bool
6676 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6677   using namespace llvm::AMDGPU::Swizzle;
6678 
6679   SMLoc Loc;
6680   int64_t GroupSize;
6681   int64_t LaneIdx;
6682 
6683   if (!parseSwizzleOperand(GroupSize,
6684                            2, 32,
6685                            "group size must be in the interval [2,32]",
6686                            Loc)) {
6687     return false;
6688   }
6689   if (!isPowerOf2_64(GroupSize)) {
6690     Error(Loc, "group size must be a power of two");
6691     return false;
6692   }
6693   if (parseSwizzleOperand(LaneIdx,
6694                           0, GroupSize - 1,
6695                           "lane id must be in the interval [0,group size - 1]",
6696                           Loc)) {
6697     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6698     return true;
6699   }
6700   return false;
6701 }
6702 
6703 bool
6704 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6705   using namespace llvm::AMDGPU::Swizzle;
6706 
6707   SMLoc Loc;
6708   int64_t GroupSize;
6709 
6710   if (!parseSwizzleOperand(GroupSize,
6711                            2, 32,
6712                            "group size must be in the interval [2,32]",
6713                            Loc)) {
6714     return false;
6715   }
6716   if (!isPowerOf2_64(GroupSize)) {
6717     Error(Loc, "group size must be a power of two");
6718     return false;
6719   }
6720 
6721   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6722   return true;
6723 }
6724 
6725 bool
6726 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6727   using namespace llvm::AMDGPU::Swizzle;
6728 
6729   SMLoc Loc;
6730   int64_t GroupSize;
6731 
6732   if (!parseSwizzleOperand(GroupSize,
6733                            1, 16,
6734                            "group size must be in the interval [1,16]",
6735                            Loc)) {
6736     return false;
6737   }
6738   if (!isPowerOf2_64(GroupSize)) {
6739     Error(Loc, "group size must be a power of two");
6740     return false;
6741   }
6742 
6743   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6744   return true;
6745 }
6746 
6747 bool
6748 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6749   using namespace llvm::AMDGPU::Swizzle;
6750 
6751   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6752     return false;
6753   }
6754 
6755   StringRef Ctl;
6756   SMLoc StrLoc = getLoc();
6757   if (!parseString(Ctl)) {
6758     return false;
6759   }
6760   if (Ctl.size() != BITMASK_WIDTH) {
6761     Error(StrLoc, "expected a 5-character mask");
6762     return false;
6763   }
6764 
6765   unsigned AndMask = 0;
6766   unsigned OrMask = 0;
6767   unsigned XorMask = 0;
6768 
6769   for (size_t i = 0; i < Ctl.size(); ++i) {
6770     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6771     switch(Ctl[i]) {
6772     default:
6773       Error(StrLoc, "invalid mask");
6774       return false;
6775     case '0':
6776       break;
6777     case '1':
6778       OrMask |= Mask;
6779       break;
6780     case 'p':
6781       AndMask |= Mask;
6782       break;
6783     case 'i':
6784       AndMask |= Mask;
6785       XorMask |= Mask;
6786       break;
6787     }
6788   }
6789 
6790   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6791   return true;
6792 }
6793 
6794 bool
6795 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6796 
6797   SMLoc OffsetLoc = getLoc();
6798 
6799   if (!parseExpr(Imm, "a swizzle macro")) {
6800     return false;
6801   }
6802   if (!isUInt<16>(Imm)) {
6803     Error(OffsetLoc, "expected a 16-bit offset");
6804     return false;
6805   }
6806   return true;
6807 }
6808 
6809 bool
6810 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6811   using namespace llvm::AMDGPU::Swizzle;
6812 
6813   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6814 
6815     SMLoc ModeLoc = getLoc();
6816     bool Ok = false;
6817 
6818     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6819       Ok = parseSwizzleQuadPerm(Imm);
6820     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6821       Ok = parseSwizzleBitmaskPerm(Imm);
6822     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6823       Ok = parseSwizzleBroadcast(Imm);
6824     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6825       Ok = parseSwizzleSwap(Imm);
6826     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6827       Ok = parseSwizzleReverse(Imm);
6828     } else {
6829       Error(ModeLoc, "expected a swizzle mode");
6830     }
6831 
6832     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6833   }
6834 
6835   return false;
6836 }
6837 
6838 OperandMatchResultTy
6839 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6840   SMLoc S = getLoc();
6841   int64_t Imm = 0;
6842 
6843   if (trySkipId("offset")) {
6844 
6845     bool Ok = false;
6846     if (skipToken(AsmToken::Colon, "expected a colon")) {
6847       if (trySkipId("swizzle")) {
6848         Ok = parseSwizzleMacro(Imm);
6849       } else {
6850         Ok = parseSwizzleOffset(Imm);
6851       }
6852     }
6853 
6854     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6855 
6856     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6857   } else {
6858     // Swizzle "offset" operand is optional.
6859     // If it is omitted, try parsing other optional operands.
6860     return parseOptionalOpr(Operands);
6861   }
6862 }
6863 
6864 bool
6865 AMDGPUOperand::isSwizzle() const {
6866   return isImmTy(ImmTySwizzle);
6867 }
6868 
6869 //===----------------------------------------------------------------------===//
6870 // VGPR Index Mode
6871 //===----------------------------------------------------------------------===//
6872 
6873 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6874 
6875   using namespace llvm::AMDGPU::VGPRIndexMode;
6876 
6877   if (trySkipToken(AsmToken::RParen)) {
6878     return OFF;
6879   }
6880 
6881   int64_t Imm = 0;
6882 
6883   while (true) {
6884     unsigned Mode = 0;
6885     SMLoc S = getLoc();
6886 
6887     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6888       if (trySkipId(IdSymbolic[ModeId])) {
6889         Mode = 1 << ModeId;
6890         break;
6891       }
6892     }
6893 
6894     if (Mode == 0) {
6895       Error(S, (Imm == 0)?
6896                "expected a VGPR index mode or a closing parenthesis" :
6897                "expected a VGPR index mode");
6898       return UNDEF;
6899     }
6900 
6901     if (Imm & Mode) {
6902       Error(S, "duplicate VGPR index mode");
6903       return UNDEF;
6904     }
6905     Imm |= Mode;
6906 
6907     if (trySkipToken(AsmToken::RParen))
6908       break;
6909     if (!skipToken(AsmToken::Comma,
6910                    "expected a comma or a closing parenthesis"))
6911       return UNDEF;
6912   }
6913 
6914   return Imm;
6915 }
6916 
6917 OperandMatchResultTy
6918 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6919 
6920   using namespace llvm::AMDGPU::VGPRIndexMode;
6921 
6922   int64_t Imm = 0;
6923   SMLoc S = getLoc();
6924 
6925   if (trySkipId("gpr_idx", AsmToken::LParen)) {
6926     Imm = parseGPRIdxMacro();
6927     if (Imm == UNDEF)
6928       return MatchOperand_ParseFail;
6929   } else {
6930     if (getParser().parseAbsoluteExpression(Imm))
6931       return MatchOperand_ParseFail;
6932     if (Imm < 0 || !isUInt<4>(Imm)) {
6933       Error(S, "invalid immediate: only 4-bit values are legal");
6934       return MatchOperand_ParseFail;
6935     }
6936   }
6937 
6938   Operands.push_back(
6939       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6940   return MatchOperand_Success;
6941 }
6942 
6943 bool AMDGPUOperand::isGPRIdxMode() const {
6944   return isImmTy(ImmTyGprIdxMode);
6945 }
6946 
6947 //===----------------------------------------------------------------------===//
6948 // sopp branch targets
6949 //===----------------------------------------------------------------------===//
6950 
6951 OperandMatchResultTy
6952 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6953 
6954   // Make sure we are not parsing something
6955   // that looks like a label or an expression but is not.
6956   // This will improve error messages.
6957   if (isRegister() || isModifier())
6958     return MatchOperand_NoMatch;
6959 
6960   if (!parseExpr(Operands))
6961     return MatchOperand_ParseFail;
6962 
6963   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6964   assert(Opr.isImm() || Opr.isExpr());
6965   SMLoc Loc = Opr.getStartLoc();
6966 
6967   // Currently we do not support arbitrary expressions as branch targets.
6968   // Only labels and absolute expressions are accepted.
6969   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6970     Error(Loc, "expected an absolute expression or a label");
6971   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6972     Error(Loc, "expected a 16-bit signed jump offset");
6973   }
6974 
6975   return MatchOperand_Success;
6976 }
6977 
6978 //===----------------------------------------------------------------------===//
6979 // Boolean holding registers
6980 //===----------------------------------------------------------------------===//
6981 
6982 OperandMatchResultTy
6983 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6984   return parseReg(Operands);
6985 }
6986 
6987 //===----------------------------------------------------------------------===//
6988 // mubuf
6989 //===----------------------------------------------------------------------===//
6990 
6991 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
6992   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
6993 }
6994 
6995 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6996                                    const OperandVector &Operands,
6997                                    bool IsAtomic,
6998                                    bool IsLds) {
6999   bool IsLdsOpcode = IsLds;
7000   bool HasLdsModifier = false;
7001   OptionalImmIndexMap OptionalIdx;
7002   unsigned FirstOperandIdx = 1;
7003   bool IsAtomicReturn = false;
7004 
7005   if (IsAtomic) {
7006     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7007       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7008       if (!Op.isCPol())
7009         continue;
7010       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7011       break;
7012     }
7013 
7014     if (!IsAtomicReturn) {
7015       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7016       if (NewOpc != -1)
7017         Inst.setOpcode(NewOpc);
7018     }
7019 
7020     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7021                       SIInstrFlags::IsAtomicRet;
7022   }
7023 
7024   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7025     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7026 
7027     // Add the register arguments
7028     if (Op.isReg()) {
7029       Op.addRegOperands(Inst, 1);
7030       // Insert a tied src for atomic return dst.
7031       // This cannot be postponed as subsequent calls to
7032       // addImmOperands rely on correct number of MC operands.
7033       if (IsAtomicReturn && i == FirstOperandIdx)
7034         Op.addRegOperands(Inst, 1);
7035       continue;
7036     }
7037 
7038     // Handle the case where soffset is an immediate
7039     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7040       Op.addImmOperands(Inst, 1);
7041       continue;
7042     }
7043 
7044     HasLdsModifier |= Op.isLDS();
7045 
7046     // Handle tokens like 'offen' which are sometimes hard-coded into the
7047     // asm string.  There are no MCInst operands for these.
7048     if (Op.isToken()) {
7049       continue;
7050     }
7051     assert(Op.isImm());
7052 
7053     // Handle optional arguments
7054     OptionalIdx[Op.getImmTy()] = i;
7055   }
7056 
7057   // This is a workaround for an llvm quirk which may result in an
7058   // incorrect instruction selection. Lds and non-lds versions of
7059   // MUBUF instructions are identical except that lds versions
7060   // have mandatory 'lds' modifier. However this modifier follows
7061   // optional modifiers and llvm asm matcher regards this 'lds'
7062   // modifier as an optional one. As a result, an lds version
7063   // of opcode may be selected even if it has no 'lds' modifier.
7064   if (IsLdsOpcode && !HasLdsModifier) {
7065     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7066     if (NoLdsOpcode != -1) { // Got lds version - correct it.
7067       Inst.setOpcode(NoLdsOpcode);
7068       IsLdsOpcode = false;
7069     }
7070   }
7071 
7072   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7073   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7074 
7075   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7076     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7077   }
7078   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7079 }
7080 
7081 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7082   OptionalImmIndexMap OptionalIdx;
7083 
7084   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7085     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7086 
7087     // Add the register arguments
7088     if (Op.isReg()) {
7089       Op.addRegOperands(Inst, 1);
7090       continue;
7091     }
7092 
7093     // Handle the case where soffset is an immediate
7094     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7095       Op.addImmOperands(Inst, 1);
7096       continue;
7097     }
7098 
7099     // Handle tokens like 'offen' which are sometimes hard-coded into the
7100     // asm string.  There are no MCInst operands for these.
7101     if (Op.isToken()) {
7102       continue;
7103     }
7104     assert(Op.isImm());
7105 
7106     // Handle optional arguments
7107     OptionalIdx[Op.getImmTy()] = i;
7108   }
7109 
7110   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7111                         AMDGPUOperand::ImmTyOffset);
7112   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7113   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7114   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7115   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7116 }
7117 
7118 //===----------------------------------------------------------------------===//
7119 // mimg
7120 //===----------------------------------------------------------------------===//
7121 
7122 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7123                               bool IsAtomic) {
7124   unsigned I = 1;
7125   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7126   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7127     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7128   }
7129 
7130   if (IsAtomic) {
7131     // Add src, same as dst
7132     assert(Desc.getNumDefs() == 1);
7133     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7134   }
7135 
7136   OptionalImmIndexMap OptionalIdx;
7137 
7138   for (unsigned E = Operands.size(); I != E; ++I) {
7139     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7140 
7141     // Add the register arguments
7142     if (Op.isReg()) {
7143       Op.addRegOperands(Inst, 1);
7144     } else if (Op.isImmModifier()) {
7145       OptionalIdx[Op.getImmTy()] = I;
7146     } else if (!Op.isToken()) {
7147       llvm_unreachable("unexpected operand type");
7148     }
7149   }
7150 
7151   bool IsGFX10Plus = isGFX10Plus();
7152 
7153   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7154   if (IsGFX10Plus)
7155     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7156   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7157   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7158   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7159   if (IsGFX10Plus)
7160     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7161   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7162     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7163   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7164   if (!IsGFX10Plus)
7165     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7166   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7167 }
7168 
7169 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7170   cvtMIMG(Inst, Operands, true);
7171 }
7172 
7173 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7174   OptionalImmIndexMap OptionalIdx;
7175   bool IsAtomicReturn = false;
7176 
7177   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7178     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7179     if (!Op.isCPol())
7180       continue;
7181     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7182     break;
7183   }
7184 
7185   if (!IsAtomicReturn) {
7186     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7187     if (NewOpc != -1)
7188       Inst.setOpcode(NewOpc);
7189   }
7190 
7191   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7192                     SIInstrFlags::IsAtomicRet;
7193 
7194   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7195     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7196 
7197     // Add the register arguments
7198     if (Op.isReg()) {
7199       Op.addRegOperands(Inst, 1);
7200       if (IsAtomicReturn && i == 1)
7201         Op.addRegOperands(Inst, 1);
7202       continue;
7203     }
7204 
7205     // Handle the case where soffset is an immediate
7206     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7207       Op.addImmOperands(Inst, 1);
7208       continue;
7209     }
7210 
7211     // Handle tokens like 'offen' which are sometimes hard-coded into the
7212     // asm string.  There are no MCInst operands for these.
7213     if (Op.isToken()) {
7214       continue;
7215     }
7216     assert(Op.isImm());
7217 
7218     // Handle optional arguments
7219     OptionalIdx[Op.getImmTy()] = i;
7220   }
7221 
7222   if ((int)Inst.getNumOperands() <=
7223       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7224     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7225   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7226 }
7227 
7228 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7229                                       const OperandVector &Operands) {
7230   for (unsigned I = 1; I < Operands.size(); ++I) {
7231     auto &Operand = (AMDGPUOperand &)*Operands[I];
7232     if (Operand.isReg())
7233       Operand.addRegOperands(Inst, 1);
7234   }
7235 
7236   Inst.addOperand(MCOperand::createImm(1)); // a16
7237 }
7238 
7239 //===----------------------------------------------------------------------===//
7240 // smrd
7241 //===----------------------------------------------------------------------===//
7242 
7243 bool AMDGPUOperand::isSMRDOffset8() const {
7244   return isImm() && isUInt<8>(getImm());
7245 }
7246 
7247 bool AMDGPUOperand::isSMEMOffset() const {
7248   return isImm(); // Offset range is checked later by validator.
7249 }
7250 
7251 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7252   // 32-bit literals are only supported on CI and we only want to use them
7253   // when the offset is > 8-bits.
7254   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7255 }
7256 
7257 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7258   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7259 }
7260 
7261 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7262   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7263 }
7264 
7265 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7266   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7267 }
7268 
7269 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7270   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7271 }
7272 
7273 //===----------------------------------------------------------------------===//
7274 // vop3
7275 //===----------------------------------------------------------------------===//
7276 
7277 static bool ConvertOmodMul(int64_t &Mul) {
7278   if (Mul != 1 && Mul != 2 && Mul != 4)
7279     return false;
7280 
7281   Mul >>= 1;
7282   return true;
7283 }
7284 
7285 static bool ConvertOmodDiv(int64_t &Div) {
7286   if (Div == 1) {
7287     Div = 0;
7288     return true;
7289   }
7290 
7291   if (Div == 2) {
7292     Div = 3;
7293     return true;
7294   }
7295 
7296   return false;
7297 }
7298 
7299 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7300 // This is intentional and ensures compatibility with sp3.
7301 // See bug 35397 for details.
7302 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7303   if (BoundCtrl == 0 || BoundCtrl == 1) {
7304     BoundCtrl = 1;
7305     return true;
7306   }
7307   return false;
7308 }
7309 
7310 // Note: the order in this table matches the order of operands in AsmString.
7311 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7312   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7313   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7314   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7315   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7316   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7317   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7318   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7319   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7320   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7321   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7322   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7323   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7324   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7325   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7326   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7327   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7328   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7329   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7330   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7331   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7332   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7333   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7334   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7335   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7336   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7337   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7338   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7339   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7340   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7341   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7342   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7343   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7344   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7345   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7346   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7347   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7348   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7349   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7350   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7351   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7352   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7353 };
7354 
7355 void AMDGPUAsmParser::onBeginOfFile() {
7356   if (!getParser().getStreamer().getTargetStreamer() ||
7357       getSTI().getTargetTriple().getArch() == Triple::r600)
7358     return;
7359 
7360   if (!getTargetStreamer().getTargetID())
7361     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7362 
7363   if (isHsaAbiVersion3Or4(&getSTI()))
7364     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7365 }
7366 
7367 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7368 
7369   OperandMatchResultTy res = parseOptionalOpr(Operands);
7370 
7371   // This is a hack to enable hardcoded mandatory operands which follow
7372   // optional operands.
7373   //
7374   // Current design assumes that all operands after the first optional operand
7375   // are also optional. However implementation of some instructions violates
7376   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7377   //
7378   // To alleviate this problem, we have to (implicitly) parse extra operands
7379   // to make sure autogenerated parser of custom operands never hit hardcoded
7380   // mandatory operands.
7381 
7382   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7383     if (res != MatchOperand_Success ||
7384         isToken(AsmToken::EndOfStatement))
7385       break;
7386 
7387     trySkipToken(AsmToken::Comma);
7388     res = parseOptionalOpr(Operands);
7389   }
7390 
7391   return res;
7392 }
7393 
7394 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7395   OperandMatchResultTy res;
7396   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7397     // try to parse any optional operand here
7398     if (Op.IsBit) {
7399       res = parseNamedBit(Op.Name, Operands, Op.Type);
7400     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7401       res = parseOModOperand(Operands);
7402     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7403                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7404                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7405       res = parseSDWASel(Operands, Op.Name, Op.Type);
7406     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7407       res = parseSDWADstUnused(Operands);
7408     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7409                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7410                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7411                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7412       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7413                                         Op.ConvertResult);
7414     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7415       res = parseDim(Operands);
7416     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7417       res = parseCPol(Operands);
7418     } else {
7419       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7420     }
7421     if (res != MatchOperand_NoMatch) {
7422       return res;
7423     }
7424   }
7425   return MatchOperand_NoMatch;
7426 }
7427 
7428 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7429   StringRef Name = getTokenStr();
7430   if (Name == "mul") {
7431     return parseIntWithPrefix("mul", Operands,
7432                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7433   }
7434 
7435   if (Name == "div") {
7436     return parseIntWithPrefix("div", Operands,
7437                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7438   }
7439 
7440   return MatchOperand_NoMatch;
7441 }
7442 
7443 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7444   cvtVOP3P(Inst, Operands);
7445 
7446   int Opc = Inst.getOpcode();
7447 
7448   int SrcNum;
7449   const int Ops[] = { AMDGPU::OpName::src0,
7450                       AMDGPU::OpName::src1,
7451                       AMDGPU::OpName::src2 };
7452   for (SrcNum = 0;
7453        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7454        ++SrcNum);
7455   assert(SrcNum > 0);
7456 
7457   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7458   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7459 
7460   if ((OpSel & (1 << SrcNum)) != 0) {
7461     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7462     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7463     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7464   }
7465 }
7466 
7467 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7468       // 1. This operand is input modifiers
7469   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7470       // 2. This is not last operand
7471       && Desc.NumOperands > (OpNum + 1)
7472       // 3. Next operand is register class
7473       && Desc.OpInfo[OpNum + 1].RegClass != -1
7474       // 4. Next register is not tied to any other operand
7475       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7476 }
7477 
7478 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7479 {
7480   OptionalImmIndexMap OptionalIdx;
7481   unsigned Opc = Inst.getOpcode();
7482 
7483   unsigned I = 1;
7484   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7485   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7486     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7487   }
7488 
7489   for (unsigned E = Operands.size(); I != E; ++I) {
7490     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7491     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7492       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7493     } else if (Op.isInterpSlot() ||
7494                Op.isInterpAttr() ||
7495                Op.isAttrChan()) {
7496       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7497     } else if (Op.isImmModifier()) {
7498       OptionalIdx[Op.getImmTy()] = I;
7499     } else {
7500       llvm_unreachable("unhandled operand type");
7501     }
7502   }
7503 
7504   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7505     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7506   }
7507 
7508   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7509     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7510   }
7511 
7512   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7513     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7514   }
7515 }
7516 
7517 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7518                               OptionalImmIndexMap &OptionalIdx) {
7519   unsigned Opc = Inst.getOpcode();
7520 
7521   unsigned I = 1;
7522   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7523   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7524     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7525   }
7526 
7527   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7528     // This instruction has src modifiers
7529     for (unsigned E = Operands.size(); I != E; ++I) {
7530       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7531       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7532         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7533       } else if (Op.isImmModifier()) {
7534         OptionalIdx[Op.getImmTy()] = I;
7535       } else if (Op.isRegOrImm()) {
7536         Op.addRegOrImmOperands(Inst, 1);
7537       } else {
7538         llvm_unreachable("unhandled operand type");
7539       }
7540     }
7541   } else {
7542     // No src modifiers
7543     for (unsigned E = Operands.size(); I != E; ++I) {
7544       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7545       if (Op.isMod()) {
7546         OptionalIdx[Op.getImmTy()] = I;
7547       } else {
7548         Op.addRegOrImmOperands(Inst, 1);
7549       }
7550     }
7551   }
7552 
7553   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7554     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7555   }
7556 
7557   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7558     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7559   }
7560 
7561   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7562   // it has src2 register operand that is tied to dst operand
7563   // we don't allow modifiers for this operand in assembler so src2_modifiers
7564   // should be 0.
7565   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7566       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7567       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7568       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7569       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7570       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7571       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7572       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7573       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7574       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7575       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7576     auto it = Inst.begin();
7577     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7578     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7579     ++it;
7580     // Copy the operand to ensure it's not invalidated when Inst grows.
7581     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7582   }
7583 }
7584 
7585 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7586   OptionalImmIndexMap OptionalIdx;
7587   cvtVOP3(Inst, Operands, OptionalIdx);
7588 }
7589 
7590 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7591                                OptionalImmIndexMap &OptIdx) {
7592   const int Opc = Inst.getOpcode();
7593   const MCInstrDesc &Desc = MII.get(Opc);
7594 
7595   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7596 
7597   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7598     assert(!IsPacked);
7599     Inst.addOperand(Inst.getOperand(0));
7600   }
7601 
7602   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7603   // instruction, and then figure out where to actually put the modifiers
7604 
7605   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7606   if (OpSelIdx != -1) {
7607     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7608   }
7609 
7610   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7611   if (OpSelHiIdx != -1) {
7612     int DefaultVal = IsPacked ? -1 : 0;
7613     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7614                           DefaultVal);
7615   }
7616 
7617   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7618   if (NegLoIdx != -1) {
7619     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7620     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7621   }
7622 
7623   const int Ops[] = { AMDGPU::OpName::src0,
7624                       AMDGPU::OpName::src1,
7625                       AMDGPU::OpName::src2 };
7626   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7627                          AMDGPU::OpName::src1_modifiers,
7628                          AMDGPU::OpName::src2_modifiers };
7629 
7630   unsigned OpSel = 0;
7631   unsigned OpSelHi = 0;
7632   unsigned NegLo = 0;
7633   unsigned NegHi = 0;
7634 
7635   if (OpSelIdx != -1)
7636     OpSel = Inst.getOperand(OpSelIdx).getImm();
7637 
7638   if (OpSelHiIdx != -1)
7639     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7640 
7641   if (NegLoIdx != -1) {
7642     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7643     NegLo = Inst.getOperand(NegLoIdx).getImm();
7644     NegHi = Inst.getOperand(NegHiIdx).getImm();
7645   }
7646 
7647   for (int J = 0; J < 3; ++J) {
7648     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7649     if (OpIdx == -1)
7650       break;
7651 
7652     uint32_t ModVal = 0;
7653 
7654     if ((OpSel & (1 << J)) != 0)
7655       ModVal |= SISrcMods::OP_SEL_0;
7656 
7657     if ((OpSelHi & (1 << J)) != 0)
7658       ModVal |= SISrcMods::OP_SEL_1;
7659 
7660     if ((NegLo & (1 << J)) != 0)
7661       ModVal |= SISrcMods::NEG;
7662 
7663     if ((NegHi & (1 << J)) != 0)
7664       ModVal |= SISrcMods::NEG_HI;
7665 
7666     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7667 
7668     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7669   }
7670 }
7671 
7672 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7673   OptionalImmIndexMap OptIdx;
7674   cvtVOP3(Inst, Operands, OptIdx);
7675   cvtVOP3P(Inst, Operands, OptIdx);
7676 }
7677 
7678 //===----------------------------------------------------------------------===//
7679 // dpp
7680 //===----------------------------------------------------------------------===//
7681 
7682 bool AMDGPUOperand::isDPP8() const {
7683   return isImmTy(ImmTyDPP8);
7684 }
7685 
7686 bool AMDGPUOperand::isDPPCtrl() const {
7687   using namespace AMDGPU::DPP;
7688 
7689   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7690   if (result) {
7691     int64_t Imm = getImm();
7692     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7693            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7694            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7695            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7696            (Imm == DppCtrl::WAVE_SHL1) ||
7697            (Imm == DppCtrl::WAVE_ROL1) ||
7698            (Imm == DppCtrl::WAVE_SHR1) ||
7699            (Imm == DppCtrl::WAVE_ROR1) ||
7700            (Imm == DppCtrl::ROW_MIRROR) ||
7701            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7702            (Imm == DppCtrl::BCAST15) ||
7703            (Imm == DppCtrl::BCAST31) ||
7704            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7705            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7706   }
7707   return false;
7708 }
7709 
7710 //===----------------------------------------------------------------------===//
7711 // mAI
7712 //===----------------------------------------------------------------------===//
7713 
7714 bool AMDGPUOperand::isBLGP() const {
7715   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7716 }
7717 
7718 bool AMDGPUOperand::isCBSZ() const {
7719   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7720 }
7721 
7722 bool AMDGPUOperand::isABID() const {
7723   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7724 }
7725 
7726 bool AMDGPUOperand::isS16Imm() const {
7727   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7728 }
7729 
7730 bool AMDGPUOperand::isU16Imm() const {
7731   return isImm() && isUInt<16>(getImm());
7732 }
7733 
7734 //===----------------------------------------------------------------------===//
7735 // dim
7736 //===----------------------------------------------------------------------===//
7737 
7738 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7739   // We want to allow "dim:1D" etc.,
7740   // but the initial 1 is tokenized as an integer.
7741   std::string Token;
7742   if (isToken(AsmToken::Integer)) {
7743     SMLoc Loc = getToken().getEndLoc();
7744     Token = std::string(getTokenStr());
7745     lex();
7746     if (getLoc() != Loc)
7747       return false;
7748   }
7749 
7750   StringRef Suffix;
7751   if (!parseId(Suffix))
7752     return false;
7753   Token += Suffix;
7754 
7755   StringRef DimId = Token;
7756   if (DimId.startswith("SQ_RSRC_IMG_"))
7757     DimId = DimId.drop_front(12);
7758 
7759   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7760   if (!DimInfo)
7761     return false;
7762 
7763   Encoding = DimInfo->Encoding;
7764   return true;
7765 }
7766 
7767 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7768   if (!isGFX10Plus())
7769     return MatchOperand_NoMatch;
7770 
7771   SMLoc S = getLoc();
7772 
7773   if (!trySkipId("dim", AsmToken::Colon))
7774     return MatchOperand_NoMatch;
7775 
7776   unsigned Encoding;
7777   SMLoc Loc = getLoc();
7778   if (!parseDimId(Encoding)) {
7779     Error(Loc, "invalid dim value");
7780     return MatchOperand_ParseFail;
7781   }
7782 
7783   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7784                                               AMDGPUOperand::ImmTyDim));
7785   return MatchOperand_Success;
7786 }
7787 
7788 //===----------------------------------------------------------------------===//
7789 // dpp
7790 //===----------------------------------------------------------------------===//
7791 
7792 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7793   SMLoc S = getLoc();
7794 
7795   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7796     return MatchOperand_NoMatch;
7797 
7798   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7799 
7800   int64_t Sels[8];
7801 
7802   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7803     return MatchOperand_ParseFail;
7804 
7805   for (size_t i = 0; i < 8; ++i) {
7806     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7807       return MatchOperand_ParseFail;
7808 
7809     SMLoc Loc = getLoc();
7810     if (getParser().parseAbsoluteExpression(Sels[i]))
7811       return MatchOperand_ParseFail;
7812     if (0 > Sels[i] || 7 < Sels[i]) {
7813       Error(Loc, "expected a 3-bit value");
7814       return MatchOperand_ParseFail;
7815     }
7816   }
7817 
7818   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7819     return MatchOperand_ParseFail;
7820 
7821   unsigned DPP8 = 0;
7822   for (size_t i = 0; i < 8; ++i)
7823     DPP8 |= (Sels[i] << (i * 3));
7824 
7825   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7826   return MatchOperand_Success;
7827 }
7828 
7829 bool
7830 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7831                                     const OperandVector &Operands) {
7832   if (Ctrl == "row_newbcast")
7833     return isGFX90A();
7834 
7835   if (Ctrl == "row_share" ||
7836       Ctrl == "row_xmask")
7837     return isGFX10Plus();
7838 
7839   if (Ctrl == "wave_shl" ||
7840       Ctrl == "wave_shr" ||
7841       Ctrl == "wave_rol" ||
7842       Ctrl == "wave_ror" ||
7843       Ctrl == "row_bcast")
7844     return isVI() || isGFX9();
7845 
7846   return Ctrl == "row_mirror" ||
7847          Ctrl == "row_half_mirror" ||
7848          Ctrl == "quad_perm" ||
7849          Ctrl == "row_shl" ||
7850          Ctrl == "row_shr" ||
7851          Ctrl == "row_ror";
7852 }
7853 
7854 int64_t
7855 AMDGPUAsmParser::parseDPPCtrlPerm() {
7856   // quad_perm:[%d,%d,%d,%d]
7857 
7858   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7859     return -1;
7860 
7861   int64_t Val = 0;
7862   for (int i = 0; i < 4; ++i) {
7863     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7864       return -1;
7865 
7866     int64_t Temp;
7867     SMLoc Loc = getLoc();
7868     if (getParser().parseAbsoluteExpression(Temp))
7869       return -1;
7870     if (Temp < 0 || Temp > 3) {
7871       Error(Loc, "expected a 2-bit value");
7872       return -1;
7873     }
7874 
7875     Val += (Temp << i * 2);
7876   }
7877 
7878   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7879     return -1;
7880 
7881   return Val;
7882 }
7883 
7884 int64_t
7885 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7886   using namespace AMDGPU::DPP;
7887 
7888   // sel:%d
7889 
7890   int64_t Val;
7891   SMLoc Loc = getLoc();
7892 
7893   if (getParser().parseAbsoluteExpression(Val))
7894     return -1;
7895 
7896   struct DppCtrlCheck {
7897     int64_t Ctrl;
7898     int Lo;
7899     int Hi;
7900   };
7901 
7902   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7903     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
7904     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
7905     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
7906     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
7907     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
7908     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
7909     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
7910     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7911     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7912     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
7913     .Default({-1, 0, 0});
7914 
7915   bool Valid;
7916   if (Check.Ctrl == -1) {
7917     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
7918     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
7919   } else {
7920     Valid = Check.Lo <= Val && Val <= Check.Hi;
7921     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
7922   }
7923 
7924   if (!Valid) {
7925     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
7926     return -1;
7927   }
7928 
7929   return Val;
7930 }
7931 
7932 OperandMatchResultTy
7933 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7934   using namespace AMDGPU::DPP;
7935 
7936   if (!isToken(AsmToken::Identifier) ||
7937       !isSupportedDPPCtrl(getTokenStr(), Operands))
7938     return MatchOperand_NoMatch;
7939 
7940   SMLoc S = getLoc();
7941   int64_t Val = -1;
7942   StringRef Ctrl;
7943 
7944   parseId(Ctrl);
7945 
7946   if (Ctrl == "row_mirror") {
7947     Val = DppCtrl::ROW_MIRROR;
7948   } else if (Ctrl == "row_half_mirror") {
7949     Val = DppCtrl::ROW_HALF_MIRROR;
7950   } else {
7951     if (skipToken(AsmToken::Colon, "expected a colon")) {
7952       if (Ctrl == "quad_perm") {
7953         Val = parseDPPCtrlPerm();
7954       } else {
7955         Val = parseDPPCtrlSel(Ctrl);
7956       }
7957     }
7958   }
7959 
7960   if (Val == -1)
7961     return MatchOperand_ParseFail;
7962 
7963   Operands.push_back(
7964     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
7965   return MatchOperand_Success;
7966 }
7967 
7968 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7969   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7970 }
7971 
7972 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7973   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7974 }
7975 
7976 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7977   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7978 }
7979 
7980 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7981   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7982 }
7983 
7984 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7985   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7986 }
7987 
7988 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7989   OptionalImmIndexMap OptionalIdx;
7990 
7991   unsigned Opc = Inst.getOpcode();
7992   bool HasModifiers =
7993       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
7994   unsigned I = 1;
7995   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7996   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7997     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7998   }
7999 
8000   int Fi = 0;
8001   for (unsigned E = Operands.size(); I != E; ++I) {
8002     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8003                                             MCOI::TIED_TO);
8004     if (TiedTo != -1) {
8005       assert((unsigned)TiedTo < Inst.getNumOperands());
8006       // handle tied old or src2 for MAC instructions
8007       Inst.addOperand(Inst.getOperand(TiedTo));
8008     }
8009     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8010     // Add the register arguments
8011     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8012       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8013       // Skip it.
8014       continue;
8015     }
8016 
8017     if (IsDPP8) {
8018       if (Op.isDPP8()) {
8019         Op.addImmOperands(Inst, 1);
8020       } else if (HasModifiers &&
8021                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8022         Op.addRegWithFPInputModsOperands(Inst, 2);
8023       } else if (Op.isFI()) {
8024         Fi = Op.getImm();
8025       } else if (Op.isReg()) {
8026         Op.addRegOperands(Inst, 1);
8027       } else {
8028         llvm_unreachable("Invalid operand type");
8029       }
8030     } else {
8031       if (HasModifiers &&
8032           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8033         Op.addRegWithFPInputModsOperands(Inst, 2);
8034       } else if (Op.isReg()) {
8035         Op.addRegOperands(Inst, 1);
8036       } else if (Op.isDPPCtrl()) {
8037         Op.addImmOperands(Inst, 1);
8038       } else if (Op.isImm()) {
8039         // Handle optional arguments
8040         OptionalIdx[Op.getImmTy()] = I;
8041       } else {
8042         llvm_unreachable("Invalid operand type");
8043       }
8044     }
8045   }
8046 
8047   if (IsDPP8) {
8048     using namespace llvm::AMDGPU::DPP;
8049     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8050   } else {
8051     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8052     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8053     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8054     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8055       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8056     }
8057   }
8058 }
8059 
8060 //===----------------------------------------------------------------------===//
8061 // sdwa
8062 //===----------------------------------------------------------------------===//
8063 
8064 OperandMatchResultTy
8065 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8066                               AMDGPUOperand::ImmTy Type) {
8067   using namespace llvm::AMDGPU::SDWA;
8068 
8069   SMLoc S = getLoc();
8070   StringRef Value;
8071   OperandMatchResultTy res;
8072 
8073   SMLoc StringLoc;
8074   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8075   if (res != MatchOperand_Success) {
8076     return res;
8077   }
8078 
8079   int64_t Int;
8080   Int = StringSwitch<int64_t>(Value)
8081         .Case("BYTE_0", SdwaSel::BYTE_0)
8082         .Case("BYTE_1", SdwaSel::BYTE_1)
8083         .Case("BYTE_2", SdwaSel::BYTE_2)
8084         .Case("BYTE_3", SdwaSel::BYTE_3)
8085         .Case("WORD_0", SdwaSel::WORD_0)
8086         .Case("WORD_1", SdwaSel::WORD_1)
8087         .Case("DWORD", SdwaSel::DWORD)
8088         .Default(0xffffffff);
8089 
8090   if (Int == 0xffffffff) {
8091     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8092     return MatchOperand_ParseFail;
8093   }
8094 
8095   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8096   return MatchOperand_Success;
8097 }
8098 
8099 OperandMatchResultTy
8100 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8101   using namespace llvm::AMDGPU::SDWA;
8102 
8103   SMLoc S = getLoc();
8104   StringRef Value;
8105   OperandMatchResultTy res;
8106 
8107   SMLoc StringLoc;
8108   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8109   if (res != MatchOperand_Success) {
8110     return res;
8111   }
8112 
8113   int64_t Int;
8114   Int = StringSwitch<int64_t>(Value)
8115         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8116         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8117         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8118         .Default(0xffffffff);
8119 
8120   if (Int == 0xffffffff) {
8121     Error(StringLoc, "invalid dst_unused value");
8122     return MatchOperand_ParseFail;
8123   }
8124 
8125   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8126   return MatchOperand_Success;
8127 }
8128 
8129 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8130   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8131 }
8132 
8133 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8134   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8135 }
8136 
8137 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8138   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8139 }
8140 
8141 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8142   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8143 }
8144 
8145 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8146   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8147 }
8148 
8149 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8150                               uint64_t BasicInstType,
8151                               bool SkipDstVcc,
8152                               bool SkipSrcVcc) {
8153   using namespace llvm::AMDGPU::SDWA;
8154 
8155   OptionalImmIndexMap OptionalIdx;
8156   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8157   bool SkippedVcc = false;
8158 
8159   unsigned I = 1;
8160   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8161   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8162     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8163   }
8164 
8165   for (unsigned E = Operands.size(); I != E; ++I) {
8166     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8167     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8168         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8169       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8170       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8171       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8172       // Skip VCC only if we didn't skip it on previous iteration.
8173       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8174       if (BasicInstType == SIInstrFlags::VOP2 &&
8175           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8176            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8177         SkippedVcc = true;
8178         continue;
8179       } else if (BasicInstType == SIInstrFlags::VOPC &&
8180                  Inst.getNumOperands() == 0) {
8181         SkippedVcc = true;
8182         continue;
8183       }
8184     }
8185     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8186       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8187     } else if (Op.isImm()) {
8188       // Handle optional arguments
8189       OptionalIdx[Op.getImmTy()] = I;
8190     } else {
8191       llvm_unreachable("Invalid operand type");
8192     }
8193     SkippedVcc = false;
8194   }
8195 
8196   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8197       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8198       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8199     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8200     switch (BasicInstType) {
8201     case SIInstrFlags::VOP1:
8202       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8203       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8204         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8205       }
8206       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8207       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8208       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8209       break;
8210 
8211     case SIInstrFlags::VOP2:
8212       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8213       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8214         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8215       }
8216       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8217       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8218       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8219       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8220       break;
8221 
8222     case SIInstrFlags::VOPC:
8223       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8224         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8225       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8226       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8227       break;
8228 
8229     default:
8230       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8231     }
8232   }
8233 
8234   // special case v_mac_{f16, f32}:
8235   // it has src2 register operand that is tied to dst operand
8236   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8237       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8238     auto it = Inst.begin();
8239     std::advance(
8240       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8241     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8242   }
8243 }
8244 
8245 //===----------------------------------------------------------------------===//
8246 // mAI
8247 //===----------------------------------------------------------------------===//
8248 
8249 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8250   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8251 }
8252 
8253 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8254   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8255 }
8256 
8257 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8258   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8259 }
8260 
8261 /// Force static initialization.
8262 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8263   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8264   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8265 }
8266 
8267 #define GET_REGISTER_MATCHER
8268 #define GET_MATCHER_IMPLEMENTATION
8269 #define GET_MNEMONIC_SPELL_CHECKER
8270 #define GET_MNEMONIC_CHECKER
8271 #include "AMDGPUGenAsmMatcher.inc"
8272 
8273 // This fuction should be defined after auto-generated include so that we have
8274 // MatchClassKind enum defined
8275 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8276                                                      unsigned Kind) {
8277   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8278   // But MatchInstructionImpl() expects to meet token and fails to validate
8279   // operand. This method checks if we are given immediate operand but expect to
8280   // get corresponding token.
8281   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8282   switch (Kind) {
8283   case MCK_addr64:
8284     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8285   case MCK_gds:
8286     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8287   case MCK_lds:
8288     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8289   case MCK_idxen:
8290     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8291   case MCK_offen:
8292     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8293   case MCK_SSrcB32:
8294     // When operands have expression values, they will return true for isToken,
8295     // because it is not possible to distinguish between a token and an
8296     // expression at parse time. MatchInstructionImpl() will always try to
8297     // match an operand as a token, when isToken returns true, and when the
8298     // name of the expression is not a valid token, the match will fail,
8299     // so we need to handle it here.
8300     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8301   case MCK_SSrcF32:
8302     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8303   case MCK_SoppBrTarget:
8304     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8305   case MCK_VReg32OrOff:
8306     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8307   case MCK_InterpSlot:
8308     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8309   case MCK_Attr:
8310     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8311   case MCK_AttrChan:
8312     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8313   case MCK_ImmSMEMOffset:
8314     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8315   case MCK_SReg_64:
8316   case MCK_SReg_64_XEXEC:
8317     // Null is defined as a 32-bit register but
8318     // it should also be enabled with 64-bit operands.
8319     // The following code enables it for SReg_64 operands
8320     // used as source and destination. Remaining source
8321     // operands are handled in isInlinableImm.
8322     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8323   default:
8324     return Match_InvalidOperand;
8325   }
8326 }
8327 
8328 //===----------------------------------------------------------------------===//
8329 // endpgm
8330 //===----------------------------------------------------------------------===//
8331 
8332 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8333   SMLoc S = getLoc();
8334   int64_t Imm = 0;
8335 
8336   if (!parseExpr(Imm)) {
8337     // The operand is optional, if not present default to 0
8338     Imm = 0;
8339   }
8340 
8341   if (!isUInt<16>(Imm)) {
8342     Error(S, "expected a 16-bit value");
8343     return MatchOperand_ParseFail;
8344   }
8345 
8346   Operands.push_back(
8347       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8348   return MatchOperand_Success;
8349 }
8350 
8351 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8352