1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCParser/MCAsmLexer.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
31 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/AMDGPUMetadata.h"
35 #include "llvm/Support/AMDHSAKernelDescriptor.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/MachineValueType.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Support/TargetParser.h"
40 
41 using namespace llvm;
42 using namespace llvm::AMDGPU;
43 using namespace llvm::amdhsa;
44 
45 namespace {
46 
47 class AMDGPUAsmParser;
48 
49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
50 
51 //===----------------------------------------------------------------------===//
52 // Operand
53 //===----------------------------------------------------------------------===//
54 
55 class AMDGPUOperand : public MCParsedAsmOperand {
56   enum KindTy {
57     Token,
58     Immediate,
59     Register,
60     Expression
61   } Kind;
62 
63   SMLoc StartLoc, EndLoc;
64   const AMDGPUAsmParser *AsmParser;
65 
66 public:
67   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
68       : Kind(Kind_), AsmParser(AsmParser_) {}
69 
70   using Ptr = std::unique_ptr<AMDGPUOperand>;
71 
72   struct Modifiers {
73     bool Abs = false;
74     bool Neg = false;
75     bool Sext = false;
76 
77     bool hasFPModifiers() const { return Abs || Neg; }
78     bool hasIntModifiers() const { return Sext; }
79     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
80 
81     int64_t getFPModifiersOperand() const {
82       int64_t Operand = 0;
83       Operand |= Abs ? SISrcMods::ABS : 0u;
84       Operand |= Neg ? SISrcMods::NEG : 0u;
85       return Operand;
86     }
87 
88     int64_t getIntModifiersOperand() const {
89       int64_t Operand = 0;
90       Operand |= Sext ? SISrcMods::SEXT : 0u;
91       return Operand;
92     }
93 
94     int64_t getModifiersOperand() const {
95       assert(!(hasFPModifiers() && hasIntModifiers())
96            && "fp and int modifiers should not be used simultaneously");
97       if (hasFPModifiers()) {
98         return getFPModifiersOperand();
99       } else if (hasIntModifiers()) {
100         return getIntModifiersOperand();
101       } else {
102         return 0;
103       }
104     }
105 
106     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
107   };
108 
109   enum ImmTy {
110     ImmTyNone,
111     ImmTyGDS,
112     ImmTyLDS,
113     ImmTyOffen,
114     ImmTyIdxen,
115     ImmTyAddr64,
116     ImmTyOffset,
117     ImmTyInstOffset,
118     ImmTyOffset0,
119     ImmTyOffset1,
120     ImmTyCPol,
121     ImmTySWZ,
122     ImmTyTFE,
123     ImmTyD16,
124     ImmTyClampSI,
125     ImmTyOModSI,
126     ImmTyDPP8,
127     ImmTyDppCtrl,
128     ImmTyDppRowMask,
129     ImmTyDppBankMask,
130     ImmTyDppBoundCtrl,
131     ImmTyDppFi,
132     ImmTySdwaDstSel,
133     ImmTySdwaSrc0Sel,
134     ImmTySdwaSrc1Sel,
135     ImmTySdwaDstUnused,
136     ImmTyDMask,
137     ImmTyDim,
138     ImmTyUNorm,
139     ImmTyDA,
140     ImmTyR128A16,
141     ImmTyA16,
142     ImmTyLWE,
143     ImmTyExpTgt,
144     ImmTyExpCompr,
145     ImmTyExpVM,
146     ImmTyFORMAT,
147     ImmTyHwreg,
148     ImmTyOff,
149     ImmTySendMsg,
150     ImmTyInterpSlot,
151     ImmTyInterpAttr,
152     ImmTyAttrChan,
153     ImmTyOpSel,
154     ImmTyOpSelHi,
155     ImmTyNegLo,
156     ImmTyNegHi,
157     ImmTySwizzle,
158     ImmTyGprIdxMode,
159     ImmTyHigh,
160     ImmTyBLGP,
161     ImmTyCBSZ,
162     ImmTyABID,
163     ImmTyEndpgm,
164     ImmTyWaitVDST,
165     ImmTyWaitEXP,
166   };
167 
168   enum ImmKindTy {
169     ImmKindTyNone,
170     ImmKindTyLiteral,
171     ImmKindTyConst,
172   };
173 
174 private:
175   struct TokOp {
176     const char *Data;
177     unsigned Length;
178   };
179 
180   struct ImmOp {
181     int64_t Val;
182     ImmTy Type;
183     bool IsFPImm;
184     mutable ImmKindTy Kind;
185     Modifiers Mods;
186   };
187 
188   struct RegOp {
189     unsigned RegNo;
190     Modifiers Mods;
191   };
192 
193   union {
194     TokOp Tok;
195     ImmOp Imm;
196     RegOp Reg;
197     const MCExpr *Expr;
198   };
199 
200 public:
201   bool isToken() const override {
202     if (Kind == Token)
203       return true;
204 
205     // When parsing operands, we can't always tell if something was meant to be
206     // a token, like 'gds', or an expression that references a global variable.
207     // In this case, we assume the string is an expression, and if we need to
208     // interpret is a token, then we treat the symbol name as the token.
209     return isSymbolRefExpr();
210   }
211 
212   bool isSymbolRefExpr() const {
213     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
214   }
215 
216   bool isImm() const override {
217     return Kind == Immediate;
218   }
219 
220   void setImmKindNone() const {
221     assert(isImm());
222     Imm.Kind = ImmKindTyNone;
223   }
224 
225   void setImmKindLiteral() const {
226     assert(isImm());
227     Imm.Kind = ImmKindTyLiteral;
228   }
229 
230   void setImmKindConst() const {
231     assert(isImm());
232     Imm.Kind = ImmKindTyConst;
233   }
234 
235   bool IsImmKindLiteral() const {
236     return isImm() && Imm.Kind == ImmKindTyLiteral;
237   }
238 
239   bool isImmKindConst() const {
240     return isImm() && Imm.Kind == ImmKindTyConst;
241   }
242 
243   bool isInlinableImm(MVT type) const;
244   bool isLiteralImm(MVT type) const;
245 
246   bool isRegKind() const {
247     return Kind == Register;
248   }
249 
250   bool isReg() const override {
251     return isRegKind() && !hasModifiers();
252   }
253 
254   bool isRegOrInline(unsigned RCID, MVT type) const {
255     return isRegClass(RCID) || isInlinableImm(type);
256   }
257 
258   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
259     return isRegOrInline(RCID, type) || isLiteralImm(type);
260   }
261 
262   bool isRegOrImmWithInt16InputMods() const {
263     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
264   }
265 
266   bool isRegOrImmWithInt32InputMods() const {
267     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
268   }
269 
270   bool isRegOrImmWithInt64InputMods() const {
271     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
272   }
273 
274   bool isRegOrImmWithFP16InputMods() const {
275     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
276   }
277 
278   bool isRegOrImmWithFP32InputMods() const {
279     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
280   }
281 
282   bool isRegOrImmWithFP64InputMods() const {
283     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
284   }
285 
286   bool isVReg() const {
287     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
288            isRegClass(AMDGPU::VReg_64RegClassID) ||
289            isRegClass(AMDGPU::VReg_96RegClassID) ||
290            isRegClass(AMDGPU::VReg_128RegClassID) ||
291            isRegClass(AMDGPU::VReg_160RegClassID) ||
292            isRegClass(AMDGPU::VReg_192RegClassID) ||
293            isRegClass(AMDGPU::VReg_256RegClassID) ||
294            isRegClass(AMDGPU::VReg_512RegClassID) ||
295            isRegClass(AMDGPU::VReg_1024RegClassID);
296   }
297 
298   bool isVReg32() const {
299     return isRegClass(AMDGPU::VGPR_32RegClassID);
300   }
301 
302   bool isVReg32OrOff() const {
303     return isOff() || isVReg32();
304   }
305 
306   bool isNull() const {
307     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
308   }
309 
310   bool isVRegWithInputMods() const;
311 
312   bool isSDWAOperand(MVT type) const;
313   bool isSDWAFP16Operand() const;
314   bool isSDWAFP32Operand() const;
315   bool isSDWAInt16Operand() const;
316   bool isSDWAInt32Operand() const;
317 
318   bool isImmTy(ImmTy ImmT) const {
319     return isImm() && Imm.Type == ImmT;
320   }
321 
322   bool isImmModifier() const {
323     return isImm() && Imm.Type != ImmTyNone;
324   }
325 
326   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
327   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
328   bool isDMask() const { return isImmTy(ImmTyDMask); }
329   bool isDim() const { return isImmTy(ImmTyDim); }
330   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
331   bool isDA() const { return isImmTy(ImmTyDA); }
332   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
333   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
334   bool isLWE() const { return isImmTy(ImmTyLWE); }
335   bool isOff() const { return isImmTy(ImmTyOff); }
336   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
337   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
338   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
339   bool isOffen() const { return isImmTy(ImmTyOffen); }
340   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
341   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
342   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
343   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
344   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
345 
346   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
347   bool isGDS() const { return isImmTy(ImmTyGDS); }
348   bool isLDS() const { return isImmTy(ImmTyLDS); }
349   bool isCPol() const { return isImmTy(ImmTyCPol); }
350   bool isSWZ() const { return isImmTy(ImmTySWZ); }
351   bool isTFE() const { return isImmTy(ImmTyTFE); }
352   bool isD16() const { return isImmTy(ImmTyD16); }
353   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
354   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
355   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
356   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
357   bool isFI() const { return isImmTy(ImmTyDppFi); }
358   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
359   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
360   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
361   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
362   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
363   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
364   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
365   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
366   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
367   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
368   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
369   bool isHigh() const { return isImmTy(ImmTyHigh); }
370 
371   bool isMod() const {
372     return isClampSI() || isOModSI();
373   }
374 
375   bool isRegOrImm() const {
376     return isReg() || isImm();
377   }
378 
379   bool isRegClass(unsigned RCID) const;
380 
381   bool isInlineValue() const;
382 
383   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
384     return isRegOrInline(RCID, type) && !hasModifiers();
385   }
386 
387   bool isSCSrcB16() const {
388     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
389   }
390 
391   bool isSCSrcV2B16() const {
392     return isSCSrcB16();
393   }
394 
395   bool isSCSrcB32() const {
396     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
397   }
398 
399   bool isSCSrcB64() const {
400     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
401   }
402 
403   bool isBoolReg() const;
404 
405   bool isSCSrcF16() const {
406     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
407   }
408 
409   bool isSCSrcV2F16() const {
410     return isSCSrcF16();
411   }
412 
413   bool isSCSrcF32() const {
414     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
415   }
416 
417   bool isSCSrcF64() const {
418     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
419   }
420 
421   bool isSSrcB32() const {
422     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
423   }
424 
425   bool isSSrcB16() const {
426     return isSCSrcB16() || isLiteralImm(MVT::i16);
427   }
428 
429   bool isSSrcV2B16() const {
430     llvm_unreachable("cannot happen");
431     return isSSrcB16();
432   }
433 
434   bool isSSrcB64() const {
435     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
436     // See isVSrc64().
437     return isSCSrcB64() || isLiteralImm(MVT::i64);
438   }
439 
440   bool isSSrcF32() const {
441     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
442   }
443 
444   bool isSSrcF64() const {
445     return isSCSrcB64() || isLiteralImm(MVT::f64);
446   }
447 
448   bool isSSrcF16() const {
449     return isSCSrcB16() || isLiteralImm(MVT::f16);
450   }
451 
452   bool isSSrcV2F16() const {
453     llvm_unreachable("cannot happen");
454     return isSSrcF16();
455   }
456 
457   bool isSSrcV2FP32() const {
458     llvm_unreachable("cannot happen");
459     return isSSrcF32();
460   }
461 
462   bool isSCSrcV2FP32() const {
463     llvm_unreachable("cannot happen");
464     return isSCSrcF32();
465   }
466 
467   bool isSSrcV2INT32() const {
468     llvm_unreachable("cannot happen");
469     return isSSrcB32();
470   }
471 
472   bool isSCSrcV2INT32() const {
473     llvm_unreachable("cannot happen");
474     return isSCSrcB32();
475   }
476 
477   bool isSSrcOrLdsB32() const {
478     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
479            isLiteralImm(MVT::i32) || isExpr();
480   }
481 
482   bool isVCSrcB32() const {
483     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
484   }
485 
486   bool isVCSrcB64() const {
487     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
488   }
489 
490   bool isVCSrcB16() const {
491     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
492   }
493 
494   bool isVCSrcV2B16() const {
495     return isVCSrcB16();
496   }
497 
498   bool isVCSrcF32() const {
499     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
500   }
501 
502   bool isVCSrcF64() const {
503     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
504   }
505 
506   bool isVCSrcF16() const {
507     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
508   }
509 
510   bool isVCSrcV2F16() const {
511     return isVCSrcF16();
512   }
513 
514   bool isVSrcB32() const {
515     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
516   }
517 
518   bool isVSrcB64() const {
519     return isVCSrcF64() || isLiteralImm(MVT::i64);
520   }
521 
522   bool isVSrcB16() const {
523     return isVCSrcB16() || isLiteralImm(MVT::i16);
524   }
525 
526   bool isVSrcV2B16() const {
527     return isVSrcB16() || isLiteralImm(MVT::v2i16);
528   }
529 
530   bool isVCSrcV2FP32() const {
531     return isVCSrcF64();
532   }
533 
534   bool isVSrcV2FP32() const {
535     return isVSrcF64() || isLiteralImm(MVT::v2f32);
536   }
537 
538   bool isVCSrcV2INT32() const {
539     return isVCSrcB64();
540   }
541 
542   bool isVSrcV2INT32() const {
543     return isVSrcB64() || isLiteralImm(MVT::v2i32);
544   }
545 
546   bool isVSrcF32() const {
547     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
548   }
549 
550   bool isVSrcF64() const {
551     return isVCSrcF64() || isLiteralImm(MVT::f64);
552   }
553 
554   bool isVSrcF16() const {
555     return isVCSrcF16() || isLiteralImm(MVT::f16);
556   }
557 
558   bool isVSrcV2F16() const {
559     return isVSrcF16() || isLiteralImm(MVT::v2f16);
560   }
561 
562   bool isVISrcB32() const {
563     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
564   }
565 
566   bool isVISrcB16() const {
567     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
568   }
569 
570   bool isVISrcV2B16() const {
571     return isVISrcB16();
572   }
573 
574   bool isVISrcF32() const {
575     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
576   }
577 
578   bool isVISrcF16() const {
579     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
580   }
581 
582   bool isVISrcV2F16() const {
583     return isVISrcF16() || isVISrcB32();
584   }
585 
586   bool isVISrc_64B64() const {
587     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
588   }
589 
590   bool isVISrc_64F64() const {
591     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
592   }
593 
594   bool isVISrc_64V2FP32() const {
595     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
596   }
597 
598   bool isVISrc_64V2INT32() const {
599     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
600   }
601 
602   bool isVISrc_256B64() const {
603     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
604   }
605 
606   bool isVISrc_256F64() const {
607     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
608   }
609 
610   bool isVISrc_128B16() const {
611     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
612   }
613 
614   bool isVISrc_128V2B16() const {
615     return isVISrc_128B16();
616   }
617 
618   bool isVISrc_128B32() const {
619     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
620   }
621 
622   bool isVISrc_128F32() const {
623     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
624   }
625 
626   bool isVISrc_256V2FP32() const {
627     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
628   }
629 
630   bool isVISrc_256V2INT32() const {
631     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
632   }
633 
634   bool isVISrc_512B32() const {
635     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
636   }
637 
638   bool isVISrc_512B16() const {
639     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
640   }
641 
642   bool isVISrc_512V2B16() const {
643     return isVISrc_512B16();
644   }
645 
646   bool isVISrc_512F32() const {
647     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
648   }
649 
650   bool isVISrc_512F16() const {
651     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
652   }
653 
654   bool isVISrc_512V2F16() const {
655     return isVISrc_512F16() || isVISrc_512B32();
656   }
657 
658   bool isVISrc_1024B32() const {
659     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
660   }
661 
662   bool isVISrc_1024B16() const {
663     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
664   }
665 
666   bool isVISrc_1024V2B16() const {
667     return isVISrc_1024B16();
668   }
669 
670   bool isVISrc_1024F32() const {
671     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
672   }
673 
674   bool isVISrc_1024F16() const {
675     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
676   }
677 
678   bool isVISrc_1024V2F16() const {
679     return isVISrc_1024F16() || isVISrc_1024B32();
680   }
681 
682   bool isAISrcB32() const {
683     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
684   }
685 
686   bool isAISrcB16() const {
687     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
688   }
689 
690   bool isAISrcV2B16() const {
691     return isAISrcB16();
692   }
693 
694   bool isAISrcF32() const {
695     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
696   }
697 
698   bool isAISrcF16() const {
699     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
700   }
701 
702   bool isAISrcV2F16() const {
703     return isAISrcF16() || isAISrcB32();
704   }
705 
706   bool isAISrc_64B64() const {
707     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
708   }
709 
710   bool isAISrc_64F64() const {
711     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
712   }
713 
714   bool isAISrc_128B32() const {
715     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
716   }
717 
718   bool isAISrc_128B16() const {
719     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
720   }
721 
722   bool isAISrc_128V2B16() const {
723     return isAISrc_128B16();
724   }
725 
726   bool isAISrc_128F32() const {
727     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
728   }
729 
730   bool isAISrc_128F16() const {
731     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
732   }
733 
734   bool isAISrc_128V2F16() const {
735     return isAISrc_128F16() || isAISrc_128B32();
736   }
737 
738   bool isVISrc_128F16() const {
739     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
740   }
741 
742   bool isVISrc_128V2F16() const {
743     return isVISrc_128F16() || isVISrc_128B32();
744   }
745 
746   bool isAISrc_256B64() const {
747     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
748   }
749 
750   bool isAISrc_256F64() const {
751     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
752   }
753 
754   bool isAISrc_512B32() const {
755     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
756   }
757 
758   bool isAISrc_512B16() const {
759     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
760   }
761 
762   bool isAISrc_512V2B16() const {
763     return isAISrc_512B16();
764   }
765 
766   bool isAISrc_512F32() const {
767     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
768   }
769 
770   bool isAISrc_512F16() const {
771     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
772   }
773 
774   bool isAISrc_512V2F16() const {
775     return isAISrc_512F16() || isAISrc_512B32();
776   }
777 
778   bool isAISrc_1024B32() const {
779     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
780   }
781 
782   bool isAISrc_1024B16() const {
783     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
784   }
785 
786   bool isAISrc_1024V2B16() const {
787     return isAISrc_1024B16();
788   }
789 
790   bool isAISrc_1024F32() const {
791     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
792   }
793 
794   bool isAISrc_1024F16() const {
795     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
796   }
797 
798   bool isAISrc_1024V2F16() const {
799     return isAISrc_1024F16() || isAISrc_1024B32();
800   }
801 
802   bool isKImmFP32() const {
803     return isLiteralImm(MVT::f32);
804   }
805 
806   bool isKImmFP16() const {
807     return isLiteralImm(MVT::f16);
808   }
809 
810   bool isMem() const override {
811     return false;
812   }
813 
814   bool isExpr() const {
815     return Kind == Expression;
816   }
817 
818   bool isSoppBrTarget() const {
819     return isExpr() || isImm();
820   }
821 
822   bool isSWaitCnt() const;
823   bool isDepCtr() const;
824   bool isSDelayAlu() const;
825   bool isHwreg() const;
826   bool isSendMsg() const;
827   bool isSwizzle() const;
828   bool isSMRDOffset8() const;
829   bool isSMEMOffset() const;
830   bool isSMRDLiteralOffset() const;
831   bool isDPP8() const;
832   bool isDPPCtrl() const;
833   bool isBLGP() const;
834   bool isCBSZ() const;
835   bool isABID() const;
836   bool isGPRIdxMode() const;
837   bool isS16Imm() const;
838   bool isU16Imm() const;
839   bool isEndpgm() const;
840   bool isWaitVDST() const;
841   bool isWaitEXP() const;
842 
843   StringRef getExpressionAsToken() const {
844     assert(isExpr());
845     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
846     return S->getSymbol().getName();
847   }
848 
849   StringRef getToken() const {
850     assert(isToken());
851 
852     if (Kind == Expression)
853       return getExpressionAsToken();
854 
855     return StringRef(Tok.Data, Tok.Length);
856   }
857 
858   int64_t getImm() const {
859     assert(isImm());
860     return Imm.Val;
861   }
862 
863   void setImm(int64_t Val) {
864     assert(isImm());
865     Imm.Val = Val;
866   }
867 
868   ImmTy getImmTy() const {
869     assert(isImm());
870     return Imm.Type;
871   }
872 
873   unsigned getReg() const override {
874     assert(isRegKind());
875     return Reg.RegNo;
876   }
877 
878   SMLoc getStartLoc() const override {
879     return StartLoc;
880   }
881 
882   SMLoc getEndLoc() const override {
883     return EndLoc;
884   }
885 
886   SMRange getLocRange() const {
887     return SMRange(StartLoc, EndLoc);
888   }
889 
890   Modifiers getModifiers() const {
891     assert(isRegKind() || isImmTy(ImmTyNone));
892     return isRegKind() ? Reg.Mods : Imm.Mods;
893   }
894 
895   void setModifiers(Modifiers Mods) {
896     assert(isRegKind() || isImmTy(ImmTyNone));
897     if (isRegKind())
898       Reg.Mods = Mods;
899     else
900       Imm.Mods = Mods;
901   }
902 
903   bool hasModifiers() const {
904     return getModifiers().hasModifiers();
905   }
906 
907   bool hasFPModifiers() const {
908     return getModifiers().hasFPModifiers();
909   }
910 
911   bool hasIntModifiers() const {
912     return getModifiers().hasIntModifiers();
913   }
914 
915   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
916 
917   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
918 
919   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
920 
921   template <unsigned Bitwidth>
922   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
923 
924   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
925     addKImmFPOperands<16>(Inst, N);
926   }
927 
928   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
929     addKImmFPOperands<32>(Inst, N);
930   }
931 
932   void addRegOperands(MCInst &Inst, unsigned N) const;
933 
934   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
935     addRegOperands(Inst, N);
936   }
937 
938   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
939     if (isRegKind())
940       addRegOperands(Inst, N);
941     else if (isExpr())
942       Inst.addOperand(MCOperand::createExpr(Expr));
943     else
944       addImmOperands(Inst, N);
945   }
946 
947   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
948     Modifiers Mods = getModifiers();
949     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
950     if (isRegKind()) {
951       addRegOperands(Inst, N);
952     } else {
953       addImmOperands(Inst, N, false);
954     }
955   }
956 
957   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
958     assert(!hasIntModifiers());
959     addRegOrImmWithInputModsOperands(Inst, N);
960   }
961 
962   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
963     assert(!hasFPModifiers());
964     addRegOrImmWithInputModsOperands(Inst, N);
965   }
966 
967   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
968     Modifiers Mods = getModifiers();
969     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
970     assert(isRegKind());
971     addRegOperands(Inst, N);
972   }
973 
974   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
975     assert(!hasIntModifiers());
976     addRegWithInputModsOperands(Inst, N);
977   }
978 
979   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
980     assert(!hasFPModifiers());
981     addRegWithInputModsOperands(Inst, N);
982   }
983 
984   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
985     if (isImm())
986       addImmOperands(Inst, N);
987     else {
988       assert(isExpr());
989       Inst.addOperand(MCOperand::createExpr(Expr));
990     }
991   }
992 
993   static void printImmTy(raw_ostream& OS, ImmTy Type) {
994     switch (Type) {
995     case ImmTyNone: OS << "None"; break;
996     case ImmTyGDS: OS << "GDS"; break;
997     case ImmTyLDS: OS << "LDS"; break;
998     case ImmTyOffen: OS << "Offen"; break;
999     case ImmTyIdxen: OS << "Idxen"; break;
1000     case ImmTyAddr64: OS << "Addr64"; break;
1001     case ImmTyOffset: OS << "Offset"; break;
1002     case ImmTyInstOffset: OS << "InstOffset"; break;
1003     case ImmTyOffset0: OS << "Offset0"; break;
1004     case ImmTyOffset1: OS << "Offset1"; break;
1005     case ImmTyCPol: OS << "CPol"; break;
1006     case ImmTySWZ: OS << "SWZ"; break;
1007     case ImmTyTFE: OS << "TFE"; break;
1008     case ImmTyD16: OS << "D16"; break;
1009     case ImmTyFORMAT: OS << "FORMAT"; break;
1010     case ImmTyClampSI: OS << "ClampSI"; break;
1011     case ImmTyOModSI: OS << "OModSI"; break;
1012     case ImmTyDPP8: OS << "DPP8"; break;
1013     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1014     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1015     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1016     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1017     case ImmTyDppFi: OS << "FI"; break;
1018     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1019     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1020     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1021     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1022     case ImmTyDMask: OS << "DMask"; break;
1023     case ImmTyDim: OS << "Dim"; break;
1024     case ImmTyUNorm: OS << "UNorm"; break;
1025     case ImmTyDA: OS << "DA"; break;
1026     case ImmTyR128A16: OS << "R128A16"; break;
1027     case ImmTyA16: OS << "A16"; break;
1028     case ImmTyLWE: OS << "LWE"; break;
1029     case ImmTyOff: OS << "Off"; break;
1030     case ImmTyExpTgt: OS << "ExpTgt"; break;
1031     case ImmTyExpCompr: OS << "ExpCompr"; break;
1032     case ImmTyExpVM: OS << "ExpVM"; break;
1033     case ImmTyHwreg: OS << "Hwreg"; break;
1034     case ImmTySendMsg: OS << "SendMsg"; break;
1035     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1036     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1037     case ImmTyAttrChan: OS << "AttrChan"; break;
1038     case ImmTyOpSel: OS << "OpSel"; break;
1039     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1040     case ImmTyNegLo: OS << "NegLo"; break;
1041     case ImmTyNegHi: OS << "NegHi"; break;
1042     case ImmTySwizzle: OS << "Swizzle"; break;
1043     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1044     case ImmTyHigh: OS << "High"; break;
1045     case ImmTyBLGP: OS << "BLGP"; break;
1046     case ImmTyCBSZ: OS << "CBSZ"; break;
1047     case ImmTyABID: OS << "ABID"; break;
1048     case ImmTyEndpgm: OS << "Endpgm"; break;
1049     case ImmTyWaitVDST: OS << "WaitVDST"; break;
1050     case ImmTyWaitEXP: OS << "WaitEXP"; break;
1051     }
1052   }
1053 
1054   void print(raw_ostream &OS) const override {
1055     switch (Kind) {
1056     case Register:
1057       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1058       break;
1059     case Immediate:
1060       OS << '<' << getImm();
1061       if (getImmTy() != ImmTyNone) {
1062         OS << " type: "; printImmTy(OS, getImmTy());
1063       }
1064       OS << " mods: " << Imm.Mods << '>';
1065       break;
1066     case Token:
1067       OS << '\'' << getToken() << '\'';
1068       break;
1069     case Expression:
1070       OS << "<expr " << *Expr << '>';
1071       break;
1072     }
1073   }
1074 
1075   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1076                                       int64_t Val, SMLoc Loc,
1077                                       ImmTy Type = ImmTyNone,
1078                                       bool IsFPImm = false) {
1079     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1080     Op->Imm.Val = Val;
1081     Op->Imm.IsFPImm = IsFPImm;
1082     Op->Imm.Kind = ImmKindTyNone;
1083     Op->Imm.Type = Type;
1084     Op->Imm.Mods = Modifiers();
1085     Op->StartLoc = Loc;
1086     Op->EndLoc = Loc;
1087     return Op;
1088   }
1089 
1090   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1091                                         StringRef Str, SMLoc Loc,
1092                                         bool HasExplicitEncodingSize = true) {
1093     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1094     Res->Tok.Data = Str.data();
1095     Res->Tok.Length = Str.size();
1096     Res->StartLoc = Loc;
1097     Res->EndLoc = Loc;
1098     return Res;
1099   }
1100 
1101   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1102                                       unsigned RegNo, SMLoc S,
1103                                       SMLoc E) {
1104     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1105     Op->Reg.RegNo = RegNo;
1106     Op->Reg.Mods = Modifiers();
1107     Op->StartLoc = S;
1108     Op->EndLoc = E;
1109     return Op;
1110   }
1111 
1112   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1113                                        const class MCExpr *Expr, SMLoc S) {
1114     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1115     Op->Expr = Expr;
1116     Op->StartLoc = S;
1117     Op->EndLoc = S;
1118     return Op;
1119   }
1120 };
1121 
1122 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1123   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1124   return OS;
1125 }
1126 
1127 //===----------------------------------------------------------------------===//
1128 // AsmParser
1129 //===----------------------------------------------------------------------===//
1130 
1131 // Holds info related to the current kernel, e.g. count of SGPRs used.
1132 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1133 // .amdgpu_hsa_kernel or at EOF.
1134 class KernelScopeInfo {
1135   int SgprIndexUnusedMin = -1;
1136   int VgprIndexUnusedMin = -1;
1137   int AgprIndexUnusedMin = -1;
1138   MCContext *Ctx = nullptr;
1139   MCSubtargetInfo const *MSTI = nullptr;
1140 
1141   void usesSgprAt(int i) {
1142     if (i >= SgprIndexUnusedMin) {
1143       SgprIndexUnusedMin = ++i;
1144       if (Ctx) {
1145         MCSymbol* const Sym =
1146           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1147         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1148       }
1149     }
1150   }
1151 
1152   void usesVgprAt(int i) {
1153     if (i >= VgprIndexUnusedMin) {
1154       VgprIndexUnusedMin = ++i;
1155       if (Ctx) {
1156         MCSymbol* const Sym =
1157           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1158         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1159                                          VgprIndexUnusedMin);
1160         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1161       }
1162     }
1163   }
1164 
1165   void usesAgprAt(int i) {
1166     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1167     if (!hasMAIInsts(*MSTI))
1168       return;
1169 
1170     if (i >= AgprIndexUnusedMin) {
1171       AgprIndexUnusedMin = ++i;
1172       if (Ctx) {
1173         MCSymbol* const Sym =
1174           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1175         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1176 
1177         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1178         MCSymbol* const vSym =
1179           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1180         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1181                                          VgprIndexUnusedMin);
1182         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1183       }
1184     }
1185   }
1186 
1187 public:
1188   KernelScopeInfo() = default;
1189 
1190   void initialize(MCContext &Context) {
1191     Ctx = &Context;
1192     MSTI = Ctx->getSubtargetInfo();
1193 
1194     usesSgprAt(SgprIndexUnusedMin = -1);
1195     usesVgprAt(VgprIndexUnusedMin = -1);
1196     if (hasMAIInsts(*MSTI)) {
1197       usesAgprAt(AgprIndexUnusedMin = -1);
1198     }
1199   }
1200 
1201   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1202                     unsigned RegWidth) {
1203     switch (RegKind) {
1204     case IS_SGPR:
1205       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1206       break;
1207     case IS_AGPR:
1208       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1209       break;
1210     case IS_VGPR:
1211       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1212       break;
1213     default:
1214       break;
1215     }
1216   }
1217 };
1218 
1219 class AMDGPUAsmParser : public MCTargetAsmParser {
1220   MCAsmParser &Parser;
1221 
1222   // Number of extra operands parsed after the first optional operand.
1223   // This may be necessary to skip hardcoded mandatory operands.
1224   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1225 
1226   unsigned ForcedEncodingSize = 0;
1227   bool ForcedDPP = false;
1228   bool ForcedSDWA = false;
1229   KernelScopeInfo KernelScope;
1230   unsigned CPolSeen;
1231 
1232   /// @name Auto-generated Match Functions
1233   /// {
1234 
1235 #define GET_ASSEMBLER_HEADER
1236 #include "AMDGPUGenAsmMatcher.inc"
1237 
1238   /// }
1239 
1240 private:
1241   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1242   bool OutOfRangeError(SMRange Range);
1243   /// Calculate VGPR/SGPR blocks required for given target, reserved
1244   /// registers, and user-specified NextFreeXGPR values.
1245   ///
1246   /// \param Features [in] Target features, used for bug corrections.
1247   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1248   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1249   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1250   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1251   /// descriptor field, if valid.
1252   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1253   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1254   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1255   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1256   /// \param VGPRBlocks [out] Result VGPR block count.
1257   /// \param SGPRBlocks [out] Result SGPR block count.
1258   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1259                           bool FlatScrUsed, bool XNACKUsed,
1260                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1261                           SMRange VGPRRange, unsigned NextFreeSGPR,
1262                           SMRange SGPRRange, unsigned &VGPRBlocks,
1263                           unsigned &SGPRBlocks);
1264   bool ParseDirectiveAMDGCNTarget();
1265   bool ParseDirectiveAMDHSAKernel();
1266   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1267   bool ParseDirectiveHSACodeObjectVersion();
1268   bool ParseDirectiveHSACodeObjectISA();
1269   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1270   bool ParseDirectiveAMDKernelCodeT();
1271   // TODO: Possibly make subtargetHasRegister const.
1272   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1273   bool ParseDirectiveAMDGPUHsaKernel();
1274 
1275   bool ParseDirectiveISAVersion();
1276   bool ParseDirectiveHSAMetadata();
1277   bool ParseDirectivePALMetadataBegin();
1278   bool ParseDirectivePALMetadata();
1279   bool ParseDirectiveAMDGPULDS();
1280 
1281   /// Common code to parse out a block of text (typically YAML) between start and
1282   /// end directives.
1283   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1284                            const char *AssemblerDirectiveEnd,
1285                            std::string &CollectString);
1286 
1287   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1288                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1289   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1290                            unsigned &RegNum, unsigned &RegWidth,
1291                            bool RestoreOnFailure = false);
1292   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1293                            unsigned &RegNum, unsigned &RegWidth,
1294                            SmallVectorImpl<AsmToken> &Tokens);
1295   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1296                            unsigned &RegWidth,
1297                            SmallVectorImpl<AsmToken> &Tokens);
1298   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1299                            unsigned &RegWidth,
1300                            SmallVectorImpl<AsmToken> &Tokens);
1301   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1302                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1303   bool ParseRegRange(unsigned& Num, unsigned& Width);
1304   unsigned getRegularReg(RegisterKind RegKind,
1305                          unsigned RegNum,
1306                          unsigned RegWidth,
1307                          SMLoc Loc);
1308 
1309   bool isRegister();
1310   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1311   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1312   void initializeGprCountSymbol(RegisterKind RegKind);
1313   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1314                              unsigned RegWidth);
1315   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1316                     bool IsAtomic, bool IsLds = false);
1317   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1318                  bool IsGdsHardcoded);
1319 
1320 public:
1321   enum AMDGPUMatchResultTy {
1322     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1323   };
1324   enum OperandMode {
1325     OperandMode_Default,
1326     OperandMode_NSA,
1327   };
1328 
1329   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1330 
1331   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1332                const MCInstrInfo &MII,
1333                const MCTargetOptions &Options)
1334       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1335     MCAsmParserExtension::Initialize(Parser);
1336 
1337     if (getFeatureBits().none()) {
1338       // Set default features.
1339       copySTI().ToggleFeature("southern-islands");
1340     }
1341 
1342     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1343 
1344     {
1345       // TODO: make those pre-defined variables read-only.
1346       // Currently there is none suitable machinery in the core llvm-mc for this.
1347       // MCSymbol::isRedefinable is intended for another purpose, and
1348       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1349       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1350       MCContext &Ctx = getContext();
1351       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1352         MCSymbol *Sym =
1353             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1354         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1355         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1356         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1357         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1358         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1359       } else {
1360         MCSymbol *Sym =
1361             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1362         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1363         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1364         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1365         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1366         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1367       }
1368       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1369         initializeGprCountSymbol(IS_VGPR);
1370         initializeGprCountSymbol(IS_SGPR);
1371       } else
1372         KernelScope.initialize(getContext());
1373     }
1374   }
1375 
1376   bool hasMIMG_R128() const {
1377     return AMDGPU::hasMIMG_R128(getSTI());
1378   }
1379 
1380   bool hasPackedD16() const {
1381     return AMDGPU::hasPackedD16(getSTI());
1382   }
1383 
1384   bool hasGFX10A16() const {
1385     return AMDGPU::hasGFX10A16(getSTI());
1386   }
1387 
1388   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1389 
1390   bool isSI() const {
1391     return AMDGPU::isSI(getSTI());
1392   }
1393 
1394   bool isCI() const {
1395     return AMDGPU::isCI(getSTI());
1396   }
1397 
1398   bool isVI() const {
1399     return AMDGPU::isVI(getSTI());
1400   }
1401 
1402   bool isGFX9() const {
1403     return AMDGPU::isGFX9(getSTI());
1404   }
1405 
1406   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1407   bool isGFX90A() const {
1408     return AMDGPU::isGFX90A(getSTI());
1409   }
1410 
1411   bool isGFX940() const {
1412     return AMDGPU::isGFX940(getSTI());
1413   }
1414 
1415   bool isGFX9Plus() const {
1416     return AMDGPU::isGFX9Plus(getSTI());
1417   }
1418 
1419   bool isGFX10() const {
1420     return AMDGPU::isGFX10(getSTI());
1421   }
1422 
1423   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1424 
1425   bool isGFX11() const {
1426     return AMDGPU::isGFX11(getSTI());
1427   }
1428 
1429   bool isGFX11Plus() const {
1430     return AMDGPU::isGFX11Plus(getSTI());
1431   }
1432 
1433   bool isGFX10_BEncoding() const {
1434     return AMDGPU::isGFX10_BEncoding(getSTI());
1435   }
1436 
1437   bool hasInv2PiInlineImm() const {
1438     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1439   }
1440 
1441   bool hasFlatOffsets() const {
1442     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1443   }
1444 
1445   bool hasArchitectedFlatScratch() const {
1446     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1447   }
1448 
1449   bool hasSGPR102_SGPR103() const {
1450     return !isVI() && !isGFX9();
1451   }
1452 
1453   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1454 
1455   bool hasIntClamp() const {
1456     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1457   }
1458 
1459   AMDGPUTargetStreamer &getTargetStreamer() {
1460     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1461     return static_cast<AMDGPUTargetStreamer &>(TS);
1462   }
1463 
1464   const MCRegisterInfo *getMRI() const {
1465     // We need this const_cast because for some reason getContext() is not const
1466     // in MCAsmParser.
1467     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1468   }
1469 
1470   const MCInstrInfo *getMII() const {
1471     return &MII;
1472   }
1473 
1474   const FeatureBitset &getFeatureBits() const {
1475     return getSTI().getFeatureBits();
1476   }
1477 
1478   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1479   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1480   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1481 
1482   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1483   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1484   bool isForcedDPP() const { return ForcedDPP; }
1485   bool isForcedSDWA() const { return ForcedSDWA; }
1486   ArrayRef<unsigned> getMatchedVariants() const;
1487   StringRef getMatchedVariantName() const;
1488 
1489   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1490   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1491                      bool RestoreOnFailure);
1492   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1493   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1494                                         SMLoc &EndLoc) override;
1495   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1496   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1497                                       unsigned Kind) override;
1498   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1499                                OperandVector &Operands, MCStreamer &Out,
1500                                uint64_t &ErrorInfo,
1501                                bool MatchingInlineAsm) override;
1502   bool ParseDirective(AsmToken DirectiveID) override;
1503   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1504                                     OperandMode Mode = OperandMode_Default);
1505   StringRef parseMnemonicSuffix(StringRef Name);
1506   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1507                         SMLoc NameLoc, OperandVector &Operands) override;
1508   //bool ProcessInstruction(MCInst &Inst);
1509 
1510   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1511 
1512   OperandMatchResultTy
1513   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1514                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1515                      bool (*ConvertResult)(int64_t &) = nullptr);
1516 
1517   OperandMatchResultTy
1518   parseOperandArrayWithPrefix(const char *Prefix,
1519                               OperandVector &Operands,
1520                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1521                               bool (*ConvertResult)(int64_t&) = nullptr);
1522 
1523   OperandMatchResultTy
1524   parseNamedBit(StringRef Name, OperandVector &Operands,
1525                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1526   OperandMatchResultTy parseCPol(OperandVector &Operands);
1527   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1528                                              StringRef &Value,
1529                                              SMLoc &StringLoc);
1530 
1531   bool isModifier();
1532   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1533   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1534   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1535   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1536   bool parseSP3NegModifier();
1537   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1538   OperandMatchResultTy parseReg(OperandVector &Operands);
1539   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1540   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1541   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1542   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1543   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1544   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1545   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1546   OperandMatchResultTy parseUfmt(int64_t &Format);
1547   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1548   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1549   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1550   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1551   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1552   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1553   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1554 
1555   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1556   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1557   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1558   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1559 
1560   bool parseCnt(int64_t &IntVal);
1561   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1562 
1563   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1564   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1565   OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
1566 
1567   bool parseDelay(int64_t &Delay);
1568   OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
1569 
1570   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1571 
1572 private:
1573   struct OperandInfoTy {
1574     SMLoc Loc;
1575     int64_t Id;
1576     bool IsSymbolic = false;
1577     bool IsDefined = false;
1578 
1579     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1580   };
1581 
1582   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1583   bool validateSendMsg(const OperandInfoTy &Msg,
1584                        const OperandInfoTy &Op,
1585                        const OperandInfoTy &Stream);
1586 
1587   bool parseHwregBody(OperandInfoTy &HwReg,
1588                       OperandInfoTy &Offset,
1589                       OperandInfoTy &Width);
1590   bool validateHwreg(const OperandInfoTy &HwReg,
1591                      const OperandInfoTy &Offset,
1592                      const OperandInfoTy &Width);
1593 
1594   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1595   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1596   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1597 
1598   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1599                       const OperandVector &Operands) const;
1600   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1601   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1602   SMLoc getLitLoc(const OperandVector &Operands) const;
1603   SMLoc getConstLoc(const OperandVector &Operands) const;
1604 
1605   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1606   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1607   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1608   bool validateSOPLiteral(const MCInst &Inst) const;
1609   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1610   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1611   bool validateIntClampSupported(const MCInst &Inst);
1612   bool validateMIMGAtomicDMask(const MCInst &Inst);
1613   bool validateMIMGGatherDMask(const MCInst &Inst);
1614   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1615   Optional<StringRef> validateMIMGDataSize(const MCInst &Inst);
1616   bool validateMIMGAddrSize(const MCInst &Inst);
1617   bool validateMIMGD16(const MCInst &Inst);
1618   bool validateMIMGDim(const MCInst &Inst);
1619   bool validateMIMGMSAA(const MCInst &Inst);
1620   bool validateOpSel(const MCInst &Inst);
1621   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1622   bool validateVccOperand(unsigned Reg) const;
1623   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1624   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1625   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1626   bool validateAGPRLdSt(const MCInst &Inst) const;
1627   bool validateVGPRAlign(const MCInst &Inst) const;
1628   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1629   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1630   bool validateDivScale(const MCInst &Inst);
1631   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1632                              const SMLoc &IDLoc);
1633   bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands,
1634                           const SMLoc &IDLoc);
1635   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1636   unsigned getConstantBusLimit(unsigned Opcode) const;
1637   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1638   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1639   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1640 
1641   bool isSupportedMnemo(StringRef Mnemo,
1642                         const FeatureBitset &FBS);
1643   bool isSupportedMnemo(StringRef Mnemo,
1644                         const FeatureBitset &FBS,
1645                         ArrayRef<unsigned> Variants);
1646   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1647 
1648   bool isId(const StringRef Id) const;
1649   bool isId(const AsmToken &Token, const StringRef Id) const;
1650   bool isToken(const AsmToken::TokenKind Kind) const;
1651   bool trySkipId(const StringRef Id);
1652   bool trySkipId(const StringRef Pref, const StringRef Id);
1653   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1654   bool trySkipToken(const AsmToken::TokenKind Kind);
1655   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1656   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1657   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1658 
1659   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1660   AsmToken::TokenKind getTokenKind() const;
1661   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1662   bool parseExpr(OperandVector &Operands);
1663   StringRef getTokenStr() const;
1664   AsmToken peekToken();
1665   AsmToken getToken() const;
1666   SMLoc getLoc() const;
1667   void lex();
1668 
1669 public:
1670   void onBeginOfFile() override;
1671 
1672   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1673   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1674 
1675   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1676   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1677   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1678   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1679   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1680   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1681 
1682   bool parseSwizzleOperand(int64_t &Op,
1683                            const unsigned MinVal,
1684                            const unsigned MaxVal,
1685                            const StringRef ErrMsg,
1686                            SMLoc &Loc);
1687   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1688                             const unsigned MinVal,
1689                             const unsigned MaxVal,
1690                             const StringRef ErrMsg);
1691   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1692   bool parseSwizzleOffset(int64_t &Imm);
1693   bool parseSwizzleMacro(int64_t &Imm);
1694   bool parseSwizzleQuadPerm(int64_t &Imm);
1695   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1696   bool parseSwizzleBroadcast(int64_t &Imm);
1697   bool parseSwizzleSwap(int64_t &Imm);
1698   bool parseSwizzleReverse(int64_t &Imm);
1699 
1700   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1701   int64_t parseGPRIdxMacro();
1702 
1703   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1704   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1705   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1706   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1707 
1708   AMDGPUOperand::Ptr defaultCPol() const;
1709 
1710   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1711   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1712   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1713   AMDGPUOperand::Ptr defaultFlatOffset() const;
1714 
1715   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1716 
1717   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1718                OptionalImmIndexMap &OptionalIdx);
1719   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1720   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1721   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1722   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1723                 OptionalImmIndexMap &OptionalIdx);
1724 
1725   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1726   void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1727 
1728   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1729                bool IsAtomic = false);
1730   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1731   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1732 
1733   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1734 
1735   bool parseDimId(unsigned &Encoding);
1736   OperandMatchResultTy parseDim(OperandVector &Operands);
1737   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1738   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1739   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1740   int64_t parseDPPCtrlSel(StringRef Ctrl);
1741   int64_t parseDPPCtrlPerm();
1742   AMDGPUOperand::Ptr defaultRowMask() const;
1743   AMDGPUOperand::Ptr defaultBankMask() const;
1744   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1745   AMDGPUOperand::Ptr defaultFI() const;
1746   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1747   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1748 
1749   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1750                                     AMDGPUOperand::ImmTy Type);
1751   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1752   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1753   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1754   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1755   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1756   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1757   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1758                uint64_t BasicInstType,
1759                bool SkipDstVcc = false,
1760                bool SkipSrcVcc = false);
1761 
1762   AMDGPUOperand::Ptr defaultBLGP() const;
1763   AMDGPUOperand::Ptr defaultCBSZ() const;
1764   AMDGPUOperand::Ptr defaultABID() const;
1765 
1766   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1767   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1768 
1769   AMDGPUOperand::Ptr defaultWaitVDST() const;
1770   AMDGPUOperand::Ptr defaultWaitEXP() const;
1771 };
1772 
1773 struct OptionalOperand {
1774   const char *Name;
1775   AMDGPUOperand::ImmTy Type;
1776   bool IsBit;
1777   bool (*ConvertResult)(int64_t&);
1778 };
1779 
1780 } // end anonymous namespace
1781 
1782 // May be called with integer type with equivalent bitwidth.
1783 static const fltSemantics *getFltSemantics(unsigned Size) {
1784   switch (Size) {
1785   case 4:
1786     return &APFloat::IEEEsingle();
1787   case 8:
1788     return &APFloat::IEEEdouble();
1789   case 2:
1790     return &APFloat::IEEEhalf();
1791   default:
1792     llvm_unreachable("unsupported fp type");
1793   }
1794 }
1795 
1796 static const fltSemantics *getFltSemantics(MVT VT) {
1797   return getFltSemantics(VT.getSizeInBits() / 8);
1798 }
1799 
1800 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1801   switch (OperandType) {
1802   case AMDGPU::OPERAND_REG_IMM_INT32:
1803   case AMDGPU::OPERAND_REG_IMM_FP32:
1804   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1805   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1806   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1807   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1808   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1809   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1810   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1811   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1812   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1813   case AMDGPU::OPERAND_KIMM32:
1814     return &APFloat::IEEEsingle();
1815   case AMDGPU::OPERAND_REG_IMM_INT64:
1816   case AMDGPU::OPERAND_REG_IMM_FP64:
1817   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1818   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1819   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1820     return &APFloat::IEEEdouble();
1821   case AMDGPU::OPERAND_REG_IMM_INT16:
1822   case AMDGPU::OPERAND_REG_IMM_FP16:
1823   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1824   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1825   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1826   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1827   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1828   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1829   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1830   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1831   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1832   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1833   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1834   case AMDGPU::OPERAND_KIMM16:
1835     return &APFloat::IEEEhalf();
1836   default:
1837     llvm_unreachable("unsupported fp type");
1838   }
1839 }
1840 
1841 //===----------------------------------------------------------------------===//
1842 // Operand
1843 //===----------------------------------------------------------------------===//
1844 
1845 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1846   bool Lost;
1847 
1848   // Convert literal to single precision
1849   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1850                                                APFloat::rmNearestTiesToEven,
1851                                                &Lost);
1852   // We allow precision lost but not overflow or underflow
1853   if (Status != APFloat::opOK &&
1854       Lost &&
1855       ((Status & APFloat::opOverflow)  != 0 ||
1856        (Status & APFloat::opUnderflow) != 0)) {
1857     return false;
1858   }
1859 
1860   return true;
1861 }
1862 
1863 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1864   return isUIntN(Size, Val) || isIntN(Size, Val);
1865 }
1866 
1867 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1868   if (VT.getScalarType() == MVT::i16) {
1869     // FP immediate values are broken.
1870     return isInlinableIntLiteral(Val);
1871   }
1872 
1873   // f16/v2f16 operands work correctly for all values.
1874   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1875 }
1876 
1877 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1878 
1879   // This is a hack to enable named inline values like
1880   // shared_base with both 32-bit and 64-bit operands.
1881   // Note that these values are defined as
1882   // 32-bit operands only.
1883   if (isInlineValue()) {
1884     return true;
1885   }
1886 
1887   if (!isImmTy(ImmTyNone)) {
1888     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1889     return false;
1890   }
1891   // TODO: We should avoid using host float here. It would be better to
1892   // check the float bit values which is what a few other places do.
1893   // We've had bot failures before due to weird NaN support on mips hosts.
1894 
1895   APInt Literal(64, Imm.Val);
1896 
1897   if (Imm.IsFPImm) { // We got fp literal token
1898     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1899       return AMDGPU::isInlinableLiteral64(Imm.Val,
1900                                           AsmParser->hasInv2PiInlineImm());
1901     }
1902 
1903     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1904     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1905       return false;
1906 
1907     if (type.getScalarSizeInBits() == 16) {
1908       return isInlineableLiteralOp16(
1909         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1910         type, AsmParser->hasInv2PiInlineImm());
1911     }
1912 
1913     // Check if single precision literal is inlinable
1914     return AMDGPU::isInlinableLiteral32(
1915       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1916       AsmParser->hasInv2PiInlineImm());
1917   }
1918 
1919   // We got int literal token.
1920   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1921     return AMDGPU::isInlinableLiteral64(Imm.Val,
1922                                         AsmParser->hasInv2PiInlineImm());
1923   }
1924 
1925   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1926     return false;
1927   }
1928 
1929   if (type.getScalarSizeInBits() == 16) {
1930     return isInlineableLiteralOp16(
1931       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1932       type, AsmParser->hasInv2PiInlineImm());
1933   }
1934 
1935   return AMDGPU::isInlinableLiteral32(
1936     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1937     AsmParser->hasInv2PiInlineImm());
1938 }
1939 
1940 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1941   // Check that this immediate can be added as literal
1942   if (!isImmTy(ImmTyNone)) {
1943     return false;
1944   }
1945 
1946   if (!Imm.IsFPImm) {
1947     // We got int literal token.
1948 
1949     if (type == MVT::f64 && hasFPModifiers()) {
1950       // Cannot apply fp modifiers to int literals preserving the same semantics
1951       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1952       // disable these cases.
1953       return false;
1954     }
1955 
1956     unsigned Size = type.getSizeInBits();
1957     if (Size == 64)
1958       Size = 32;
1959 
1960     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1961     // types.
1962     return isSafeTruncation(Imm.Val, Size);
1963   }
1964 
1965   // We got fp literal token
1966   if (type == MVT::f64) { // Expected 64-bit fp operand
1967     // We would set low 64-bits of literal to zeroes but we accept this literals
1968     return true;
1969   }
1970 
1971   if (type == MVT::i64) { // Expected 64-bit int operand
1972     // We don't allow fp literals in 64-bit integer instructions. It is
1973     // unclear how we should encode them.
1974     return false;
1975   }
1976 
1977   // We allow fp literals with f16x2 operands assuming that the specified
1978   // literal goes into the lower half and the upper half is zero. We also
1979   // require that the literal may be losslessly converted to f16.
1980   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1981                      (type == MVT::v2i16)? MVT::i16 :
1982                      (type == MVT::v2f32)? MVT::f32 : type;
1983 
1984   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1985   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1986 }
1987 
1988 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1989   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1990 }
1991 
1992 bool AMDGPUOperand::isVRegWithInputMods() const {
1993   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1994          // GFX90A allows DPP on 64-bit operands.
1995          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1996           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1997 }
1998 
1999 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2000   if (AsmParser->isVI())
2001     return isVReg32();
2002   else if (AsmParser->isGFX9Plus())
2003     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2004   else
2005     return false;
2006 }
2007 
2008 bool AMDGPUOperand::isSDWAFP16Operand() const {
2009   return isSDWAOperand(MVT::f16);
2010 }
2011 
2012 bool AMDGPUOperand::isSDWAFP32Operand() const {
2013   return isSDWAOperand(MVT::f32);
2014 }
2015 
2016 bool AMDGPUOperand::isSDWAInt16Operand() const {
2017   return isSDWAOperand(MVT::i16);
2018 }
2019 
2020 bool AMDGPUOperand::isSDWAInt32Operand() const {
2021   return isSDWAOperand(MVT::i32);
2022 }
2023 
2024 bool AMDGPUOperand::isBoolReg() const {
2025   auto FB = AsmParser->getFeatureBits();
2026   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2027                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2028 }
2029 
2030 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2031 {
2032   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2033   assert(Size == 2 || Size == 4 || Size == 8);
2034 
2035   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2036 
2037   if (Imm.Mods.Abs) {
2038     Val &= ~FpSignMask;
2039   }
2040   if (Imm.Mods.Neg) {
2041     Val ^= FpSignMask;
2042   }
2043 
2044   return Val;
2045 }
2046 
2047 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2048   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2049                              Inst.getNumOperands())) {
2050     addLiteralImmOperand(Inst, Imm.Val,
2051                          ApplyModifiers &
2052                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2053   } else {
2054     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2055     Inst.addOperand(MCOperand::createImm(Imm.Val));
2056     setImmKindNone();
2057   }
2058 }
2059 
2060 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2061   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2062   auto OpNum = Inst.getNumOperands();
2063   // Check that this operand accepts literals
2064   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2065 
2066   if (ApplyModifiers) {
2067     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2068     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2069     Val = applyInputFPModifiers(Val, Size);
2070   }
2071 
2072   APInt Literal(64, Val);
2073   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2074 
2075   if (Imm.IsFPImm) { // We got fp literal token
2076     switch (OpTy) {
2077     case AMDGPU::OPERAND_REG_IMM_INT64:
2078     case AMDGPU::OPERAND_REG_IMM_FP64:
2079     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2080     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2081     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2082       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2083                                        AsmParser->hasInv2PiInlineImm())) {
2084         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2085         setImmKindConst();
2086         return;
2087       }
2088 
2089       // Non-inlineable
2090       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2091         // For fp operands we check if low 32 bits are zeros
2092         if (Literal.getLoBits(32) != 0) {
2093           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2094           "Can't encode literal as exact 64-bit floating-point operand. "
2095           "Low 32-bits will be set to zero");
2096         }
2097 
2098         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2099         setImmKindLiteral();
2100         return;
2101       }
2102 
2103       // We don't allow fp literals in 64-bit integer instructions. It is
2104       // unclear how we should encode them. This case should be checked earlier
2105       // in predicate methods (isLiteralImm())
2106       llvm_unreachable("fp literal in 64-bit integer instruction.");
2107 
2108     case AMDGPU::OPERAND_REG_IMM_INT32:
2109     case AMDGPU::OPERAND_REG_IMM_FP32:
2110     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2111     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2112     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2113     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2114     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2115     case AMDGPU::OPERAND_REG_IMM_INT16:
2116     case AMDGPU::OPERAND_REG_IMM_FP16:
2117     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2118     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2119     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2120     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2121     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2122     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2123     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2124     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2125     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2126     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2127     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2128     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2129     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2130     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2131     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2132     case AMDGPU::OPERAND_KIMM32:
2133     case AMDGPU::OPERAND_KIMM16: {
2134       bool lost;
2135       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2136       // Convert literal to single precision
2137       FPLiteral.convert(*getOpFltSemantics(OpTy),
2138                         APFloat::rmNearestTiesToEven, &lost);
2139       // We allow precision lost but not overflow or underflow. This should be
2140       // checked earlier in isLiteralImm()
2141 
2142       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2143       Inst.addOperand(MCOperand::createImm(ImmVal));
2144       setImmKindLiteral();
2145       return;
2146     }
2147     default:
2148       llvm_unreachable("invalid operand size");
2149     }
2150 
2151     return;
2152   }
2153 
2154   // We got int literal token.
2155   // Only sign extend inline immediates.
2156   switch (OpTy) {
2157   case AMDGPU::OPERAND_REG_IMM_INT32:
2158   case AMDGPU::OPERAND_REG_IMM_FP32:
2159   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2160   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2161   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2162   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2163   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2164   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2165   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2166   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2167   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2168   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2169   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2170     if (isSafeTruncation(Val, 32) &&
2171         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2172                                      AsmParser->hasInv2PiInlineImm())) {
2173       Inst.addOperand(MCOperand::createImm(Val));
2174       setImmKindConst();
2175       return;
2176     }
2177 
2178     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2179     setImmKindLiteral();
2180     return;
2181 
2182   case AMDGPU::OPERAND_REG_IMM_INT64:
2183   case AMDGPU::OPERAND_REG_IMM_FP64:
2184   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2185   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2186   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2187     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2188       Inst.addOperand(MCOperand::createImm(Val));
2189       setImmKindConst();
2190       return;
2191     }
2192 
2193     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2194     setImmKindLiteral();
2195     return;
2196 
2197   case AMDGPU::OPERAND_REG_IMM_INT16:
2198   case AMDGPU::OPERAND_REG_IMM_FP16:
2199   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2200   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2201   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2202   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2203   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2204     if (isSafeTruncation(Val, 16) &&
2205         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2206                                      AsmParser->hasInv2PiInlineImm())) {
2207       Inst.addOperand(MCOperand::createImm(Val));
2208       setImmKindConst();
2209       return;
2210     }
2211 
2212     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2213     setImmKindLiteral();
2214     return;
2215 
2216   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2217   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2218   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2219   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2220     assert(isSafeTruncation(Val, 16));
2221     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2222                                         AsmParser->hasInv2PiInlineImm()));
2223 
2224     Inst.addOperand(MCOperand::createImm(Val));
2225     return;
2226   }
2227   case AMDGPU::OPERAND_KIMM32:
2228     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2229     setImmKindNone();
2230     return;
2231   case AMDGPU::OPERAND_KIMM16:
2232     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2233     setImmKindNone();
2234     return;
2235   default:
2236     llvm_unreachable("invalid operand size");
2237   }
2238 }
2239 
2240 template <unsigned Bitwidth>
2241 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2242   APInt Literal(64, Imm.Val);
2243   setImmKindNone();
2244 
2245   if (!Imm.IsFPImm) {
2246     // We got int literal token.
2247     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2248     return;
2249   }
2250 
2251   bool Lost;
2252   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2253   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2254                     APFloat::rmNearestTiesToEven, &Lost);
2255   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2256 }
2257 
2258 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2259   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2260 }
2261 
2262 static bool isInlineValue(unsigned Reg) {
2263   switch (Reg) {
2264   case AMDGPU::SRC_SHARED_BASE:
2265   case AMDGPU::SRC_SHARED_LIMIT:
2266   case AMDGPU::SRC_PRIVATE_BASE:
2267   case AMDGPU::SRC_PRIVATE_LIMIT:
2268   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2269     return true;
2270   case AMDGPU::SRC_VCCZ:
2271   case AMDGPU::SRC_EXECZ:
2272   case AMDGPU::SRC_SCC:
2273     return true;
2274   case AMDGPU::SGPR_NULL:
2275     return true;
2276   default:
2277     return false;
2278   }
2279 }
2280 
2281 bool AMDGPUOperand::isInlineValue() const {
2282   return isRegKind() && ::isInlineValue(getReg());
2283 }
2284 
2285 //===----------------------------------------------------------------------===//
2286 // AsmParser
2287 //===----------------------------------------------------------------------===//
2288 
2289 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2290   if (Is == IS_VGPR) {
2291     switch (RegWidth) {
2292       default: return -1;
2293       case 32:
2294         return AMDGPU::VGPR_32RegClassID;
2295       case 64:
2296         return AMDGPU::VReg_64RegClassID;
2297       case 96:
2298         return AMDGPU::VReg_96RegClassID;
2299       case 128:
2300         return AMDGPU::VReg_128RegClassID;
2301       case 160:
2302         return AMDGPU::VReg_160RegClassID;
2303       case 192:
2304         return AMDGPU::VReg_192RegClassID;
2305       case 224:
2306         return AMDGPU::VReg_224RegClassID;
2307       case 256:
2308         return AMDGPU::VReg_256RegClassID;
2309       case 512:
2310         return AMDGPU::VReg_512RegClassID;
2311       case 1024:
2312         return AMDGPU::VReg_1024RegClassID;
2313     }
2314   } else if (Is == IS_TTMP) {
2315     switch (RegWidth) {
2316       default: return -1;
2317       case 32:
2318         return AMDGPU::TTMP_32RegClassID;
2319       case 64:
2320         return AMDGPU::TTMP_64RegClassID;
2321       case 128:
2322         return AMDGPU::TTMP_128RegClassID;
2323       case 256:
2324         return AMDGPU::TTMP_256RegClassID;
2325       case 512:
2326         return AMDGPU::TTMP_512RegClassID;
2327     }
2328   } else if (Is == IS_SGPR) {
2329     switch (RegWidth) {
2330       default: return -1;
2331       case 32:
2332         return AMDGPU::SGPR_32RegClassID;
2333       case 64:
2334         return AMDGPU::SGPR_64RegClassID;
2335       case 96:
2336         return AMDGPU::SGPR_96RegClassID;
2337       case 128:
2338         return AMDGPU::SGPR_128RegClassID;
2339       case 160:
2340         return AMDGPU::SGPR_160RegClassID;
2341       case 192:
2342         return AMDGPU::SGPR_192RegClassID;
2343       case 224:
2344         return AMDGPU::SGPR_224RegClassID;
2345       case 256:
2346         return AMDGPU::SGPR_256RegClassID;
2347       case 512:
2348         return AMDGPU::SGPR_512RegClassID;
2349     }
2350   } else if (Is == IS_AGPR) {
2351     switch (RegWidth) {
2352       default: return -1;
2353       case 32:
2354         return AMDGPU::AGPR_32RegClassID;
2355       case 64:
2356         return AMDGPU::AReg_64RegClassID;
2357       case 96:
2358         return AMDGPU::AReg_96RegClassID;
2359       case 128:
2360         return AMDGPU::AReg_128RegClassID;
2361       case 160:
2362         return AMDGPU::AReg_160RegClassID;
2363       case 192:
2364         return AMDGPU::AReg_192RegClassID;
2365       case 224:
2366         return AMDGPU::AReg_224RegClassID;
2367       case 256:
2368         return AMDGPU::AReg_256RegClassID;
2369       case 512:
2370         return AMDGPU::AReg_512RegClassID;
2371       case 1024:
2372         return AMDGPU::AReg_1024RegClassID;
2373     }
2374   }
2375   return -1;
2376 }
2377 
2378 static unsigned getSpecialRegForName(StringRef RegName) {
2379   return StringSwitch<unsigned>(RegName)
2380     .Case("exec", AMDGPU::EXEC)
2381     .Case("vcc", AMDGPU::VCC)
2382     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2383     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2384     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2385     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2386     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2387     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2388     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2389     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2390     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2391     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2392     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2393     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2394     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2395     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2396     .Case("m0", AMDGPU::M0)
2397     .Case("vccz", AMDGPU::SRC_VCCZ)
2398     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2399     .Case("execz", AMDGPU::SRC_EXECZ)
2400     .Case("src_execz", AMDGPU::SRC_EXECZ)
2401     .Case("scc", AMDGPU::SRC_SCC)
2402     .Case("src_scc", AMDGPU::SRC_SCC)
2403     .Case("tba", AMDGPU::TBA)
2404     .Case("tma", AMDGPU::TMA)
2405     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2406     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2407     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2408     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2409     .Case("vcc_lo", AMDGPU::VCC_LO)
2410     .Case("vcc_hi", AMDGPU::VCC_HI)
2411     .Case("exec_lo", AMDGPU::EXEC_LO)
2412     .Case("exec_hi", AMDGPU::EXEC_HI)
2413     .Case("tma_lo", AMDGPU::TMA_LO)
2414     .Case("tma_hi", AMDGPU::TMA_HI)
2415     .Case("tba_lo", AMDGPU::TBA_LO)
2416     .Case("tba_hi", AMDGPU::TBA_HI)
2417     .Case("pc", AMDGPU::PC_REG)
2418     .Case("null", AMDGPU::SGPR_NULL)
2419     .Default(AMDGPU::NoRegister);
2420 }
2421 
2422 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2423                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2424   auto R = parseRegister();
2425   if (!R) return true;
2426   assert(R->isReg());
2427   RegNo = R->getReg();
2428   StartLoc = R->getStartLoc();
2429   EndLoc = R->getEndLoc();
2430   return false;
2431 }
2432 
2433 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2434                                     SMLoc &EndLoc) {
2435   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2436 }
2437 
2438 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2439                                                        SMLoc &StartLoc,
2440                                                        SMLoc &EndLoc) {
2441   bool Result =
2442       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2443   bool PendingErrors = getParser().hasPendingError();
2444   getParser().clearPendingErrors();
2445   if (PendingErrors)
2446     return MatchOperand_ParseFail;
2447   if (Result)
2448     return MatchOperand_NoMatch;
2449   return MatchOperand_Success;
2450 }
2451 
2452 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2453                                             RegisterKind RegKind, unsigned Reg1,
2454                                             SMLoc Loc) {
2455   switch (RegKind) {
2456   case IS_SPECIAL:
2457     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2458       Reg = AMDGPU::EXEC;
2459       RegWidth = 64;
2460       return true;
2461     }
2462     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2463       Reg = AMDGPU::FLAT_SCR;
2464       RegWidth = 64;
2465       return true;
2466     }
2467     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2468       Reg = AMDGPU::XNACK_MASK;
2469       RegWidth = 64;
2470       return true;
2471     }
2472     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2473       Reg = AMDGPU::VCC;
2474       RegWidth = 64;
2475       return true;
2476     }
2477     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2478       Reg = AMDGPU::TBA;
2479       RegWidth = 64;
2480       return true;
2481     }
2482     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2483       Reg = AMDGPU::TMA;
2484       RegWidth = 64;
2485       return true;
2486     }
2487     Error(Loc, "register does not fit in the list");
2488     return false;
2489   case IS_VGPR:
2490   case IS_SGPR:
2491   case IS_AGPR:
2492   case IS_TTMP:
2493     if (Reg1 != Reg + RegWidth / 32) {
2494       Error(Loc, "registers in a list must have consecutive indices");
2495       return false;
2496     }
2497     RegWidth += 32;
2498     return true;
2499   default:
2500     llvm_unreachable("unexpected register kind");
2501   }
2502 }
2503 
2504 struct RegInfo {
2505   StringLiteral Name;
2506   RegisterKind Kind;
2507 };
2508 
2509 static constexpr RegInfo RegularRegisters[] = {
2510   {{"v"},    IS_VGPR},
2511   {{"s"},    IS_SGPR},
2512   {{"ttmp"}, IS_TTMP},
2513   {{"acc"},  IS_AGPR},
2514   {{"a"},    IS_AGPR},
2515 };
2516 
2517 static bool isRegularReg(RegisterKind Kind) {
2518   return Kind == IS_VGPR ||
2519          Kind == IS_SGPR ||
2520          Kind == IS_TTMP ||
2521          Kind == IS_AGPR;
2522 }
2523 
2524 static const RegInfo* getRegularRegInfo(StringRef Str) {
2525   for (const RegInfo &Reg : RegularRegisters)
2526     if (Str.startswith(Reg.Name))
2527       return &Reg;
2528   return nullptr;
2529 }
2530 
2531 static bool getRegNum(StringRef Str, unsigned& Num) {
2532   return !Str.getAsInteger(10, Num);
2533 }
2534 
2535 bool
2536 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2537                             const AsmToken &NextToken) const {
2538 
2539   // A list of consecutive registers: [s0,s1,s2,s3]
2540   if (Token.is(AsmToken::LBrac))
2541     return true;
2542 
2543   if (!Token.is(AsmToken::Identifier))
2544     return false;
2545 
2546   // A single register like s0 or a range of registers like s[0:1]
2547 
2548   StringRef Str = Token.getString();
2549   const RegInfo *Reg = getRegularRegInfo(Str);
2550   if (Reg) {
2551     StringRef RegName = Reg->Name;
2552     StringRef RegSuffix = Str.substr(RegName.size());
2553     if (!RegSuffix.empty()) {
2554       unsigned Num;
2555       // A single register with an index: rXX
2556       if (getRegNum(RegSuffix, Num))
2557         return true;
2558     } else {
2559       // A range of registers: r[XX:YY].
2560       if (NextToken.is(AsmToken::LBrac))
2561         return true;
2562     }
2563   }
2564 
2565   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2566 }
2567 
2568 bool
2569 AMDGPUAsmParser::isRegister()
2570 {
2571   return isRegister(getToken(), peekToken());
2572 }
2573 
2574 unsigned
2575 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2576                                unsigned RegNum,
2577                                unsigned RegWidth,
2578                                SMLoc Loc) {
2579 
2580   assert(isRegularReg(RegKind));
2581 
2582   unsigned AlignSize = 1;
2583   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2584     // SGPR and TTMP registers must be aligned.
2585     // Max required alignment is 4 dwords.
2586     AlignSize = std::min(RegWidth / 32, 4u);
2587   }
2588 
2589   if (RegNum % AlignSize != 0) {
2590     Error(Loc, "invalid register alignment");
2591     return AMDGPU::NoRegister;
2592   }
2593 
2594   unsigned RegIdx = RegNum / AlignSize;
2595   int RCID = getRegClass(RegKind, RegWidth);
2596   if (RCID == -1) {
2597     Error(Loc, "invalid or unsupported register size");
2598     return AMDGPU::NoRegister;
2599   }
2600 
2601   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2602   const MCRegisterClass RC = TRI->getRegClass(RCID);
2603   if (RegIdx >= RC.getNumRegs()) {
2604     Error(Loc, "register index is out of range");
2605     return AMDGPU::NoRegister;
2606   }
2607 
2608   return RC.getRegister(RegIdx);
2609 }
2610 
2611 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2612   int64_t RegLo, RegHi;
2613   if (!skipToken(AsmToken::LBrac, "missing register index"))
2614     return false;
2615 
2616   SMLoc FirstIdxLoc = getLoc();
2617   SMLoc SecondIdxLoc;
2618 
2619   if (!parseExpr(RegLo))
2620     return false;
2621 
2622   if (trySkipToken(AsmToken::Colon)) {
2623     SecondIdxLoc = getLoc();
2624     if (!parseExpr(RegHi))
2625       return false;
2626   } else {
2627     RegHi = RegLo;
2628   }
2629 
2630   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2631     return false;
2632 
2633   if (!isUInt<32>(RegLo)) {
2634     Error(FirstIdxLoc, "invalid register index");
2635     return false;
2636   }
2637 
2638   if (!isUInt<32>(RegHi)) {
2639     Error(SecondIdxLoc, "invalid register index");
2640     return false;
2641   }
2642 
2643   if (RegLo > RegHi) {
2644     Error(FirstIdxLoc, "first register index should not exceed second index");
2645     return false;
2646   }
2647 
2648   Num = static_cast<unsigned>(RegLo);
2649   RegWidth = 32 * ((RegHi - RegLo) + 1);
2650   return true;
2651 }
2652 
2653 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2654                                           unsigned &RegNum, unsigned &RegWidth,
2655                                           SmallVectorImpl<AsmToken> &Tokens) {
2656   assert(isToken(AsmToken::Identifier));
2657   unsigned Reg = getSpecialRegForName(getTokenStr());
2658   if (Reg) {
2659     RegNum = 0;
2660     RegWidth = 32;
2661     RegKind = IS_SPECIAL;
2662     Tokens.push_back(getToken());
2663     lex(); // skip register name
2664   }
2665   return Reg;
2666 }
2667 
2668 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2669                                           unsigned &RegNum, unsigned &RegWidth,
2670                                           SmallVectorImpl<AsmToken> &Tokens) {
2671   assert(isToken(AsmToken::Identifier));
2672   StringRef RegName = getTokenStr();
2673   auto Loc = getLoc();
2674 
2675   const RegInfo *RI = getRegularRegInfo(RegName);
2676   if (!RI) {
2677     Error(Loc, "invalid register name");
2678     return AMDGPU::NoRegister;
2679   }
2680 
2681   Tokens.push_back(getToken());
2682   lex(); // skip register name
2683 
2684   RegKind = RI->Kind;
2685   StringRef RegSuffix = RegName.substr(RI->Name.size());
2686   if (!RegSuffix.empty()) {
2687     // Single 32-bit register: vXX.
2688     if (!getRegNum(RegSuffix, RegNum)) {
2689       Error(Loc, "invalid register index");
2690       return AMDGPU::NoRegister;
2691     }
2692     RegWidth = 32;
2693   } else {
2694     // Range of registers: v[XX:YY]. ":YY" is optional.
2695     if (!ParseRegRange(RegNum, RegWidth))
2696       return AMDGPU::NoRegister;
2697   }
2698 
2699   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2700 }
2701 
2702 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2703                                        unsigned &RegWidth,
2704                                        SmallVectorImpl<AsmToken> &Tokens) {
2705   unsigned Reg = AMDGPU::NoRegister;
2706   auto ListLoc = getLoc();
2707 
2708   if (!skipToken(AsmToken::LBrac,
2709                  "expected a register or a list of registers")) {
2710     return AMDGPU::NoRegister;
2711   }
2712 
2713   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2714 
2715   auto Loc = getLoc();
2716   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2717     return AMDGPU::NoRegister;
2718   if (RegWidth != 32) {
2719     Error(Loc, "expected a single 32-bit register");
2720     return AMDGPU::NoRegister;
2721   }
2722 
2723   for (; trySkipToken(AsmToken::Comma); ) {
2724     RegisterKind NextRegKind;
2725     unsigned NextReg, NextRegNum, NextRegWidth;
2726     Loc = getLoc();
2727 
2728     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2729                              NextRegNum, NextRegWidth,
2730                              Tokens)) {
2731       return AMDGPU::NoRegister;
2732     }
2733     if (NextRegWidth != 32) {
2734       Error(Loc, "expected a single 32-bit register");
2735       return AMDGPU::NoRegister;
2736     }
2737     if (NextRegKind != RegKind) {
2738       Error(Loc, "registers in a list must be of the same kind");
2739       return AMDGPU::NoRegister;
2740     }
2741     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2742       return AMDGPU::NoRegister;
2743   }
2744 
2745   if (!skipToken(AsmToken::RBrac,
2746                  "expected a comma or a closing square bracket")) {
2747     return AMDGPU::NoRegister;
2748   }
2749 
2750   if (isRegularReg(RegKind))
2751     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2752 
2753   return Reg;
2754 }
2755 
2756 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2757                                           unsigned &RegNum, unsigned &RegWidth,
2758                                           SmallVectorImpl<AsmToken> &Tokens) {
2759   auto Loc = getLoc();
2760   Reg = AMDGPU::NoRegister;
2761 
2762   if (isToken(AsmToken::Identifier)) {
2763     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2764     if (Reg == AMDGPU::NoRegister)
2765       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2766   } else {
2767     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2768   }
2769 
2770   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2771   if (Reg == AMDGPU::NoRegister) {
2772     assert(Parser.hasPendingError());
2773     return false;
2774   }
2775 
2776   if (!subtargetHasRegister(*TRI, Reg)) {
2777     if (Reg == AMDGPU::SGPR_NULL) {
2778       Error(Loc, "'null' operand is not supported on this GPU");
2779     } else {
2780       Error(Loc, "register not available on this GPU");
2781     }
2782     return false;
2783   }
2784 
2785   return true;
2786 }
2787 
2788 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2789                                           unsigned &RegNum, unsigned &RegWidth,
2790                                           bool RestoreOnFailure /*=false*/) {
2791   Reg = AMDGPU::NoRegister;
2792 
2793   SmallVector<AsmToken, 1> Tokens;
2794   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2795     if (RestoreOnFailure) {
2796       while (!Tokens.empty()) {
2797         getLexer().UnLex(Tokens.pop_back_val());
2798       }
2799     }
2800     return true;
2801   }
2802   return false;
2803 }
2804 
2805 Optional<StringRef>
2806 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2807   switch (RegKind) {
2808   case IS_VGPR:
2809     return StringRef(".amdgcn.next_free_vgpr");
2810   case IS_SGPR:
2811     return StringRef(".amdgcn.next_free_sgpr");
2812   default:
2813     return None;
2814   }
2815 }
2816 
2817 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2818   auto SymbolName = getGprCountSymbolName(RegKind);
2819   assert(SymbolName && "initializing invalid register kind");
2820   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2821   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2822 }
2823 
2824 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2825                                             unsigned DwordRegIndex,
2826                                             unsigned RegWidth) {
2827   // Symbols are only defined for GCN targets
2828   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2829     return true;
2830 
2831   auto SymbolName = getGprCountSymbolName(RegKind);
2832   if (!SymbolName)
2833     return true;
2834   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2835 
2836   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2837   int64_t OldCount;
2838 
2839   if (!Sym->isVariable())
2840     return !Error(getLoc(),
2841                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2842   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2843     return !Error(
2844         getLoc(),
2845         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2846 
2847   if (OldCount <= NewMax)
2848     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2849 
2850   return true;
2851 }
2852 
2853 std::unique_ptr<AMDGPUOperand>
2854 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2855   const auto &Tok = getToken();
2856   SMLoc StartLoc = Tok.getLoc();
2857   SMLoc EndLoc = Tok.getEndLoc();
2858   RegisterKind RegKind;
2859   unsigned Reg, RegNum, RegWidth;
2860 
2861   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2862     return nullptr;
2863   }
2864   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2865     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2866       return nullptr;
2867   } else
2868     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2869   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2870 }
2871 
2872 OperandMatchResultTy
2873 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2874   // TODO: add syntactic sugar for 1/(2*PI)
2875 
2876   assert(!isRegister());
2877   assert(!isModifier());
2878 
2879   const auto& Tok = getToken();
2880   const auto& NextTok = peekToken();
2881   bool IsReal = Tok.is(AsmToken::Real);
2882   SMLoc S = getLoc();
2883   bool Negate = false;
2884 
2885   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2886     lex();
2887     IsReal = true;
2888     Negate = true;
2889   }
2890 
2891   if (IsReal) {
2892     // Floating-point expressions are not supported.
2893     // Can only allow floating-point literals with an
2894     // optional sign.
2895 
2896     StringRef Num = getTokenStr();
2897     lex();
2898 
2899     APFloat RealVal(APFloat::IEEEdouble());
2900     auto roundMode = APFloat::rmNearestTiesToEven;
2901     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2902       return MatchOperand_ParseFail;
2903     }
2904     if (Negate)
2905       RealVal.changeSign();
2906 
2907     Operands.push_back(
2908       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2909                                AMDGPUOperand::ImmTyNone, true));
2910 
2911     return MatchOperand_Success;
2912 
2913   } else {
2914     int64_t IntVal;
2915     const MCExpr *Expr;
2916     SMLoc S = getLoc();
2917 
2918     if (HasSP3AbsModifier) {
2919       // This is a workaround for handling expressions
2920       // as arguments of SP3 'abs' modifier, for example:
2921       //     |1.0|
2922       //     |-1|
2923       //     |1+x|
2924       // This syntax is not compatible with syntax of standard
2925       // MC expressions (due to the trailing '|').
2926       SMLoc EndLoc;
2927       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2928         return MatchOperand_ParseFail;
2929     } else {
2930       if (Parser.parseExpression(Expr))
2931         return MatchOperand_ParseFail;
2932     }
2933 
2934     if (Expr->evaluateAsAbsolute(IntVal)) {
2935       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2936     } else {
2937       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2938     }
2939 
2940     return MatchOperand_Success;
2941   }
2942 
2943   return MatchOperand_NoMatch;
2944 }
2945 
2946 OperandMatchResultTy
2947 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2948   if (!isRegister())
2949     return MatchOperand_NoMatch;
2950 
2951   if (auto R = parseRegister()) {
2952     assert(R->isReg());
2953     Operands.push_back(std::move(R));
2954     return MatchOperand_Success;
2955   }
2956   return MatchOperand_ParseFail;
2957 }
2958 
2959 OperandMatchResultTy
2960 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2961   auto res = parseReg(Operands);
2962   if (res != MatchOperand_NoMatch) {
2963     return res;
2964   } else if (isModifier()) {
2965     return MatchOperand_NoMatch;
2966   } else {
2967     return parseImm(Operands, HasSP3AbsMod);
2968   }
2969 }
2970 
2971 bool
2972 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2973   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2974     const auto &str = Token.getString();
2975     return str == "abs" || str == "neg" || str == "sext";
2976   }
2977   return false;
2978 }
2979 
2980 bool
2981 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2982   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2983 }
2984 
2985 bool
2986 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2987   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2988 }
2989 
2990 bool
2991 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2992   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2993 }
2994 
2995 // Check if this is an operand modifier or an opcode modifier
2996 // which may look like an expression but it is not. We should
2997 // avoid parsing these modifiers as expressions. Currently
2998 // recognized sequences are:
2999 //   |...|
3000 //   abs(...)
3001 //   neg(...)
3002 //   sext(...)
3003 //   -reg
3004 //   -|...|
3005 //   -abs(...)
3006 //   name:...
3007 // Note that simple opcode modifiers like 'gds' may be parsed as
3008 // expressions; this is a special case. See getExpressionAsToken.
3009 //
3010 bool
3011 AMDGPUAsmParser::isModifier() {
3012 
3013   AsmToken Tok = getToken();
3014   AsmToken NextToken[2];
3015   peekTokens(NextToken);
3016 
3017   return isOperandModifier(Tok, NextToken[0]) ||
3018          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3019          isOpcodeModifierWithVal(Tok, NextToken[0]);
3020 }
3021 
3022 // Check if the current token is an SP3 'neg' modifier.
3023 // Currently this modifier is allowed in the following context:
3024 //
3025 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3026 // 2. Before an 'abs' modifier: -abs(...)
3027 // 3. Before an SP3 'abs' modifier: -|...|
3028 //
3029 // In all other cases "-" is handled as a part
3030 // of an expression that follows the sign.
3031 //
3032 // Note: When "-" is followed by an integer literal,
3033 // this is interpreted as integer negation rather
3034 // than a floating-point NEG modifier applied to N.
3035 // Beside being contr-intuitive, such use of floating-point
3036 // NEG modifier would have resulted in different meaning
3037 // of integer literals used with VOP1/2/C and VOP3,
3038 // for example:
3039 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3040 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3041 // Negative fp literals with preceding "-" are
3042 // handled likewise for uniformity
3043 //
3044 bool
3045 AMDGPUAsmParser::parseSP3NegModifier() {
3046 
3047   AsmToken NextToken[2];
3048   peekTokens(NextToken);
3049 
3050   if (isToken(AsmToken::Minus) &&
3051       (isRegister(NextToken[0], NextToken[1]) ||
3052        NextToken[0].is(AsmToken::Pipe) ||
3053        isId(NextToken[0], "abs"))) {
3054     lex();
3055     return true;
3056   }
3057 
3058   return false;
3059 }
3060 
3061 OperandMatchResultTy
3062 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3063                                               bool AllowImm) {
3064   bool Neg, SP3Neg;
3065   bool Abs, SP3Abs;
3066   SMLoc Loc;
3067 
3068   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3069   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3070     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3071     return MatchOperand_ParseFail;
3072   }
3073 
3074   SP3Neg = parseSP3NegModifier();
3075 
3076   Loc = getLoc();
3077   Neg = trySkipId("neg");
3078   if (Neg && SP3Neg) {
3079     Error(Loc, "expected register or immediate");
3080     return MatchOperand_ParseFail;
3081   }
3082   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3083     return MatchOperand_ParseFail;
3084 
3085   Abs = trySkipId("abs");
3086   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3087     return MatchOperand_ParseFail;
3088 
3089   Loc = getLoc();
3090   SP3Abs = trySkipToken(AsmToken::Pipe);
3091   if (Abs && SP3Abs) {
3092     Error(Loc, "expected register or immediate");
3093     return MatchOperand_ParseFail;
3094   }
3095 
3096   OperandMatchResultTy Res;
3097   if (AllowImm) {
3098     Res = parseRegOrImm(Operands, SP3Abs);
3099   } else {
3100     Res = parseReg(Operands);
3101   }
3102   if (Res != MatchOperand_Success) {
3103     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3104   }
3105 
3106   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3107     return MatchOperand_ParseFail;
3108   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3109     return MatchOperand_ParseFail;
3110   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3111     return MatchOperand_ParseFail;
3112 
3113   AMDGPUOperand::Modifiers Mods;
3114   Mods.Abs = Abs || SP3Abs;
3115   Mods.Neg = Neg || SP3Neg;
3116 
3117   if (Mods.hasFPModifiers()) {
3118     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3119     if (Op.isExpr()) {
3120       Error(Op.getStartLoc(), "expected an absolute expression");
3121       return MatchOperand_ParseFail;
3122     }
3123     Op.setModifiers(Mods);
3124   }
3125   return MatchOperand_Success;
3126 }
3127 
3128 OperandMatchResultTy
3129 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3130                                                bool AllowImm) {
3131   bool Sext = trySkipId("sext");
3132   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3133     return MatchOperand_ParseFail;
3134 
3135   OperandMatchResultTy Res;
3136   if (AllowImm) {
3137     Res = parseRegOrImm(Operands);
3138   } else {
3139     Res = parseReg(Operands);
3140   }
3141   if (Res != MatchOperand_Success) {
3142     return Sext? MatchOperand_ParseFail : Res;
3143   }
3144 
3145   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3146     return MatchOperand_ParseFail;
3147 
3148   AMDGPUOperand::Modifiers Mods;
3149   Mods.Sext = Sext;
3150 
3151   if (Mods.hasIntModifiers()) {
3152     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3153     if (Op.isExpr()) {
3154       Error(Op.getStartLoc(), "expected an absolute expression");
3155       return MatchOperand_ParseFail;
3156     }
3157     Op.setModifiers(Mods);
3158   }
3159 
3160   return MatchOperand_Success;
3161 }
3162 
3163 OperandMatchResultTy
3164 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3165   return parseRegOrImmWithFPInputMods(Operands, false);
3166 }
3167 
3168 OperandMatchResultTy
3169 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3170   return parseRegOrImmWithIntInputMods(Operands, false);
3171 }
3172 
3173 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3174   auto Loc = getLoc();
3175   if (trySkipId("off")) {
3176     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3177                                                 AMDGPUOperand::ImmTyOff, false));
3178     return MatchOperand_Success;
3179   }
3180 
3181   if (!isRegister())
3182     return MatchOperand_NoMatch;
3183 
3184   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3185   if (Reg) {
3186     Operands.push_back(std::move(Reg));
3187     return MatchOperand_Success;
3188   }
3189 
3190   return MatchOperand_ParseFail;
3191 
3192 }
3193 
3194 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3195   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3196 
3197   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3198       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3199       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3200       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3201     return Match_InvalidOperand;
3202 
3203   if ((TSFlags & SIInstrFlags::VOP3) &&
3204       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3205       getForcedEncodingSize() != 64)
3206     return Match_PreferE32;
3207 
3208   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3209       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3210     // v_mac_f32/16 allow only dst_sel == DWORD;
3211     auto OpNum =
3212         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3213     const auto &Op = Inst.getOperand(OpNum);
3214     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3215       return Match_InvalidOperand;
3216     }
3217   }
3218 
3219   return Match_Success;
3220 }
3221 
3222 static ArrayRef<unsigned> getAllVariants() {
3223   static const unsigned Variants[] = {
3224     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3225     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3226   };
3227 
3228   return makeArrayRef(Variants);
3229 }
3230 
3231 // What asm variants we should check
3232 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3233   if (getForcedEncodingSize() == 32) {
3234     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3235     return makeArrayRef(Variants);
3236   }
3237 
3238   if (isForcedVOP3()) {
3239     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3240     return makeArrayRef(Variants);
3241   }
3242 
3243   if (isForcedSDWA()) {
3244     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3245                                         AMDGPUAsmVariants::SDWA9};
3246     return makeArrayRef(Variants);
3247   }
3248 
3249   if (isForcedDPP()) {
3250     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3251     return makeArrayRef(Variants);
3252   }
3253 
3254   return getAllVariants();
3255 }
3256 
3257 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3258   if (getForcedEncodingSize() == 32)
3259     return "e32";
3260 
3261   if (isForcedVOP3())
3262     return "e64";
3263 
3264   if (isForcedSDWA())
3265     return "sdwa";
3266 
3267   if (isForcedDPP())
3268     return "dpp";
3269 
3270   return "";
3271 }
3272 
3273 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3274   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3275   const unsigned Num = Desc.getNumImplicitUses();
3276   for (unsigned i = 0; i < Num; ++i) {
3277     unsigned Reg = Desc.ImplicitUses[i];
3278     switch (Reg) {
3279     case AMDGPU::FLAT_SCR:
3280     case AMDGPU::VCC:
3281     case AMDGPU::VCC_LO:
3282     case AMDGPU::VCC_HI:
3283     case AMDGPU::M0:
3284       return Reg;
3285     default:
3286       break;
3287     }
3288   }
3289   return AMDGPU::NoRegister;
3290 }
3291 
3292 // NB: This code is correct only when used to check constant
3293 // bus limitations because GFX7 support no f16 inline constants.
3294 // Note that there are no cases when a GFX7 opcode violates
3295 // constant bus limitations due to the use of an f16 constant.
3296 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3297                                        unsigned OpIdx) const {
3298   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3299 
3300   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3301     return false;
3302   }
3303 
3304   const MCOperand &MO = Inst.getOperand(OpIdx);
3305 
3306   int64_t Val = MO.getImm();
3307   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3308 
3309   switch (OpSize) { // expected operand size
3310   case 8:
3311     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3312   case 4:
3313     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3314   case 2: {
3315     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3316     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3317         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3318         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3319       return AMDGPU::isInlinableIntLiteral(Val);
3320 
3321     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3322         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3323         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3324       return AMDGPU::isInlinableIntLiteralV216(Val);
3325 
3326     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3327         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3328         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3329       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3330 
3331     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3332   }
3333   default:
3334     llvm_unreachable("invalid operand size");
3335   }
3336 }
3337 
3338 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3339   if (!isGFX10Plus())
3340     return 1;
3341 
3342   switch (Opcode) {
3343   // 64-bit shift instructions can use only one scalar value input
3344   case AMDGPU::V_LSHLREV_B64_e64:
3345   case AMDGPU::V_LSHLREV_B64_gfx10:
3346   case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3347   case AMDGPU::V_LSHRREV_B64_e64:
3348   case AMDGPU::V_LSHRREV_B64_gfx10:
3349   case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3350   case AMDGPU::V_ASHRREV_I64_e64:
3351   case AMDGPU::V_ASHRREV_I64_gfx10:
3352   case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3353   case AMDGPU::V_LSHL_B64_e64:
3354   case AMDGPU::V_LSHR_B64_e64:
3355   case AMDGPU::V_ASHR_I64_e64:
3356     return 1;
3357   default:
3358     return 2;
3359   }
3360 }
3361 
3362 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3363   const MCOperand &MO = Inst.getOperand(OpIdx);
3364   if (MO.isImm()) {
3365     return !isInlineConstant(Inst, OpIdx);
3366   } else if (MO.isReg()) {
3367     auto Reg = MO.getReg();
3368     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3369     auto PReg = mc2PseudoReg(Reg);
3370     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3371   } else {
3372     return true;
3373   }
3374 }
3375 
3376 bool
3377 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3378                                                 const OperandVector &Operands) {
3379   const unsigned Opcode = Inst.getOpcode();
3380   const MCInstrDesc &Desc = MII.get(Opcode);
3381   unsigned LastSGPR = AMDGPU::NoRegister;
3382   unsigned ConstantBusUseCount = 0;
3383   unsigned NumLiterals = 0;
3384   unsigned LiteralSize;
3385 
3386   if (Desc.TSFlags &
3387       (SIInstrFlags::VOPC |
3388        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3389        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3390        SIInstrFlags::SDWA)) {
3391     // Check special imm operands (used by madmk, etc)
3392     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3393       ++NumLiterals;
3394       LiteralSize = 4;
3395     }
3396 
3397     SmallDenseSet<unsigned> SGPRsUsed;
3398     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3399     if (SGPRUsed != AMDGPU::NoRegister) {
3400       SGPRsUsed.insert(SGPRUsed);
3401       ++ConstantBusUseCount;
3402     }
3403 
3404     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3405     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3406     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3407 
3408     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3409 
3410     for (int OpIdx : OpIndices) {
3411       if (OpIdx == -1) break;
3412 
3413       const MCOperand &MO = Inst.getOperand(OpIdx);
3414       if (usesConstantBus(Inst, OpIdx)) {
3415         if (MO.isReg()) {
3416           LastSGPR = mc2PseudoReg(MO.getReg());
3417           // Pairs of registers with a partial intersections like these
3418           //   s0, s[0:1]
3419           //   flat_scratch_lo, flat_scratch
3420           //   flat_scratch_lo, flat_scratch_hi
3421           // are theoretically valid but they are disabled anyway.
3422           // Note that this code mimics SIInstrInfo::verifyInstruction
3423           if (!SGPRsUsed.count(LastSGPR)) {
3424             SGPRsUsed.insert(LastSGPR);
3425             ++ConstantBusUseCount;
3426           }
3427         } else { // Expression or a literal
3428 
3429           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3430             continue; // special operand like VINTERP attr_chan
3431 
3432           // An instruction may use only one literal.
3433           // This has been validated on the previous step.
3434           // See validateVOPLiteral.
3435           // This literal may be used as more than one operand.
3436           // If all these operands are of the same size,
3437           // this literal counts as one scalar value.
3438           // Otherwise it counts as 2 scalar values.
3439           // See "GFX10 Shader Programming", section 3.6.2.3.
3440 
3441           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3442           if (Size < 4) Size = 4;
3443 
3444           if (NumLiterals == 0) {
3445             NumLiterals = 1;
3446             LiteralSize = Size;
3447           } else if (LiteralSize != Size) {
3448             NumLiterals = 2;
3449           }
3450         }
3451       }
3452     }
3453   }
3454   ConstantBusUseCount += NumLiterals;
3455 
3456   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3457     return true;
3458 
3459   SMLoc LitLoc = getLitLoc(Operands);
3460   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3461   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3462   Error(Loc, "invalid operand (violates constant bus restrictions)");
3463   return false;
3464 }
3465 
3466 bool
3467 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3468                                                  const OperandVector &Operands) {
3469   const unsigned Opcode = Inst.getOpcode();
3470   const MCInstrDesc &Desc = MII.get(Opcode);
3471 
3472   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3473   if (DstIdx == -1 ||
3474       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3475     return true;
3476   }
3477 
3478   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3479 
3480   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3481   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3482   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3483 
3484   assert(DstIdx != -1);
3485   const MCOperand &Dst = Inst.getOperand(DstIdx);
3486   assert(Dst.isReg());
3487 
3488   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3489 
3490   for (int SrcIdx : SrcIndices) {
3491     if (SrcIdx == -1) break;
3492     const MCOperand &Src = Inst.getOperand(SrcIdx);
3493     if (Src.isReg()) {
3494       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3495         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3496         Error(getRegLoc(SrcReg, Operands),
3497           "destination must be different than all sources");
3498         return false;
3499       }
3500     }
3501   }
3502 
3503   return true;
3504 }
3505 
3506 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3507 
3508   const unsigned Opc = Inst.getOpcode();
3509   const MCInstrDesc &Desc = MII.get(Opc);
3510 
3511   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3512     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3513     assert(ClampIdx != -1);
3514     return Inst.getOperand(ClampIdx).getImm() == 0;
3515   }
3516 
3517   return true;
3518 }
3519 
3520 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3521 
3522   const unsigned Opc = Inst.getOpcode();
3523   const MCInstrDesc &Desc = MII.get(Opc);
3524 
3525   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3526     return None;
3527 
3528   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3529   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3530   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3531 
3532   assert(VDataIdx != -1);
3533 
3534   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3535     return None;
3536 
3537   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3538   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3539   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3540   if (DMask == 0)
3541     DMask = 1;
3542 
3543   bool isPackedD16 = false;
3544   unsigned DataSize =
3545     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3546   if (hasPackedD16()) {
3547     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3548     isPackedD16 = D16Idx >= 0;
3549     if (isPackedD16 && Inst.getOperand(D16Idx).getImm())
3550       DataSize = (DataSize + 1) / 2;
3551   }
3552 
3553   if ((VDataSize / 4) == DataSize + TFESize)
3554     return None;
3555 
3556   return StringRef(isPackedD16
3557                        ? "image data size does not match dmask, d16 and tfe"
3558                        : "image data size does not match dmask and tfe");
3559 }
3560 
3561 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3562   const unsigned Opc = Inst.getOpcode();
3563   const MCInstrDesc &Desc = MII.get(Opc);
3564 
3565   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3566     return true;
3567 
3568   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3569 
3570   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3571       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3572   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3573   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3574   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3575   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3576 
3577   assert(VAddr0Idx != -1);
3578   assert(SrsrcIdx != -1);
3579   assert(SrsrcIdx > VAddr0Idx);
3580 
3581   if (DimIdx == -1)
3582     return true; // intersect_ray
3583 
3584   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3585   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3586   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3587   unsigned ActualAddrSize =
3588       IsNSA ? SrsrcIdx - VAddr0Idx
3589             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3590   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3591 
3592   unsigned ExpectedAddrSize =
3593       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3594 
3595   if (!IsNSA) {
3596     if (ExpectedAddrSize > 8)
3597       ExpectedAddrSize = 16;
3598 
3599     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3600     // This provides backward compatibility for assembly created
3601     // before 160b/192b/224b types were directly supported.
3602     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3603       return true;
3604   }
3605 
3606   return ActualAddrSize == ExpectedAddrSize;
3607 }
3608 
3609 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3610 
3611   const unsigned Opc = Inst.getOpcode();
3612   const MCInstrDesc &Desc = MII.get(Opc);
3613 
3614   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3615     return true;
3616   if (!Desc.mayLoad() || !Desc.mayStore())
3617     return true; // Not atomic
3618 
3619   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3620   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3621 
3622   // This is an incomplete check because image_atomic_cmpswap
3623   // may only use 0x3 and 0xf while other atomic operations
3624   // may use 0x1 and 0x3. However these limitations are
3625   // verified when we check that dmask matches dst size.
3626   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3627 }
3628 
3629 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3630 
3631   const unsigned Opc = Inst.getOpcode();
3632   const MCInstrDesc &Desc = MII.get(Opc);
3633 
3634   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3635     return true;
3636 
3637   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3638   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3639 
3640   // GATHER4 instructions use dmask in a different fashion compared to
3641   // other MIMG instructions. The only useful DMASK values are
3642   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3643   // (red,red,red,red) etc.) The ISA document doesn't mention
3644   // this.
3645   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3646 }
3647 
3648 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3649   const unsigned Opc = Inst.getOpcode();
3650   const MCInstrDesc &Desc = MII.get(Opc);
3651 
3652   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3653     return true;
3654 
3655   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3656   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3657       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3658 
3659   if (!BaseOpcode->MSAA)
3660     return true;
3661 
3662   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3663   assert(DimIdx != -1);
3664 
3665   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3666   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3667 
3668   return DimInfo->MSAA;
3669 }
3670 
3671 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3672 {
3673   switch (Opcode) {
3674   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3675   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3676   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3677     return true;
3678   default:
3679     return false;
3680   }
3681 }
3682 
3683 // movrels* opcodes should only allow VGPRS as src0.
3684 // This is specified in .td description for vop1/vop3,
3685 // but sdwa is handled differently. See isSDWAOperand.
3686 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3687                                       const OperandVector &Operands) {
3688 
3689   const unsigned Opc = Inst.getOpcode();
3690   const MCInstrDesc &Desc = MII.get(Opc);
3691 
3692   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3693     return true;
3694 
3695   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3696   assert(Src0Idx != -1);
3697 
3698   SMLoc ErrLoc;
3699   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3700   if (Src0.isReg()) {
3701     auto Reg = mc2PseudoReg(Src0.getReg());
3702     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3703     if (!isSGPR(Reg, TRI))
3704       return true;
3705     ErrLoc = getRegLoc(Reg, Operands);
3706   } else {
3707     ErrLoc = getConstLoc(Operands);
3708   }
3709 
3710   Error(ErrLoc, "source operand must be a VGPR");
3711   return false;
3712 }
3713 
3714 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3715                                           const OperandVector &Operands) {
3716 
3717   const unsigned Opc = Inst.getOpcode();
3718 
3719   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3720     return true;
3721 
3722   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3723   assert(Src0Idx != -1);
3724 
3725   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3726   if (!Src0.isReg())
3727     return true;
3728 
3729   auto Reg = mc2PseudoReg(Src0.getReg());
3730   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3731   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3732     Error(getRegLoc(Reg, Operands),
3733           "source operand must be either a VGPR or an inline constant");
3734     return false;
3735   }
3736 
3737   return true;
3738 }
3739 
3740 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3741                                    const OperandVector &Operands) {
3742   const unsigned Opc = Inst.getOpcode();
3743   const MCInstrDesc &Desc = MII.get(Opc);
3744 
3745   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3746     return true;
3747 
3748   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3749   if (Src2Idx == -1)
3750     return true;
3751 
3752   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3753   if (!Src2.isReg())
3754     return true;
3755 
3756   MCRegister Src2Reg = Src2.getReg();
3757   MCRegister DstReg = Inst.getOperand(0).getReg();
3758   if (Src2Reg == DstReg)
3759     return true;
3760 
3761   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3762   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3763     return true;
3764 
3765   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3766     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3767           "source 2 operand must not partially overlap with dst");
3768     return false;
3769   }
3770 
3771   return true;
3772 }
3773 
3774 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3775   switch (Inst.getOpcode()) {
3776   default:
3777     return true;
3778   case V_DIV_SCALE_F32_gfx6_gfx7:
3779   case V_DIV_SCALE_F32_vi:
3780   case V_DIV_SCALE_F32_gfx10:
3781   case V_DIV_SCALE_F64_gfx6_gfx7:
3782   case V_DIV_SCALE_F64_vi:
3783   case V_DIV_SCALE_F64_gfx10:
3784     break;
3785   }
3786 
3787   // TODO: Check that src0 = src1 or src2.
3788 
3789   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3790                     AMDGPU::OpName::src2_modifiers,
3791                     AMDGPU::OpName::src2_modifiers}) {
3792     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3793             .getImm() &
3794         SISrcMods::ABS) {
3795       return false;
3796     }
3797   }
3798 
3799   return true;
3800 }
3801 
3802 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3803 
3804   const unsigned Opc = Inst.getOpcode();
3805   const MCInstrDesc &Desc = MII.get(Opc);
3806 
3807   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3808     return true;
3809 
3810   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3811   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3812     if (isCI() || isSI())
3813       return false;
3814   }
3815 
3816   return true;
3817 }
3818 
3819 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3820   const unsigned Opc = Inst.getOpcode();
3821   const MCInstrDesc &Desc = MII.get(Opc);
3822 
3823   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3824     return true;
3825 
3826   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3827   if (DimIdx < 0)
3828     return true;
3829 
3830   long Imm = Inst.getOperand(DimIdx).getImm();
3831   if (Imm < 0 || Imm >= 8)
3832     return false;
3833 
3834   return true;
3835 }
3836 
3837 static bool IsRevOpcode(const unsigned Opcode)
3838 {
3839   switch (Opcode) {
3840   case AMDGPU::V_SUBREV_F32_e32:
3841   case AMDGPU::V_SUBREV_F32_e64:
3842   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3843   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3844   case AMDGPU::V_SUBREV_F32_e32_vi:
3845   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3846   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3847   case AMDGPU::V_SUBREV_F32_e64_vi:
3848 
3849   case AMDGPU::V_SUBREV_CO_U32_e32:
3850   case AMDGPU::V_SUBREV_CO_U32_e64:
3851   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3852   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3853 
3854   case AMDGPU::V_SUBBREV_U32_e32:
3855   case AMDGPU::V_SUBBREV_U32_e64:
3856   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3857   case AMDGPU::V_SUBBREV_U32_e32_vi:
3858   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3859   case AMDGPU::V_SUBBREV_U32_e64_vi:
3860 
3861   case AMDGPU::V_SUBREV_U32_e32:
3862   case AMDGPU::V_SUBREV_U32_e64:
3863   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3864   case AMDGPU::V_SUBREV_U32_e32_vi:
3865   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3866   case AMDGPU::V_SUBREV_U32_e64_vi:
3867 
3868   case AMDGPU::V_SUBREV_F16_e32:
3869   case AMDGPU::V_SUBREV_F16_e64:
3870   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3871   case AMDGPU::V_SUBREV_F16_e32_vi:
3872   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3873   case AMDGPU::V_SUBREV_F16_e64_vi:
3874 
3875   case AMDGPU::V_SUBREV_U16_e32:
3876   case AMDGPU::V_SUBREV_U16_e64:
3877   case AMDGPU::V_SUBREV_U16_e32_vi:
3878   case AMDGPU::V_SUBREV_U16_e64_vi:
3879 
3880   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3881   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3882   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3883 
3884   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3885   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3886 
3887   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3888   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3889 
3890   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3891   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3892 
3893   case AMDGPU::V_LSHRREV_B32_e32:
3894   case AMDGPU::V_LSHRREV_B32_e64:
3895   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3896   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3897   case AMDGPU::V_LSHRREV_B32_e32_vi:
3898   case AMDGPU::V_LSHRREV_B32_e64_vi:
3899   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3900   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3901 
3902   case AMDGPU::V_ASHRREV_I32_e32:
3903   case AMDGPU::V_ASHRREV_I32_e64:
3904   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3905   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3906   case AMDGPU::V_ASHRREV_I32_e32_vi:
3907   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3908   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3909   case AMDGPU::V_ASHRREV_I32_e64_vi:
3910 
3911   case AMDGPU::V_LSHLREV_B32_e32:
3912   case AMDGPU::V_LSHLREV_B32_e64:
3913   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3914   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3915   case AMDGPU::V_LSHLREV_B32_e32_vi:
3916   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3917   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3918   case AMDGPU::V_LSHLREV_B32_e64_vi:
3919 
3920   case AMDGPU::V_LSHLREV_B16_e32:
3921   case AMDGPU::V_LSHLREV_B16_e64:
3922   case AMDGPU::V_LSHLREV_B16_e32_vi:
3923   case AMDGPU::V_LSHLREV_B16_e64_vi:
3924   case AMDGPU::V_LSHLREV_B16_gfx10:
3925 
3926   case AMDGPU::V_LSHRREV_B16_e32:
3927   case AMDGPU::V_LSHRREV_B16_e64:
3928   case AMDGPU::V_LSHRREV_B16_e32_vi:
3929   case AMDGPU::V_LSHRREV_B16_e64_vi:
3930   case AMDGPU::V_LSHRREV_B16_gfx10:
3931 
3932   case AMDGPU::V_ASHRREV_I16_e32:
3933   case AMDGPU::V_ASHRREV_I16_e64:
3934   case AMDGPU::V_ASHRREV_I16_e32_vi:
3935   case AMDGPU::V_ASHRREV_I16_e64_vi:
3936   case AMDGPU::V_ASHRREV_I16_gfx10:
3937 
3938   case AMDGPU::V_LSHLREV_B64_e64:
3939   case AMDGPU::V_LSHLREV_B64_gfx10:
3940   case AMDGPU::V_LSHLREV_B64_vi:
3941 
3942   case AMDGPU::V_LSHRREV_B64_e64:
3943   case AMDGPU::V_LSHRREV_B64_gfx10:
3944   case AMDGPU::V_LSHRREV_B64_vi:
3945 
3946   case AMDGPU::V_ASHRREV_I64_e64:
3947   case AMDGPU::V_ASHRREV_I64_gfx10:
3948   case AMDGPU::V_ASHRREV_I64_vi:
3949 
3950   case AMDGPU::V_PK_LSHLREV_B16:
3951   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3952   case AMDGPU::V_PK_LSHLREV_B16_vi:
3953 
3954   case AMDGPU::V_PK_LSHRREV_B16:
3955   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3956   case AMDGPU::V_PK_LSHRREV_B16_vi:
3957   case AMDGPU::V_PK_ASHRREV_I16:
3958   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3959   case AMDGPU::V_PK_ASHRREV_I16_vi:
3960     return true;
3961   default:
3962     return false;
3963   }
3964 }
3965 
3966 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3967 
3968   using namespace SIInstrFlags;
3969   const unsigned Opcode = Inst.getOpcode();
3970   const MCInstrDesc &Desc = MII.get(Opcode);
3971 
3972   // lds_direct register is defined so that it can be used
3973   // with 9-bit operands only. Ignore encodings which do not accept these.
3974   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3975   if ((Desc.TSFlags & Enc) == 0)
3976     return None;
3977 
3978   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3979     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3980     if (SrcIdx == -1)
3981       break;
3982     const auto &Src = Inst.getOperand(SrcIdx);
3983     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3984 
3985       if (isGFX90A() || isGFX11Plus())
3986         return StringRef("lds_direct is not supported on this GPU");
3987 
3988       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3989         return StringRef("lds_direct cannot be used with this instruction");
3990 
3991       if (SrcName != OpName::src0)
3992         return StringRef("lds_direct may be used as src0 only");
3993     }
3994   }
3995 
3996   return None;
3997 }
3998 
3999 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4000   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4001     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4002     if (Op.isFlatOffset())
4003       return Op.getStartLoc();
4004   }
4005   return getLoc();
4006 }
4007 
4008 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4009                                          const OperandVector &Operands) {
4010   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4011   if ((TSFlags & SIInstrFlags::FLAT) == 0)
4012     return true;
4013 
4014   auto Opcode = Inst.getOpcode();
4015   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4016   assert(OpNum != -1);
4017 
4018   const auto &Op = Inst.getOperand(OpNum);
4019   if (!hasFlatOffsets() && Op.getImm() != 0) {
4020     Error(getFlatOffsetLoc(Operands),
4021           "flat offset modifier is not supported on this GPU");
4022     return false;
4023   }
4024 
4025   // For FLAT segment the offset must be positive;
4026   // MSB is ignored and forced to zero.
4027   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
4028     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
4029     if (!isIntN(OffsetSize, Op.getImm())) {
4030       Error(getFlatOffsetLoc(Operands),
4031             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4032       return false;
4033     }
4034   } else {
4035     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
4036     if (!isUIntN(OffsetSize, Op.getImm())) {
4037       Error(getFlatOffsetLoc(Operands),
4038             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4039       return false;
4040     }
4041   }
4042 
4043   return true;
4044 }
4045 
4046 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4047   // Start with second operand because SMEM Offset cannot be dst or src0.
4048   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4049     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4050     if (Op.isSMEMOffset())
4051       return Op.getStartLoc();
4052   }
4053   return getLoc();
4054 }
4055 
4056 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4057                                          const OperandVector &Operands) {
4058   if (isCI() || isSI())
4059     return true;
4060 
4061   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4062   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4063     return true;
4064 
4065   auto Opcode = Inst.getOpcode();
4066   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4067   if (OpNum == -1)
4068     return true;
4069 
4070   const auto &Op = Inst.getOperand(OpNum);
4071   if (!Op.isImm())
4072     return true;
4073 
4074   uint64_t Offset = Op.getImm();
4075   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4076   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4077       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4078     return true;
4079 
4080   Error(getSMEMOffsetLoc(Operands),
4081         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4082                                "expected a 21-bit signed offset");
4083 
4084   return false;
4085 }
4086 
4087 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4088   unsigned Opcode = Inst.getOpcode();
4089   const MCInstrDesc &Desc = MII.get(Opcode);
4090   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4091     return true;
4092 
4093   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4094   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4095 
4096   const int OpIndices[] = { Src0Idx, Src1Idx };
4097 
4098   unsigned NumExprs = 0;
4099   unsigned NumLiterals = 0;
4100   uint32_t LiteralValue;
4101 
4102   for (int OpIdx : OpIndices) {
4103     if (OpIdx == -1) break;
4104 
4105     const MCOperand &MO = Inst.getOperand(OpIdx);
4106     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4107     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4108       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4109         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4110         if (NumLiterals == 0 || LiteralValue != Value) {
4111           LiteralValue = Value;
4112           ++NumLiterals;
4113         }
4114       } else if (MO.isExpr()) {
4115         ++NumExprs;
4116       }
4117     }
4118   }
4119 
4120   return NumLiterals + NumExprs <= 1;
4121 }
4122 
4123 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4124   const unsigned Opc = Inst.getOpcode();
4125   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4126       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4127     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4128     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4129 
4130     if (OpSel & ~3)
4131       return false;
4132   }
4133 
4134   if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
4135     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4136     if (OpSelIdx != -1) {
4137       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4138         return false;
4139     }
4140     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4141     if (OpSelHiIdx != -1) {
4142       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4143         return false;
4144     }
4145   }
4146 
4147   return true;
4148 }
4149 
4150 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4151                                   const OperandVector &Operands) {
4152   const unsigned Opc = Inst.getOpcode();
4153   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4154   if (DppCtrlIdx < 0)
4155     return true;
4156   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4157 
4158   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4159     // DPP64 is supported for row_newbcast only.
4160     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4161     if (Src0Idx >= 0 &&
4162         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4163       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4164       Error(S, "64 bit dpp only supports row_newbcast");
4165       return false;
4166     }
4167   }
4168 
4169   return true;
4170 }
4171 
4172 // Check if VCC register matches wavefront size
4173 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4174   auto FB = getFeatureBits();
4175   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4176     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4177 }
4178 
4179 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4180 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4181                                          const OperandVector &Operands) {
4182   unsigned Opcode = Inst.getOpcode();
4183   const MCInstrDesc &Desc = MII.get(Opcode);
4184   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4185   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4186       ImmIdx == -1)
4187     return true;
4188 
4189   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4190   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4191   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4192 
4193   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4194 
4195   unsigned NumExprs = 0;
4196   unsigned NumLiterals = 0;
4197   uint32_t LiteralValue;
4198 
4199   for (int OpIdx : OpIndices) {
4200     if (OpIdx == -1)
4201       continue;
4202 
4203     const MCOperand &MO = Inst.getOperand(OpIdx);
4204     if (!MO.isImm() && !MO.isExpr())
4205       continue;
4206     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4207       continue;
4208 
4209     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4210         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4211       Error(getConstLoc(Operands),
4212             "inline constants are not allowed for this operand");
4213       return false;
4214     }
4215 
4216     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4217       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4218       if (NumLiterals == 0 || LiteralValue != Value) {
4219         LiteralValue = Value;
4220         ++NumLiterals;
4221       }
4222     } else if (MO.isExpr()) {
4223       ++NumExprs;
4224     }
4225   }
4226   NumLiterals += NumExprs;
4227 
4228   if (!NumLiterals)
4229     return true;
4230 
4231   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4232     Error(getLitLoc(Operands), "literal operands are not supported");
4233     return false;
4234   }
4235 
4236   if (NumLiterals > 1) {
4237     Error(getLitLoc(Operands), "only one literal operand is allowed");
4238     return false;
4239   }
4240 
4241   return true;
4242 }
4243 
4244 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4245 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4246                          const MCRegisterInfo *MRI) {
4247   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4248   if (OpIdx < 0)
4249     return -1;
4250 
4251   const MCOperand &Op = Inst.getOperand(OpIdx);
4252   if (!Op.isReg())
4253     return -1;
4254 
4255   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4256   auto Reg = Sub ? Sub : Op.getReg();
4257   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4258   return AGPR32.contains(Reg) ? 1 : 0;
4259 }
4260 
4261 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4262   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4263   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4264                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4265                   SIInstrFlags::DS)) == 0)
4266     return true;
4267 
4268   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4269                                                       : AMDGPU::OpName::vdata;
4270 
4271   const MCRegisterInfo *MRI = getMRI();
4272   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4273   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4274 
4275   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4276     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4277     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4278       return false;
4279   }
4280 
4281   auto FB = getFeatureBits();
4282   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4283     if (DataAreg < 0 || DstAreg < 0)
4284       return true;
4285     return DstAreg == DataAreg;
4286   }
4287 
4288   return DstAreg < 1 && DataAreg < 1;
4289 }
4290 
4291 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4292   auto FB = getFeatureBits();
4293   if (!FB[AMDGPU::FeatureGFX90AInsts])
4294     return true;
4295 
4296   const MCRegisterInfo *MRI = getMRI();
4297   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4298   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4299   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4300     const MCOperand &Op = Inst.getOperand(I);
4301     if (!Op.isReg())
4302       continue;
4303 
4304     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4305     if (!Sub)
4306       continue;
4307 
4308     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4309       return false;
4310     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4311       return false;
4312   }
4313 
4314   return true;
4315 }
4316 
4317 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4318   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4319     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4320     if (Op.isBLGP())
4321       return Op.getStartLoc();
4322   }
4323   return SMLoc();
4324 }
4325 
4326 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4327                                    const OperandVector &Operands) {
4328   unsigned Opc = Inst.getOpcode();
4329   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4330   if (BlgpIdx == -1)
4331     return true;
4332   SMLoc BLGPLoc = getBLGPLoc(Operands);
4333   if (!BLGPLoc.isValid())
4334     return true;
4335   bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4336   auto FB = getFeatureBits();
4337   bool UsesNeg = false;
4338   if (FB[AMDGPU::FeatureGFX940Insts]) {
4339     switch (Opc) {
4340     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4341     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4342     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4343     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4344       UsesNeg = true;
4345     }
4346   }
4347 
4348   if (IsNeg == UsesNeg)
4349     return true;
4350 
4351   Error(BLGPLoc,
4352         UsesNeg ? "invalid modifier: blgp is not supported"
4353                 : "invalid modifier: neg is not supported");
4354 
4355   return false;
4356 }
4357 
4358 // gfx90a has an undocumented limitation:
4359 // DS_GWS opcodes must use even aligned registers.
4360 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4361                                   const OperandVector &Operands) {
4362   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4363     return true;
4364 
4365   int Opc = Inst.getOpcode();
4366   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4367       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4368     return true;
4369 
4370   const MCRegisterInfo *MRI = getMRI();
4371   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4372   int Data0Pos =
4373       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4374   assert(Data0Pos != -1);
4375   auto Reg = Inst.getOperand(Data0Pos).getReg();
4376   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4377   if (RegIdx & 1) {
4378     SMLoc RegLoc = getRegLoc(Reg, Operands);
4379     Error(RegLoc, "vgpr must be even aligned");
4380     return false;
4381   }
4382 
4383   return true;
4384 }
4385 
4386 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4387                                             const OperandVector &Operands,
4388                                             const SMLoc &IDLoc) {
4389   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4390                                            AMDGPU::OpName::cpol);
4391   if (CPolPos == -1)
4392     return true;
4393 
4394   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4395 
4396   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4397   if (TSFlags & SIInstrFlags::SMRD) {
4398     if (CPol && (isSI() || isCI())) {
4399       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4400       Error(S, "cache policy is not supported for SMRD instructions");
4401       return false;
4402     }
4403     if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4404       Error(IDLoc, "invalid cache policy for SMEM instruction");
4405       return false;
4406     }
4407   }
4408 
4409   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4410     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4411     StringRef CStr(S.getPointer());
4412     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4413     Error(S, "scc is not supported on this GPU");
4414     return false;
4415   }
4416 
4417   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4418     return true;
4419 
4420   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4421     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4422       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4423                               : "instruction must use glc");
4424       return false;
4425     }
4426   } else {
4427     if (CPol & CPol::GLC) {
4428       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4429       StringRef CStr(S.getPointer());
4430       S = SMLoc::getFromPointer(
4431           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4432       Error(S, isGFX940() ? "instruction must not use sc0"
4433                           : "instruction must not use glc");
4434       return false;
4435     }
4436   }
4437 
4438   return true;
4439 }
4440 
4441 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst,
4442                                          const OperandVector &Operands,
4443                                          const SMLoc &IDLoc) {
4444   if (isGFX940())
4445     return true;
4446 
4447   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4448   if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) !=
4449       (SIInstrFlags::VALU | SIInstrFlags::FLAT))
4450     return true;
4451   // This is FLAT LDS DMA.
4452 
4453   SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands);
4454   StringRef CStr(S.getPointer());
4455   if (!CStr.startswith("lds")) {
4456     // This is incorrectly selected LDS DMA version of a FLAT load opcode.
4457     // And LDS version should have 'lds' modifier, but it follows optional
4458     // operands so its absense is ignored by the matcher.
4459     Error(IDLoc, "invalid operands for instruction");
4460     return false;
4461   }
4462 
4463   return true;
4464 }
4465 
4466 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4467                                           const SMLoc &IDLoc,
4468                                           const OperandVector &Operands) {
4469   if (auto ErrMsg = validateLdsDirect(Inst)) {
4470     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4471     return false;
4472   }
4473   if (!validateSOPLiteral(Inst)) {
4474     Error(getLitLoc(Operands),
4475       "only one literal operand is allowed");
4476     return false;
4477   }
4478   if (!validateVOPLiteral(Inst, Operands)) {
4479     return false;
4480   }
4481   if (!validateConstantBusLimitations(Inst, Operands)) {
4482     return false;
4483   }
4484   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4485     return false;
4486   }
4487   if (!validateIntClampSupported(Inst)) {
4488     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4489       "integer clamping is not supported on this GPU");
4490     return false;
4491   }
4492   if (!validateOpSel(Inst)) {
4493     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4494       "invalid op_sel operand");
4495     return false;
4496   }
4497   if (!validateDPP(Inst, Operands)) {
4498     return false;
4499   }
4500   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4501   if (!validateMIMGD16(Inst)) {
4502     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4503       "d16 modifier is not supported on this GPU");
4504     return false;
4505   }
4506   if (!validateMIMGDim(Inst)) {
4507     Error(IDLoc, "dim modifier is required on this GPU");
4508     return false;
4509   }
4510   if (!validateMIMGMSAA(Inst)) {
4511     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4512           "invalid dim; must be MSAA type");
4513     return false;
4514   }
4515   if (auto ErrMsg = validateMIMGDataSize(Inst)) {
4516     Error(IDLoc, *ErrMsg);
4517     return false;
4518   }
4519   if (!validateMIMGAddrSize(Inst)) {
4520     Error(IDLoc,
4521       "image address size does not match dim and a16");
4522     return false;
4523   }
4524   if (!validateMIMGAtomicDMask(Inst)) {
4525     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4526       "invalid atomic image dmask");
4527     return false;
4528   }
4529   if (!validateMIMGGatherDMask(Inst)) {
4530     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4531       "invalid image_gather dmask: only one bit must be set");
4532     return false;
4533   }
4534   if (!validateMovrels(Inst, Operands)) {
4535     return false;
4536   }
4537   if (!validateFlatOffset(Inst, Operands)) {
4538     return false;
4539   }
4540   if (!validateSMEMOffset(Inst, Operands)) {
4541     return false;
4542   }
4543   if (!validateMAIAccWrite(Inst, Operands)) {
4544     return false;
4545   }
4546   if (!validateMFMA(Inst, Operands)) {
4547     return false;
4548   }
4549   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4550     return false;
4551   }
4552 
4553   if (!validateAGPRLdSt(Inst)) {
4554     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4555     ? "invalid register class: data and dst should be all VGPR or AGPR"
4556     : "invalid register class: agpr loads and stores not supported on this GPU"
4557     );
4558     return false;
4559   }
4560   if (!validateVGPRAlign(Inst)) {
4561     Error(IDLoc,
4562       "invalid register class: vgpr tuples must be 64 bit aligned");
4563     return false;
4564   }
4565   if (!validateGWS(Inst, Operands)) {
4566     return false;
4567   }
4568 
4569   if (!validateBLGP(Inst, Operands)) {
4570     return false;
4571   }
4572 
4573   if (!validateDivScale(Inst)) {
4574     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4575     return false;
4576   }
4577   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4578     return false;
4579   }
4580 
4581   if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) {
4582     return false;
4583   }
4584 
4585   return true;
4586 }
4587 
4588 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4589                                             const FeatureBitset &FBS,
4590                                             unsigned VariantID = 0);
4591 
4592 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4593                                 const FeatureBitset &AvailableFeatures,
4594                                 unsigned VariantID);
4595 
4596 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4597                                        const FeatureBitset &FBS) {
4598   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4599 }
4600 
4601 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4602                                        const FeatureBitset &FBS,
4603                                        ArrayRef<unsigned> Variants) {
4604   for (auto Variant : Variants) {
4605     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4606       return true;
4607   }
4608 
4609   return false;
4610 }
4611 
4612 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4613                                                   const SMLoc &IDLoc) {
4614   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4615 
4616   // Check if requested instruction variant is supported.
4617   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4618     return false;
4619 
4620   // This instruction is not supported.
4621   // Clear any other pending errors because they are no longer relevant.
4622   getParser().clearPendingErrors();
4623 
4624   // Requested instruction variant is not supported.
4625   // Check if any other variants are supported.
4626   StringRef VariantName = getMatchedVariantName();
4627   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4628     return Error(IDLoc,
4629                  Twine(VariantName,
4630                        " variant of this instruction is not supported"));
4631   }
4632 
4633   // Finally check if this instruction is supported on any other GPU.
4634   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4635     return Error(IDLoc, "instruction not supported on this GPU");
4636   }
4637 
4638   // Instruction not supported on any GPU. Probably a typo.
4639   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4640   return Error(IDLoc, "invalid instruction" + Suggestion);
4641 }
4642 
4643 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4644                                               OperandVector &Operands,
4645                                               MCStreamer &Out,
4646                                               uint64_t &ErrorInfo,
4647                                               bool MatchingInlineAsm) {
4648   MCInst Inst;
4649   unsigned Result = Match_Success;
4650   for (auto Variant : getMatchedVariants()) {
4651     uint64_t EI;
4652     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4653                                   Variant);
4654     // We order match statuses from least to most specific. We use most specific
4655     // status as resulting
4656     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4657     if ((R == Match_Success) ||
4658         (R == Match_PreferE32) ||
4659         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4660         (R == Match_InvalidOperand && Result != Match_MissingFeature
4661                                    && Result != Match_PreferE32) ||
4662         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4663                                    && Result != Match_MissingFeature
4664                                    && Result != Match_PreferE32)) {
4665       Result = R;
4666       ErrorInfo = EI;
4667     }
4668     if (R == Match_Success)
4669       break;
4670   }
4671 
4672   if (Result == Match_Success) {
4673     if (!validateInstruction(Inst, IDLoc, Operands)) {
4674       return true;
4675     }
4676     Inst.setLoc(IDLoc);
4677     Out.emitInstruction(Inst, getSTI());
4678     return false;
4679   }
4680 
4681   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4682   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4683     return true;
4684   }
4685 
4686   switch (Result) {
4687   default: break;
4688   case Match_MissingFeature:
4689     // It has been verified that the specified instruction
4690     // mnemonic is valid. A match was found but it requires
4691     // features which are not supported on this GPU.
4692     return Error(IDLoc, "operands are not valid for this GPU or mode");
4693 
4694   case Match_InvalidOperand: {
4695     SMLoc ErrorLoc = IDLoc;
4696     if (ErrorInfo != ~0ULL) {
4697       if (ErrorInfo >= Operands.size()) {
4698         return Error(IDLoc, "too few operands for instruction");
4699       }
4700       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4701       if (ErrorLoc == SMLoc())
4702         ErrorLoc = IDLoc;
4703     }
4704     return Error(ErrorLoc, "invalid operand for instruction");
4705   }
4706 
4707   case Match_PreferE32:
4708     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4709                         "should be encoded as e32");
4710   case Match_MnemonicFail:
4711     llvm_unreachable("Invalid instructions should have been handled already");
4712   }
4713   llvm_unreachable("Implement any new match types added!");
4714 }
4715 
4716 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4717   int64_t Tmp = -1;
4718   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4719     return true;
4720   }
4721   if (getParser().parseAbsoluteExpression(Tmp)) {
4722     return true;
4723   }
4724   Ret = static_cast<uint32_t>(Tmp);
4725   return false;
4726 }
4727 
4728 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4729                                                uint32_t &Minor) {
4730   if (ParseAsAbsoluteExpression(Major))
4731     return TokError("invalid major version");
4732 
4733   if (!trySkipToken(AsmToken::Comma))
4734     return TokError("minor version number required, comma expected");
4735 
4736   if (ParseAsAbsoluteExpression(Minor))
4737     return TokError("invalid minor version");
4738 
4739   return false;
4740 }
4741 
4742 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4743   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4744     return TokError("directive only supported for amdgcn architecture");
4745 
4746   std::string TargetIDDirective;
4747   SMLoc TargetStart = getTok().getLoc();
4748   if (getParser().parseEscapedString(TargetIDDirective))
4749     return true;
4750 
4751   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4752   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4753     return getParser().Error(TargetRange.Start,
4754         (Twine(".amdgcn_target directive's target id ") +
4755          Twine(TargetIDDirective) +
4756          Twine(" does not match the specified target id ") +
4757          Twine(getTargetStreamer().getTargetID()->toString())).str());
4758 
4759   return false;
4760 }
4761 
4762 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4763   return Error(Range.Start, "value out of range", Range);
4764 }
4765 
4766 bool AMDGPUAsmParser::calculateGPRBlocks(
4767     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4768     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4769     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4770     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4771   // TODO(scott.linder): These calculations are duplicated from
4772   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4773   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4774 
4775   unsigned NumVGPRs = NextFreeVGPR;
4776   unsigned NumSGPRs = NextFreeSGPR;
4777 
4778   if (Version.Major >= 10)
4779     NumSGPRs = 0;
4780   else {
4781     unsigned MaxAddressableNumSGPRs =
4782         IsaInfo::getAddressableNumSGPRs(&getSTI());
4783 
4784     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4785         NumSGPRs > MaxAddressableNumSGPRs)
4786       return OutOfRangeError(SGPRRange);
4787 
4788     NumSGPRs +=
4789         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4790 
4791     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4792         NumSGPRs > MaxAddressableNumSGPRs)
4793       return OutOfRangeError(SGPRRange);
4794 
4795     if (Features.test(FeatureSGPRInitBug))
4796       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4797   }
4798 
4799   VGPRBlocks =
4800       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4801   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4802 
4803   return false;
4804 }
4805 
4806 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4807   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4808     return TokError("directive only supported for amdgcn architecture");
4809 
4810   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4811     return TokError("directive only supported for amdhsa OS");
4812 
4813   StringRef KernelName;
4814   if (getParser().parseIdentifier(KernelName))
4815     return true;
4816 
4817   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4818 
4819   StringSet<> Seen;
4820 
4821   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4822 
4823   SMRange VGPRRange;
4824   uint64_t NextFreeVGPR = 0;
4825   uint64_t AccumOffset = 0;
4826   uint64_t SharedVGPRCount = 0;
4827   SMRange SGPRRange;
4828   uint64_t NextFreeSGPR = 0;
4829 
4830   // Count the number of user SGPRs implied from the enabled feature bits.
4831   unsigned ImpliedUserSGPRCount = 0;
4832 
4833   // Track if the asm explicitly contains the directive for the user SGPR
4834   // count.
4835   Optional<unsigned> ExplicitUserSGPRCount;
4836   bool ReserveVCC = true;
4837   bool ReserveFlatScr = true;
4838   Optional<bool> EnableWavefrontSize32;
4839 
4840   while (true) {
4841     while (trySkipToken(AsmToken::EndOfStatement));
4842 
4843     StringRef ID;
4844     SMRange IDRange = getTok().getLocRange();
4845     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4846       return true;
4847 
4848     if (ID == ".end_amdhsa_kernel")
4849       break;
4850 
4851     if (Seen.find(ID) != Seen.end())
4852       return TokError(".amdhsa_ directives cannot be repeated");
4853     Seen.insert(ID);
4854 
4855     SMLoc ValStart = getLoc();
4856     int64_t IVal;
4857     if (getParser().parseAbsoluteExpression(IVal))
4858       return true;
4859     SMLoc ValEnd = getLoc();
4860     SMRange ValRange = SMRange(ValStart, ValEnd);
4861 
4862     if (IVal < 0)
4863       return OutOfRangeError(ValRange);
4864 
4865     uint64_t Val = IVal;
4866 
4867 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4868   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4869     return OutOfRangeError(RANGE);                                             \
4870   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4871 
4872     if (ID == ".amdhsa_group_segment_fixed_size") {
4873       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4874         return OutOfRangeError(ValRange);
4875       KD.group_segment_fixed_size = Val;
4876     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4877       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4878         return OutOfRangeError(ValRange);
4879       KD.private_segment_fixed_size = Val;
4880     } else if (ID == ".amdhsa_kernarg_size") {
4881       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4882         return OutOfRangeError(ValRange);
4883       KD.kernarg_size = Val;
4884     } else if (ID == ".amdhsa_user_sgpr_count") {
4885       ExplicitUserSGPRCount = Val;
4886     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4887       if (hasArchitectedFlatScratch())
4888         return Error(IDRange.Start,
4889                      "directive is not supported with architected flat scratch",
4890                      IDRange);
4891       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4892                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4893                        Val, ValRange);
4894       if (Val)
4895         ImpliedUserSGPRCount += 4;
4896     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4897       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4898                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4899                        ValRange);
4900       if (Val)
4901         ImpliedUserSGPRCount += 2;
4902     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4903       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4904                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4905                        ValRange);
4906       if (Val)
4907         ImpliedUserSGPRCount += 2;
4908     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4909       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4910                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4911                        Val, ValRange);
4912       if (Val)
4913         ImpliedUserSGPRCount += 2;
4914     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4915       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4916                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4917                        ValRange);
4918       if (Val)
4919         ImpliedUserSGPRCount += 2;
4920     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4921       if (hasArchitectedFlatScratch())
4922         return Error(IDRange.Start,
4923                      "directive is not supported with architected flat scratch",
4924                      IDRange);
4925       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4926                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4927                        ValRange);
4928       if (Val)
4929         ImpliedUserSGPRCount += 2;
4930     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4931       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4932                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4933                        Val, ValRange);
4934       if (Val)
4935         ImpliedUserSGPRCount += 1;
4936     } else if (ID == ".amdhsa_wavefront_size32") {
4937       if (IVersion.Major < 10)
4938         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4939       EnableWavefrontSize32 = Val;
4940       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4941                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4942                        Val, ValRange);
4943     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4944       if (hasArchitectedFlatScratch())
4945         return Error(IDRange.Start,
4946                      "directive is not supported with architected flat scratch",
4947                      IDRange);
4948       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4949                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4950     } else if (ID == ".amdhsa_enable_private_segment") {
4951       if (!hasArchitectedFlatScratch())
4952         return Error(
4953             IDRange.Start,
4954             "directive is not supported without architected flat scratch",
4955             IDRange);
4956       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4957                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4958     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4959       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4960                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4961                        ValRange);
4962     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4963       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4964                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4965                        ValRange);
4966     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4967       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4968                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4969                        ValRange);
4970     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4971       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4972                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4973                        ValRange);
4974     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4975       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4976                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4977                        ValRange);
4978     } else if (ID == ".amdhsa_next_free_vgpr") {
4979       VGPRRange = ValRange;
4980       NextFreeVGPR = Val;
4981     } else if (ID == ".amdhsa_next_free_sgpr") {
4982       SGPRRange = ValRange;
4983       NextFreeSGPR = Val;
4984     } else if (ID == ".amdhsa_accum_offset") {
4985       if (!isGFX90A())
4986         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4987       AccumOffset = Val;
4988     } else if (ID == ".amdhsa_reserve_vcc") {
4989       if (!isUInt<1>(Val))
4990         return OutOfRangeError(ValRange);
4991       ReserveVCC = Val;
4992     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4993       if (IVersion.Major < 7)
4994         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4995       if (hasArchitectedFlatScratch())
4996         return Error(IDRange.Start,
4997                      "directive is not supported with architected flat scratch",
4998                      IDRange);
4999       if (!isUInt<1>(Val))
5000         return OutOfRangeError(ValRange);
5001       ReserveFlatScr = Val;
5002     } else if (ID == ".amdhsa_reserve_xnack_mask") {
5003       if (IVersion.Major < 8)
5004         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5005       if (!isUInt<1>(Val))
5006         return OutOfRangeError(ValRange);
5007       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5008         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5009                                  IDRange);
5010     } else if (ID == ".amdhsa_float_round_mode_32") {
5011       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5012                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5013     } else if (ID == ".amdhsa_float_round_mode_16_64") {
5014       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5015                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5016     } else if (ID == ".amdhsa_float_denorm_mode_32") {
5017       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5018                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5019     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5020       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5021                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5022                        ValRange);
5023     } else if (ID == ".amdhsa_dx10_clamp") {
5024       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5025                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5026     } else if (ID == ".amdhsa_ieee_mode") {
5027       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5028                        Val, ValRange);
5029     } else if (ID == ".amdhsa_fp16_overflow") {
5030       if (IVersion.Major < 9)
5031         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5032       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5033                        ValRange);
5034     } else if (ID == ".amdhsa_tg_split") {
5035       if (!isGFX90A())
5036         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5037       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5038                        ValRange);
5039     } else if (ID == ".amdhsa_workgroup_processor_mode") {
5040       if (IVersion.Major < 10)
5041         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5042       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5043                        ValRange);
5044     } else if (ID == ".amdhsa_memory_ordered") {
5045       if (IVersion.Major < 10)
5046         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5047       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5048                        ValRange);
5049     } else if (ID == ".amdhsa_forward_progress") {
5050       if (IVersion.Major < 10)
5051         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5052       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5053                        ValRange);
5054     } else if (ID == ".amdhsa_shared_vgpr_count") {
5055       if (IVersion.Major < 10)
5056         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5057       SharedVGPRCount = Val;
5058       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5059                        COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val,
5060                        ValRange);
5061     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5062       PARSE_BITS_ENTRY(
5063           KD.compute_pgm_rsrc2,
5064           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5065           ValRange);
5066     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5067       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5068                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5069                        Val, ValRange);
5070     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5071       PARSE_BITS_ENTRY(
5072           KD.compute_pgm_rsrc2,
5073           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5074           ValRange);
5075     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5076       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5077                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5078                        Val, ValRange);
5079     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5080       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5081                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5082                        Val, ValRange);
5083     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5084       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5085                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5086                        Val, ValRange);
5087     } else if (ID == ".amdhsa_exception_int_div_zero") {
5088       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5089                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5090                        Val, ValRange);
5091     } else {
5092       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5093     }
5094 
5095 #undef PARSE_BITS_ENTRY
5096   }
5097 
5098   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5099     return TokError(".amdhsa_next_free_vgpr directive is required");
5100 
5101   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5102     return TokError(".amdhsa_next_free_sgpr directive is required");
5103 
5104   unsigned VGPRBlocks;
5105   unsigned SGPRBlocks;
5106   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5107                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5108                          EnableWavefrontSize32, NextFreeVGPR,
5109                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5110                          SGPRBlocks))
5111     return true;
5112 
5113   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5114           VGPRBlocks))
5115     return OutOfRangeError(VGPRRange);
5116   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5117                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5118 
5119   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5120           SGPRBlocks))
5121     return OutOfRangeError(SGPRRange);
5122   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5123                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5124                   SGPRBlocks);
5125 
5126   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5127     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5128                     "enabled user SGPRs");
5129 
5130   unsigned UserSGPRCount =
5131       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5132 
5133   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5134     return TokError("too many user SGPRs enabled");
5135   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5136                   UserSGPRCount);
5137 
5138   if (isGFX90A()) {
5139     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5140       return TokError(".amdhsa_accum_offset directive is required");
5141     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5142       return TokError("accum_offset should be in range [4..256] in "
5143                       "increments of 4");
5144     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5145       return TokError("accum_offset exceeds total VGPR allocation");
5146     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5147                     (AccumOffset / 4 - 1));
5148   }
5149 
5150   if (IVersion.Major == 10) {
5151     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5152     if (SharedVGPRCount && EnableWavefrontSize32) {
5153       return TokError("shared_vgpr_count directive not valid on "
5154                       "wavefront size 32");
5155     }
5156     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5157       return TokError("shared_vgpr_count*2 + "
5158                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5159                       "exceed 63\n");
5160     }
5161   }
5162 
5163   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5164       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5165       ReserveFlatScr);
5166   return false;
5167 }
5168 
5169 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5170   uint32_t Major;
5171   uint32_t Minor;
5172 
5173   if (ParseDirectiveMajorMinor(Major, Minor))
5174     return true;
5175 
5176   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5177   return false;
5178 }
5179 
5180 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5181   uint32_t Major;
5182   uint32_t Minor;
5183   uint32_t Stepping;
5184   StringRef VendorName;
5185   StringRef ArchName;
5186 
5187   // If this directive has no arguments, then use the ISA version for the
5188   // targeted GPU.
5189   if (isToken(AsmToken::EndOfStatement)) {
5190     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5191     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5192                                                         ISA.Stepping,
5193                                                         "AMD", "AMDGPU");
5194     return false;
5195   }
5196 
5197   if (ParseDirectiveMajorMinor(Major, Minor))
5198     return true;
5199 
5200   if (!trySkipToken(AsmToken::Comma))
5201     return TokError("stepping version number required, comma expected");
5202 
5203   if (ParseAsAbsoluteExpression(Stepping))
5204     return TokError("invalid stepping version");
5205 
5206   if (!trySkipToken(AsmToken::Comma))
5207     return TokError("vendor name required, comma expected");
5208 
5209   if (!parseString(VendorName, "invalid vendor name"))
5210     return true;
5211 
5212   if (!trySkipToken(AsmToken::Comma))
5213     return TokError("arch name required, comma expected");
5214 
5215   if (!parseString(ArchName, "invalid arch name"))
5216     return true;
5217 
5218   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5219                                                       VendorName, ArchName);
5220   return false;
5221 }
5222 
5223 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5224                                                amd_kernel_code_t &Header) {
5225   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5226   // assembly for backwards compatibility.
5227   if (ID == "max_scratch_backing_memory_byte_size") {
5228     Parser.eatToEndOfStatement();
5229     return false;
5230   }
5231 
5232   SmallString<40> ErrStr;
5233   raw_svector_ostream Err(ErrStr);
5234   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5235     return TokError(Err.str());
5236   }
5237   Lex();
5238 
5239   if (ID == "enable_wavefront_size32") {
5240     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5241       if (!isGFX10Plus())
5242         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5243       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5244         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5245     } else {
5246       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5247         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5248     }
5249   }
5250 
5251   if (ID == "wavefront_size") {
5252     if (Header.wavefront_size == 5) {
5253       if (!isGFX10Plus())
5254         return TokError("wavefront_size=5 is only allowed on GFX10+");
5255       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5256         return TokError("wavefront_size=5 requires +WavefrontSize32");
5257     } else if (Header.wavefront_size == 6) {
5258       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5259         return TokError("wavefront_size=6 requires +WavefrontSize64");
5260     }
5261   }
5262 
5263   if (ID == "enable_wgp_mode") {
5264     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5265         !isGFX10Plus())
5266       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5267   }
5268 
5269   if (ID == "enable_mem_ordered") {
5270     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5271         !isGFX10Plus())
5272       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5273   }
5274 
5275   if (ID == "enable_fwd_progress") {
5276     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5277         !isGFX10Plus())
5278       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5279   }
5280 
5281   return false;
5282 }
5283 
5284 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5285   amd_kernel_code_t Header;
5286   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5287 
5288   while (true) {
5289     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5290     // will set the current token to EndOfStatement.
5291     while(trySkipToken(AsmToken::EndOfStatement));
5292 
5293     StringRef ID;
5294     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5295       return true;
5296 
5297     if (ID == ".end_amd_kernel_code_t")
5298       break;
5299 
5300     if (ParseAMDKernelCodeTValue(ID, Header))
5301       return true;
5302   }
5303 
5304   getTargetStreamer().EmitAMDKernelCodeT(Header);
5305 
5306   return false;
5307 }
5308 
5309 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5310   StringRef KernelName;
5311   if (!parseId(KernelName, "expected symbol name"))
5312     return true;
5313 
5314   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5315                                            ELF::STT_AMDGPU_HSA_KERNEL);
5316 
5317   KernelScope.initialize(getContext());
5318   return false;
5319 }
5320 
5321 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5322   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5323     return Error(getLoc(),
5324                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5325                  "architectures");
5326   }
5327 
5328   auto TargetIDDirective = getLexer().getTok().getStringContents();
5329   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5330     return Error(getParser().getTok().getLoc(), "target id must match options");
5331 
5332   getTargetStreamer().EmitISAVersion();
5333   Lex();
5334 
5335   return false;
5336 }
5337 
5338 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5339   const char *AssemblerDirectiveBegin;
5340   const char *AssemblerDirectiveEnd;
5341   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5342       isHsaAbiVersion3AndAbove(&getSTI())
5343           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5344                             HSAMD::V3::AssemblerDirectiveEnd)
5345           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5346                             HSAMD::AssemblerDirectiveEnd);
5347 
5348   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5349     return Error(getLoc(),
5350                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5351                  "not available on non-amdhsa OSes")).str());
5352   }
5353 
5354   std::string HSAMetadataString;
5355   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5356                           HSAMetadataString))
5357     return true;
5358 
5359   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5360     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5361       return Error(getLoc(), "invalid HSA metadata");
5362   } else {
5363     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5364       return Error(getLoc(), "invalid HSA metadata");
5365   }
5366 
5367   return false;
5368 }
5369 
5370 /// Common code to parse out a block of text (typically YAML) between start and
5371 /// end directives.
5372 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5373                                           const char *AssemblerDirectiveEnd,
5374                                           std::string &CollectString) {
5375 
5376   raw_string_ostream CollectStream(CollectString);
5377 
5378   getLexer().setSkipSpace(false);
5379 
5380   bool FoundEnd = false;
5381   while (!isToken(AsmToken::Eof)) {
5382     while (isToken(AsmToken::Space)) {
5383       CollectStream << getTokenStr();
5384       Lex();
5385     }
5386 
5387     if (trySkipId(AssemblerDirectiveEnd)) {
5388       FoundEnd = true;
5389       break;
5390     }
5391 
5392     CollectStream << Parser.parseStringToEndOfStatement()
5393                   << getContext().getAsmInfo()->getSeparatorString();
5394 
5395     Parser.eatToEndOfStatement();
5396   }
5397 
5398   getLexer().setSkipSpace(true);
5399 
5400   if (isToken(AsmToken::Eof) && !FoundEnd) {
5401     return TokError(Twine("expected directive ") +
5402                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5403   }
5404 
5405   CollectStream.flush();
5406   return false;
5407 }
5408 
5409 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5410 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5411   std::string String;
5412   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5413                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5414     return true;
5415 
5416   auto PALMetadata = getTargetStreamer().getPALMetadata();
5417   if (!PALMetadata->setFromString(String))
5418     return Error(getLoc(), "invalid PAL metadata");
5419   return false;
5420 }
5421 
5422 /// Parse the assembler directive for old linear-format PAL metadata.
5423 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5424   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5425     return Error(getLoc(),
5426                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5427                  "not available on non-amdpal OSes")).str());
5428   }
5429 
5430   auto PALMetadata = getTargetStreamer().getPALMetadata();
5431   PALMetadata->setLegacy();
5432   for (;;) {
5433     uint32_t Key, Value;
5434     if (ParseAsAbsoluteExpression(Key)) {
5435       return TokError(Twine("invalid value in ") +
5436                       Twine(PALMD::AssemblerDirective));
5437     }
5438     if (!trySkipToken(AsmToken::Comma)) {
5439       return TokError(Twine("expected an even number of values in ") +
5440                       Twine(PALMD::AssemblerDirective));
5441     }
5442     if (ParseAsAbsoluteExpression(Value)) {
5443       return TokError(Twine("invalid value in ") +
5444                       Twine(PALMD::AssemblerDirective));
5445     }
5446     PALMetadata->setRegister(Key, Value);
5447     if (!trySkipToken(AsmToken::Comma))
5448       break;
5449   }
5450   return false;
5451 }
5452 
5453 /// ParseDirectiveAMDGPULDS
5454 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5455 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5456   if (getParser().checkForValidSection())
5457     return true;
5458 
5459   StringRef Name;
5460   SMLoc NameLoc = getLoc();
5461   if (getParser().parseIdentifier(Name))
5462     return TokError("expected identifier in directive");
5463 
5464   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5465   if (parseToken(AsmToken::Comma, "expected ','"))
5466     return true;
5467 
5468   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5469 
5470   int64_t Size;
5471   SMLoc SizeLoc = getLoc();
5472   if (getParser().parseAbsoluteExpression(Size))
5473     return true;
5474   if (Size < 0)
5475     return Error(SizeLoc, "size must be non-negative");
5476   if (Size > LocalMemorySize)
5477     return Error(SizeLoc, "size is too large");
5478 
5479   int64_t Alignment = 4;
5480   if (trySkipToken(AsmToken::Comma)) {
5481     SMLoc AlignLoc = getLoc();
5482     if (getParser().parseAbsoluteExpression(Alignment))
5483       return true;
5484     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5485       return Error(AlignLoc, "alignment must be a power of two");
5486 
5487     // Alignment larger than the size of LDS is possible in theory, as long
5488     // as the linker manages to place to symbol at address 0, but we do want
5489     // to make sure the alignment fits nicely into a 32-bit integer.
5490     if (Alignment >= 1u << 31)
5491       return Error(AlignLoc, "alignment is too large");
5492   }
5493 
5494   if (parseEOL())
5495     return true;
5496 
5497   Symbol->redefineIfPossible();
5498   if (!Symbol->isUndefined())
5499     return Error(NameLoc, "invalid symbol redefinition");
5500 
5501   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5502   return false;
5503 }
5504 
5505 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5506   StringRef IDVal = DirectiveID.getString();
5507 
5508   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5509     if (IDVal == ".amdhsa_kernel")
5510      return ParseDirectiveAMDHSAKernel();
5511 
5512     // TODO: Restructure/combine with PAL metadata directive.
5513     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5514       return ParseDirectiveHSAMetadata();
5515   } else {
5516     if (IDVal == ".hsa_code_object_version")
5517       return ParseDirectiveHSACodeObjectVersion();
5518 
5519     if (IDVal == ".hsa_code_object_isa")
5520       return ParseDirectiveHSACodeObjectISA();
5521 
5522     if (IDVal == ".amd_kernel_code_t")
5523       return ParseDirectiveAMDKernelCodeT();
5524 
5525     if (IDVal == ".amdgpu_hsa_kernel")
5526       return ParseDirectiveAMDGPUHsaKernel();
5527 
5528     if (IDVal == ".amd_amdgpu_isa")
5529       return ParseDirectiveISAVersion();
5530 
5531     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5532       return ParseDirectiveHSAMetadata();
5533   }
5534 
5535   if (IDVal == ".amdgcn_target")
5536     return ParseDirectiveAMDGCNTarget();
5537 
5538   if (IDVal == ".amdgpu_lds")
5539     return ParseDirectiveAMDGPULDS();
5540 
5541   if (IDVal == PALMD::AssemblerDirectiveBegin)
5542     return ParseDirectivePALMetadataBegin();
5543 
5544   if (IDVal == PALMD::AssemblerDirective)
5545     return ParseDirectivePALMetadata();
5546 
5547   return true;
5548 }
5549 
5550 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5551                                            unsigned RegNo) {
5552 
5553   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5554     return isGFX9Plus();
5555 
5556   // GFX10 has 2 more SGPRs 104 and 105.
5557   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5558     return hasSGPR104_SGPR105();
5559 
5560   switch (RegNo) {
5561   case AMDGPU::SRC_SHARED_BASE:
5562   case AMDGPU::SRC_SHARED_LIMIT:
5563   case AMDGPU::SRC_PRIVATE_BASE:
5564   case AMDGPU::SRC_PRIVATE_LIMIT:
5565   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5566     return isGFX9Plus();
5567   case AMDGPU::TBA:
5568   case AMDGPU::TBA_LO:
5569   case AMDGPU::TBA_HI:
5570   case AMDGPU::TMA:
5571   case AMDGPU::TMA_LO:
5572   case AMDGPU::TMA_HI:
5573     return !isGFX9Plus();
5574   case AMDGPU::XNACK_MASK:
5575   case AMDGPU::XNACK_MASK_LO:
5576   case AMDGPU::XNACK_MASK_HI:
5577     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5578   case AMDGPU::SGPR_NULL:
5579     return isGFX10Plus();
5580   default:
5581     break;
5582   }
5583 
5584   if (isCI())
5585     return true;
5586 
5587   if (isSI() || isGFX10Plus()) {
5588     // No flat_scr on SI.
5589     // On GFX10 flat scratch is not a valid register operand and can only be
5590     // accessed with s_setreg/s_getreg.
5591     switch (RegNo) {
5592     case AMDGPU::FLAT_SCR:
5593     case AMDGPU::FLAT_SCR_LO:
5594     case AMDGPU::FLAT_SCR_HI:
5595       return false;
5596     default:
5597       return true;
5598     }
5599   }
5600 
5601   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5602   // SI/CI have.
5603   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5604     return hasSGPR102_SGPR103();
5605 
5606   return true;
5607 }
5608 
5609 OperandMatchResultTy
5610 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5611                               OperandMode Mode) {
5612   // Try to parse with a custom parser
5613   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5614 
5615   // If we successfully parsed the operand or if there as an error parsing,
5616   // we are done.
5617   //
5618   // If we are parsing after we reach EndOfStatement then this means we
5619   // are appending default values to the Operands list.  This is only done
5620   // by custom parser, so we shouldn't continue on to the generic parsing.
5621   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5622       isToken(AsmToken::EndOfStatement))
5623     return ResTy;
5624 
5625   SMLoc RBraceLoc;
5626   SMLoc LBraceLoc = getLoc();
5627   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5628     unsigned Prefix = Operands.size();
5629 
5630     for (;;) {
5631       auto Loc = getLoc();
5632       ResTy = parseReg(Operands);
5633       if (ResTy == MatchOperand_NoMatch)
5634         Error(Loc, "expected a register");
5635       if (ResTy != MatchOperand_Success)
5636         return MatchOperand_ParseFail;
5637 
5638       RBraceLoc = getLoc();
5639       if (trySkipToken(AsmToken::RBrac))
5640         break;
5641 
5642       if (!skipToken(AsmToken::Comma,
5643                      "expected a comma or a closing square bracket")) {
5644         return MatchOperand_ParseFail;
5645       }
5646     }
5647 
5648     if (Operands.size() - Prefix > 1) {
5649       Operands.insert(Operands.begin() + Prefix,
5650                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5651       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5652     }
5653 
5654     return MatchOperand_Success;
5655   }
5656 
5657   return parseRegOrImm(Operands);
5658 }
5659 
5660 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5661   // Clear any forced encodings from the previous instruction.
5662   setForcedEncodingSize(0);
5663   setForcedDPP(false);
5664   setForcedSDWA(false);
5665 
5666   if (Name.endswith("_e64")) {
5667     setForcedEncodingSize(64);
5668     return Name.substr(0, Name.size() - 4);
5669   } else if (Name.endswith("_e32")) {
5670     setForcedEncodingSize(32);
5671     return Name.substr(0, Name.size() - 4);
5672   } else if (Name.endswith("_dpp")) {
5673     setForcedDPP(true);
5674     return Name.substr(0, Name.size() - 4);
5675   } else if (Name.endswith("_sdwa")) {
5676     setForcedSDWA(true);
5677     return Name.substr(0, Name.size() - 5);
5678   }
5679   return Name;
5680 }
5681 
5682 static void applyMnemonicAliases(StringRef &Mnemonic,
5683                                  const FeatureBitset &Features,
5684                                  unsigned VariantID);
5685 
5686 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5687                                        StringRef Name,
5688                                        SMLoc NameLoc, OperandVector &Operands) {
5689   // Add the instruction mnemonic
5690   Name = parseMnemonicSuffix(Name);
5691 
5692   // If the target architecture uses MnemonicAlias, call it here to parse
5693   // operands correctly.
5694   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
5695 
5696   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5697 
5698   bool IsMIMG = Name.startswith("image_");
5699 
5700   while (!trySkipToken(AsmToken::EndOfStatement)) {
5701     OperandMode Mode = OperandMode_Default;
5702     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5703       Mode = OperandMode_NSA;
5704     CPolSeen = 0;
5705     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5706 
5707     if (Res != MatchOperand_Success) {
5708       checkUnsupportedInstruction(Name, NameLoc);
5709       if (!Parser.hasPendingError()) {
5710         // FIXME: use real operand location rather than the current location.
5711         StringRef Msg =
5712           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5713                                             "not a valid operand.";
5714         Error(getLoc(), Msg);
5715       }
5716       while (!trySkipToken(AsmToken::EndOfStatement)) {
5717         lex();
5718       }
5719       return true;
5720     }
5721 
5722     // Eat the comma or space if there is one.
5723     trySkipToken(AsmToken::Comma);
5724   }
5725 
5726   return false;
5727 }
5728 
5729 //===----------------------------------------------------------------------===//
5730 // Utility functions
5731 //===----------------------------------------------------------------------===//
5732 
5733 OperandMatchResultTy
5734 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5735 
5736   if (!trySkipId(Prefix, AsmToken::Colon))
5737     return MatchOperand_NoMatch;
5738 
5739   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5740 }
5741 
5742 OperandMatchResultTy
5743 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5744                                     AMDGPUOperand::ImmTy ImmTy,
5745                                     bool (*ConvertResult)(int64_t&)) {
5746   SMLoc S = getLoc();
5747   int64_t Value = 0;
5748 
5749   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5750   if (Res != MatchOperand_Success)
5751     return Res;
5752 
5753   if (ConvertResult && !ConvertResult(Value)) {
5754     Error(S, "invalid " + StringRef(Prefix) + " value.");
5755   }
5756 
5757   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5758   return MatchOperand_Success;
5759 }
5760 
5761 OperandMatchResultTy
5762 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5763                                              OperandVector &Operands,
5764                                              AMDGPUOperand::ImmTy ImmTy,
5765                                              bool (*ConvertResult)(int64_t&)) {
5766   SMLoc S = getLoc();
5767   if (!trySkipId(Prefix, AsmToken::Colon))
5768     return MatchOperand_NoMatch;
5769 
5770   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5771     return MatchOperand_ParseFail;
5772 
5773   unsigned Val = 0;
5774   const unsigned MaxSize = 4;
5775 
5776   // FIXME: How to verify the number of elements matches the number of src
5777   // operands?
5778   for (int I = 0; ; ++I) {
5779     int64_t Op;
5780     SMLoc Loc = getLoc();
5781     if (!parseExpr(Op))
5782       return MatchOperand_ParseFail;
5783 
5784     if (Op != 0 && Op != 1) {
5785       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5786       return MatchOperand_ParseFail;
5787     }
5788 
5789     Val |= (Op << I);
5790 
5791     if (trySkipToken(AsmToken::RBrac))
5792       break;
5793 
5794     if (I + 1 == MaxSize) {
5795       Error(getLoc(), "expected a closing square bracket");
5796       return MatchOperand_ParseFail;
5797     }
5798 
5799     if (!skipToken(AsmToken::Comma, "expected a comma"))
5800       return MatchOperand_ParseFail;
5801   }
5802 
5803   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5804   return MatchOperand_Success;
5805 }
5806 
5807 OperandMatchResultTy
5808 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5809                                AMDGPUOperand::ImmTy ImmTy) {
5810   int64_t Bit;
5811   SMLoc S = getLoc();
5812 
5813   if (trySkipId(Name)) {
5814     Bit = 1;
5815   } else if (trySkipId("no", Name)) {
5816     Bit = 0;
5817   } else {
5818     return MatchOperand_NoMatch;
5819   }
5820 
5821   if (Name == "r128" && !hasMIMG_R128()) {
5822     Error(S, "r128 modifier is not supported on this GPU");
5823     return MatchOperand_ParseFail;
5824   }
5825   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5826     Error(S, "a16 modifier is not supported on this GPU");
5827     return MatchOperand_ParseFail;
5828   }
5829 
5830   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5831     ImmTy = AMDGPUOperand::ImmTyR128A16;
5832 
5833   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5834   return MatchOperand_Success;
5835 }
5836 
5837 OperandMatchResultTy
5838 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5839   unsigned CPolOn = 0;
5840   unsigned CPolOff = 0;
5841   SMLoc S = getLoc();
5842 
5843   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5844   if (isGFX940() && !Mnemo.startswith("s_")) {
5845     if (trySkipId("sc0"))
5846       CPolOn = AMDGPU::CPol::SC0;
5847     else if (trySkipId("nosc0"))
5848       CPolOff = AMDGPU::CPol::SC0;
5849     else if (trySkipId("nt"))
5850       CPolOn = AMDGPU::CPol::NT;
5851     else if (trySkipId("nont"))
5852       CPolOff = AMDGPU::CPol::NT;
5853     else if (trySkipId("sc1"))
5854       CPolOn = AMDGPU::CPol::SC1;
5855     else if (trySkipId("nosc1"))
5856       CPolOff = AMDGPU::CPol::SC1;
5857     else
5858       return MatchOperand_NoMatch;
5859   }
5860   else if (trySkipId("glc"))
5861     CPolOn = AMDGPU::CPol::GLC;
5862   else if (trySkipId("noglc"))
5863     CPolOff = AMDGPU::CPol::GLC;
5864   else if (trySkipId("slc"))
5865     CPolOn = AMDGPU::CPol::SLC;
5866   else if (trySkipId("noslc"))
5867     CPolOff = AMDGPU::CPol::SLC;
5868   else if (trySkipId("dlc"))
5869     CPolOn = AMDGPU::CPol::DLC;
5870   else if (trySkipId("nodlc"))
5871     CPolOff = AMDGPU::CPol::DLC;
5872   else if (trySkipId("scc"))
5873     CPolOn = AMDGPU::CPol::SCC;
5874   else if (trySkipId("noscc"))
5875     CPolOff = AMDGPU::CPol::SCC;
5876   else
5877     return MatchOperand_NoMatch;
5878 
5879   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5880     Error(S, "dlc modifier is not supported on this GPU");
5881     return MatchOperand_ParseFail;
5882   }
5883 
5884   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5885     Error(S, "scc modifier is not supported on this GPU");
5886     return MatchOperand_ParseFail;
5887   }
5888 
5889   if (CPolSeen & (CPolOn | CPolOff)) {
5890     Error(S, "duplicate cache policy modifier");
5891     return MatchOperand_ParseFail;
5892   }
5893 
5894   CPolSeen |= (CPolOn | CPolOff);
5895 
5896   for (unsigned I = 1; I != Operands.size(); ++I) {
5897     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5898     if (Op.isCPol()) {
5899       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5900       return MatchOperand_Success;
5901     }
5902   }
5903 
5904   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5905                                               AMDGPUOperand::ImmTyCPol));
5906 
5907   return MatchOperand_Success;
5908 }
5909 
5910 static void addOptionalImmOperand(
5911   MCInst& Inst, const OperandVector& Operands,
5912   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5913   AMDGPUOperand::ImmTy ImmT,
5914   int64_t Default = 0) {
5915   auto i = OptionalIdx.find(ImmT);
5916   if (i != OptionalIdx.end()) {
5917     unsigned Idx = i->second;
5918     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5919   } else {
5920     Inst.addOperand(MCOperand::createImm(Default));
5921   }
5922 }
5923 
5924 OperandMatchResultTy
5925 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5926                                        StringRef &Value,
5927                                        SMLoc &StringLoc) {
5928   if (!trySkipId(Prefix, AsmToken::Colon))
5929     return MatchOperand_NoMatch;
5930 
5931   StringLoc = getLoc();
5932   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5933                                                   : MatchOperand_ParseFail;
5934 }
5935 
5936 //===----------------------------------------------------------------------===//
5937 // MTBUF format
5938 //===----------------------------------------------------------------------===//
5939 
5940 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5941                                   int64_t MaxVal,
5942                                   int64_t &Fmt) {
5943   int64_t Val;
5944   SMLoc Loc = getLoc();
5945 
5946   auto Res = parseIntWithPrefix(Pref, Val);
5947   if (Res == MatchOperand_ParseFail)
5948     return false;
5949   if (Res == MatchOperand_NoMatch)
5950     return true;
5951 
5952   if (Val < 0 || Val > MaxVal) {
5953     Error(Loc, Twine("out of range ", StringRef(Pref)));
5954     return false;
5955   }
5956 
5957   Fmt = Val;
5958   return true;
5959 }
5960 
5961 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5962 // values to live in a joint format operand in the MCInst encoding.
5963 OperandMatchResultTy
5964 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5965   using namespace llvm::AMDGPU::MTBUFFormat;
5966 
5967   int64_t Dfmt = DFMT_UNDEF;
5968   int64_t Nfmt = NFMT_UNDEF;
5969 
5970   // dfmt and nfmt can appear in either order, and each is optional.
5971   for (int I = 0; I < 2; ++I) {
5972     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5973       return MatchOperand_ParseFail;
5974 
5975     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5976       return MatchOperand_ParseFail;
5977     }
5978     // Skip optional comma between dfmt/nfmt
5979     // but guard against 2 commas following each other.
5980     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5981         !peekToken().is(AsmToken::Comma)) {
5982       trySkipToken(AsmToken::Comma);
5983     }
5984   }
5985 
5986   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5987     return MatchOperand_NoMatch;
5988 
5989   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5990   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5991 
5992   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5993   return MatchOperand_Success;
5994 }
5995 
5996 OperandMatchResultTy
5997 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5998   using namespace llvm::AMDGPU::MTBUFFormat;
5999 
6000   int64_t Fmt = UFMT_UNDEF;
6001 
6002   if (!tryParseFmt("format", UFMT_MAX, Fmt))
6003     return MatchOperand_ParseFail;
6004 
6005   if (Fmt == UFMT_UNDEF)
6006     return MatchOperand_NoMatch;
6007 
6008   Format = Fmt;
6009   return MatchOperand_Success;
6010 }
6011 
6012 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6013                                     int64_t &Nfmt,
6014                                     StringRef FormatStr,
6015                                     SMLoc Loc) {
6016   using namespace llvm::AMDGPU::MTBUFFormat;
6017   int64_t Format;
6018 
6019   Format = getDfmt(FormatStr);
6020   if (Format != DFMT_UNDEF) {
6021     Dfmt = Format;
6022     return true;
6023   }
6024 
6025   Format = getNfmt(FormatStr, getSTI());
6026   if (Format != NFMT_UNDEF) {
6027     Nfmt = Format;
6028     return true;
6029   }
6030 
6031   Error(Loc, "unsupported format");
6032   return false;
6033 }
6034 
6035 OperandMatchResultTy
6036 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6037                                           SMLoc FormatLoc,
6038                                           int64_t &Format) {
6039   using namespace llvm::AMDGPU::MTBUFFormat;
6040 
6041   int64_t Dfmt = DFMT_UNDEF;
6042   int64_t Nfmt = NFMT_UNDEF;
6043   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6044     return MatchOperand_ParseFail;
6045 
6046   if (trySkipToken(AsmToken::Comma)) {
6047     StringRef Str;
6048     SMLoc Loc = getLoc();
6049     if (!parseId(Str, "expected a format string") ||
6050         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
6051       return MatchOperand_ParseFail;
6052     }
6053     if (Dfmt == DFMT_UNDEF) {
6054       Error(Loc, "duplicate numeric format");
6055       return MatchOperand_ParseFail;
6056     } else if (Nfmt == NFMT_UNDEF) {
6057       Error(Loc, "duplicate data format");
6058       return MatchOperand_ParseFail;
6059     }
6060   }
6061 
6062   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6063   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6064 
6065   if (isGFX10Plus()) {
6066     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6067     if (Ufmt == UFMT_UNDEF) {
6068       Error(FormatLoc, "unsupported format");
6069       return MatchOperand_ParseFail;
6070     }
6071     Format = Ufmt;
6072   } else {
6073     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6074   }
6075 
6076   return MatchOperand_Success;
6077 }
6078 
6079 OperandMatchResultTy
6080 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6081                                             SMLoc Loc,
6082                                             int64_t &Format) {
6083   using namespace llvm::AMDGPU::MTBUFFormat;
6084 
6085   auto Id = getUnifiedFormat(FormatStr, getSTI());
6086   if (Id == UFMT_UNDEF)
6087     return MatchOperand_NoMatch;
6088 
6089   if (!isGFX10Plus()) {
6090     Error(Loc, "unified format is not supported on this GPU");
6091     return MatchOperand_ParseFail;
6092   }
6093 
6094   Format = Id;
6095   return MatchOperand_Success;
6096 }
6097 
6098 OperandMatchResultTy
6099 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6100   using namespace llvm::AMDGPU::MTBUFFormat;
6101   SMLoc Loc = getLoc();
6102 
6103   if (!parseExpr(Format))
6104     return MatchOperand_ParseFail;
6105   if (!isValidFormatEncoding(Format, getSTI())) {
6106     Error(Loc, "out of range format");
6107     return MatchOperand_ParseFail;
6108   }
6109 
6110   return MatchOperand_Success;
6111 }
6112 
6113 OperandMatchResultTy
6114 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6115   using namespace llvm::AMDGPU::MTBUFFormat;
6116 
6117   if (!trySkipId("format", AsmToken::Colon))
6118     return MatchOperand_NoMatch;
6119 
6120   if (trySkipToken(AsmToken::LBrac)) {
6121     StringRef FormatStr;
6122     SMLoc Loc = getLoc();
6123     if (!parseId(FormatStr, "expected a format string"))
6124       return MatchOperand_ParseFail;
6125 
6126     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6127     if (Res == MatchOperand_NoMatch)
6128       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6129     if (Res != MatchOperand_Success)
6130       return Res;
6131 
6132     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6133       return MatchOperand_ParseFail;
6134 
6135     return MatchOperand_Success;
6136   }
6137 
6138   return parseNumericFormat(Format);
6139 }
6140 
6141 OperandMatchResultTy
6142 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6143   using namespace llvm::AMDGPU::MTBUFFormat;
6144 
6145   int64_t Format = getDefaultFormatEncoding(getSTI());
6146   OperandMatchResultTy Res;
6147   SMLoc Loc = getLoc();
6148 
6149   // Parse legacy format syntax.
6150   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6151   if (Res == MatchOperand_ParseFail)
6152     return Res;
6153 
6154   bool FormatFound = (Res == MatchOperand_Success);
6155 
6156   Operands.push_back(
6157     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6158 
6159   if (FormatFound)
6160     trySkipToken(AsmToken::Comma);
6161 
6162   if (isToken(AsmToken::EndOfStatement)) {
6163     // We are expecting an soffset operand,
6164     // but let matcher handle the error.
6165     return MatchOperand_Success;
6166   }
6167 
6168   // Parse soffset.
6169   Res = parseRegOrImm(Operands);
6170   if (Res != MatchOperand_Success)
6171     return Res;
6172 
6173   trySkipToken(AsmToken::Comma);
6174 
6175   if (!FormatFound) {
6176     Res = parseSymbolicOrNumericFormat(Format);
6177     if (Res == MatchOperand_ParseFail)
6178       return Res;
6179     if (Res == MatchOperand_Success) {
6180       auto Size = Operands.size();
6181       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6182       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6183       Op.setImm(Format);
6184     }
6185     return MatchOperand_Success;
6186   }
6187 
6188   if (isId("format") && peekToken().is(AsmToken::Colon)) {
6189     Error(getLoc(), "duplicate format");
6190     return MatchOperand_ParseFail;
6191   }
6192   return MatchOperand_Success;
6193 }
6194 
6195 //===----------------------------------------------------------------------===//
6196 // ds
6197 //===----------------------------------------------------------------------===//
6198 
6199 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6200                                     const OperandVector &Operands) {
6201   OptionalImmIndexMap OptionalIdx;
6202 
6203   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6204     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6205 
6206     // Add the register arguments
6207     if (Op.isReg()) {
6208       Op.addRegOperands(Inst, 1);
6209       continue;
6210     }
6211 
6212     // Handle optional arguments
6213     OptionalIdx[Op.getImmTy()] = i;
6214   }
6215 
6216   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6217   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6218   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6219 
6220   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6221 }
6222 
6223 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6224                                 bool IsGdsHardcoded) {
6225   OptionalImmIndexMap OptionalIdx;
6226 
6227   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6228     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6229 
6230     // Add the register arguments
6231     if (Op.isReg()) {
6232       Op.addRegOperands(Inst, 1);
6233       continue;
6234     }
6235 
6236     if (Op.isToken() && Op.getToken() == "gds") {
6237       IsGdsHardcoded = true;
6238       continue;
6239     }
6240 
6241     // Handle optional arguments
6242     OptionalIdx[Op.getImmTy()] = i;
6243   }
6244 
6245   AMDGPUOperand::ImmTy OffsetType =
6246     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
6247      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
6248      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
6249                                                       AMDGPUOperand::ImmTyOffset;
6250 
6251   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6252 
6253   if (!IsGdsHardcoded) {
6254     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6255   }
6256   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6257 }
6258 
6259 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6260   OptionalImmIndexMap OptionalIdx;
6261 
6262   unsigned OperandIdx[4];
6263   unsigned EnMask = 0;
6264   int SrcIdx = 0;
6265 
6266   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6267     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6268 
6269     // Add the register arguments
6270     if (Op.isReg()) {
6271       assert(SrcIdx < 4);
6272       OperandIdx[SrcIdx] = Inst.size();
6273       Op.addRegOperands(Inst, 1);
6274       ++SrcIdx;
6275       continue;
6276     }
6277 
6278     if (Op.isOff()) {
6279       assert(SrcIdx < 4);
6280       OperandIdx[SrcIdx] = Inst.size();
6281       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6282       ++SrcIdx;
6283       continue;
6284     }
6285 
6286     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6287       Op.addImmOperands(Inst, 1);
6288       continue;
6289     }
6290 
6291     if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
6292       continue;
6293 
6294     // Handle optional arguments
6295     OptionalIdx[Op.getImmTy()] = i;
6296   }
6297 
6298   assert(SrcIdx == 4);
6299 
6300   bool Compr = false;
6301   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6302     Compr = true;
6303     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6304     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6305     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6306   }
6307 
6308   for (auto i = 0; i < SrcIdx; ++i) {
6309     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6310       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6311     }
6312   }
6313 
6314   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6315   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6316 
6317   Inst.addOperand(MCOperand::createImm(EnMask));
6318 }
6319 
6320 //===----------------------------------------------------------------------===//
6321 // s_waitcnt
6322 //===----------------------------------------------------------------------===//
6323 
6324 static bool
6325 encodeCnt(
6326   const AMDGPU::IsaVersion ISA,
6327   int64_t &IntVal,
6328   int64_t CntVal,
6329   bool Saturate,
6330   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6331   unsigned (*decode)(const IsaVersion &Version, unsigned))
6332 {
6333   bool Failed = false;
6334 
6335   IntVal = encode(ISA, IntVal, CntVal);
6336   if (CntVal != decode(ISA, IntVal)) {
6337     if (Saturate) {
6338       IntVal = encode(ISA, IntVal, -1);
6339     } else {
6340       Failed = true;
6341     }
6342   }
6343   return Failed;
6344 }
6345 
6346 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6347 
6348   SMLoc CntLoc = getLoc();
6349   StringRef CntName = getTokenStr();
6350 
6351   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6352       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6353     return false;
6354 
6355   int64_t CntVal;
6356   SMLoc ValLoc = getLoc();
6357   if (!parseExpr(CntVal))
6358     return false;
6359 
6360   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6361 
6362   bool Failed = true;
6363   bool Sat = CntName.endswith("_sat");
6364 
6365   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6366     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6367   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6368     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6369   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6370     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6371   } else {
6372     Error(CntLoc, "invalid counter name " + CntName);
6373     return false;
6374   }
6375 
6376   if (Failed) {
6377     Error(ValLoc, "too large value for " + CntName);
6378     return false;
6379   }
6380 
6381   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6382     return false;
6383 
6384   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6385     if (isToken(AsmToken::EndOfStatement)) {
6386       Error(getLoc(), "expected a counter name");
6387       return false;
6388     }
6389   }
6390 
6391   return true;
6392 }
6393 
6394 OperandMatchResultTy
6395 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6396   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6397   int64_t Waitcnt = getWaitcntBitMask(ISA);
6398   SMLoc S = getLoc();
6399 
6400   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6401     while (!isToken(AsmToken::EndOfStatement)) {
6402       if (!parseCnt(Waitcnt))
6403         return MatchOperand_ParseFail;
6404     }
6405   } else {
6406     if (!parseExpr(Waitcnt))
6407       return MatchOperand_ParseFail;
6408   }
6409 
6410   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6411   return MatchOperand_Success;
6412 }
6413 
6414 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6415   SMLoc FieldLoc = getLoc();
6416   StringRef FieldName = getTokenStr();
6417   if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6418       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6419     return false;
6420 
6421   SMLoc ValueLoc = getLoc();
6422   StringRef ValueName = getTokenStr();
6423   if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6424       !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6425     return false;
6426 
6427   unsigned Shift;
6428   if (FieldName == "instid0") {
6429     Shift = 0;
6430   } else if (FieldName == "instskip") {
6431     Shift = 4;
6432   } else if (FieldName == "instid1") {
6433     Shift = 7;
6434   } else {
6435     Error(FieldLoc, "invalid field name " + FieldName);
6436     return false;
6437   }
6438 
6439   int Value;
6440   if (Shift == 4) {
6441     // Parse values for instskip.
6442     Value = StringSwitch<int>(ValueName)
6443                 .Case("SAME", 0)
6444                 .Case("NEXT", 1)
6445                 .Case("SKIP_1", 2)
6446                 .Case("SKIP_2", 3)
6447                 .Case("SKIP_3", 4)
6448                 .Case("SKIP_4", 5)
6449                 .Default(-1);
6450   } else {
6451     // Parse values for instid0 and instid1.
6452     Value = StringSwitch<int>(ValueName)
6453                 .Case("NO_DEP", 0)
6454                 .Case("VALU_DEP_1", 1)
6455                 .Case("VALU_DEP_2", 2)
6456                 .Case("VALU_DEP_3", 3)
6457                 .Case("VALU_DEP_4", 4)
6458                 .Case("TRANS32_DEP_1", 5)
6459                 .Case("TRANS32_DEP_2", 6)
6460                 .Case("TRANS32_DEP_3", 7)
6461                 .Case("FMA_ACCUM_CYCLE_1", 8)
6462                 .Case("SALU_CYCLE_1", 9)
6463                 .Case("SALU_CYCLE_2", 10)
6464                 .Case("SALU_CYCLE_3", 11)
6465                 .Default(-1);
6466   }
6467   if (Value < 0) {
6468     Error(ValueLoc, "invalid value name " + ValueName);
6469     return false;
6470   }
6471 
6472   Delay |= Value << Shift;
6473   return true;
6474 }
6475 
6476 OperandMatchResultTy
6477 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) {
6478   int64_t Delay = 0;
6479   SMLoc S = getLoc();
6480 
6481   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6482     do {
6483       if (!parseDelay(Delay))
6484         return MatchOperand_ParseFail;
6485     } while (trySkipToken(AsmToken::Pipe));
6486   } else {
6487     if (!parseExpr(Delay))
6488       return MatchOperand_ParseFail;
6489   }
6490 
6491   Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
6492   return MatchOperand_Success;
6493 }
6494 
6495 bool
6496 AMDGPUOperand::isSWaitCnt() const {
6497   return isImm();
6498 }
6499 
6500 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); }
6501 
6502 //===----------------------------------------------------------------------===//
6503 // DepCtr
6504 //===----------------------------------------------------------------------===//
6505 
6506 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6507                                   StringRef DepCtrName) {
6508   switch (ErrorId) {
6509   case OPR_ID_UNKNOWN:
6510     Error(Loc, Twine("invalid counter name ", DepCtrName));
6511     return;
6512   case OPR_ID_UNSUPPORTED:
6513     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6514     return;
6515   case OPR_ID_DUPLICATE:
6516     Error(Loc, Twine("duplicate counter name ", DepCtrName));
6517     return;
6518   case OPR_VAL_INVALID:
6519     Error(Loc, Twine("invalid value for ", DepCtrName));
6520     return;
6521   default:
6522     assert(false);
6523   }
6524 }
6525 
6526 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6527 
6528   using namespace llvm::AMDGPU::DepCtr;
6529 
6530   SMLoc DepCtrLoc = getLoc();
6531   StringRef DepCtrName = getTokenStr();
6532 
6533   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6534       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6535     return false;
6536 
6537   int64_t ExprVal;
6538   if (!parseExpr(ExprVal))
6539     return false;
6540 
6541   unsigned PrevOprMask = UsedOprMask;
6542   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6543 
6544   if (CntVal < 0) {
6545     depCtrError(DepCtrLoc, CntVal, DepCtrName);
6546     return false;
6547   }
6548 
6549   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6550     return false;
6551 
6552   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6553     if (isToken(AsmToken::EndOfStatement)) {
6554       Error(getLoc(), "expected a counter name");
6555       return false;
6556     }
6557   }
6558 
6559   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
6560   DepCtr = (DepCtr & ~CntValMask) | CntVal;
6561   return true;
6562 }
6563 
6564 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) {
6565   using namespace llvm::AMDGPU::DepCtr;
6566 
6567   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
6568   SMLoc Loc = getLoc();
6569 
6570   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6571     unsigned UsedOprMask = 0;
6572     while (!isToken(AsmToken::EndOfStatement)) {
6573       if (!parseDepCtr(DepCtr, UsedOprMask))
6574         return MatchOperand_ParseFail;
6575     }
6576   } else {
6577     if (!parseExpr(DepCtr))
6578       return MatchOperand_ParseFail;
6579   }
6580 
6581   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
6582   return MatchOperand_Success;
6583 }
6584 
6585 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
6586 
6587 //===----------------------------------------------------------------------===//
6588 // hwreg
6589 //===----------------------------------------------------------------------===//
6590 
6591 bool
6592 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6593                                 OperandInfoTy &Offset,
6594                                 OperandInfoTy &Width) {
6595   using namespace llvm::AMDGPU::Hwreg;
6596 
6597   // The register may be specified by name or using a numeric code
6598   HwReg.Loc = getLoc();
6599   if (isToken(AsmToken::Identifier) &&
6600       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6601     HwReg.IsSymbolic = true;
6602     lex(); // skip register name
6603   } else if (!parseExpr(HwReg.Id, "a register name")) {
6604     return false;
6605   }
6606 
6607   if (trySkipToken(AsmToken::RParen))
6608     return true;
6609 
6610   // parse optional params
6611   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6612     return false;
6613 
6614   Offset.Loc = getLoc();
6615   if (!parseExpr(Offset.Id))
6616     return false;
6617 
6618   if (!skipToken(AsmToken::Comma, "expected a comma"))
6619     return false;
6620 
6621   Width.Loc = getLoc();
6622   return parseExpr(Width.Id) &&
6623          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6624 }
6625 
6626 bool
6627 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6628                                const OperandInfoTy &Offset,
6629                                const OperandInfoTy &Width) {
6630 
6631   using namespace llvm::AMDGPU::Hwreg;
6632 
6633   if (HwReg.IsSymbolic) {
6634     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6635       Error(HwReg.Loc,
6636             "specified hardware register is not supported on this GPU");
6637       return false;
6638     }
6639   } else {
6640     if (!isValidHwreg(HwReg.Id)) {
6641       Error(HwReg.Loc,
6642             "invalid code of hardware register: only 6-bit values are legal");
6643       return false;
6644     }
6645   }
6646   if (!isValidHwregOffset(Offset.Id)) {
6647     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6648     return false;
6649   }
6650   if (!isValidHwregWidth(Width.Id)) {
6651     Error(Width.Loc,
6652           "invalid bitfield width: only values from 1 to 32 are legal");
6653     return false;
6654   }
6655   return true;
6656 }
6657 
6658 OperandMatchResultTy
6659 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6660   using namespace llvm::AMDGPU::Hwreg;
6661 
6662   int64_t ImmVal = 0;
6663   SMLoc Loc = getLoc();
6664 
6665   if (trySkipId("hwreg", AsmToken::LParen)) {
6666     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6667     OperandInfoTy Offset(OFFSET_DEFAULT_);
6668     OperandInfoTy Width(WIDTH_DEFAULT_);
6669     if (parseHwregBody(HwReg, Offset, Width) &&
6670         validateHwreg(HwReg, Offset, Width)) {
6671       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6672     } else {
6673       return MatchOperand_ParseFail;
6674     }
6675   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6676     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6677       Error(Loc, "invalid immediate: only 16-bit values are legal");
6678       return MatchOperand_ParseFail;
6679     }
6680   } else {
6681     return MatchOperand_ParseFail;
6682   }
6683 
6684   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6685   return MatchOperand_Success;
6686 }
6687 
6688 bool AMDGPUOperand::isHwreg() const {
6689   return isImmTy(ImmTyHwreg);
6690 }
6691 
6692 //===----------------------------------------------------------------------===//
6693 // sendmsg
6694 //===----------------------------------------------------------------------===//
6695 
6696 bool
6697 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6698                                   OperandInfoTy &Op,
6699                                   OperandInfoTy &Stream) {
6700   using namespace llvm::AMDGPU::SendMsg;
6701 
6702   Msg.Loc = getLoc();
6703   if (isToken(AsmToken::Identifier) &&
6704       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6705     Msg.IsSymbolic = true;
6706     lex(); // skip message name
6707   } else if (!parseExpr(Msg.Id, "a message name")) {
6708     return false;
6709   }
6710 
6711   if (trySkipToken(AsmToken::Comma)) {
6712     Op.IsDefined = true;
6713     Op.Loc = getLoc();
6714     if (isToken(AsmToken::Identifier) &&
6715         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6716       lex(); // skip operation name
6717     } else if (!parseExpr(Op.Id, "an operation name")) {
6718       return false;
6719     }
6720 
6721     if (trySkipToken(AsmToken::Comma)) {
6722       Stream.IsDefined = true;
6723       Stream.Loc = getLoc();
6724       if (!parseExpr(Stream.Id))
6725         return false;
6726     }
6727   }
6728 
6729   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6730 }
6731 
6732 bool
6733 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6734                                  const OperandInfoTy &Op,
6735                                  const OperandInfoTy &Stream) {
6736   using namespace llvm::AMDGPU::SendMsg;
6737 
6738   // Validation strictness depends on whether message is specified
6739   // in a symbolic or in a numeric form. In the latter case
6740   // only encoding possibility is checked.
6741   bool Strict = Msg.IsSymbolic;
6742 
6743   if (Strict) {
6744     if (Msg.Id == OPR_ID_UNSUPPORTED) {
6745       Error(Msg.Loc, "specified message id is not supported on this GPU");
6746       return false;
6747     }
6748   } else {
6749     if (!isValidMsgId(Msg.Id, getSTI())) {
6750       Error(Msg.Loc, "invalid message id");
6751       return false;
6752     }
6753   }
6754   if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
6755     if (Op.IsDefined) {
6756       Error(Op.Loc, "message does not support operations");
6757     } else {
6758       Error(Msg.Loc, "missing message operation");
6759     }
6760     return false;
6761   }
6762   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6763     Error(Op.Loc, "invalid operation id");
6764     return false;
6765   }
6766   if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
6767       Stream.IsDefined) {
6768     Error(Stream.Loc, "message operation does not support streams");
6769     return false;
6770   }
6771   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6772     Error(Stream.Loc, "invalid message stream id");
6773     return false;
6774   }
6775   return true;
6776 }
6777 
6778 OperandMatchResultTy
6779 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6780   using namespace llvm::AMDGPU::SendMsg;
6781 
6782   int64_t ImmVal = 0;
6783   SMLoc Loc = getLoc();
6784 
6785   if (trySkipId("sendmsg", AsmToken::LParen)) {
6786     OperandInfoTy Msg(OPR_ID_UNKNOWN);
6787     OperandInfoTy Op(OP_NONE_);
6788     OperandInfoTy Stream(STREAM_ID_NONE_);
6789     if (parseSendMsgBody(Msg, Op, Stream) &&
6790         validateSendMsg(Msg, Op, Stream)) {
6791       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6792     } else {
6793       return MatchOperand_ParseFail;
6794     }
6795   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6796     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6797       Error(Loc, "invalid immediate: only 16-bit values are legal");
6798       return MatchOperand_ParseFail;
6799     }
6800   } else {
6801     return MatchOperand_ParseFail;
6802   }
6803 
6804   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6805   return MatchOperand_Success;
6806 }
6807 
6808 bool AMDGPUOperand::isSendMsg() const {
6809   return isImmTy(ImmTySendMsg);
6810 }
6811 
6812 //===----------------------------------------------------------------------===//
6813 // v_interp
6814 //===----------------------------------------------------------------------===//
6815 
6816 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6817   StringRef Str;
6818   SMLoc S = getLoc();
6819 
6820   if (!parseId(Str))
6821     return MatchOperand_NoMatch;
6822 
6823   int Slot = StringSwitch<int>(Str)
6824     .Case("p10", 0)
6825     .Case("p20", 1)
6826     .Case("p0", 2)
6827     .Default(-1);
6828 
6829   if (Slot == -1) {
6830     Error(S, "invalid interpolation slot");
6831     return MatchOperand_ParseFail;
6832   }
6833 
6834   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6835                                               AMDGPUOperand::ImmTyInterpSlot));
6836   return MatchOperand_Success;
6837 }
6838 
6839 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6840   StringRef Str;
6841   SMLoc S = getLoc();
6842 
6843   if (!parseId(Str))
6844     return MatchOperand_NoMatch;
6845 
6846   if (!Str.startswith("attr")) {
6847     Error(S, "invalid interpolation attribute");
6848     return MatchOperand_ParseFail;
6849   }
6850 
6851   StringRef Chan = Str.take_back(2);
6852   int AttrChan = StringSwitch<int>(Chan)
6853     .Case(".x", 0)
6854     .Case(".y", 1)
6855     .Case(".z", 2)
6856     .Case(".w", 3)
6857     .Default(-1);
6858   if (AttrChan == -1) {
6859     Error(S, "invalid or missing interpolation attribute channel");
6860     return MatchOperand_ParseFail;
6861   }
6862 
6863   Str = Str.drop_back(2).drop_front(4);
6864 
6865   uint8_t Attr;
6866   if (Str.getAsInteger(10, Attr)) {
6867     Error(S, "invalid or missing interpolation attribute number");
6868     return MatchOperand_ParseFail;
6869   }
6870 
6871   if (Attr > 63) {
6872     Error(S, "out of bounds interpolation attribute number");
6873     return MatchOperand_ParseFail;
6874   }
6875 
6876   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6877 
6878   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6879                                               AMDGPUOperand::ImmTyInterpAttr));
6880   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6881                                               AMDGPUOperand::ImmTyAttrChan));
6882   return MatchOperand_Success;
6883 }
6884 
6885 //===----------------------------------------------------------------------===//
6886 // exp
6887 //===----------------------------------------------------------------------===//
6888 
6889 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6890   using namespace llvm::AMDGPU::Exp;
6891 
6892   StringRef Str;
6893   SMLoc S = getLoc();
6894 
6895   if (!parseId(Str))
6896     return MatchOperand_NoMatch;
6897 
6898   unsigned Id = getTgtId(Str);
6899   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6900     Error(S, (Id == ET_INVALID) ?
6901                 "invalid exp target" :
6902                 "exp target is not supported on this GPU");
6903     return MatchOperand_ParseFail;
6904   }
6905 
6906   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6907                                               AMDGPUOperand::ImmTyExpTgt));
6908   return MatchOperand_Success;
6909 }
6910 
6911 //===----------------------------------------------------------------------===//
6912 // parser helpers
6913 //===----------------------------------------------------------------------===//
6914 
6915 bool
6916 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6917   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6918 }
6919 
6920 bool
6921 AMDGPUAsmParser::isId(const StringRef Id) const {
6922   return isId(getToken(), Id);
6923 }
6924 
6925 bool
6926 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6927   return getTokenKind() == Kind;
6928 }
6929 
6930 bool
6931 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6932   if (isId(Id)) {
6933     lex();
6934     return true;
6935   }
6936   return false;
6937 }
6938 
6939 bool
6940 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6941   if (isToken(AsmToken::Identifier)) {
6942     StringRef Tok = getTokenStr();
6943     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6944       lex();
6945       return true;
6946     }
6947   }
6948   return false;
6949 }
6950 
6951 bool
6952 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6953   if (isId(Id) && peekToken().is(Kind)) {
6954     lex();
6955     lex();
6956     return true;
6957   }
6958   return false;
6959 }
6960 
6961 bool
6962 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6963   if (isToken(Kind)) {
6964     lex();
6965     return true;
6966   }
6967   return false;
6968 }
6969 
6970 bool
6971 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6972                            const StringRef ErrMsg) {
6973   if (!trySkipToken(Kind)) {
6974     Error(getLoc(), ErrMsg);
6975     return false;
6976   }
6977   return true;
6978 }
6979 
6980 bool
6981 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6982   SMLoc S = getLoc();
6983 
6984   const MCExpr *Expr;
6985   if (Parser.parseExpression(Expr))
6986     return false;
6987 
6988   if (Expr->evaluateAsAbsolute(Imm))
6989     return true;
6990 
6991   if (Expected.empty()) {
6992     Error(S, "expected absolute expression");
6993   } else {
6994     Error(S, Twine("expected ", Expected) +
6995              Twine(" or an absolute expression"));
6996   }
6997   return false;
6998 }
6999 
7000 bool
7001 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7002   SMLoc S = getLoc();
7003 
7004   const MCExpr *Expr;
7005   if (Parser.parseExpression(Expr))
7006     return false;
7007 
7008   int64_t IntVal;
7009   if (Expr->evaluateAsAbsolute(IntVal)) {
7010     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7011   } else {
7012     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7013   }
7014   return true;
7015 }
7016 
7017 bool
7018 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7019   if (isToken(AsmToken::String)) {
7020     Val = getToken().getStringContents();
7021     lex();
7022     return true;
7023   } else {
7024     Error(getLoc(), ErrMsg);
7025     return false;
7026   }
7027 }
7028 
7029 bool
7030 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7031   if (isToken(AsmToken::Identifier)) {
7032     Val = getTokenStr();
7033     lex();
7034     return true;
7035   } else {
7036     if (!ErrMsg.empty())
7037       Error(getLoc(), ErrMsg);
7038     return false;
7039   }
7040 }
7041 
7042 AsmToken
7043 AMDGPUAsmParser::getToken() const {
7044   return Parser.getTok();
7045 }
7046 
7047 AsmToken
7048 AMDGPUAsmParser::peekToken() {
7049   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
7050 }
7051 
7052 void
7053 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7054   auto TokCount = getLexer().peekTokens(Tokens);
7055 
7056   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7057     Tokens[Idx] = AsmToken(AsmToken::Error, "");
7058 }
7059 
7060 AsmToken::TokenKind
7061 AMDGPUAsmParser::getTokenKind() const {
7062   return getLexer().getKind();
7063 }
7064 
7065 SMLoc
7066 AMDGPUAsmParser::getLoc() const {
7067   return getToken().getLoc();
7068 }
7069 
7070 StringRef
7071 AMDGPUAsmParser::getTokenStr() const {
7072   return getToken().getString();
7073 }
7074 
7075 void
7076 AMDGPUAsmParser::lex() {
7077   Parser.Lex();
7078 }
7079 
7080 SMLoc
7081 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7082                                const OperandVector &Operands) const {
7083   for (unsigned i = Operands.size() - 1; i > 0; --i) {
7084     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7085     if (Test(Op))
7086       return Op.getStartLoc();
7087   }
7088   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7089 }
7090 
7091 SMLoc
7092 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7093                            const OperandVector &Operands) const {
7094   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7095   return getOperandLoc(Test, Operands);
7096 }
7097 
7098 SMLoc
7099 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7100                            const OperandVector &Operands) const {
7101   auto Test = [=](const AMDGPUOperand& Op) {
7102     return Op.isRegKind() && Op.getReg() == Reg;
7103   };
7104   return getOperandLoc(Test, Operands);
7105 }
7106 
7107 SMLoc
7108 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
7109   auto Test = [](const AMDGPUOperand& Op) {
7110     return Op.IsImmKindLiteral() || Op.isExpr();
7111   };
7112   return getOperandLoc(Test, Operands);
7113 }
7114 
7115 SMLoc
7116 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7117   auto Test = [](const AMDGPUOperand& Op) {
7118     return Op.isImmKindConst();
7119   };
7120   return getOperandLoc(Test, Operands);
7121 }
7122 
7123 //===----------------------------------------------------------------------===//
7124 // swizzle
7125 //===----------------------------------------------------------------------===//
7126 
7127 LLVM_READNONE
7128 static unsigned
7129 encodeBitmaskPerm(const unsigned AndMask,
7130                   const unsigned OrMask,
7131                   const unsigned XorMask) {
7132   using namespace llvm::AMDGPU::Swizzle;
7133 
7134   return BITMASK_PERM_ENC |
7135          (AndMask << BITMASK_AND_SHIFT) |
7136          (OrMask  << BITMASK_OR_SHIFT)  |
7137          (XorMask << BITMASK_XOR_SHIFT);
7138 }
7139 
7140 bool
7141 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7142                                      const unsigned MinVal,
7143                                      const unsigned MaxVal,
7144                                      const StringRef ErrMsg,
7145                                      SMLoc &Loc) {
7146   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7147     return false;
7148   }
7149   Loc = getLoc();
7150   if (!parseExpr(Op)) {
7151     return false;
7152   }
7153   if (Op < MinVal || Op > MaxVal) {
7154     Error(Loc, ErrMsg);
7155     return false;
7156   }
7157 
7158   return true;
7159 }
7160 
7161 bool
7162 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7163                                       const unsigned MinVal,
7164                                       const unsigned MaxVal,
7165                                       const StringRef ErrMsg) {
7166   SMLoc Loc;
7167   for (unsigned i = 0; i < OpNum; ++i) {
7168     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7169       return false;
7170   }
7171 
7172   return true;
7173 }
7174 
7175 bool
7176 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7177   using namespace llvm::AMDGPU::Swizzle;
7178 
7179   int64_t Lane[LANE_NUM];
7180   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7181                            "expected a 2-bit lane id")) {
7182     Imm = QUAD_PERM_ENC;
7183     for (unsigned I = 0; I < LANE_NUM; ++I) {
7184       Imm |= Lane[I] << (LANE_SHIFT * I);
7185     }
7186     return true;
7187   }
7188   return false;
7189 }
7190 
7191 bool
7192 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7193   using namespace llvm::AMDGPU::Swizzle;
7194 
7195   SMLoc Loc;
7196   int64_t GroupSize;
7197   int64_t LaneIdx;
7198 
7199   if (!parseSwizzleOperand(GroupSize,
7200                            2, 32,
7201                            "group size must be in the interval [2,32]",
7202                            Loc)) {
7203     return false;
7204   }
7205   if (!isPowerOf2_64(GroupSize)) {
7206     Error(Loc, "group size must be a power of two");
7207     return false;
7208   }
7209   if (parseSwizzleOperand(LaneIdx,
7210                           0, GroupSize - 1,
7211                           "lane id must be in the interval [0,group size - 1]",
7212                           Loc)) {
7213     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7214     return true;
7215   }
7216   return false;
7217 }
7218 
7219 bool
7220 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7221   using namespace llvm::AMDGPU::Swizzle;
7222 
7223   SMLoc Loc;
7224   int64_t GroupSize;
7225 
7226   if (!parseSwizzleOperand(GroupSize,
7227                            2, 32,
7228                            "group size must be in the interval [2,32]",
7229                            Loc)) {
7230     return false;
7231   }
7232   if (!isPowerOf2_64(GroupSize)) {
7233     Error(Loc, "group size must be a power of two");
7234     return false;
7235   }
7236 
7237   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7238   return true;
7239 }
7240 
7241 bool
7242 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7243   using namespace llvm::AMDGPU::Swizzle;
7244 
7245   SMLoc Loc;
7246   int64_t GroupSize;
7247 
7248   if (!parseSwizzleOperand(GroupSize,
7249                            1, 16,
7250                            "group size must be in the interval [1,16]",
7251                            Loc)) {
7252     return false;
7253   }
7254   if (!isPowerOf2_64(GroupSize)) {
7255     Error(Loc, "group size must be a power of two");
7256     return false;
7257   }
7258 
7259   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7260   return true;
7261 }
7262 
7263 bool
7264 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7265   using namespace llvm::AMDGPU::Swizzle;
7266 
7267   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7268     return false;
7269   }
7270 
7271   StringRef Ctl;
7272   SMLoc StrLoc = getLoc();
7273   if (!parseString(Ctl)) {
7274     return false;
7275   }
7276   if (Ctl.size() != BITMASK_WIDTH) {
7277     Error(StrLoc, "expected a 5-character mask");
7278     return false;
7279   }
7280 
7281   unsigned AndMask = 0;
7282   unsigned OrMask = 0;
7283   unsigned XorMask = 0;
7284 
7285   for (size_t i = 0; i < Ctl.size(); ++i) {
7286     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7287     switch(Ctl[i]) {
7288     default:
7289       Error(StrLoc, "invalid mask");
7290       return false;
7291     case '0':
7292       break;
7293     case '1':
7294       OrMask |= Mask;
7295       break;
7296     case 'p':
7297       AndMask |= Mask;
7298       break;
7299     case 'i':
7300       AndMask |= Mask;
7301       XorMask |= Mask;
7302       break;
7303     }
7304   }
7305 
7306   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7307   return true;
7308 }
7309 
7310 bool
7311 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7312 
7313   SMLoc OffsetLoc = getLoc();
7314 
7315   if (!parseExpr(Imm, "a swizzle macro")) {
7316     return false;
7317   }
7318   if (!isUInt<16>(Imm)) {
7319     Error(OffsetLoc, "expected a 16-bit offset");
7320     return false;
7321   }
7322   return true;
7323 }
7324 
7325 bool
7326 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7327   using namespace llvm::AMDGPU::Swizzle;
7328 
7329   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7330 
7331     SMLoc ModeLoc = getLoc();
7332     bool Ok = false;
7333 
7334     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7335       Ok = parseSwizzleQuadPerm(Imm);
7336     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7337       Ok = parseSwizzleBitmaskPerm(Imm);
7338     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7339       Ok = parseSwizzleBroadcast(Imm);
7340     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7341       Ok = parseSwizzleSwap(Imm);
7342     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7343       Ok = parseSwizzleReverse(Imm);
7344     } else {
7345       Error(ModeLoc, "expected a swizzle mode");
7346     }
7347 
7348     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7349   }
7350 
7351   return false;
7352 }
7353 
7354 OperandMatchResultTy
7355 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
7356   SMLoc S = getLoc();
7357   int64_t Imm = 0;
7358 
7359   if (trySkipId("offset")) {
7360 
7361     bool Ok = false;
7362     if (skipToken(AsmToken::Colon, "expected a colon")) {
7363       if (trySkipId("swizzle")) {
7364         Ok = parseSwizzleMacro(Imm);
7365       } else {
7366         Ok = parseSwizzleOffset(Imm);
7367       }
7368     }
7369 
7370     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7371 
7372     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
7373   } else {
7374     // Swizzle "offset" operand is optional.
7375     // If it is omitted, try parsing other optional operands.
7376     return parseOptionalOpr(Operands);
7377   }
7378 }
7379 
7380 bool
7381 AMDGPUOperand::isSwizzle() const {
7382   return isImmTy(ImmTySwizzle);
7383 }
7384 
7385 //===----------------------------------------------------------------------===//
7386 // VGPR Index Mode
7387 //===----------------------------------------------------------------------===//
7388 
7389 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7390 
7391   using namespace llvm::AMDGPU::VGPRIndexMode;
7392 
7393   if (trySkipToken(AsmToken::RParen)) {
7394     return OFF;
7395   }
7396 
7397   int64_t Imm = 0;
7398 
7399   while (true) {
7400     unsigned Mode = 0;
7401     SMLoc S = getLoc();
7402 
7403     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7404       if (trySkipId(IdSymbolic[ModeId])) {
7405         Mode = 1 << ModeId;
7406         break;
7407       }
7408     }
7409 
7410     if (Mode == 0) {
7411       Error(S, (Imm == 0)?
7412                "expected a VGPR index mode or a closing parenthesis" :
7413                "expected a VGPR index mode");
7414       return UNDEF;
7415     }
7416 
7417     if (Imm & Mode) {
7418       Error(S, "duplicate VGPR index mode");
7419       return UNDEF;
7420     }
7421     Imm |= Mode;
7422 
7423     if (trySkipToken(AsmToken::RParen))
7424       break;
7425     if (!skipToken(AsmToken::Comma,
7426                    "expected a comma or a closing parenthesis"))
7427       return UNDEF;
7428   }
7429 
7430   return Imm;
7431 }
7432 
7433 OperandMatchResultTy
7434 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7435 
7436   using namespace llvm::AMDGPU::VGPRIndexMode;
7437 
7438   int64_t Imm = 0;
7439   SMLoc S = getLoc();
7440 
7441   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7442     Imm = parseGPRIdxMacro();
7443     if (Imm == UNDEF)
7444       return MatchOperand_ParseFail;
7445   } else {
7446     if (getParser().parseAbsoluteExpression(Imm))
7447       return MatchOperand_ParseFail;
7448     if (Imm < 0 || !isUInt<4>(Imm)) {
7449       Error(S, "invalid immediate: only 4-bit values are legal");
7450       return MatchOperand_ParseFail;
7451     }
7452   }
7453 
7454   Operands.push_back(
7455       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7456   return MatchOperand_Success;
7457 }
7458 
7459 bool AMDGPUOperand::isGPRIdxMode() const {
7460   return isImmTy(ImmTyGprIdxMode);
7461 }
7462 
7463 //===----------------------------------------------------------------------===//
7464 // sopp branch targets
7465 //===----------------------------------------------------------------------===//
7466 
7467 OperandMatchResultTy
7468 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7469 
7470   // Make sure we are not parsing something
7471   // that looks like a label or an expression but is not.
7472   // This will improve error messages.
7473   if (isRegister() || isModifier())
7474     return MatchOperand_NoMatch;
7475 
7476   if (!parseExpr(Operands))
7477     return MatchOperand_ParseFail;
7478 
7479   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7480   assert(Opr.isImm() || Opr.isExpr());
7481   SMLoc Loc = Opr.getStartLoc();
7482 
7483   // Currently we do not support arbitrary expressions as branch targets.
7484   // Only labels and absolute expressions are accepted.
7485   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7486     Error(Loc, "expected an absolute expression or a label");
7487   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7488     Error(Loc, "expected a 16-bit signed jump offset");
7489   }
7490 
7491   return MatchOperand_Success;
7492 }
7493 
7494 //===----------------------------------------------------------------------===//
7495 // Boolean holding registers
7496 //===----------------------------------------------------------------------===//
7497 
7498 OperandMatchResultTy
7499 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7500   return parseReg(Operands);
7501 }
7502 
7503 //===----------------------------------------------------------------------===//
7504 // mubuf
7505 //===----------------------------------------------------------------------===//
7506 
7507 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7508   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7509 }
7510 
7511 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7512                                    const OperandVector &Operands,
7513                                    bool IsAtomic,
7514                                    bool IsLds) {
7515   OptionalImmIndexMap OptionalIdx;
7516   unsigned FirstOperandIdx = 1;
7517   bool IsAtomicReturn = false;
7518 
7519   if (IsAtomic) {
7520     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7521       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7522       if (!Op.isCPol())
7523         continue;
7524       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7525       break;
7526     }
7527 
7528     if (!IsAtomicReturn) {
7529       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7530       if (NewOpc != -1)
7531         Inst.setOpcode(NewOpc);
7532     }
7533 
7534     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7535                       SIInstrFlags::IsAtomicRet;
7536   }
7537 
7538   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7539     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7540 
7541     // Add the register arguments
7542     if (Op.isReg()) {
7543       Op.addRegOperands(Inst, 1);
7544       // Insert a tied src for atomic return dst.
7545       // This cannot be postponed as subsequent calls to
7546       // addImmOperands rely on correct number of MC operands.
7547       if (IsAtomicReturn && i == FirstOperandIdx)
7548         Op.addRegOperands(Inst, 1);
7549       continue;
7550     }
7551 
7552     // Handle the case where soffset is an immediate
7553     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7554       Op.addImmOperands(Inst, 1);
7555       continue;
7556     }
7557 
7558     // Handle tokens like 'offen' which are sometimes hard-coded into the
7559     // asm string.  There are no MCInst operands for these.
7560     if (Op.isToken()) {
7561       continue;
7562     }
7563     assert(Op.isImm());
7564 
7565     // Handle optional arguments
7566     OptionalIdx[Op.getImmTy()] = i;
7567   }
7568 
7569   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7570   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7571 
7572   if (!IsLds) { // tfe is not legal with lds opcodes
7573     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7574   }
7575   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7576 }
7577 
7578 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7579   OptionalImmIndexMap OptionalIdx;
7580 
7581   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7582     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7583 
7584     // Add the register arguments
7585     if (Op.isReg()) {
7586       Op.addRegOperands(Inst, 1);
7587       continue;
7588     }
7589 
7590     // Handle the case where soffset is an immediate
7591     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7592       Op.addImmOperands(Inst, 1);
7593       continue;
7594     }
7595 
7596     // Handle tokens like 'offen' which are sometimes hard-coded into the
7597     // asm string.  There are no MCInst operands for these.
7598     if (Op.isToken()) {
7599       continue;
7600     }
7601     assert(Op.isImm());
7602 
7603     // Handle optional arguments
7604     OptionalIdx[Op.getImmTy()] = i;
7605   }
7606 
7607   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7608                         AMDGPUOperand::ImmTyOffset);
7609   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7610   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7611   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7612   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7613 }
7614 
7615 //===----------------------------------------------------------------------===//
7616 // mimg
7617 //===----------------------------------------------------------------------===//
7618 
7619 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7620                               bool IsAtomic) {
7621   unsigned I = 1;
7622   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7623   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7624     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7625   }
7626 
7627   if (IsAtomic) {
7628     // Add src, same as dst
7629     assert(Desc.getNumDefs() == 1);
7630     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7631   }
7632 
7633   OptionalImmIndexMap OptionalIdx;
7634 
7635   for (unsigned E = Operands.size(); I != E; ++I) {
7636     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7637 
7638     // Add the register arguments
7639     if (Op.isReg()) {
7640       Op.addRegOperands(Inst, 1);
7641     } else if (Op.isImmModifier()) {
7642       OptionalIdx[Op.getImmTy()] = I;
7643     } else if (!Op.isToken()) {
7644       llvm_unreachable("unexpected operand type");
7645     }
7646   }
7647 
7648   bool IsGFX10Plus = isGFX10Plus();
7649 
7650   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7651   if (IsGFX10Plus)
7652     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7653   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7654   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7655   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7656   if (IsGFX10Plus)
7657     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7658   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7659     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7660   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7661   if (!IsGFX10Plus)
7662     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7663   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7664 }
7665 
7666 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7667   cvtMIMG(Inst, Operands, true);
7668 }
7669 
7670 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7671   OptionalImmIndexMap OptionalIdx;
7672   bool IsAtomicReturn = false;
7673 
7674   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7675     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7676     if (!Op.isCPol())
7677       continue;
7678     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7679     break;
7680   }
7681 
7682   if (!IsAtomicReturn) {
7683     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7684     if (NewOpc != -1)
7685       Inst.setOpcode(NewOpc);
7686   }
7687 
7688   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7689                     SIInstrFlags::IsAtomicRet;
7690 
7691   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7692     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7693 
7694     // Add the register arguments
7695     if (Op.isReg()) {
7696       Op.addRegOperands(Inst, 1);
7697       if (IsAtomicReturn && i == 1)
7698         Op.addRegOperands(Inst, 1);
7699       continue;
7700     }
7701 
7702     // Handle the case where soffset is an immediate
7703     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7704       Op.addImmOperands(Inst, 1);
7705       continue;
7706     }
7707 
7708     // Handle tokens like 'offen' which are sometimes hard-coded into the
7709     // asm string.  There are no MCInst operands for these.
7710     if (Op.isToken()) {
7711       continue;
7712     }
7713     assert(Op.isImm());
7714 
7715     // Handle optional arguments
7716     OptionalIdx[Op.getImmTy()] = i;
7717   }
7718 
7719   if ((int)Inst.getNumOperands() <=
7720       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7721     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7722   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7723 }
7724 
7725 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7726                                       const OperandVector &Operands) {
7727   for (unsigned I = 1; I < Operands.size(); ++I) {
7728     auto &Operand = (AMDGPUOperand &)*Operands[I];
7729     if (Operand.isReg())
7730       Operand.addRegOperands(Inst, 1);
7731   }
7732 
7733   Inst.addOperand(MCOperand::createImm(1)); // a16
7734 }
7735 
7736 //===----------------------------------------------------------------------===//
7737 // smrd
7738 //===----------------------------------------------------------------------===//
7739 
7740 bool AMDGPUOperand::isSMRDOffset8() const {
7741   return isImm() && isUInt<8>(getImm());
7742 }
7743 
7744 bool AMDGPUOperand::isSMEMOffset() const {
7745   return isImmTy(ImmTyNone) ||
7746          isImmTy(ImmTyOffset); // Offset range is checked later by validator.
7747 }
7748 
7749 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7750   // 32-bit literals are only supported on CI and we only want to use them
7751   // when the offset is > 8-bits.
7752   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7753 }
7754 
7755 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7756   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7757 }
7758 
7759 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7760   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7761 }
7762 
7763 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7764   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7765 }
7766 
7767 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7768   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7769 }
7770 
7771 //===----------------------------------------------------------------------===//
7772 // vop3
7773 //===----------------------------------------------------------------------===//
7774 
7775 static bool ConvertOmodMul(int64_t &Mul) {
7776   if (Mul != 1 && Mul != 2 && Mul != 4)
7777     return false;
7778 
7779   Mul >>= 1;
7780   return true;
7781 }
7782 
7783 static bool ConvertOmodDiv(int64_t &Div) {
7784   if (Div == 1) {
7785     Div = 0;
7786     return true;
7787   }
7788 
7789   if (Div == 2) {
7790     Div = 3;
7791     return true;
7792   }
7793 
7794   return false;
7795 }
7796 
7797 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7798 // This is intentional and ensures compatibility with sp3.
7799 // See bug 35397 for details.
7800 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7801   if (BoundCtrl == 0 || BoundCtrl == 1) {
7802     BoundCtrl = 1;
7803     return true;
7804   }
7805   return false;
7806 }
7807 
7808 // Note: the order in this table matches the order of operands in AsmString.
7809 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7810   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7811   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7812   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7813   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7814   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7815   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7816   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7817   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7818   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7819   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7820   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7821   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7822   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7823   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7824   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7825   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7826   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7827   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7828   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7829   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7830   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7831   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7832   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7833   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7834   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7835   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7836   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7837   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7838   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7839   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7840   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7841   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7842   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7843   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7844   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7845   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7846   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7847   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7848   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7849   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7850   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr},
7851   {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr},
7852   {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr}
7853 };
7854 
7855 void AMDGPUAsmParser::onBeginOfFile() {
7856   if (!getParser().getStreamer().getTargetStreamer() ||
7857       getSTI().getTargetTriple().getArch() == Triple::r600)
7858     return;
7859 
7860   if (!getTargetStreamer().getTargetID())
7861     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7862 
7863   if (isHsaAbiVersion3AndAbove(&getSTI()))
7864     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7865 }
7866 
7867 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7868 
7869   OperandMatchResultTy res = parseOptionalOpr(Operands);
7870 
7871   // This is a hack to enable hardcoded mandatory operands which follow
7872   // optional operands.
7873   //
7874   // Current design assumes that all operands after the first optional operand
7875   // are also optional. However implementation of some instructions violates
7876   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7877   //
7878   // To alleviate this problem, we have to (implicitly) parse extra operands
7879   // to make sure autogenerated parser of custom operands never hit hardcoded
7880   // mandatory operands.
7881 
7882   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7883     if (res != MatchOperand_Success ||
7884         isToken(AsmToken::EndOfStatement))
7885       break;
7886 
7887     trySkipToken(AsmToken::Comma);
7888     res = parseOptionalOpr(Operands);
7889   }
7890 
7891   return res;
7892 }
7893 
7894 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7895   OperandMatchResultTy res;
7896   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7897     // try to parse any optional operand here
7898     if (Op.IsBit) {
7899       res = parseNamedBit(Op.Name, Operands, Op.Type);
7900     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7901       res = parseOModOperand(Operands);
7902     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7903                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7904                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7905       res = parseSDWASel(Operands, Op.Name, Op.Type);
7906     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7907       res = parseSDWADstUnused(Operands);
7908     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7909                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7910                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7911                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7912       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7913                                         Op.ConvertResult);
7914     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7915       res = parseDim(Operands);
7916     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7917       res = parseCPol(Operands);
7918     } else {
7919       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7920       if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) {
7921         res = parseOperandArrayWithPrefix("neg", Operands,
7922                                           AMDGPUOperand::ImmTyBLGP,
7923                                           nullptr);
7924       }
7925     }
7926     if (res != MatchOperand_NoMatch) {
7927       return res;
7928     }
7929   }
7930   return MatchOperand_NoMatch;
7931 }
7932 
7933 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7934   StringRef Name = getTokenStr();
7935   if (Name == "mul") {
7936     return parseIntWithPrefix("mul", Operands,
7937                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7938   }
7939 
7940   if (Name == "div") {
7941     return parseIntWithPrefix("div", Operands,
7942                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7943   }
7944 
7945   return MatchOperand_NoMatch;
7946 }
7947 
7948 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7949   cvtVOP3P(Inst, Operands);
7950 
7951   int Opc = Inst.getOpcode();
7952 
7953   int SrcNum;
7954   const int Ops[] = { AMDGPU::OpName::src0,
7955                       AMDGPU::OpName::src1,
7956                       AMDGPU::OpName::src2 };
7957   for (SrcNum = 0;
7958        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7959        ++SrcNum);
7960   assert(SrcNum > 0);
7961 
7962   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7963   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7964 
7965   if ((OpSel & (1 << SrcNum)) != 0) {
7966     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7967     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7968     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7969   }
7970 }
7971 
7972 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7973       // 1. This operand is input modifiers
7974   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7975       // 2. This is not last operand
7976       && Desc.NumOperands > (OpNum + 1)
7977       // 3. Next operand is register class
7978       && Desc.OpInfo[OpNum + 1].RegClass != -1
7979       // 4. Next register is not tied to any other operand
7980       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7981 }
7982 
7983 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7984 {
7985   OptionalImmIndexMap OptionalIdx;
7986   unsigned Opc = Inst.getOpcode();
7987 
7988   unsigned I = 1;
7989   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7990   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7991     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7992   }
7993 
7994   for (unsigned E = Operands.size(); I != E; ++I) {
7995     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7996     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7997       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7998     } else if (Op.isInterpSlot() ||
7999                Op.isInterpAttr() ||
8000                Op.isAttrChan()) {
8001       Inst.addOperand(MCOperand::createImm(Op.getImm()));
8002     } else if (Op.isImmModifier()) {
8003       OptionalIdx[Op.getImmTy()] = I;
8004     } else {
8005       llvm_unreachable("unhandled operand type");
8006     }
8007   }
8008 
8009   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
8010     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
8011   }
8012 
8013   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8014     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8015   }
8016 
8017   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8018     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8019   }
8020 }
8021 
8022 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8023 {
8024   OptionalImmIndexMap OptionalIdx;
8025   unsigned Opc = Inst.getOpcode();
8026 
8027   unsigned I = 1;
8028   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8029   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8030     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8031   }
8032 
8033   for (unsigned E = Operands.size(); I != E; ++I) {
8034     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8035     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8036       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8037     } else if (Op.isImmModifier()) {
8038       OptionalIdx[Op.getImmTy()] = I;
8039     } else {
8040       llvm_unreachable("unhandled operand type");
8041     }
8042   }
8043 
8044   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8045 
8046   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8047   if (OpSelIdx != -1)
8048     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8049 
8050   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8051 
8052   if (OpSelIdx == -1)
8053     return;
8054 
8055   const int Ops[] = { AMDGPU::OpName::src0,
8056                       AMDGPU::OpName::src1,
8057                       AMDGPU::OpName::src2 };
8058   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8059                          AMDGPU::OpName::src1_modifiers,
8060                          AMDGPU::OpName::src2_modifiers };
8061 
8062   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8063 
8064   for (int J = 0; J < 3; ++J) {
8065     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8066     if (OpIdx == -1)
8067       break;
8068 
8069     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8070     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8071 
8072     if ((OpSel & (1 << J)) != 0)
8073       ModVal |= SISrcMods::OP_SEL_0;
8074     if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8075         (OpSel & (1 << 3)) != 0)
8076       ModVal |= SISrcMods::DST_OP_SEL;
8077 
8078     Inst.getOperand(ModIdx).setImm(ModVal);
8079   }
8080 }
8081 
8082 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8083                               OptionalImmIndexMap &OptionalIdx) {
8084   unsigned Opc = Inst.getOpcode();
8085 
8086   unsigned I = 1;
8087   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8088   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8089     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8090   }
8091 
8092   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
8093     // This instruction has src modifiers
8094     for (unsigned E = Operands.size(); I != E; ++I) {
8095       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8096       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8097         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8098       } else if (Op.isImmModifier()) {
8099         OptionalIdx[Op.getImmTy()] = I;
8100       } else if (Op.isRegOrImm()) {
8101         Op.addRegOrImmOperands(Inst, 1);
8102       } else {
8103         llvm_unreachable("unhandled operand type");
8104       }
8105     }
8106   } else {
8107     // No src modifiers
8108     for (unsigned E = Operands.size(); I != E; ++I) {
8109       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8110       if (Op.isMod()) {
8111         OptionalIdx[Op.getImmTy()] = I;
8112       } else {
8113         Op.addRegOrImmOperands(Inst, 1);
8114       }
8115     }
8116   }
8117 
8118   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8119     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8120   }
8121 
8122   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8123     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8124   }
8125 
8126   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8127   // it has src2 register operand that is tied to dst operand
8128   // we don't allow modifiers for this operand in assembler so src2_modifiers
8129   // should be 0.
8130   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
8131       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
8132       Opc == AMDGPU::V_MAC_F32_e64_vi ||
8133       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
8134       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
8135       Opc == AMDGPU::V_MAC_F16_e64_vi ||
8136       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
8137       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
8138       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
8139       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
8140       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
8141     auto it = Inst.begin();
8142     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8143     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8144     ++it;
8145     // Copy the operand to ensure it's not invalidated when Inst grows.
8146     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8147   }
8148 }
8149 
8150 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8151   OptionalImmIndexMap OptionalIdx;
8152   cvtVOP3(Inst, Operands, OptionalIdx);
8153 }
8154 
8155 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8156                                OptionalImmIndexMap &OptIdx) {
8157   const int Opc = Inst.getOpcode();
8158   const MCInstrDesc &Desc = MII.get(Opc);
8159 
8160   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8161 
8162   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
8163     assert(!IsPacked);
8164     Inst.addOperand(Inst.getOperand(0));
8165   }
8166 
8167   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8168   // instruction, and then figure out where to actually put the modifiers
8169 
8170   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8171   if (OpSelIdx != -1) {
8172     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8173   }
8174 
8175   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8176   if (OpSelHiIdx != -1) {
8177     int DefaultVal = IsPacked ? -1 : 0;
8178     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8179                           DefaultVal);
8180   }
8181 
8182   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8183   if (NegLoIdx != -1) {
8184     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8185     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8186   }
8187 
8188   const int Ops[] = { AMDGPU::OpName::src0,
8189                       AMDGPU::OpName::src1,
8190                       AMDGPU::OpName::src2 };
8191   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8192                          AMDGPU::OpName::src1_modifiers,
8193                          AMDGPU::OpName::src2_modifiers };
8194 
8195   unsigned OpSel = 0;
8196   unsigned OpSelHi = 0;
8197   unsigned NegLo = 0;
8198   unsigned NegHi = 0;
8199 
8200   if (OpSelIdx != -1)
8201     OpSel = Inst.getOperand(OpSelIdx).getImm();
8202 
8203   if (OpSelHiIdx != -1)
8204     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8205 
8206   if (NegLoIdx != -1) {
8207     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8208     NegLo = Inst.getOperand(NegLoIdx).getImm();
8209     NegHi = Inst.getOperand(NegHiIdx).getImm();
8210   }
8211 
8212   for (int J = 0; J < 3; ++J) {
8213     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8214     if (OpIdx == -1)
8215       break;
8216 
8217     uint32_t ModVal = 0;
8218 
8219     if ((OpSel & (1 << J)) != 0)
8220       ModVal |= SISrcMods::OP_SEL_0;
8221 
8222     if ((OpSelHi & (1 << J)) != 0)
8223       ModVal |= SISrcMods::OP_SEL_1;
8224 
8225     if ((NegLo & (1 << J)) != 0)
8226       ModVal |= SISrcMods::NEG;
8227 
8228     if ((NegHi & (1 << J)) != 0)
8229       ModVal |= SISrcMods::NEG_HI;
8230 
8231     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8232 
8233     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8234   }
8235 }
8236 
8237 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8238   OptionalImmIndexMap OptIdx;
8239   cvtVOP3(Inst, Operands, OptIdx);
8240   cvtVOP3P(Inst, Operands, OptIdx);
8241 }
8242 
8243 //===----------------------------------------------------------------------===//
8244 // dpp
8245 //===----------------------------------------------------------------------===//
8246 
8247 bool AMDGPUOperand::isDPP8() const {
8248   return isImmTy(ImmTyDPP8);
8249 }
8250 
8251 bool AMDGPUOperand::isDPPCtrl() const {
8252   using namespace AMDGPU::DPP;
8253 
8254   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8255   if (result) {
8256     int64_t Imm = getImm();
8257     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8258            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8259            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8260            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8261            (Imm == DppCtrl::WAVE_SHL1) ||
8262            (Imm == DppCtrl::WAVE_ROL1) ||
8263            (Imm == DppCtrl::WAVE_SHR1) ||
8264            (Imm == DppCtrl::WAVE_ROR1) ||
8265            (Imm == DppCtrl::ROW_MIRROR) ||
8266            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8267            (Imm == DppCtrl::BCAST15) ||
8268            (Imm == DppCtrl::BCAST31) ||
8269            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8270            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8271   }
8272   return false;
8273 }
8274 
8275 //===----------------------------------------------------------------------===//
8276 // mAI
8277 //===----------------------------------------------------------------------===//
8278 
8279 bool AMDGPUOperand::isBLGP() const {
8280   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8281 }
8282 
8283 bool AMDGPUOperand::isCBSZ() const {
8284   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8285 }
8286 
8287 bool AMDGPUOperand::isABID() const {
8288   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8289 }
8290 
8291 bool AMDGPUOperand::isS16Imm() const {
8292   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8293 }
8294 
8295 bool AMDGPUOperand::isU16Imm() const {
8296   return isImm() && isUInt<16>(getImm());
8297 }
8298 
8299 //===----------------------------------------------------------------------===//
8300 // dim
8301 //===----------------------------------------------------------------------===//
8302 
8303 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8304   // We want to allow "dim:1D" etc.,
8305   // but the initial 1 is tokenized as an integer.
8306   std::string Token;
8307   if (isToken(AsmToken::Integer)) {
8308     SMLoc Loc = getToken().getEndLoc();
8309     Token = std::string(getTokenStr());
8310     lex();
8311     if (getLoc() != Loc)
8312       return false;
8313   }
8314 
8315   StringRef Suffix;
8316   if (!parseId(Suffix))
8317     return false;
8318   Token += Suffix;
8319 
8320   StringRef DimId = Token;
8321   if (DimId.startswith("SQ_RSRC_IMG_"))
8322     DimId = DimId.drop_front(12);
8323 
8324   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8325   if (!DimInfo)
8326     return false;
8327 
8328   Encoding = DimInfo->Encoding;
8329   return true;
8330 }
8331 
8332 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8333   if (!isGFX10Plus())
8334     return MatchOperand_NoMatch;
8335 
8336   SMLoc S = getLoc();
8337 
8338   if (!trySkipId("dim", AsmToken::Colon))
8339     return MatchOperand_NoMatch;
8340 
8341   unsigned Encoding;
8342   SMLoc Loc = getLoc();
8343   if (!parseDimId(Encoding)) {
8344     Error(Loc, "invalid dim value");
8345     return MatchOperand_ParseFail;
8346   }
8347 
8348   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8349                                               AMDGPUOperand::ImmTyDim));
8350   return MatchOperand_Success;
8351 }
8352 
8353 //===----------------------------------------------------------------------===//
8354 // dpp
8355 //===----------------------------------------------------------------------===//
8356 
8357 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8358   SMLoc S = getLoc();
8359 
8360   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8361     return MatchOperand_NoMatch;
8362 
8363   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8364 
8365   int64_t Sels[8];
8366 
8367   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8368     return MatchOperand_ParseFail;
8369 
8370   for (size_t i = 0; i < 8; ++i) {
8371     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8372       return MatchOperand_ParseFail;
8373 
8374     SMLoc Loc = getLoc();
8375     if (getParser().parseAbsoluteExpression(Sels[i]))
8376       return MatchOperand_ParseFail;
8377     if (0 > Sels[i] || 7 < Sels[i]) {
8378       Error(Loc, "expected a 3-bit value");
8379       return MatchOperand_ParseFail;
8380     }
8381   }
8382 
8383   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8384     return MatchOperand_ParseFail;
8385 
8386   unsigned DPP8 = 0;
8387   for (size_t i = 0; i < 8; ++i)
8388     DPP8 |= (Sels[i] << (i * 3));
8389 
8390   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8391   return MatchOperand_Success;
8392 }
8393 
8394 bool
8395 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8396                                     const OperandVector &Operands) {
8397   if (Ctrl == "row_newbcast")
8398     return isGFX90A();
8399 
8400   if (Ctrl == "row_share" ||
8401       Ctrl == "row_xmask")
8402     return isGFX10Plus();
8403 
8404   if (Ctrl == "wave_shl" ||
8405       Ctrl == "wave_shr" ||
8406       Ctrl == "wave_rol" ||
8407       Ctrl == "wave_ror" ||
8408       Ctrl == "row_bcast")
8409     return isVI() || isGFX9();
8410 
8411   return Ctrl == "row_mirror" ||
8412          Ctrl == "row_half_mirror" ||
8413          Ctrl == "quad_perm" ||
8414          Ctrl == "row_shl" ||
8415          Ctrl == "row_shr" ||
8416          Ctrl == "row_ror";
8417 }
8418 
8419 int64_t
8420 AMDGPUAsmParser::parseDPPCtrlPerm() {
8421   // quad_perm:[%d,%d,%d,%d]
8422 
8423   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8424     return -1;
8425 
8426   int64_t Val = 0;
8427   for (int i = 0; i < 4; ++i) {
8428     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8429       return -1;
8430 
8431     int64_t Temp;
8432     SMLoc Loc = getLoc();
8433     if (getParser().parseAbsoluteExpression(Temp))
8434       return -1;
8435     if (Temp < 0 || Temp > 3) {
8436       Error(Loc, "expected a 2-bit value");
8437       return -1;
8438     }
8439 
8440     Val += (Temp << i * 2);
8441   }
8442 
8443   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8444     return -1;
8445 
8446   return Val;
8447 }
8448 
8449 int64_t
8450 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8451   using namespace AMDGPU::DPP;
8452 
8453   // sel:%d
8454 
8455   int64_t Val;
8456   SMLoc Loc = getLoc();
8457 
8458   if (getParser().parseAbsoluteExpression(Val))
8459     return -1;
8460 
8461   struct DppCtrlCheck {
8462     int64_t Ctrl;
8463     int Lo;
8464     int Hi;
8465   };
8466 
8467   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8468     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8469     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8470     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8471     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8472     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8473     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8474     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8475     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8476     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8477     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8478     .Default({-1, 0, 0});
8479 
8480   bool Valid;
8481   if (Check.Ctrl == -1) {
8482     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8483     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8484   } else {
8485     Valid = Check.Lo <= Val && Val <= Check.Hi;
8486     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8487   }
8488 
8489   if (!Valid) {
8490     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8491     return -1;
8492   }
8493 
8494   return Val;
8495 }
8496 
8497 OperandMatchResultTy
8498 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8499   using namespace AMDGPU::DPP;
8500 
8501   if (!isToken(AsmToken::Identifier) ||
8502       !isSupportedDPPCtrl(getTokenStr(), Operands))
8503     return MatchOperand_NoMatch;
8504 
8505   SMLoc S = getLoc();
8506   int64_t Val = -1;
8507   StringRef Ctrl;
8508 
8509   parseId(Ctrl);
8510 
8511   if (Ctrl == "row_mirror") {
8512     Val = DppCtrl::ROW_MIRROR;
8513   } else if (Ctrl == "row_half_mirror") {
8514     Val = DppCtrl::ROW_HALF_MIRROR;
8515   } else {
8516     if (skipToken(AsmToken::Colon, "expected a colon")) {
8517       if (Ctrl == "quad_perm") {
8518         Val = parseDPPCtrlPerm();
8519       } else {
8520         Val = parseDPPCtrlSel(Ctrl);
8521       }
8522     }
8523   }
8524 
8525   if (Val == -1)
8526     return MatchOperand_ParseFail;
8527 
8528   Operands.push_back(
8529     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8530   return MatchOperand_Success;
8531 }
8532 
8533 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8534   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8535 }
8536 
8537 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8538   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8539 }
8540 
8541 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8542   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8543 }
8544 
8545 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8546   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8547 }
8548 
8549 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8550   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8551 }
8552 
8553 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8554   OptionalImmIndexMap OptionalIdx;
8555 
8556   unsigned Opc = Inst.getOpcode();
8557   bool HasModifiers =
8558       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8559   unsigned I = 1;
8560   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8561   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8562     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8563   }
8564 
8565   int Fi = 0;
8566   for (unsigned E = Operands.size(); I != E; ++I) {
8567     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8568                                             MCOI::TIED_TO);
8569     if (TiedTo != -1) {
8570       assert((unsigned)TiedTo < Inst.getNumOperands());
8571       // handle tied old or src2 for MAC instructions
8572       Inst.addOperand(Inst.getOperand(TiedTo));
8573     }
8574     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8575     // Add the register arguments
8576     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8577       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8578       // Skip it.
8579       continue;
8580     }
8581 
8582     if (IsDPP8) {
8583       if (Op.isDPP8()) {
8584         Op.addImmOperands(Inst, 1);
8585       } else if (HasModifiers &&
8586                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8587         Op.addRegWithFPInputModsOperands(Inst, 2);
8588       } else if (Op.isFI()) {
8589         Fi = Op.getImm();
8590       } else if (Op.isReg()) {
8591         Op.addRegOperands(Inst, 1);
8592       } else {
8593         llvm_unreachable("Invalid operand type");
8594       }
8595     } else {
8596       if (HasModifiers &&
8597           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8598         Op.addRegWithFPInputModsOperands(Inst, 2);
8599       } else if (Op.isReg()) {
8600         Op.addRegOperands(Inst, 1);
8601       } else if (Op.isDPPCtrl()) {
8602         Op.addImmOperands(Inst, 1);
8603       } else if (Op.isImm()) {
8604         // Handle optional arguments
8605         OptionalIdx[Op.getImmTy()] = I;
8606       } else {
8607         llvm_unreachable("Invalid operand type");
8608       }
8609     }
8610   }
8611 
8612   if (IsDPP8) {
8613     using namespace llvm::AMDGPU::DPP;
8614     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8615   } else {
8616     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8617     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8618     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8619     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8620       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8621     }
8622   }
8623 }
8624 
8625 //===----------------------------------------------------------------------===//
8626 // sdwa
8627 //===----------------------------------------------------------------------===//
8628 
8629 OperandMatchResultTy
8630 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8631                               AMDGPUOperand::ImmTy Type) {
8632   using namespace llvm::AMDGPU::SDWA;
8633 
8634   SMLoc S = getLoc();
8635   StringRef Value;
8636   OperandMatchResultTy res;
8637 
8638   SMLoc StringLoc;
8639   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8640   if (res != MatchOperand_Success) {
8641     return res;
8642   }
8643 
8644   int64_t Int;
8645   Int = StringSwitch<int64_t>(Value)
8646         .Case("BYTE_0", SdwaSel::BYTE_0)
8647         .Case("BYTE_1", SdwaSel::BYTE_1)
8648         .Case("BYTE_2", SdwaSel::BYTE_2)
8649         .Case("BYTE_3", SdwaSel::BYTE_3)
8650         .Case("WORD_0", SdwaSel::WORD_0)
8651         .Case("WORD_1", SdwaSel::WORD_1)
8652         .Case("DWORD", SdwaSel::DWORD)
8653         .Default(0xffffffff);
8654 
8655   if (Int == 0xffffffff) {
8656     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8657     return MatchOperand_ParseFail;
8658   }
8659 
8660   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8661   return MatchOperand_Success;
8662 }
8663 
8664 OperandMatchResultTy
8665 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8666   using namespace llvm::AMDGPU::SDWA;
8667 
8668   SMLoc S = getLoc();
8669   StringRef Value;
8670   OperandMatchResultTy res;
8671 
8672   SMLoc StringLoc;
8673   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8674   if (res != MatchOperand_Success) {
8675     return res;
8676   }
8677 
8678   int64_t Int;
8679   Int = StringSwitch<int64_t>(Value)
8680         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8681         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8682         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8683         .Default(0xffffffff);
8684 
8685   if (Int == 0xffffffff) {
8686     Error(StringLoc, "invalid dst_unused value");
8687     return MatchOperand_ParseFail;
8688   }
8689 
8690   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8691   return MatchOperand_Success;
8692 }
8693 
8694 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8695   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8696 }
8697 
8698 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8699   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8700 }
8701 
8702 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8703   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8704 }
8705 
8706 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8707   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8708 }
8709 
8710 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8711   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8712 }
8713 
8714 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8715                               uint64_t BasicInstType,
8716                               bool SkipDstVcc,
8717                               bool SkipSrcVcc) {
8718   using namespace llvm::AMDGPU::SDWA;
8719 
8720   OptionalImmIndexMap OptionalIdx;
8721   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8722   bool SkippedVcc = false;
8723 
8724   unsigned I = 1;
8725   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8726   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8727     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8728   }
8729 
8730   for (unsigned E = Operands.size(); I != E; ++I) {
8731     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8732     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8733         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8734       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8735       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8736       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8737       // Skip VCC only if we didn't skip it on previous iteration.
8738       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8739       if (BasicInstType == SIInstrFlags::VOP2 &&
8740           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8741            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8742         SkippedVcc = true;
8743         continue;
8744       } else if (BasicInstType == SIInstrFlags::VOPC &&
8745                  Inst.getNumOperands() == 0) {
8746         SkippedVcc = true;
8747         continue;
8748       }
8749     }
8750     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8751       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8752     } else if (Op.isImm()) {
8753       // Handle optional arguments
8754       OptionalIdx[Op.getImmTy()] = I;
8755     } else {
8756       llvm_unreachable("Invalid operand type");
8757     }
8758     SkippedVcc = false;
8759   }
8760 
8761   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8762       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8763       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8764     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8765     switch (BasicInstType) {
8766     case SIInstrFlags::VOP1:
8767       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8768       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8769         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8770       }
8771       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8772       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8773       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8774       break;
8775 
8776     case SIInstrFlags::VOP2:
8777       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8778       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8779         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8780       }
8781       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8782       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8783       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8784       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8785       break;
8786 
8787     case SIInstrFlags::VOPC:
8788       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8789         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8790       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8791       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8792       break;
8793 
8794     default:
8795       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8796     }
8797   }
8798 
8799   // special case v_mac_{f16, f32}:
8800   // it has src2 register operand that is tied to dst operand
8801   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8802       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8803     auto it = Inst.begin();
8804     std::advance(
8805       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8806     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8807   }
8808 }
8809 
8810 //===----------------------------------------------------------------------===//
8811 // mAI
8812 //===----------------------------------------------------------------------===//
8813 
8814 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8815   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8816 }
8817 
8818 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8819   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8820 }
8821 
8822 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8823   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8824 }
8825 
8826 /// Force static initialization.
8827 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8828   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8829   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8830 }
8831 
8832 #define GET_REGISTER_MATCHER
8833 #define GET_MATCHER_IMPLEMENTATION
8834 #define GET_MNEMONIC_SPELL_CHECKER
8835 #define GET_MNEMONIC_CHECKER
8836 #include "AMDGPUGenAsmMatcher.inc"
8837 
8838 // This function should be defined after auto-generated include so that we have
8839 // MatchClassKind enum defined
8840 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8841                                                      unsigned Kind) {
8842   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8843   // But MatchInstructionImpl() expects to meet token and fails to validate
8844   // operand. This method checks if we are given immediate operand but expect to
8845   // get corresponding token.
8846   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8847   switch (Kind) {
8848   case MCK_addr64:
8849     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8850   case MCK_gds:
8851     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8852   case MCK_lds:
8853     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8854   case MCK_idxen:
8855     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8856   case MCK_offen:
8857     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8858   case MCK_SSrcB32:
8859     // When operands have expression values, they will return true for isToken,
8860     // because it is not possible to distinguish between a token and an
8861     // expression at parse time. MatchInstructionImpl() will always try to
8862     // match an operand as a token, when isToken returns true, and when the
8863     // name of the expression is not a valid token, the match will fail,
8864     // so we need to handle it here.
8865     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8866   case MCK_SSrcF32:
8867     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8868   case MCK_SoppBrTarget:
8869     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8870   case MCK_VReg32OrOff:
8871     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8872   case MCK_InterpSlot:
8873     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8874   case MCK_Attr:
8875     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8876   case MCK_AttrChan:
8877     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8878   case MCK_ImmSMEMOffset:
8879     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8880   case MCK_SReg_64:
8881   case MCK_SReg_64_XEXEC:
8882     // Null is defined as a 32-bit register but
8883     // it should also be enabled with 64-bit operands.
8884     // The following code enables it for SReg_64 operands
8885     // used as source and destination. Remaining source
8886     // operands are handled in isInlinableImm.
8887     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8888   default:
8889     return Match_InvalidOperand;
8890   }
8891 }
8892 
8893 //===----------------------------------------------------------------------===//
8894 // endpgm
8895 //===----------------------------------------------------------------------===//
8896 
8897 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8898   SMLoc S = getLoc();
8899   int64_t Imm = 0;
8900 
8901   if (!parseExpr(Imm)) {
8902     // The operand is optional, if not present default to 0
8903     Imm = 0;
8904   }
8905 
8906   if (!isUInt<16>(Imm)) {
8907     Error(S, "expected a 16-bit value");
8908     return MatchOperand_ParseFail;
8909   }
8910 
8911   Operands.push_back(
8912       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8913   return MatchOperand_Success;
8914 }
8915 
8916 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8917 
8918 //===----------------------------------------------------------------------===//
8919 // LDSDIR
8920 //===----------------------------------------------------------------------===//
8921 
8922 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const {
8923   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST);
8924 }
8925 
8926 bool AMDGPUOperand::isWaitVDST() const {
8927   return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
8928 }
8929 
8930 //===----------------------------------------------------------------------===//
8931 // VINTERP
8932 //===----------------------------------------------------------------------===//
8933 
8934 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const {
8935   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP);
8936 }
8937 
8938 bool AMDGPUOperand::isWaitEXP() const {
8939   return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
8940 }
8941