1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCParser/MCAsmLexer.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
31 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/AMDGPUMetadata.h"
35 #include "llvm/Support/AMDHSAKernelDescriptor.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/MachineValueType.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Support/TargetParser.h"
40 
41 using namespace llvm;
42 using namespace llvm::AMDGPU;
43 using namespace llvm::amdhsa;
44 
45 namespace {
46 
47 class AMDGPUAsmParser;
48 
49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
50 
51 //===----------------------------------------------------------------------===//
52 // Operand
53 //===----------------------------------------------------------------------===//
54 
55 class AMDGPUOperand : public MCParsedAsmOperand {
56   enum KindTy {
57     Token,
58     Immediate,
59     Register,
60     Expression
61   } Kind;
62 
63   SMLoc StartLoc, EndLoc;
64   const AMDGPUAsmParser *AsmParser;
65 
66 public:
67   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
68       : Kind(Kind_), AsmParser(AsmParser_) {}
69 
70   using Ptr = std::unique_ptr<AMDGPUOperand>;
71 
72   struct Modifiers {
73     bool Abs = false;
74     bool Neg = false;
75     bool Sext = false;
76 
77     bool hasFPModifiers() const { return Abs || Neg; }
78     bool hasIntModifiers() const { return Sext; }
79     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
80 
81     int64_t getFPModifiersOperand() const {
82       int64_t Operand = 0;
83       Operand |= Abs ? SISrcMods::ABS : 0u;
84       Operand |= Neg ? SISrcMods::NEG : 0u;
85       return Operand;
86     }
87 
88     int64_t getIntModifiersOperand() const {
89       int64_t Operand = 0;
90       Operand |= Sext ? SISrcMods::SEXT : 0u;
91       return Operand;
92     }
93 
94     int64_t getModifiersOperand() const {
95       assert(!(hasFPModifiers() && hasIntModifiers())
96            && "fp and int modifiers should not be used simultaneously");
97       if (hasFPModifiers()) {
98         return getFPModifiersOperand();
99       } else if (hasIntModifiers()) {
100         return getIntModifiersOperand();
101       } else {
102         return 0;
103       }
104     }
105 
106     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
107   };
108 
109   enum ImmTy {
110     ImmTyNone,
111     ImmTyGDS,
112     ImmTyLDS,
113     ImmTyOffen,
114     ImmTyIdxen,
115     ImmTyAddr64,
116     ImmTyOffset,
117     ImmTyInstOffset,
118     ImmTyOffset0,
119     ImmTyOffset1,
120     ImmTyCPol,
121     ImmTySWZ,
122     ImmTyTFE,
123     ImmTyD16,
124     ImmTyClampSI,
125     ImmTyOModSI,
126     ImmTyDPP8,
127     ImmTyDppCtrl,
128     ImmTyDppRowMask,
129     ImmTyDppBankMask,
130     ImmTyDppBoundCtrl,
131     ImmTyDppFi,
132     ImmTySdwaDstSel,
133     ImmTySdwaSrc0Sel,
134     ImmTySdwaSrc1Sel,
135     ImmTySdwaDstUnused,
136     ImmTyDMask,
137     ImmTyDim,
138     ImmTyUNorm,
139     ImmTyDA,
140     ImmTyR128A16,
141     ImmTyA16,
142     ImmTyLWE,
143     ImmTyExpTgt,
144     ImmTyExpCompr,
145     ImmTyExpVM,
146     ImmTyFORMAT,
147     ImmTyHwreg,
148     ImmTyOff,
149     ImmTySendMsg,
150     ImmTyInterpSlot,
151     ImmTyInterpAttr,
152     ImmTyAttrChan,
153     ImmTyOpSel,
154     ImmTyOpSelHi,
155     ImmTyNegLo,
156     ImmTyNegHi,
157     ImmTySwizzle,
158     ImmTyGprIdxMode,
159     ImmTyHigh,
160     ImmTyBLGP,
161     ImmTyCBSZ,
162     ImmTyABID,
163     ImmTyEndpgm,
164     ImmTyWaitVDST,
165     ImmTyWaitEXP,
166   };
167 
168   enum ImmKindTy {
169     ImmKindTyNone,
170     ImmKindTyLiteral,
171     ImmKindTyConst,
172   };
173 
174 private:
175   struct TokOp {
176     const char *Data;
177     unsigned Length;
178   };
179 
180   struct ImmOp {
181     int64_t Val;
182     ImmTy Type;
183     bool IsFPImm;
184     mutable ImmKindTy Kind;
185     Modifiers Mods;
186   };
187 
188   struct RegOp {
189     unsigned RegNo;
190     Modifiers Mods;
191   };
192 
193   union {
194     TokOp Tok;
195     ImmOp Imm;
196     RegOp Reg;
197     const MCExpr *Expr;
198   };
199 
200 public:
201   bool isToken() const override {
202     if (Kind == Token)
203       return true;
204 
205     // When parsing operands, we can't always tell if something was meant to be
206     // a token, like 'gds', or an expression that references a global variable.
207     // In this case, we assume the string is an expression, and if we need to
208     // interpret is a token, then we treat the symbol name as the token.
209     return isSymbolRefExpr();
210   }
211 
212   bool isSymbolRefExpr() const {
213     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
214   }
215 
216   bool isImm() const override {
217     return Kind == Immediate;
218   }
219 
220   void setImmKindNone() const {
221     assert(isImm());
222     Imm.Kind = ImmKindTyNone;
223   }
224 
225   void setImmKindLiteral() const {
226     assert(isImm());
227     Imm.Kind = ImmKindTyLiteral;
228   }
229 
230   void setImmKindConst() const {
231     assert(isImm());
232     Imm.Kind = ImmKindTyConst;
233   }
234 
235   bool IsImmKindLiteral() const {
236     return isImm() && Imm.Kind == ImmKindTyLiteral;
237   }
238 
239   bool isImmKindConst() const {
240     return isImm() && Imm.Kind == ImmKindTyConst;
241   }
242 
243   bool isInlinableImm(MVT type) const;
244   bool isLiteralImm(MVT type) const;
245 
246   bool isRegKind() const {
247     return Kind == Register;
248   }
249 
250   bool isReg() const override {
251     return isRegKind() && !hasModifiers();
252   }
253 
254   bool isRegOrInline(unsigned RCID, MVT type) const {
255     return isRegClass(RCID) || isInlinableImm(type);
256   }
257 
258   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
259     return isRegOrInline(RCID, type) || isLiteralImm(type);
260   }
261 
262   bool isRegOrImmWithInt16InputMods() const {
263     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
264   }
265 
266   bool isRegOrImmWithInt32InputMods() const {
267     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
268   }
269 
270   bool isRegOrImmWithInt64InputMods() const {
271     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
272   }
273 
274   bool isRegOrImmWithFP16InputMods() const {
275     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
276   }
277 
278   bool isRegOrImmWithFP32InputMods() const {
279     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
280   }
281 
282   bool isRegOrImmWithFP64InputMods() const {
283     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
284   }
285 
286   bool isVReg() const {
287     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
288            isRegClass(AMDGPU::VReg_64RegClassID) ||
289            isRegClass(AMDGPU::VReg_96RegClassID) ||
290            isRegClass(AMDGPU::VReg_128RegClassID) ||
291            isRegClass(AMDGPU::VReg_160RegClassID) ||
292            isRegClass(AMDGPU::VReg_192RegClassID) ||
293            isRegClass(AMDGPU::VReg_256RegClassID) ||
294            isRegClass(AMDGPU::VReg_512RegClassID) ||
295            isRegClass(AMDGPU::VReg_1024RegClassID);
296   }
297 
298   bool isVReg32() const {
299     return isRegClass(AMDGPU::VGPR_32RegClassID);
300   }
301 
302   bool isVReg32OrOff() const {
303     return isOff() || isVReg32();
304   }
305 
306   bool isNull() const {
307     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
308   }
309 
310   bool isVRegWithInputMods() const;
311 
312   bool isSDWAOperand(MVT type) const;
313   bool isSDWAFP16Operand() const;
314   bool isSDWAFP32Operand() const;
315   bool isSDWAInt16Operand() const;
316   bool isSDWAInt32Operand() const;
317 
318   bool isImmTy(ImmTy ImmT) const {
319     return isImm() && Imm.Type == ImmT;
320   }
321 
322   bool isImmModifier() const {
323     return isImm() && Imm.Type != ImmTyNone;
324   }
325 
326   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
327   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
328   bool isDMask() const { return isImmTy(ImmTyDMask); }
329   bool isDim() const { return isImmTy(ImmTyDim); }
330   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
331   bool isDA() const { return isImmTy(ImmTyDA); }
332   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
333   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
334   bool isLWE() const { return isImmTy(ImmTyLWE); }
335   bool isOff() const { return isImmTy(ImmTyOff); }
336   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
337   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
338   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
339   bool isOffen() const { return isImmTy(ImmTyOffen); }
340   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
341   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
342   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
343   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
344   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
345 
346   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
347   bool isGDS() const { return isImmTy(ImmTyGDS); }
348   bool isLDS() const { return isImmTy(ImmTyLDS); }
349   bool isCPol() const { return isImmTy(ImmTyCPol); }
350   bool isSWZ() const { return isImmTy(ImmTySWZ); }
351   bool isTFE() const { return isImmTy(ImmTyTFE); }
352   bool isD16() const { return isImmTy(ImmTyD16); }
353   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
354   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
355   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
356   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
357   bool isFI() const { return isImmTy(ImmTyDppFi); }
358   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
359   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
360   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
361   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
362   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
363   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
364   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
365   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
366   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
367   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
368   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
369   bool isHigh() const { return isImmTy(ImmTyHigh); }
370 
371   bool isMod() const {
372     return isClampSI() || isOModSI();
373   }
374 
375   bool isRegOrImm() const {
376     return isReg() || isImm();
377   }
378 
379   bool isRegClass(unsigned RCID) const;
380 
381   bool isInlineValue() const;
382 
383   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
384     return isRegOrInline(RCID, type) && !hasModifiers();
385   }
386 
387   bool isSCSrcB16() const {
388     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
389   }
390 
391   bool isSCSrcV2B16() const {
392     return isSCSrcB16();
393   }
394 
395   bool isSCSrcB32() const {
396     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
397   }
398 
399   bool isSCSrcB64() const {
400     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
401   }
402 
403   bool isBoolReg() const;
404 
405   bool isSCSrcF16() const {
406     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
407   }
408 
409   bool isSCSrcV2F16() const {
410     return isSCSrcF16();
411   }
412 
413   bool isSCSrcF32() const {
414     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
415   }
416 
417   bool isSCSrcF64() const {
418     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
419   }
420 
421   bool isSSrcB32() const {
422     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
423   }
424 
425   bool isSSrcB16() const {
426     return isSCSrcB16() || isLiteralImm(MVT::i16);
427   }
428 
429   bool isSSrcV2B16() const {
430     llvm_unreachable("cannot happen");
431     return isSSrcB16();
432   }
433 
434   bool isSSrcB64() const {
435     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
436     // See isVSrc64().
437     return isSCSrcB64() || isLiteralImm(MVT::i64);
438   }
439 
440   bool isSSrcF32() const {
441     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
442   }
443 
444   bool isSSrcF64() const {
445     return isSCSrcB64() || isLiteralImm(MVT::f64);
446   }
447 
448   bool isSSrcF16() const {
449     return isSCSrcB16() || isLiteralImm(MVT::f16);
450   }
451 
452   bool isSSrcV2F16() const {
453     llvm_unreachable("cannot happen");
454     return isSSrcF16();
455   }
456 
457   bool isSSrcV2FP32() const {
458     llvm_unreachable("cannot happen");
459     return isSSrcF32();
460   }
461 
462   bool isSCSrcV2FP32() const {
463     llvm_unreachable("cannot happen");
464     return isSCSrcF32();
465   }
466 
467   bool isSSrcV2INT32() const {
468     llvm_unreachable("cannot happen");
469     return isSSrcB32();
470   }
471 
472   bool isSCSrcV2INT32() const {
473     llvm_unreachable("cannot happen");
474     return isSCSrcB32();
475   }
476 
477   bool isSSrcOrLdsB32() const {
478     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
479            isLiteralImm(MVT::i32) || isExpr();
480   }
481 
482   bool isVCSrcB32() const {
483     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
484   }
485 
486   bool isVCSrcB64() const {
487     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
488   }
489 
490   bool isVCSrcB16() const {
491     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
492   }
493 
494   bool isVCSrcV2B16() const {
495     return isVCSrcB16();
496   }
497 
498   bool isVCSrcF32() const {
499     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
500   }
501 
502   bool isVCSrcF64() const {
503     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
504   }
505 
506   bool isVCSrcF16() const {
507     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
508   }
509 
510   bool isVCSrcV2F16() const {
511     return isVCSrcF16();
512   }
513 
514   bool isVSrcB32() const {
515     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
516   }
517 
518   bool isVSrcB64() const {
519     return isVCSrcF64() || isLiteralImm(MVT::i64);
520   }
521 
522   bool isVSrcB16() const {
523     return isVCSrcB16() || isLiteralImm(MVT::i16);
524   }
525 
526   bool isVSrcV2B16() const {
527     return isVSrcB16() || isLiteralImm(MVT::v2i16);
528   }
529 
530   bool isVCSrcV2FP32() const {
531     return isVCSrcF64();
532   }
533 
534   bool isVSrcV2FP32() const {
535     return isVSrcF64() || isLiteralImm(MVT::v2f32);
536   }
537 
538   bool isVCSrcV2INT32() const {
539     return isVCSrcB64();
540   }
541 
542   bool isVSrcV2INT32() const {
543     return isVSrcB64() || isLiteralImm(MVT::v2i32);
544   }
545 
546   bool isVSrcF32() const {
547     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
548   }
549 
550   bool isVSrcF64() const {
551     return isVCSrcF64() || isLiteralImm(MVT::f64);
552   }
553 
554   bool isVSrcF16() const {
555     return isVCSrcF16() || isLiteralImm(MVT::f16);
556   }
557 
558   bool isVSrcV2F16() const {
559     return isVSrcF16() || isLiteralImm(MVT::v2f16);
560   }
561 
562   bool isVISrcB32() const {
563     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
564   }
565 
566   bool isVISrcB16() const {
567     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
568   }
569 
570   bool isVISrcV2B16() const {
571     return isVISrcB16();
572   }
573 
574   bool isVISrcF32() const {
575     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
576   }
577 
578   bool isVISrcF16() const {
579     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
580   }
581 
582   bool isVISrcV2F16() const {
583     return isVISrcF16() || isVISrcB32();
584   }
585 
586   bool isVISrc_64B64() const {
587     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
588   }
589 
590   bool isVISrc_64F64() const {
591     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
592   }
593 
594   bool isVISrc_64V2FP32() const {
595     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
596   }
597 
598   bool isVISrc_64V2INT32() const {
599     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
600   }
601 
602   bool isVISrc_256B64() const {
603     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
604   }
605 
606   bool isVISrc_256F64() const {
607     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
608   }
609 
610   bool isVISrc_128B16() const {
611     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
612   }
613 
614   bool isVISrc_128V2B16() const {
615     return isVISrc_128B16();
616   }
617 
618   bool isVISrc_128B32() const {
619     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
620   }
621 
622   bool isVISrc_128F32() const {
623     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
624   }
625 
626   bool isVISrc_256V2FP32() const {
627     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
628   }
629 
630   bool isVISrc_256V2INT32() const {
631     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
632   }
633 
634   bool isVISrc_512B32() const {
635     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
636   }
637 
638   bool isVISrc_512B16() const {
639     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
640   }
641 
642   bool isVISrc_512V2B16() const {
643     return isVISrc_512B16();
644   }
645 
646   bool isVISrc_512F32() const {
647     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
648   }
649 
650   bool isVISrc_512F16() const {
651     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
652   }
653 
654   bool isVISrc_512V2F16() const {
655     return isVISrc_512F16() || isVISrc_512B32();
656   }
657 
658   bool isVISrc_1024B32() const {
659     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
660   }
661 
662   bool isVISrc_1024B16() const {
663     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
664   }
665 
666   bool isVISrc_1024V2B16() const {
667     return isVISrc_1024B16();
668   }
669 
670   bool isVISrc_1024F32() const {
671     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
672   }
673 
674   bool isVISrc_1024F16() const {
675     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
676   }
677 
678   bool isVISrc_1024V2F16() const {
679     return isVISrc_1024F16() || isVISrc_1024B32();
680   }
681 
682   bool isAISrcB32() const {
683     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
684   }
685 
686   bool isAISrcB16() const {
687     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
688   }
689 
690   bool isAISrcV2B16() const {
691     return isAISrcB16();
692   }
693 
694   bool isAISrcF32() const {
695     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
696   }
697 
698   bool isAISrcF16() const {
699     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
700   }
701 
702   bool isAISrcV2F16() const {
703     return isAISrcF16() || isAISrcB32();
704   }
705 
706   bool isAISrc_64B64() const {
707     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
708   }
709 
710   bool isAISrc_64F64() const {
711     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
712   }
713 
714   bool isAISrc_128B32() const {
715     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
716   }
717 
718   bool isAISrc_128B16() const {
719     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
720   }
721 
722   bool isAISrc_128V2B16() const {
723     return isAISrc_128B16();
724   }
725 
726   bool isAISrc_128F32() const {
727     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
728   }
729 
730   bool isAISrc_128F16() const {
731     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
732   }
733 
734   bool isAISrc_128V2F16() const {
735     return isAISrc_128F16() || isAISrc_128B32();
736   }
737 
738   bool isVISrc_128F16() const {
739     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
740   }
741 
742   bool isVISrc_128V2F16() const {
743     return isVISrc_128F16() || isVISrc_128B32();
744   }
745 
746   bool isAISrc_256B64() const {
747     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
748   }
749 
750   bool isAISrc_256F64() const {
751     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
752   }
753 
754   bool isAISrc_512B32() const {
755     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
756   }
757 
758   bool isAISrc_512B16() const {
759     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
760   }
761 
762   bool isAISrc_512V2B16() const {
763     return isAISrc_512B16();
764   }
765 
766   bool isAISrc_512F32() const {
767     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
768   }
769 
770   bool isAISrc_512F16() const {
771     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
772   }
773 
774   bool isAISrc_512V2F16() const {
775     return isAISrc_512F16() || isAISrc_512B32();
776   }
777 
778   bool isAISrc_1024B32() const {
779     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
780   }
781 
782   bool isAISrc_1024B16() const {
783     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
784   }
785 
786   bool isAISrc_1024V2B16() const {
787     return isAISrc_1024B16();
788   }
789 
790   bool isAISrc_1024F32() const {
791     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
792   }
793 
794   bool isAISrc_1024F16() const {
795     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
796   }
797 
798   bool isAISrc_1024V2F16() const {
799     return isAISrc_1024F16() || isAISrc_1024B32();
800   }
801 
802   bool isKImmFP32() const {
803     return isLiteralImm(MVT::f32);
804   }
805 
806   bool isKImmFP16() const {
807     return isLiteralImm(MVT::f16);
808   }
809 
810   bool isMem() const override {
811     return false;
812   }
813 
814   bool isExpr() const {
815     return Kind == Expression;
816   }
817 
818   bool isSoppBrTarget() const {
819     return isExpr() || isImm();
820   }
821 
822   bool isSWaitCnt() const;
823   bool isDepCtr() const;
824   bool isSDelayAlu() const;
825   bool isHwreg() const;
826   bool isSendMsg() const;
827   bool isSwizzle() const;
828   bool isSMRDOffset8() const;
829   bool isSMEMOffset() const;
830   bool isSMRDLiteralOffset() const;
831   bool isDPP8() const;
832   bool isDPPCtrl() const;
833   bool isBLGP() const;
834   bool isCBSZ() const;
835   bool isABID() const;
836   bool isGPRIdxMode() const;
837   bool isS16Imm() const;
838   bool isU16Imm() const;
839   bool isEndpgm() const;
840   bool isWaitVDST() const;
841   bool isWaitEXP() const;
842 
843   StringRef getExpressionAsToken() const {
844     assert(isExpr());
845     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
846     return S->getSymbol().getName();
847   }
848 
849   StringRef getToken() const {
850     assert(isToken());
851 
852     if (Kind == Expression)
853       return getExpressionAsToken();
854 
855     return StringRef(Tok.Data, Tok.Length);
856   }
857 
858   int64_t getImm() const {
859     assert(isImm());
860     return Imm.Val;
861   }
862 
863   void setImm(int64_t Val) {
864     assert(isImm());
865     Imm.Val = Val;
866   }
867 
868   ImmTy getImmTy() const {
869     assert(isImm());
870     return Imm.Type;
871   }
872 
873   unsigned getReg() const override {
874     assert(isRegKind());
875     return Reg.RegNo;
876   }
877 
878   SMLoc getStartLoc() const override {
879     return StartLoc;
880   }
881 
882   SMLoc getEndLoc() const override {
883     return EndLoc;
884   }
885 
886   SMRange getLocRange() const {
887     return SMRange(StartLoc, EndLoc);
888   }
889 
890   Modifiers getModifiers() const {
891     assert(isRegKind() || isImmTy(ImmTyNone));
892     return isRegKind() ? Reg.Mods : Imm.Mods;
893   }
894 
895   void setModifiers(Modifiers Mods) {
896     assert(isRegKind() || isImmTy(ImmTyNone));
897     if (isRegKind())
898       Reg.Mods = Mods;
899     else
900       Imm.Mods = Mods;
901   }
902 
903   bool hasModifiers() const {
904     return getModifiers().hasModifiers();
905   }
906 
907   bool hasFPModifiers() const {
908     return getModifiers().hasFPModifiers();
909   }
910 
911   bool hasIntModifiers() const {
912     return getModifiers().hasIntModifiers();
913   }
914 
915   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
916 
917   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
918 
919   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
920 
921   template <unsigned Bitwidth>
922   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
923 
924   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
925     addKImmFPOperands<16>(Inst, N);
926   }
927 
928   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
929     addKImmFPOperands<32>(Inst, N);
930   }
931 
932   void addRegOperands(MCInst &Inst, unsigned N) const;
933 
934   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
935     addRegOperands(Inst, N);
936   }
937 
938   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
939     if (isRegKind())
940       addRegOperands(Inst, N);
941     else if (isExpr())
942       Inst.addOperand(MCOperand::createExpr(Expr));
943     else
944       addImmOperands(Inst, N);
945   }
946 
947   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
948     Modifiers Mods = getModifiers();
949     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
950     if (isRegKind()) {
951       addRegOperands(Inst, N);
952     } else {
953       addImmOperands(Inst, N, false);
954     }
955   }
956 
957   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
958     assert(!hasIntModifiers());
959     addRegOrImmWithInputModsOperands(Inst, N);
960   }
961 
962   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
963     assert(!hasFPModifiers());
964     addRegOrImmWithInputModsOperands(Inst, N);
965   }
966 
967   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
968     Modifiers Mods = getModifiers();
969     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
970     assert(isRegKind());
971     addRegOperands(Inst, N);
972   }
973 
974   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
975     assert(!hasIntModifiers());
976     addRegWithInputModsOperands(Inst, N);
977   }
978 
979   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
980     assert(!hasFPModifiers());
981     addRegWithInputModsOperands(Inst, N);
982   }
983 
984   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
985     if (isImm())
986       addImmOperands(Inst, N);
987     else {
988       assert(isExpr());
989       Inst.addOperand(MCOperand::createExpr(Expr));
990     }
991   }
992 
993   static void printImmTy(raw_ostream& OS, ImmTy Type) {
994     switch (Type) {
995     case ImmTyNone: OS << "None"; break;
996     case ImmTyGDS: OS << "GDS"; break;
997     case ImmTyLDS: OS << "LDS"; break;
998     case ImmTyOffen: OS << "Offen"; break;
999     case ImmTyIdxen: OS << "Idxen"; break;
1000     case ImmTyAddr64: OS << "Addr64"; break;
1001     case ImmTyOffset: OS << "Offset"; break;
1002     case ImmTyInstOffset: OS << "InstOffset"; break;
1003     case ImmTyOffset0: OS << "Offset0"; break;
1004     case ImmTyOffset1: OS << "Offset1"; break;
1005     case ImmTyCPol: OS << "CPol"; break;
1006     case ImmTySWZ: OS << "SWZ"; break;
1007     case ImmTyTFE: OS << "TFE"; break;
1008     case ImmTyD16: OS << "D16"; break;
1009     case ImmTyFORMAT: OS << "FORMAT"; break;
1010     case ImmTyClampSI: OS << "ClampSI"; break;
1011     case ImmTyOModSI: OS << "OModSI"; break;
1012     case ImmTyDPP8: OS << "DPP8"; break;
1013     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1014     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1015     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1016     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1017     case ImmTyDppFi: OS << "FI"; break;
1018     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1019     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1020     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1021     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1022     case ImmTyDMask: OS << "DMask"; break;
1023     case ImmTyDim: OS << "Dim"; break;
1024     case ImmTyUNorm: OS << "UNorm"; break;
1025     case ImmTyDA: OS << "DA"; break;
1026     case ImmTyR128A16: OS << "R128A16"; break;
1027     case ImmTyA16: OS << "A16"; break;
1028     case ImmTyLWE: OS << "LWE"; break;
1029     case ImmTyOff: OS << "Off"; break;
1030     case ImmTyExpTgt: OS << "ExpTgt"; break;
1031     case ImmTyExpCompr: OS << "ExpCompr"; break;
1032     case ImmTyExpVM: OS << "ExpVM"; break;
1033     case ImmTyHwreg: OS << "Hwreg"; break;
1034     case ImmTySendMsg: OS << "SendMsg"; break;
1035     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1036     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1037     case ImmTyAttrChan: OS << "AttrChan"; break;
1038     case ImmTyOpSel: OS << "OpSel"; break;
1039     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1040     case ImmTyNegLo: OS << "NegLo"; break;
1041     case ImmTyNegHi: OS << "NegHi"; break;
1042     case ImmTySwizzle: OS << "Swizzle"; break;
1043     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1044     case ImmTyHigh: OS << "High"; break;
1045     case ImmTyBLGP: OS << "BLGP"; break;
1046     case ImmTyCBSZ: OS << "CBSZ"; break;
1047     case ImmTyABID: OS << "ABID"; break;
1048     case ImmTyEndpgm: OS << "Endpgm"; break;
1049     case ImmTyWaitVDST: OS << "WaitVDST"; break;
1050     case ImmTyWaitEXP: OS << "WaitEXP"; break;
1051     }
1052   }
1053 
1054   void print(raw_ostream &OS) const override {
1055     switch (Kind) {
1056     case Register:
1057       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1058       break;
1059     case Immediate:
1060       OS << '<' << getImm();
1061       if (getImmTy() != ImmTyNone) {
1062         OS << " type: "; printImmTy(OS, getImmTy());
1063       }
1064       OS << " mods: " << Imm.Mods << '>';
1065       break;
1066     case Token:
1067       OS << '\'' << getToken() << '\'';
1068       break;
1069     case Expression:
1070       OS << "<expr " << *Expr << '>';
1071       break;
1072     }
1073   }
1074 
1075   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1076                                       int64_t Val, SMLoc Loc,
1077                                       ImmTy Type = ImmTyNone,
1078                                       bool IsFPImm = false) {
1079     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1080     Op->Imm.Val = Val;
1081     Op->Imm.IsFPImm = IsFPImm;
1082     Op->Imm.Kind = ImmKindTyNone;
1083     Op->Imm.Type = Type;
1084     Op->Imm.Mods = Modifiers();
1085     Op->StartLoc = Loc;
1086     Op->EndLoc = Loc;
1087     return Op;
1088   }
1089 
1090   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1091                                         StringRef Str, SMLoc Loc,
1092                                         bool HasExplicitEncodingSize = true) {
1093     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1094     Res->Tok.Data = Str.data();
1095     Res->Tok.Length = Str.size();
1096     Res->StartLoc = Loc;
1097     Res->EndLoc = Loc;
1098     return Res;
1099   }
1100 
1101   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1102                                       unsigned RegNo, SMLoc S,
1103                                       SMLoc E) {
1104     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1105     Op->Reg.RegNo = RegNo;
1106     Op->Reg.Mods = Modifiers();
1107     Op->StartLoc = S;
1108     Op->EndLoc = E;
1109     return Op;
1110   }
1111 
1112   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1113                                        const class MCExpr *Expr, SMLoc S) {
1114     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1115     Op->Expr = Expr;
1116     Op->StartLoc = S;
1117     Op->EndLoc = S;
1118     return Op;
1119   }
1120 };
1121 
1122 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1123   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1124   return OS;
1125 }
1126 
1127 //===----------------------------------------------------------------------===//
1128 // AsmParser
1129 //===----------------------------------------------------------------------===//
1130 
1131 // Holds info related to the current kernel, e.g. count of SGPRs used.
1132 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1133 // .amdgpu_hsa_kernel or at EOF.
1134 class KernelScopeInfo {
1135   int SgprIndexUnusedMin = -1;
1136   int VgprIndexUnusedMin = -1;
1137   int AgprIndexUnusedMin = -1;
1138   MCContext *Ctx = nullptr;
1139   MCSubtargetInfo const *MSTI = nullptr;
1140 
1141   void usesSgprAt(int i) {
1142     if (i >= SgprIndexUnusedMin) {
1143       SgprIndexUnusedMin = ++i;
1144       if (Ctx) {
1145         MCSymbol* const Sym =
1146           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1147         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1148       }
1149     }
1150   }
1151 
1152   void usesVgprAt(int i) {
1153     if (i >= VgprIndexUnusedMin) {
1154       VgprIndexUnusedMin = ++i;
1155       if (Ctx) {
1156         MCSymbol* const Sym =
1157           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1158         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1159                                          VgprIndexUnusedMin);
1160         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1161       }
1162     }
1163   }
1164 
1165   void usesAgprAt(int i) {
1166     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1167     if (!hasMAIInsts(*MSTI))
1168       return;
1169 
1170     if (i >= AgprIndexUnusedMin) {
1171       AgprIndexUnusedMin = ++i;
1172       if (Ctx) {
1173         MCSymbol* const Sym =
1174           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1175         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1176 
1177         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1178         MCSymbol* const vSym =
1179           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1180         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1181                                          VgprIndexUnusedMin);
1182         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1183       }
1184     }
1185   }
1186 
1187 public:
1188   KernelScopeInfo() = default;
1189 
1190   void initialize(MCContext &Context) {
1191     Ctx = &Context;
1192     MSTI = Ctx->getSubtargetInfo();
1193 
1194     usesSgprAt(SgprIndexUnusedMin = -1);
1195     usesVgprAt(VgprIndexUnusedMin = -1);
1196     if (hasMAIInsts(*MSTI)) {
1197       usesAgprAt(AgprIndexUnusedMin = -1);
1198     }
1199   }
1200 
1201   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1202                     unsigned RegWidth) {
1203     switch (RegKind) {
1204     case IS_SGPR:
1205       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1206       break;
1207     case IS_AGPR:
1208       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1209       break;
1210     case IS_VGPR:
1211       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1212       break;
1213     default:
1214       break;
1215     }
1216   }
1217 };
1218 
1219 class AMDGPUAsmParser : public MCTargetAsmParser {
1220   MCAsmParser &Parser;
1221 
1222   // Number of extra operands parsed after the first optional operand.
1223   // This may be necessary to skip hardcoded mandatory operands.
1224   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1225 
1226   unsigned ForcedEncodingSize = 0;
1227   bool ForcedDPP = false;
1228   bool ForcedSDWA = false;
1229   KernelScopeInfo KernelScope;
1230   unsigned CPolSeen;
1231 
1232   /// @name Auto-generated Match Functions
1233   /// {
1234 
1235 #define GET_ASSEMBLER_HEADER
1236 #include "AMDGPUGenAsmMatcher.inc"
1237 
1238   /// }
1239 
1240 private:
1241   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1242   bool OutOfRangeError(SMRange Range);
1243   /// Calculate VGPR/SGPR blocks required for given target, reserved
1244   /// registers, and user-specified NextFreeXGPR values.
1245   ///
1246   /// \param Features [in] Target features, used for bug corrections.
1247   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1248   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1249   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1250   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1251   /// descriptor field, if valid.
1252   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1253   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1254   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1255   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1256   /// \param VGPRBlocks [out] Result VGPR block count.
1257   /// \param SGPRBlocks [out] Result SGPR block count.
1258   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1259                           bool FlatScrUsed, bool XNACKUsed,
1260                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1261                           SMRange VGPRRange, unsigned NextFreeSGPR,
1262                           SMRange SGPRRange, unsigned &VGPRBlocks,
1263                           unsigned &SGPRBlocks);
1264   bool ParseDirectiveAMDGCNTarget();
1265   bool ParseDirectiveAMDHSAKernel();
1266   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1267   bool ParseDirectiveHSACodeObjectVersion();
1268   bool ParseDirectiveHSACodeObjectISA();
1269   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1270   bool ParseDirectiveAMDKernelCodeT();
1271   // TODO: Possibly make subtargetHasRegister const.
1272   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1273   bool ParseDirectiveAMDGPUHsaKernel();
1274 
1275   bool ParseDirectiveISAVersion();
1276   bool ParseDirectiveHSAMetadata();
1277   bool ParseDirectivePALMetadataBegin();
1278   bool ParseDirectivePALMetadata();
1279   bool ParseDirectiveAMDGPULDS();
1280 
1281   /// Common code to parse out a block of text (typically YAML) between start and
1282   /// end directives.
1283   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1284                            const char *AssemblerDirectiveEnd,
1285                            std::string &CollectString);
1286 
1287   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1288                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1289   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1290                            unsigned &RegNum, unsigned &RegWidth,
1291                            bool RestoreOnFailure = false);
1292   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1293                            unsigned &RegNum, unsigned &RegWidth,
1294                            SmallVectorImpl<AsmToken> &Tokens);
1295   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1296                            unsigned &RegWidth,
1297                            SmallVectorImpl<AsmToken> &Tokens);
1298   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1299                            unsigned &RegWidth,
1300                            SmallVectorImpl<AsmToken> &Tokens);
1301   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1302                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1303   bool ParseRegRange(unsigned& Num, unsigned& Width);
1304   unsigned getRegularReg(RegisterKind RegKind,
1305                          unsigned RegNum,
1306                          unsigned RegWidth,
1307                          SMLoc Loc);
1308 
1309   bool isRegister();
1310   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1311   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1312   void initializeGprCountSymbol(RegisterKind RegKind);
1313   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1314                              unsigned RegWidth);
1315   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1316                     bool IsAtomic, bool IsLds = false);
1317   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1318                  bool IsGdsHardcoded);
1319 
1320 public:
1321   enum AMDGPUMatchResultTy {
1322     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1323   };
1324   enum OperandMode {
1325     OperandMode_Default,
1326     OperandMode_NSA,
1327   };
1328 
1329   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1330 
1331   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1332                const MCInstrInfo &MII,
1333                const MCTargetOptions &Options)
1334       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1335     MCAsmParserExtension::Initialize(Parser);
1336 
1337     if (getFeatureBits().none()) {
1338       // Set default features.
1339       copySTI().ToggleFeature("southern-islands");
1340     }
1341 
1342     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1343 
1344     {
1345       // TODO: make those pre-defined variables read-only.
1346       // Currently there is none suitable machinery in the core llvm-mc for this.
1347       // MCSymbol::isRedefinable is intended for another purpose, and
1348       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1349       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1350       MCContext &Ctx = getContext();
1351       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1352         MCSymbol *Sym =
1353             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1354         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1355         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1356         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1357         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1358         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1359       } else {
1360         MCSymbol *Sym =
1361             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1362         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1363         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1364         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1365         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1366         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1367       }
1368       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1369         initializeGprCountSymbol(IS_VGPR);
1370         initializeGprCountSymbol(IS_SGPR);
1371       } else
1372         KernelScope.initialize(getContext());
1373     }
1374   }
1375 
1376   bool hasMIMG_R128() const {
1377     return AMDGPU::hasMIMG_R128(getSTI());
1378   }
1379 
1380   bool hasPackedD16() const {
1381     return AMDGPU::hasPackedD16(getSTI());
1382   }
1383 
1384   bool hasGFX10A16() const {
1385     return AMDGPU::hasGFX10A16(getSTI());
1386   }
1387 
1388   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1389 
1390   bool isSI() const {
1391     return AMDGPU::isSI(getSTI());
1392   }
1393 
1394   bool isCI() const {
1395     return AMDGPU::isCI(getSTI());
1396   }
1397 
1398   bool isVI() const {
1399     return AMDGPU::isVI(getSTI());
1400   }
1401 
1402   bool isGFX9() const {
1403     return AMDGPU::isGFX9(getSTI());
1404   }
1405 
1406   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1407   bool isGFX90A() const {
1408     return AMDGPU::isGFX90A(getSTI());
1409   }
1410 
1411   bool isGFX940() const {
1412     return AMDGPU::isGFX940(getSTI());
1413   }
1414 
1415   bool isGFX9Plus() const {
1416     return AMDGPU::isGFX9Plus(getSTI());
1417   }
1418 
1419   bool isGFX10() const {
1420     return AMDGPU::isGFX10(getSTI());
1421   }
1422 
1423   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1424 
1425   bool isGFX11() const {
1426     return AMDGPU::isGFX11(getSTI());
1427   }
1428 
1429   bool isGFX11Plus() const {
1430     return AMDGPU::isGFX11Plus(getSTI());
1431   }
1432 
1433   bool isGFX10_BEncoding() const {
1434     return AMDGPU::isGFX10_BEncoding(getSTI());
1435   }
1436 
1437   bool hasInv2PiInlineImm() const {
1438     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1439   }
1440 
1441   bool hasFlatOffsets() const {
1442     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1443   }
1444 
1445   bool hasArchitectedFlatScratch() const {
1446     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1447   }
1448 
1449   bool hasSGPR102_SGPR103() const {
1450     return !isVI() && !isGFX9();
1451   }
1452 
1453   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1454 
1455   bool hasIntClamp() const {
1456     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1457   }
1458 
1459   AMDGPUTargetStreamer &getTargetStreamer() {
1460     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1461     return static_cast<AMDGPUTargetStreamer &>(TS);
1462   }
1463 
1464   const MCRegisterInfo *getMRI() const {
1465     // We need this const_cast because for some reason getContext() is not const
1466     // in MCAsmParser.
1467     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1468   }
1469 
1470   const MCInstrInfo *getMII() const {
1471     return &MII;
1472   }
1473 
1474   const FeatureBitset &getFeatureBits() const {
1475     return getSTI().getFeatureBits();
1476   }
1477 
1478   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1479   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1480   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1481 
1482   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1483   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1484   bool isForcedDPP() const { return ForcedDPP; }
1485   bool isForcedSDWA() const { return ForcedSDWA; }
1486   ArrayRef<unsigned> getMatchedVariants() const;
1487   StringRef getMatchedVariantName() const;
1488 
1489   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1490   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1491                      bool RestoreOnFailure);
1492   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1493   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1494                                         SMLoc &EndLoc) override;
1495   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1496   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1497                                       unsigned Kind) override;
1498   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1499                                OperandVector &Operands, MCStreamer &Out,
1500                                uint64_t &ErrorInfo,
1501                                bool MatchingInlineAsm) override;
1502   bool ParseDirective(AsmToken DirectiveID) override;
1503   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1504                                     OperandMode Mode = OperandMode_Default);
1505   StringRef parseMnemonicSuffix(StringRef Name);
1506   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1507                         SMLoc NameLoc, OperandVector &Operands) override;
1508   //bool ProcessInstruction(MCInst &Inst);
1509 
1510   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1511 
1512   OperandMatchResultTy
1513   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1514                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1515                      bool (*ConvertResult)(int64_t &) = nullptr);
1516 
1517   OperandMatchResultTy
1518   parseOperandArrayWithPrefix(const char *Prefix,
1519                               OperandVector &Operands,
1520                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1521                               bool (*ConvertResult)(int64_t&) = nullptr);
1522 
1523   OperandMatchResultTy
1524   parseNamedBit(StringRef Name, OperandVector &Operands,
1525                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1526   OperandMatchResultTy parseCPol(OperandVector &Operands);
1527   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1528                                              StringRef &Value,
1529                                              SMLoc &StringLoc);
1530 
1531   bool isModifier();
1532   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1533   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1534   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1535   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1536   bool parseSP3NegModifier();
1537   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1538   OperandMatchResultTy parseReg(OperandVector &Operands);
1539   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1540   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1541   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1542   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1543   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1544   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1545   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1546   OperandMatchResultTy parseUfmt(int64_t &Format);
1547   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1548   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1549   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1550   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1551   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1552   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1553   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1554 
1555   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1556   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1557   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1558   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1559 
1560   bool parseCnt(int64_t &IntVal);
1561   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1562 
1563   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1564   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1565   OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
1566 
1567   bool parseDelay(int64_t &Delay);
1568   OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
1569 
1570   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1571 
1572 private:
1573   struct OperandInfoTy {
1574     SMLoc Loc;
1575     int64_t Id;
1576     bool IsSymbolic = false;
1577     bool IsDefined = false;
1578 
1579     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1580   };
1581 
1582   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1583   bool validateSendMsg(const OperandInfoTy &Msg,
1584                        const OperandInfoTy &Op,
1585                        const OperandInfoTy &Stream);
1586 
1587   bool parseHwregBody(OperandInfoTy &HwReg,
1588                       OperandInfoTy &Offset,
1589                       OperandInfoTy &Width);
1590   bool validateHwreg(const OperandInfoTy &HwReg,
1591                      const OperandInfoTy &Offset,
1592                      const OperandInfoTy &Width);
1593 
1594   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1595   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1596   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1597 
1598   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1599                       const OperandVector &Operands) const;
1600   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1601   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1602   SMLoc getLitLoc(const OperandVector &Operands) const;
1603   SMLoc getConstLoc(const OperandVector &Operands) const;
1604 
1605   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1606   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1607   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1608   bool validateSOPLiteral(const MCInst &Inst) const;
1609   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1610   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1611   bool validateIntClampSupported(const MCInst &Inst);
1612   bool validateMIMGAtomicDMask(const MCInst &Inst);
1613   bool validateMIMGGatherDMask(const MCInst &Inst);
1614   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1615   Optional<StringRef> validateMIMGDataSize(const MCInst &Inst);
1616   bool validateMIMGAddrSize(const MCInst &Inst);
1617   bool validateMIMGD16(const MCInst &Inst);
1618   bool validateMIMGDim(const MCInst &Inst);
1619   bool validateMIMGMSAA(const MCInst &Inst);
1620   bool validateOpSel(const MCInst &Inst);
1621   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1622   bool validateVccOperand(unsigned Reg) const;
1623   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1624   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1625   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1626   bool validateAGPRLdSt(const MCInst &Inst) const;
1627   bool validateVGPRAlign(const MCInst &Inst) const;
1628   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1629   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1630   bool validateDivScale(const MCInst &Inst);
1631   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1632                              const SMLoc &IDLoc);
1633   bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands,
1634                           const SMLoc &IDLoc);
1635   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1636   unsigned getConstantBusLimit(unsigned Opcode) const;
1637   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1638   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1639   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1640 
1641   bool isSupportedMnemo(StringRef Mnemo,
1642                         const FeatureBitset &FBS);
1643   bool isSupportedMnemo(StringRef Mnemo,
1644                         const FeatureBitset &FBS,
1645                         ArrayRef<unsigned> Variants);
1646   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1647 
1648   bool isId(const StringRef Id) const;
1649   bool isId(const AsmToken &Token, const StringRef Id) const;
1650   bool isToken(const AsmToken::TokenKind Kind) const;
1651   bool trySkipId(const StringRef Id);
1652   bool trySkipId(const StringRef Pref, const StringRef Id);
1653   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1654   bool trySkipToken(const AsmToken::TokenKind Kind);
1655   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1656   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1657   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1658 
1659   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1660   AsmToken::TokenKind getTokenKind() const;
1661   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1662   bool parseExpr(OperandVector &Operands);
1663   StringRef getTokenStr() const;
1664   AsmToken peekToken();
1665   AsmToken getToken() const;
1666   SMLoc getLoc() const;
1667   void lex();
1668 
1669 public:
1670   void onBeginOfFile() override;
1671 
1672   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1673   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1674 
1675   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1676   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1677   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1678   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1679   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1680   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1681 
1682   bool parseSwizzleOperand(int64_t &Op,
1683                            const unsigned MinVal,
1684                            const unsigned MaxVal,
1685                            const StringRef ErrMsg,
1686                            SMLoc &Loc);
1687   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1688                             const unsigned MinVal,
1689                             const unsigned MaxVal,
1690                             const StringRef ErrMsg);
1691   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1692   bool parseSwizzleOffset(int64_t &Imm);
1693   bool parseSwizzleMacro(int64_t &Imm);
1694   bool parseSwizzleQuadPerm(int64_t &Imm);
1695   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1696   bool parseSwizzleBroadcast(int64_t &Imm);
1697   bool parseSwizzleSwap(int64_t &Imm);
1698   bool parseSwizzleReverse(int64_t &Imm);
1699 
1700   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1701   int64_t parseGPRIdxMacro();
1702 
1703   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1704   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1705   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1706   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1707 
1708   AMDGPUOperand::Ptr defaultCPol() const;
1709 
1710   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1711   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1712   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1713   AMDGPUOperand::Ptr defaultFlatOffset() const;
1714 
1715   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1716 
1717   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1718                OptionalImmIndexMap &OptionalIdx);
1719   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1720   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1721   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1722   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1723                 OptionalImmIndexMap &OptionalIdx);
1724 
1725   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1726   void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1727 
1728   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1729                bool IsAtomic = false);
1730   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1731   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1732 
1733   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1734 
1735   bool parseDimId(unsigned &Encoding);
1736   OperandMatchResultTy parseDim(OperandVector &Operands);
1737   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1738   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1739   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1740   int64_t parseDPPCtrlSel(StringRef Ctrl);
1741   int64_t parseDPPCtrlPerm();
1742   AMDGPUOperand::Ptr defaultRowMask() const;
1743   AMDGPUOperand::Ptr defaultBankMask() const;
1744   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1745   AMDGPUOperand::Ptr defaultFI() const;
1746   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1747   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1748 
1749   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1750                                     AMDGPUOperand::ImmTy Type);
1751   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1752   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1753   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1754   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1755   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1756   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1757   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1758                uint64_t BasicInstType,
1759                bool SkipDstVcc = false,
1760                bool SkipSrcVcc = false);
1761 
1762   AMDGPUOperand::Ptr defaultBLGP() const;
1763   AMDGPUOperand::Ptr defaultCBSZ() const;
1764   AMDGPUOperand::Ptr defaultABID() const;
1765 
1766   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1767   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1768 
1769   AMDGPUOperand::Ptr defaultWaitVDST() const;
1770   AMDGPUOperand::Ptr defaultWaitEXP() const;
1771 };
1772 
1773 struct OptionalOperand {
1774   const char *Name;
1775   AMDGPUOperand::ImmTy Type;
1776   bool IsBit;
1777   bool (*ConvertResult)(int64_t&);
1778 };
1779 
1780 } // end anonymous namespace
1781 
1782 // May be called with integer type with equivalent bitwidth.
1783 static const fltSemantics *getFltSemantics(unsigned Size) {
1784   switch (Size) {
1785   case 4:
1786     return &APFloat::IEEEsingle();
1787   case 8:
1788     return &APFloat::IEEEdouble();
1789   case 2:
1790     return &APFloat::IEEEhalf();
1791   default:
1792     llvm_unreachable("unsupported fp type");
1793   }
1794 }
1795 
1796 static const fltSemantics *getFltSemantics(MVT VT) {
1797   return getFltSemantics(VT.getSizeInBits() / 8);
1798 }
1799 
1800 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1801   switch (OperandType) {
1802   case AMDGPU::OPERAND_REG_IMM_INT32:
1803   case AMDGPU::OPERAND_REG_IMM_FP32:
1804   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1805   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1806   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1807   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1808   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1809   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1810   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1811   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1812   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1813   case AMDGPU::OPERAND_KIMM32:
1814     return &APFloat::IEEEsingle();
1815   case AMDGPU::OPERAND_REG_IMM_INT64:
1816   case AMDGPU::OPERAND_REG_IMM_FP64:
1817   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1818   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1819   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1820     return &APFloat::IEEEdouble();
1821   case AMDGPU::OPERAND_REG_IMM_INT16:
1822   case AMDGPU::OPERAND_REG_IMM_FP16:
1823   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1824   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1825   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1826   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1827   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1828   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1829   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1830   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1831   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1832   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1833   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1834   case AMDGPU::OPERAND_KIMM16:
1835     return &APFloat::IEEEhalf();
1836   default:
1837     llvm_unreachable("unsupported fp type");
1838   }
1839 }
1840 
1841 //===----------------------------------------------------------------------===//
1842 // Operand
1843 //===----------------------------------------------------------------------===//
1844 
1845 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1846   bool Lost;
1847 
1848   // Convert literal to single precision
1849   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1850                                                APFloat::rmNearestTiesToEven,
1851                                                &Lost);
1852   // We allow precision lost but not overflow or underflow
1853   if (Status != APFloat::opOK &&
1854       Lost &&
1855       ((Status & APFloat::opOverflow)  != 0 ||
1856        (Status & APFloat::opUnderflow) != 0)) {
1857     return false;
1858   }
1859 
1860   return true;
1861 }
1862 
1863 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1864   return isUIntN(Size, Val) || isIntN(Size, Val);
1865 }
1866 
1867 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1868   if (VT.getScalarType() == MVT::i16) {
1869     // FP immediate values are broken.
1870     return isInlinableIntLiteral(Val);
1871   }
1872 
1873   // f16/v2f16 operands work correctly for all values.
1874   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1875 }
1876 
1877 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1878 
1879   // This is a hack to enable named inline values like
1880   // shared_base with both 32-bit and 64-bit operands.
1881   // Note that these values are defined as
1882   // 32-bit operands only.
1883   if (isInlineValue()) {
1884     return true;
1885   }
1886 
1887   if (!isImmTy(ImmTyNone)) {
1888     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1889     return false;
1890   }
1891   // TODO: We should avoid using host float here. It would be better to
1892   // check the float bit values which is what a few other places do.
1893   // We've had bot failures before due to weird NaN support on mips hosts.
1894 
1895   APInt Literal(64, Imm.Val);
1896 
1897   if (Imm.IsFPImm) { // We got fp literal token
1898     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1899       return AMDGPU::isInlinableLiteral64(Imm.Val,
1900                                           AsmParser->hasInv2PiInlineImm());
1901     }
1902 
1903     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1904     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1905       return false;
1906 
1907     if (type.getScalarSizeInBits() == 16) {
1908       return isInlineableLiteralOp16(
1909         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1910         type, AsmParser->hasInv2PiInlineImm());
1911     }
1912 
1913     // Check if single precision literal is inlinable
1914     return AMDGPU::isInlinableLiteral32(
1915       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1916       AsmParser->hasInv2PiInlineImm());
1917   }
1918 
1919   // We got int literal token.
1920   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1921     return AMDGPU::isInlinableLiteral64(Imm.Val,
1922                                         AsmParser->hasInv2PiInlineImm());
1923   }
1924 
1925   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1926     return false;
1927   }
1928 
1929   if (type.getScalarSizeInBits() == 16) {
1930     return isInlineableLiteralOp16(
1931       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1932       type, AsmParser->hasInv2PiInlineImm());
1933   }
1934 
1935   return AMDGPU::isInlinableLiteral32(
1936     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1937     AsmParser->hasInv2PiInlineImm());
1938 }
1939 
1940 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1941   // Check that this immediate can be added as literal
1942   if (!isImmTy(ImmTyNone)) {
1943     return false;
1944   }
1945 
1946   if (!Imm.IsFPImm) {
1947     // We got int literal token.
1948 
1949     if (type == MVT::f64 && hasFPModifiers()) {
1950       // Cannot apply fp modifiers to int literals preserving the same semantics
1951       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1952       // disable these cases.
1953       return false;
1954     }
1955 
1956     unsigned Size = type.getSizeInBits();
1957     if (Size == 64)
1958       Size = 32;
1959 
1960     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1961     // types.
1962     return isSafeTruncation(Imm.Val, Size);
1963   }
1964 
1965   // We got fp literal token
1966   if (type == MVT::f64) { // Expected 64-bit fp operand
1967     // We would set low 64-bits of literal to zeroes but we accept this literals
1968     return true;
1969   }
1970 
1971   if (type == MVT::i64) { // Expected 64-bit int operand
1972     // We don't allow fp literals in 64-bit integer instructions. It is
1973     // unclear how we should encode them.
1974     return false;
1975   }
1976 
1977   // We allow fp literals with f16x2 operands assuming that the specified
1978   // literal goes into the lower half and the upper half is zero. We also
1979   // require that the literal may be losslessly converted to f16.
1980   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1981                      (type == MVT::v2i16)? MVT::i16 :
1982                      (type == MVT::v2f32)? MVT::f32 : type;
1983 
1984   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1985   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1986 }
1987 
1988 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1989   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1990 }
1991 
1992 bool AMDGPUOperand::isVRegWithInputMods() const {
1993   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1994          // GFX90A allows DPP on 64-bit operands.
1995          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1996           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1997 }
1998 
1999 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2000   if (AsmParser->isVI())
2001     return isVReg32();
2002   else if (AsmParser->isGFX9Plus())
2003     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2004   else
2005     return false;
2006 }
2007 
2008 bool AMDGPUOperand::isSDWAFP16Operand() const {
2009   return isSDWAOperand(MVT::f16);
2010 }
2011 
2012 bool AMDGPUOperand::isSDWAFP32Operand() const {
2013   return isSDWAOperand(MVT::f32);
2014 }
2015 
2016 bool AMDGPUOperand::isSDWAInt16Operand() const {
2017   return isSDWAOperand(MVT::i16);
2018 }
2019 
2020 bool AMDGPUOperand::isSDWAInt32Operand() const {
2021   return isSDWAOperand(MVT::i32);
2022 }
2023 
2024 bool AMDGPUOperand::isBoolReg() const {
2025   auto FB = AsmParser->getFeatureBits();
2026   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2027                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2028 }
2029 
2030 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2031 {
2032   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2033   assert(Size == 2 || Size == 4 || Size == 8);
2034 
2035   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2036 
2037   if (Imm.Mods.Abs) {
2038     Val &= ~FpSignMask;
2039   }
2040   if (Imm.Mods.Neg) {
2041     Val ^= FpSignMask;
2042   }
2043 
2044   return Val;
2045 }
2046 
2047 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2048   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2049                              Inst.getNumOperands())) {
2050     addLiteralImmOperand(Inst, Imm.Val,
2051                          ApplyModifiers &
2052                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2053   } else {
2054     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2055     Inst.addOperand(MCOperand::createImm(Imm.Val));
2056     setImmKindNone();
2057   }
2058 }
2059 
2060 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2061   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2062   auto OpNum = Inst.getNumOperands();
2063   // Check that this operand accepts literals
2064   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2065 
2066   if (ApplyModifiers) {
2067     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2068     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2069     Val = applyInputFPModifiers(Val, Size);
2070   }
2071 
2072   APInt Literal(64, Val);
2073   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2074 
2075   if (Imm.IsFPImm) { // We got fp literal token
2076     switch (OpTy) {
2077     case AMDGPU::OPERAND_REG_IMM_INT64:
2078     case AMDGPU::OPERAND_REG_IMM_FP64:
2079     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2080     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2081     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2082       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2083                                        AsmParser->hasInv2PiInlineImm())) {
2084         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2085         setImmKindConst();
2086         return;
2087       }
2088 
2089       // Non-inlineable
2090       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2091         // For fp operands we check if low 32 bits are zeros
2092         if (Literal.getLoBits(32) != 0) {
2093           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2094           "Can't encode literal as exact 64-bit floating-point operand. "
2095           "Low 32-bits will be set to zero");
2096         }
2097 
2098         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2099         setImmKindLiteral();
2100         return;
2101       }
2102 
2103       // We don't allow fp literals in 64-bit integer instructions. It is
2104       // unclear how we should encode them. This case should be checked earlier
2105       // in predicate methods (isLiteralImm())
2106       llvm_unreachable("fp literal in 64-bit integer instruction.");
2107 
2108     case AMDGPU::OPERAND_REG_IMM_INT32:
2109     case AMDGPU::OPERAND_REG_IMM_FP32:
2110     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2111     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2112     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2113     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2114     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2115     case AMDGPU::OPERAND_REG_IMM_INT16:
2116     case AMDGPU::OPERAND_REG_IMM_FP16:
2117     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2118     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2119     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2120     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2121     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2122     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2123     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2124     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2125     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2126     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2127     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2128     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2129     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2130     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2131     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2132     case AMDGPU::OPERAND_KIMM32:
2133     case AMDGPU::OPERAND_KIMM16: {
2134       bool lost;
2135       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2136       // Convert literal to single precision
2137       FPLiteral.convert(*getOpFltSemantics(OpTy),
2138                         APFloat::rmNearestTiesToEven, &lost);
2139       // We allow precision lost but not overflow or underflow. This should be
2140       // checked earlier in isLiteralImm()
2141 
2142       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2143       Inst.addOperand(MCOperand::createImm(ImmVal));
2144       setImmKindLiteral();
2145       return;
2146     }
2147     default:
2148       llvm_unreachable("invalid operand size");
2149     }
2150 
2151     return;
2152   }
2153 
2154   // We got int literal token.
2155   // Only sign extend inline immediates.
2156   switch (OpTy) {
2157   case AMDGPU::OPERAND_REG_IMM_INT32:
2158   case AMDGPU::OPERAND_REG_IMM_FP32:
2159   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2160   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2161   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2162   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2163   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2164   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2165   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2166   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2167   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2168   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2169   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2170     if (isSafeTruncation(Val, 32) &&
2171         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2172                                      AsmParser->hasInv2PiInlineImm())) {
2173       Inst.addOperand(MCOperand::createImm(Val));
2174       setImmKindConst();
2175       return;
2176     }
2177 
2178     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2179     setImmKindLiteral();
2180     return;
2181 
2182   case AMDGPU::OPERAND_REG_IMM_INT64:
2183   case AMDGPU::OPERAND_REG_IMM_FP64:
2184   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2185   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2186   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2187     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2188       Inst.addOperand(MCOperand::createImm(Val));
2189       setImmKindConst();
2190       return;
2191     }
2192 
2193     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2194     setImmKindLiteral();
2195     return;
2196 
2197   case AMDGPU::OPERAND_REG_IMM_INT16:
2198   case AMDGPU::OPERAND_REG_IMM_FP16:
2199   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2200   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2201   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2202   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2203   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2204     if (isSafeTruncation(Val, 16) &&
2205         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2206                                      AsmParser->hasInv2PiInlineImm())) {
2207       Inst.addOperand(MCOperand::createImm(Val));
2208       setImmKindConst();
2209       return;
2210     }
2211 
2212     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2213     setImmKindLiteral();
2214     return;
2215 
2216   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2217   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2218   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2219   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2220     assert(isSafeTruncation(Val, 16));
2221     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2222                                         AsmParser->hasInv2PiInlineImm()));
2223 
2224     Inst.addOperand(MCOperand::createImm(Val));
2225     return;
2226   }
2227   case AMDGPU::OPERAND_KIMM32:
2228     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2229     setImmKindNone();
2230     return;
2231   case AMDGPU::OPERAND_KIMM16:
2232     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2233     setImmKindNone();
2234     return;
2235   default:
2236     llvm_unreachable("invalid operand size");
2237   }
2238 }
2239 
2240 template <unsigned Bitwidth>
2241 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2242   APInt Literal(64, Imm.Val);
2243   setImmKindNone();
2244 
2245   if (!Imm.IsFPImm) {
2246     // We got int literal token.
2247     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2248     return;
2249   }
2250 
2251   bool Lost;
2252   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2253   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2254                     APFloat::rmNearestTiesToEven, &Lost);
2255   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2256 }
2257 
2258 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2259   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2260 }
2261 
2262 static bool isInlineValue(unsigned Reg) {
2263   switch (Reg) {
2264   case AMDGPU::SRC_SHARED_BASE:
2265   case AMDGPU::SRC_SHARED_LIMIT:
2266   case AMDGPU::SRC_PRIVATE_BASE:
2267   case AMDGPU::SRC_PRIVATE_LIMIT:
2268   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2269     return true;
2270   case AMDGPU::SRC_VCCZ:
2271   case AMDGPU::SRC_EXECZ:
2272   case AMDGPU::SRC_SCC:
2273     return true;
2274   case AMDGPU::SGPR_NULL:
2275     return true;
2276   default:
2277     return false;
2278   }
2279 }
2280 
2281 bool AMDGPUOperand::isInlineValue() const {
2282   return isRegKind() && ::isInlineValue(getReg());
2283 }
2284 
2285 //===----------------------------------------------------------------------===//
2286 // AsmParser
2287 //===----------------------------------------------------------------------===//
2288 
2289 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2290   if (Is == IS_VGPR) {
2291     switch (RegWidth) {
2292       default: return -1;
2293       case 32:
2294         return AMDGPU::VGPR_32RegClassID;
2295       case 64:
2296         return AMDGPU::VReg_64RegClassID;
2297       case 96:
2298         return AMDGPU::VReg_96RegClassID;
2299       case 128:
2300         return AMDGPU::VReg_128RegClassID;
2301       case 160:
2302         return AMDGPU::VReg_160RegClassID;
2303       case 192:
2304         return AMDGPU::VReg_192RegClassID;
2305       case 224:
2306         return AMDGPU::VReg_224RegClassID;
2307       case 256:
2308         return AMDGPU::VReg_256RegClassID;
2309       case 512:
2310         return AMDGPU::VReg_512RegClassID;
2311       case 1024:
2312         return AMDGPU::VReg_1024RegClassID;
2313     }
2314   } else if (Is == IS_TTMP) {
2315     switch (RegWidth) {
2316       default: return -1;
2317       case 32:
2318         return AMDGPU::TTMP_32RegClassID;
2319       case 64:
2320         return AMDGPU::TTMP_64RegClassID;
2321       case 128:
2322         return AMDGPU::TTMP_128RegClassID;
2323       case 256:
2324         return AMDGPU::TTMP_256RegClassID;
2325       case 512:
2326         return AMDGPU::TTMP_512RegClassID;
2327     }
2328   } else if (Is == IS_SGPR) {
2329     switch (RegWidth) {
2330       default: return -1;
2331       case 32:
2332         return AMDGPU::SGPR_32RegClassID;
2333       case 64:
2334         return AMDGPU::SGPR_64RegClassID;
2335       case 96:
2336         return AMDGPU::SGPR_96RegClassID;
2337       case 128:
2338         return AMDGPU::SGPR_128RegClassID;
2339       case 160:
2340         return AMDGPU::SGPR_160RegClassID;
2341       case 192:
2342         return AMDGPU::SGPR_192RegClassID;
2343       case 224:
2344         return AMDGPU::SGPR_224RegClassID;
2345       case 256:
2346         return AMDGPU::SGPR_256RegClassID;
2347       case 512:
2348         return AMDGPU::SGPR_512RegClassID;
2349     }
2350   } else if (Is == IS_AGPR) {
2351     switch (RegWidth) {
2352       default: return -1;
2353       case 32:
2354         return AMDGPU::AGPR_32RegClassID;
2355       case 64:
2356         return AMDGPU::AReg_64RegClassID;
2357       case 96:
2358         return AMDGPU::AReg_96RegClassID;
2359       case 128:
2360         return AMDGPU::AReg_128RegClassID;
2361       case 160:
2362         return AMDGPU::AReg_160RegClassID;
2363       case 192:
2364         return AMDGPU::AReg_192RegClassID;
2365       case 224:
2366         return AMDGPU::AReg_224RegClassID;
2367       case 256:
2368         return AMDGPU::AReg_256RegClassID;
2369       case 512:
2370         return AMDGPU::AReg_512RegClassID;
2371       case 1024:
2372         return AMDGPU::AReg_1024RegClassID;
2373     }
2374   }
2375   return -1;
2376 }
2377 
2378 static unsigned getSpecialRegForName(StringRef RegName) {
2379   return StringSwitch<unsigned>(RegName)
2380     .Case("exec", AMDGPU::EXEC)
2381     .Case("vcc", AMDGPU::VCC)
2382     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2383     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2384     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2385     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2386     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2387     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2388     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2389     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2390     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2391     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2392     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2393     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2394     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2395     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2396     .Case("m0", AMDGPU::M0)
2397     .Case("vccz", AMDGPU::SRC_VCCZ)
2398     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2399     .Case("execz", AMDGPU::SRC_EXECZ)
2400     .Case("src_execz", AMDGPU::SRC_EXECZ)
2401     .Case("scc", AMDGPU::SRC_SCC)
2402     .Case("src_scc", AMDGPU::SRC_SCC)
2403     .Case("tba", AMDGPU::TBA)
2404     .Case("tma", AMDGPU::TMA)
2405     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2406     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2407     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2408     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2409     .Case("vcc_lo", AMDGPU::VCC_LO)
2410     .Case("vcc_hi", AMDGPU::VCC_HI)
2411     .Case("exec_lo", AMDGPU::EXEC_LO)
2412     .Case("exec_hi", AMDGPU::EXEC_HI)
2413     .Case("tma_lo", AMDGPU::TMA_LO)
2414     .Case("tma_hi", AMDGPU::TMA_HI)
2415     .Case("tba_lo", AMDGPU::TBA_LO)
2416     .Case("tba_hi", AMDGPU::TBA_HI)
2417     .Case("pc", AMDGPU::PC_REG)
2418     .Case("null", AMDGPU::SGPR_NULL)
2419     .Default(AMDGPU::NoRegister);
2420 }
2421 
2422 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2423                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2424   auto R = parseRegister();
2425   if (!R) return true;
2426   assert(R->isReg());
2427   RegNo = R->getReg();
2428   StartLoc = R->getStartLoc();
2429   EndLoc = R->getEndLoc();
2430   return false;
2431 }
2432 
2433 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2434                                     SMLoc &EndLoc) {
2435   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2436 }
2437 
2438 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2439                                                        SMLoc &StartLoc,
2440                                                        SMLoc &EndLoc) {
2441   bool Result =
2442       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2443   bool PendingErrors = getParser().hasPendingError();
2444   getParser().clearPendingErrors();
2445   if (PendingErrors)
2446     return MatchOperand_ParseFail;
2447   if (Result)
2448     return MatchOperand_NoMatch;
2449   return MatchOperand_Success;
2450 }
2451 
2452 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2453                                             RegisterKind RegKind, unsigned Reg1,
2454                                             SMLoc Loc) {
2455   switch (RegKind) {
2456   case IS_SPECIAL:
2457     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2458       Reg = AMDGPU::EXEC;
2459       RegWidth = 64;
2460       return true;
2461     }
2462     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2463       Reg = AMDGPU::FLAT_SCR;
2464       RegWidth = 64;
2465       return true;
2466     }
2467     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2468       Reg = AMDGPU::XNACK_MASK;
2469       RegWidth = 64;
2470       return true;
2471     }
2472     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2473       Reg = AMDGPU::VCC;
2474       RegWidth = 64;
2475       return true;
2476     }
2477     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2478       Reg = AMDGPU::TBA;
2479       RegWidth = 64;
2480       return true;
2481     }
2482     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2483       Reg = AMDGPU::TMA;
2484       RegWidth = 64;
2485       return true;
2486     }
2487     Error(Loc, "register does not fit in the list");
2488     return false;
2489   case IS_VGPR:
2490   case IS_SGPR:
2491   case IS_AGPR:
2492   case IS_TTMP:
2493     if (Reg1 != Reg + RegWidth / 32) {
2494       Error(Loc, "registers in a list must have consecutive indices");
2495       return false;
2496     }
2497     RegWidth += 32;
2498     return true;
2499   default:
2500     llvm_unreachable("unexpected register kind");
2501   }
2502 }
2503 
2504 struct RegInfo {
2505   StringLiteral Name;
2506   RegisterKind Kind;
2507 };
2508 
2509 static constexpr RegInfo RegularRegisters[] = {
2510   {{"v"},    IS_VGPR},
2511   {{"s"},    IS_SGPR},
2512   {{"ttmp"}, IS_TTMP},
2513   {{"acc"},  IS_AGPR},
2514   {{"a"},    IS_AGPR},
2515 };
2516 
2517 static bool isRegularReg(RegisterKind Kind) {
2518   return Kind == IS_VGPR ||
2519          Kind == IS_SGPR ||
2520          Kind == IS_TTMP ||
2521          Kind == IS_AGPR;
2522 }
2523 
2524 static const RegInfo* getRegularRegInfo(StringRef Str) {
2525   for (const RegInfo &Reg : RegularRegisters)
2526     if (Str.startswith(Reg.Name))
2527       return &Reg;
2528   return nullptr;
2529 }
2530 
2531 static bool getRegNum(StringRef Str, unsigned& Num) {
2532   return !Str.getAsInteger(10, Num);
2533 }
2534 
2535 bool
2536 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2537                             const AsmToken &NextToken) const {
2538 
2539   // A list of consecutive registers: [s0,s1,s2,s3]
2540   if (Token.is(AsmToken::LBrac))
2541     return true;
2542 
2543   if (!Token.is(AsmToken::Identifier))
2544     return false;
2545 
2546   // A single register like s0 or a range of registers like s[0:1]
2547 
2548   StringRef Str = Token.getString();
2549   const RegInfo *Reg = getRegularRegInfo(Str);
2550   if (Reg) {
2551     StringRef RegName = Reg->Name;
2552     StringRef RegSuffix = Str.substr(RegName.size());
2553     if (!RegSuffix.empty()) {
2554       unsigned Num;
2555       // A single register with an index: rXX
2556       if (getRegNum(RegSuffix, Num))
2557         return true;
2558     } else {
2559       // A range of registers: r[XX:YY].
2560       if (NextToken.is(AsmToken::LBrac))
2561         return true;
2562     }
2563   }
2564 
2565   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2566 }
2567 
2568 bool
2569 AMDGPUAsmParser::isRegister()
2570 {
2571   return isRegister(getToken(), peekToken());
2572 }
2573 
2574 unsigned
2575 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2576                                unsigned RegNum,
2577                                unsigned RegWidth,
2578                                SMLoc Loc) {
2579 
2580   assert(isRegularReg(RegKind));
2581 
2582   unsigned AlignSize = 1;
2583   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2584     // SGPR and TTMP registers must be aligned.
2585     // Max required alignment is 4 dwords.
2586     AlignSize = std::min(RegWidth / 32, 4u);
2587   }
2588 
2589   if (RegNum % AlignSize != 0) {
2590     Error(Loc, "invalid register alignment");
2591     return AMDGPU::NoRegister;
2592   }
2593 
2594   unsigned RegIdx = RegNum / AlignSize;
2595   int RCID = getRegClass(RegKind, RegWidth);
2596   if (RCID == -1) {
2597     Error(Loc, "invalid or unsupported register size");
2598     return AMDGPU::NoRegister;
2599   }
2600 
2601   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2602   const MCRegisterClass RC = TRI->getRegClass(RCID);
2603   if (RegIdx >= RC.getNumRegs()) {
2604     Error(Loc, "register index is out of range");
2605     return AMDGPU::NoRegister;
2606   }
2607 
2608   return RC.getRegister(RegIdx);
2609 }
2610 
2611 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2612   int64_t RegLo, RegHi;
2613   if (!skipToken(AsmToken::LBrac, "missing register index"))
2614     return false;
2615 
2616   SMLoc FirstIdxLoc = getLoc();
2617   SMLoc SecondIdxLoc;
2618 
2619   if (!parseExpr(RegLo))
2620     return false;
2621 
2622   if (trySkipToken(AsmToken::Colon)) {
2623     SecondIdxLoc = getLoc();
2624     if (!parseExpr(RegHi))
2625       return false;
2626   } else {
2627     RegHi = RegLo;
2628   }
2629 
2630   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2631     return false;
2632 
2633   if (!isUInt<32>(RegLo)) {
2634     Error(FirstIdxLoc, "invalid register index");
2635     return false;
2636   }
2637 
2638   if (!isUInt<32>(RegHi)) {
2639     Error(SecondIdxLoc, "invalid register index");
2640     return false;
2641   }
2642 
2643   if (RegLo > RegHi) {
2644     Error(FirstIdxLoc, "first register index should not exceed second index");
2645     return false;
2646   }
2647 
2648   Num = static_cast<unsigned>(RegLo);
2649   RegWidth = 32 * ((RegHi - RegLo) + 1);
2650   return true;
2651 }
2652 
2653 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2654                                           unsigned &RegNum, unsigned &RegWidth,
2655                                           SmallVectorImpl<AsmToken> &Tokens) {
2656   assert(isToken(AsmToken::Identifier));
2657   unsigned Reg = getSpecialRegForName(getTokenStr());
2658   if (Reg) {
2659     RegNum = 0;
2660     RegWidth = 32;
2661     RegKind = IS_SPECIAL;
2662     Tokens.push_back(getToken());
2663     lex(); // skip register name
2664   }
2665   return Reg;
2666 }
2667 
2668 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2669                                           unsigned &RegNum, unsigned &RegWidth,
2670                                           SmallVectorImpl<AsmToken> &Tokens) {
2671   assert(isToken(AsmToken::Identifier));
2672   StringRef RegName = getTokenStr();
2673   auto Loc = getLoc();
2674 
2675   const RegInfo *RI = getRegularRegInfo(RegName);
2676   if (!RI) {
2677     Error(Loc, "invalid register name");
2678     return AMDGPU::NoRegister;
2679   }
2680 
2681   Tokens.push_back(getToken());
2682   lex(); // skip register name
2683 
2684   RegKind = RI->Kind;
2685   StringRef RegSuffix = RegName.substr(RI->Name.size());
2686   if (!RegSuffix.empty()) {
2687     // Single 32-bit register: vXX.
2688     if (!getRegNum(RegSuffix, RegNum)) {
2689       Error(Loc, "invalid register index");
2690       return AMDGPU::NoRegister;
2691     }
2692     RegWidth = 32;
2693   } else {
2694     // Range of registers: v[XX:YY]. ":YY" is optional.
2695     if (!ParseRegRange(RegNum, RegWidth))
2696       return AMDGPU::NoRegister;
2697   }
2698 
2699   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2700 }
2701 
2702 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2703                                        unsigned &RegWidth,
2704                                        SmallVectorImpl<AsmToken> &Tokens) {
2705   unsigned Reg = AMDGPU::NoRegister;
2706   auto ListLoc = getLoc();
2707 
2708   if (!skipToken(AsmToken::LBrac,
2709                  "expected a register or a list of registers")) {
2710     return AMDGPU::NoRegister;
2711   }
2712 
2713   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2714 
2715   auto Loc = getLoc();
2716   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2717     return AMDGPU::NoRegister;
2718   if (RegWidth != 32) {
2719     Error(Loc, "expected a single 32-bit register");
2720     return AMDGPU::NoRegister;
2721   }
2722 
2723   for (; trySkipToken(AsmToken::Comma); ) {
2724     RegisterKind NextRegKind;
2725     unsigned NextReg, NextRegNum, NextRegWidth;
2726     Loc = getLoc();
2727 
2728     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2729                              NextRegNum, NextRegWidth,
2730                              Tokens)) {
2731       return AMDGPU::NoRegister;
2732     }
2733     if (NextRegWidth != 32) {
2734       Error(Loc, "expected a single 32-bit register");
2735       return AMDGPU::NoRegister;
2736     }
2737     if (NextRegKind != RegKind) {
2738       Error(Loc, "registers in a list must be of the same kind");
2739       return AMDGPU::NoRegister;
2740     }
2741     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2742       return AMDGPU::NoRegister;
2743   }
2744 
2745   if (!skipToken(AsmToken::RBrac,
2746                  "expected a comma or a closing square bracket")) {
2747     return AMDGPU::NoRegister;
2748   }
2749 
2750   if (isRegularReg(RegKind))
2751     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2752 
2753   return Reg;
2754 }
2755 
2756 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2757                                           unsigned &RegNum, unsigned &RegWidth,
2758                                           SmallVectorImpl<AsmToken> &Tokens) {
2759   auto Loc = getLoc();
2760   Reg = AMDGPU::NoRegister;
2761 
2762   if (isToken(AsmToken::Identifier)) {
2763     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2764     if (Reg == AMDGPU::NoRegister)
2765       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2766   } else {
2767     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2768   }
2769 
2770   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2771   if (Reg == AMDGPU::NoRegister) {
2772     assert(Parser.hasPendingError());
2773     return false;
2774   }
2775 
2776   if (!subtargetHasRegister(*TRI, Reg)) {
2777     if (Reg == AMDGPU::SGPR_NULL) {
2778       Error(Loc, "'null' operand is not supported on this GPU");
2779     } else {
2780       Error(Loc, "register not available on this GPU");
2781     }
2782     return false;
2783   }
2784 
2785   return true;
2786 }
2787 
2788 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2789                                           unsigned &RegNum, unsigned &RegWidth,
2790                                           bool RestoreOnFailure /*=false*/) {
2791   Reg = AMDGPU::NoRegister;
2792 
2793   SmallVector<AsmToken, 1> Tokens;
2794   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2795     if (RestoreOnFailure) {
2796       while (!Tokens.empty()) {
2797         getLexer().UnLex(Tokens.pop_back_val());
2798       }
2799     }
2800     return true;
2801   }
2802   return false;
2803 }
2804 
2805 Optional<StringRef>
2806 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2807   switch (RegKind) {
2808   case IS_VGPR:
2809     return StringRef(".amdgcn.next_free_vgpr");
2810   case IS_SGPR:
2811     return StringRef(".amdgcn.next_free_sgpr");
2812   default:
2813     return None;
2814   }
2815 }
2816 
2817 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2818   auto SymbolName = getGprCountSymbolName(RegKind);
2819   assert(SymbolName && "initializing invalid register kind");
2820   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2821   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2822 }
2823 
2824 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2825                                             unsigned DwordRegIndex,
2826                                             unsigned RegWidth) {
2827   // Symbols are only defined for GCN targets
2828   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2829     return true;
2830 
2831   auto SymbolName = getGprCountSymbolName(RegKind);
2832   if (!SymbolName)
2833     return true;
2834   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2835 
2836   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2837   int64_t OldCount;
2838 
2839   if (!Sym->isVariable())
2840     return !Error(getLoc(),
2841                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2842   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2843     return !Error(
2844         getLoc(),
2845         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2846 
2847   if (OldCount <= NewMax)
2848     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2849 
2850   return true;
2851 }
2852 
2853 std::unique_ptr<AMDGPUOperand>
2854 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2855   const auto &Tok = getToken();
2856   SMLoc StartLoc = Tok.getLoc();
2857   SMLoc EndLoc = Tok.getEndLoc();
2858   RegisterKind RegKind;
2859   unsigned Reg, RegNum, RegWidth;
2860 
2861   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2862     return nullptr;
2863   }
2864   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2865     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2866       return nullptr;
2867   } else
2868     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2869   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2870 }
2871 
2872 OperandMatchResultTy
2873 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2874   // TODO: add syntactic sugar for 1/(2*PI)
2875 
2876   assert(!isRegister());
2877   assert(!isModifier());
2878 
2879   const auto& Tok = getToken();
2880   const auto& NextTok = peekToken();
2881   bool IsReal = Tok.is(AsmToken::Real);
2882   SMLoc S = getLoc();
2883   bool Negate = false;
2884 
2885   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2886     lex();
2887     IsReal = true;
2888     Negate = true;
2889   }
2890 
2891   if (IsReal) {
2892     // Floating-point expressions are not supported.
2893     // Can only allow floating-point literals with an
2894     // optional sign.
2895 
2896     StringRef Num = getTokenStr();
2897     lex();
2898 
2899     APFloat RealVal(APFloat::IEEEdouble());
2900     auto roundMode = APFloat::rmNearestTiesToEven;
2901     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2902       return MatchOperand_ParseFail;
2903     }
2904     if (Negate)
2905       RealVal.changeSign();
2906 
2907     Operands.push_back(
2908       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2909                                AMDGPUOperand::ImmTyNone, true));
2910 
2911     return MatchOperand_Success;
2912 
2913   } else {
2914     int64_t IntVal;
2915     const MCExpr *Expr;
2916     SMLoc S = getLoc();
2917 
2918     if (HasSP3AbsModifier) {
2919       // This is a workaround for handling expressions
2920       // as arguments of SP3 'abs' modifier, for example:
2921       //     |1.0|
2922       //     |-1|
2923       //     |1+x|
2924       // This syntax is not compatible with syntax of standard
2925       // MC expressions (due to the trailing '|').
2926       SMLoc EndLoc;
2927       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2928         return MatchOperand_ParseFail;
2929     } else {
2930       if (Parser.parseExpression(Expr))
2931         return MatchOperand_ParseFail;
2932     }
2933 
2934     if (Expr->evaluateAsAbsolute(IntVal)) {
2935       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2936     } else {
2937       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2938     }
2939 
2940     return MatchOperand_Success;
2941   }
2942 
2943   return MatchOperand_NoMatch;
2944 }
2945 
2946 OperandMatchResultTy
2947 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2948   if (!isRegister())
2949     return MatchOperand_NoMatch;
2950 
2951   if (auto R = parseRegister()) {
2952     assert(R->isReg());
2953     Operands.push_back(std::move(R));
2954     return MatchOperand_Success;
2955   }
2956   return MatchOperand_ParseFail;
2957 }
2958 
2959 OperandMatchResultTy
2960 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2961   auto res = parseReg(Operands);
2962   if (res != MatchOperand_NoMatch) {
2963     return res;
2964   } else if (isModifier()) {
2965     return MatchOperand_NoMatch;
2966   } else {
2967     return parseImm(Operands, HasSP3AbsMod);
2968   }
2969 }
2970 
2971 bool
2972 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2973   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2974     const auto &str = Token.getString();
2975     return str == "abs" || str == "neg" || str == "sext";
2976   }
2977   return false;
2978 }
2979 
2980 bool
2981 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2982   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2983 }
2984 
2985 bool
2986 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2987   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2988 }
2989 
2990 bool
2991 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2992   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2993 }
2994 
2995 // Check if this is an operand modifier or an opcode modifier
2996 // which may look like an expression but it is not. We should
2997 // avoid parsing these modifiers as expressions. Currently
2998 // recognized sequences are:
2999 //   |...|
3000 //   abs(...)
3001 //   neg(...)
3002 //   sext(...)
3003 //   -reg
3004 //   -|...|
3005 //   -abs(...)
3006 //   name:...
3007 // Note that simple opcode modifiers like 'gds' may be parsed as
3008 // expressions; this is a special case. See getExpressionAsToken.
3009 //
3010 bool
3011 AMDGPUAsmParser::isModifier() {
3012 
3013   AsmToken Tok = getToken();
3014   AsmToken NextToken[2];
3015   peekTokens(NextToken);
3016 
3017   return isOperandModifier(Tok, NextToken[0]) ||
3018          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3019          isOpcodeModifierWithVal(Tok, NextToken[0]);
3020 }
3021 
3022 // Check if the current token is an SP3 'neg' modifier.
3023 // Currently this modifier is allowed in the following context:
3024 //
3025 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3026 // 2. Before an 'abs' modifier: -abs(...)
3027 // 3. Before an SP3 'abs' modifier: -|...|
3028 //
3029 // In all other cases "-" is handled as a part
3030 // of an expression that follows the sign.
3031 //
3032 // Note: When "-" is followed by an integer literal,
3033 // this is interpreted as integer negation rather
3034 // than a floating-point NEG modifier applied to N.
3035 // Beside being contr-intuitive, such use of floating-point
3036 // NEG modifier would have resulted in different meaning
3037 // of integer literals used with VOP1/2/C and VOP3,
3038 // for example:
3039 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3040 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3041 // Negative fp literals with preceding "-" are
3042 // handled likewise for uniformity
3043 //
3044 bool
3045 AMDGPUAsmParser::parseSP3NegModifier() {
3046 
3047   AsmToken NextToken[2];
3048   peekTokens(NextToken);
3049 
3050   if (isToken(AsmToken::Minus) &&
3051       (isRegister(NextToken[0], NextToken[1]) ||
3052        NextToken[0].is(AsmToken::Pipe) ||
3053        isId(NextToken[0], "abs"))) {
3054     lex();
3055     return true;
3056   }
3057 
3058   return false;
3059 }
3060 
3061 OperandMatchResultTy
3062 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3063                                               bool AllowImm) {
3064   bool Neg, SP3Neg;
3065   bool Abs, SP3Abs;
3066   SMLoc Loc;
3067 
3068   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3069   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3070     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3071     return MatchOperand_ParseFail;
3072   }
3073 
3074   SP3Neg = parseSP3NegModifier();
3075 
3076   Loc = getLoc();
3077   Neg = trySkipId("neg");
3078   if (Neg && SP3Neg) {
3079     Error(Loc, "expected register or immediate");
3080     return MatchOperand_ParseFail;
3081   }
3082   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3083     return MatchOperand_ParseFail;
3084 
3085   Abs = trySkipId("abs");
3086   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3087     return MatchOperand_ParseFail;
3088 
3089   Loc = getLoc();
3090   SP3Abs = trySkipToken(AsmToken::Pipe);
3091   if (Abs && SP3Abs) {
3092     Error(Loc, "expected register or immediate");
3093     return MatchOperand_ParseFail;
3094   }
3095 
3096   OperandMatchResultTy Res;
3097   if (AllowImm) {
3098     Res = parseRegOrImm(Operands, SP3Abs);
3099   } else {
3100     Res = parseReg(Operands);
3101   }
3102   if (Res != MatchOperand_Success) {
3103     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3104   }
3105 
3106   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3107     return MatchOperand_ParseFail;
3108   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3109     return MatchOperand_ParseFail;
3110   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3111     return MatchOperand_ParseFail;
3112 
3113   AMDGPUOperand::Modifiers Mods;
3114   Mods.Abs = Abs || SP3Abs;
3115   Mods.Neg = Neg || SP3Neg;
3116 
3117   if (Mods.hasFPModifiers()) {
3118     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3119     if (Op.isExpr()) {
3120       Error(Op.getStartLoc(), "expected an absolute expression");
3121       return MatchOperand_ParseFail;
3122     }
3123     Op.setModifiers(Mods);
3124   }
3125   return MatchOperand_Success;
3126 }
3127 
3128 OperandMatchResultTy
3129 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3130                                                bool AllowImm) {
3131   bool Sext = trySkipId("sext");
3132   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3133     return MatchOperand_ParseFail;
3134 
3135   OperandMatchResultTy Res;
3136   if (AllowImm) {
3137     Res = parseRegOrImm(Operands);
3138   } else {
3139     Res = parseReg(Operands);
3140   }
3141   if (Res != MatchOperand_Success) {
3142     return Sext? MatchOperand_ParseFail : Res;
3143   }
3144 
3145   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3146     return MatchOperand_ParseFail;
3147 
3148   AMDGPUOperand::Modifiers Mods;
3149   Mods.Sext = Sext;
3150 
3151   if (Mods.hasIntModifiers()) {
3152     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3153     if (Op.isExpr()) {
3154       Error(Op.getStartLoc(), "expected an absolute expression");
3155       return MatchOperand_ParseFail;
3156     }
3157     Op.setModifiers(Mods);
3158   }
3159 
3160   return MatchOperand_Success;
3161 }
3162 
3163 OperandMatchResultTy
3164 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3165   return parseRegOrImmWithFPInputMods(Operands, false);
3166 }
3167 
3168 OperandMatchResultTy
3169 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3170   return parseRegOrImmWithIntInputMods(Operands, false);
3171 }
3172 
3173 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3174   auto Loc = getLoc();
3175   if (trySkipId("off")) {
3176     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3177                                                 AMDGPUOperand::ImmTyOff, false));
3178     return MatchOperand_Success;
3179   }
3180 
3181   if (!isRegister())
3182     return MatchOperand_NoMatch;
3183 
3184   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3185   if (Reg) {
3186     Operands.push_back(std::move(Reg));
3187     return MatchOperand_Success;
3188   }
3189 
3190   return MatchOperand_ParseFail;
3191 
3192 }
3193 
3194 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3195   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3196 
3197   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3198       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3199       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3200       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3201     return Match_InvalidOperand;
3202 
3203   if ((TSFlags & SIInstrFlags::VOP3) &&
3204       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3205       getForcedEncodingSize() != 64)
3206     return Match_PreferE32;
3207 
3208   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3209       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3210     // v_mac_f32/16 allow only dst_sel == DWORD;
3211     auto OpNum =
3212         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3213     const auto &Op = Inst.getOperand(OpNum);
3214     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3215       return Match_InvalidOperand;
3216     }
3217   }
3218 
3219   return Match_Success;
3220 }
3221 
3222 static ArrayRef<unsigned> getAllVariants() {
3223   static const unsigned Variants[] = {
3224     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3225     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3226   };
3227 
3228   return makeArrayRef(Variants);
3229 }
3230 
3231 // What asm variants we should check
3232 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3233   if (getForcedEncodingSize() == 32) {
3234     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3235     return makeArrayRef(Variants);
3236   }
3237 
3238   if (isForcedVOP3()) {
3239     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3240     return makeArrayRef(Variants);
3241   }
3242 
3243   if (isForcedSDWA()) {
3244     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3245                                         AMDGPUAsmVariants::SDWA9};
3246     return makeArrayRef(Variants);
3247   }
3248 
3249   if (isForcedDPP()) {
3250     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3251     return makeArrayRef(Variants);
3252   }
3253 
3254   return getAllVariants();
3255 }
3256 
3257 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3258   if (getForcedEncodingSize() == 32)
3259     return "e32";
3260 
3261   if (isForcedVOP3())
3262     return "e64";
3263 
3264   if (isForcedSDWA())
3265     return "sdwa";
3266 
3267   if (isForcedDPP())
3268     return "dpp";
3269 
3270   return "";
3271 }
3272 
3273 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3274   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3275   const unsigned Num = Desc.getNumImplicitUses();
3276   for (unsigned i = 0; i < Num; ++i) {
3277     unsigned Reg = Desc.ImplicitUses[i];
3278     switch (Reg) {
3279     case AMDGPU::FLAT_SCR:
3280     case AMDGPU::VCC:
3281     case AMDGPU::VCC_LO:
3282     case AMDGPU::VCC_HI:
3283     case AMDGPU::M0:
3284       return Reg;
3285     default:
3286       break;
3287     }
3288   }
3289   return AMDGPU::NoRegister;
3290 }
3291 
3292 // NB: This code is correct only when used to check constant
3293 // bus limitations because GFX7 support no f16 inline constants.
3294 // Note that there are no cases when a GFX7 opcode violates
3295 // constant bus limitations due to the use of an f16 constant.
3296 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3297                                        unsigned OpIdx) const {
3298   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3299 
3300   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3301     return false;
3302   }
3303 
3304   const MCOperand &MO = Inst.getOperand(OpIdx);
3305 
3306   int64_t Val = MO.getImm();
3307   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3308 
3309   switch (OpSize) { // expected operand size
3310   case 8:
3311     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3312   case 4:
3313     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3314   case 2: {
3315     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3316     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3317         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3318         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3319       return AMDGPU::isInlinableIntLiteral(Val);
3320 
3321     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3322         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3323         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3324       return AMDGPU::isInlinableIntLiteralV216(Val);
3325 
3326     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3327         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3328         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3329       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3330 
3331     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3332   }
3333   default:
3334     llvm_unreachable("invalid operand size");
3335   }
3336 }
3337 
3338 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3339   if (!isGFX10Plus())
3340     return 1;
3341 
3342   switch (Opcode) {
3343   // 64-bit shift instructions can use only one scalar value input
3344   case AMDGPU::V_LSHLREV_B64_e64:
3345   case AMDGPU::V_LSHLREV_B64_gfx10:
3346   case AMDGPU::V_LSHRREV_B64_e64:
3347   case AMDGPU::V_LSHRREV_B64_gfx10:
3348   case AMDGPU::V_ASHRREV_I64_e64:
3349   case AMDGPU::V_ASHRREV_I64_gfx10:
3350   case AMDGPU::V_LSHL_B64_e64:
3351   case AMDGPU::V_LSHR_B64_e64:
3352   case AMDGPU::V_ASHR_I64_e64:
3353     return 1;
3354   default:
3355     return 2;
3356   }
3357 }
3358 
3359 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3360   const MCOperand &MO = Inst.getOperand(OpIdx);
3361   if (MO.isImm()) {
3362     return !isInlineConstant(Inst, OpIdx);
3363   } else if (MO.isReg()) {
3364     auto Reg = MO.getReg();
3365     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3366     auto PReg = mc2PseudoReg(Reg);
3367     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3368   } else {
3369     return true;
3370   }
3371 }
3372 
3373 bool
3374 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3375                                                 const OperandVector &Operands) {
3376   const unsigned Opcode = Inst.getOpcode();
3377   const MCInstrDesc &Desc = MII.get(Opcode);
3378   unsigned LastSGPR = AMDGPU::NoRegister;
3379   unsigned ConstantBusUseCount = 0;
3380   unsigned NumLiterals = 0;
3381   unsigned LiteralSize;
3382 
3383   if (Desc.TSFlags &
3384       (SIInstrFlags::VOPC |
3385        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3386        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3387        SIInstrFlags::SDWA)) {
3388     // Check special imm operands (used by madmk, etc)
3389     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3390       ++NumLiterals;
3391       LiteralSize = 4;
3392     }
3393 
3394     SmallDenseSet<unsigned> SGPRsUsed;
3395     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3396     if (SGPRUsed != AMDGPU::NoRegister) {
3397       SGPRsUsed.insert(SGPRUsed);
3398       ++ConstantBusUseCount;
3399     }
3400 
3401     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3402     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3403     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3404 
3405     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3406 
3407     for (int OpIdx : OpIndices) {
3408       if (OpIdx == -1) break;
3409 
3410       const MCOperand &MO = Inst.getOperand(OpIdx);
3411       if (usesConstantBus(Inst, OpIdx)) {
3412         if (MO.isReg()) {
3413           LastSGPR = mc2PseudoReg(MO.getReg());
3414           // Pairs of registers with a partial intersections like these
3415           //   s0, s[0:1]
3416           //   flat_scratch_lo, flat_scratch
3417           //   flat_scratch_lo, flat_scratch_hi
3418           // are theoretically valid but they are disabled anyway.
3419           // Note that this code mimics SIInstrInfo::verifyInstruction
3420           if (!SGPRsUsed.count(LastSGPR)) {
3421             SGPRsUsed.insert(LastSGPR);
3422             ++ConstantBusUseCount;
3423           }
3424         } else { // Expression or a literal
3425 
3426           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3427             continue; // special operand like VINTERP attr_chan
3428 
3429           // An instruction may use only one literal.
3430           // This has been validated on the previous step.
3431           // See validateVOPLiteral.
3432           // This literal may be used as more than one operand.
3433           // If all these operands are of the same size,
3434           // this literal counts as one scalar value.
3435           // Otherwise it counts as 2 scalar values.
3436           // See "GFX10 Shader Programming", section 3.6.2.3.
3437 
3438           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3439           if (Size < 4) Size = 4;
3440 
3441           if (NumLiterals == 0) {
3442             NumLiterals = 1;
3443             LiteralSize = Size;
3444           } else if (LiteralSize != Size) {
3445             NumLiterals = 2;
3446           }
3447         }
3448       }
3449     }
3450   }
3451   ConstantBusUseCount += NumLiterals;
3452 
3453   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3454     return true;
3455 
3456   SMLoc LitLoc = getLitLoc(Operands);
3457   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3458   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3459   Error(Loc, "invalid operand (violates constant bus restrictions)");
3460   return false;
3461 }
3462 
3463 bool
3464 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3465                                                  const OperandVector &Operands) {
3466   const unsigned Opcode = Inst.getOpcode();
3467   const MCInstrDesc &Desc = MII.get(Opcode);
3468 
3469   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3470   if (DstIdx == -1 ||
3471       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3472     return true;
3473   }
3474 
3475   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3476 
3477   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3478   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3479   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3480 
3481   assert(DstIdx != -1);
3482   const MCOperand &Dst = Inst.getOperand(DstIdx);
3483   assert(Dst.isReg());
3484 
3485   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3486 
3487   for (int SrcIdx : SrcIndices) {
3488     if (SrcIdx == -1) break;
3489     const MCOperand &Src = Inst.getOperand(SrcIdx);
3490     if (Src.isReg()) {
3491       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3492         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3493         Error(getRegLoc(SrcReg, Operands),
3494           "destination must be different than all sources");
3495         return false;
3496       }
3497     }
3498   }
3499 
3500   return true;
3501 }
3502 
3503 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3504 
3505   const unsigned Opc = Inst.getOpcode();
3506   const MCInstrDesc &Desc = MII.get(Opc);
3507 
3508   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3509     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3510     assert(ClampIdx != -1);
3511     return Inst.getOperand(ClampIdx).getImm() == 0;
3512   }
3513 
3514   return true;
3515 }
3516 
3517 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3518 
3519   const unsigned Opc = Inst.getOpcode();
3520   const MCInstrDesc &Desc = MII.get(Opc);
3521 
3522   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3523     return None;
3524 
3525   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3526   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3527   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3528 
3529   assert(VDataIdx != -1);
3530 
3531   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3532     return None;
3533 
3534   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3535   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3536   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3537   if (DMask == 0)
3538     DMask = 1;
3539 
3540   bool isPackedD16 = false;
3541   unsigned DataSize =
3542     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3543   if (hasPackedD16()) {
3544     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3545     isPackedD16 = D16Idx >= 0;
3546     if (isPackedD16 && Inst.getOperand(D16Idx).getImm())
3547       DataSize = (DataSize + 1) / 2;
3548   }
3549 
3550   if ((VDataSize / 4) == DataSize + TFESize)
3551     return None;
3552 
3553   return StringRef(isPackedD16
3554                        ? "image data size does not match dmask, d16 and tfe"
3555                        : "image data size does not match dmask and tfe");
3556 }
3557 
3558 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3559   const unsigned Opc = Inst.getOpcode();
3560   const MCInstrDesc &Desc = MII.get(Opc);
3561 
3562   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3563     return true;
3564 
3565   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3566 
3567   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3568       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3569   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3570   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3571   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3572   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3573 
3574   assert(VAddr0Idx != -1);
3575   assert(SrsrcIdx != -1);
3576   assert(SrsrcIdx > VAddr0Idx);
3577 
3578   if (DimIdx == -1)
3579     return true; // intersect_ray
3580 
3581   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3582   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3583   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3584   unsigned ActualAddrSize =
3585       IsNSA ? SrsrcIdx - VAddr0Idx
3586             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3587   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3588 
3589   unsigned ExpectedAddrSize =
3590       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3591 
3592   if (!IsNSA) {
3593     if (ExpectedAddrSize > 8)
3594       ExpectedAddrSize = 16;
3595 
3596     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3597     // This provides backward compatibility for assembly created
3598     // before 160b/192b/224b types were directly supported.
3599     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3600       return true;
3601   }
3602 
3603   return ActualAddrSize == ExpectedAddrSize;
3604 }
3605 
3606 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3607 
3608   const unsigned Opc = Inst.getOpcode();
3609   const MCInstrDesc &Desc = MII.get(Opc);
3610 
3611   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3612     return true;
3613   if (!Desc.mayLoad() || !Desc.mayStore())
3614     return true; // Not atomic
3615 
3616   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3617   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3618 
3619   // This is an incomplete check because image_atomic_cmpswap
3620   // may only use 0x3 and 0xf while other atomic operations
3621   // may use 0x1 and 0x3. However these limitations are
3622   // verified when we check that dmask matches dst size.
3623   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3624 }
3625 
3626 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3627 
3628   const unsigned Opc = Inst.getOpcode();
3629   const MCInstrDesc &Desc = MII.get(Opc);
3630 
3631   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3632     return true;
3633 
3634   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3635   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3636 
3637   // GATHER4 instructions use dmask in a different fashion compared to
3638   // other MIMG instructions. The only useful DMASK values are
3639   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3640   // (red,red,red,red) etc.) The ISA document doesn't mention
3641   // this.
3642   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3643 }
3644 
3645 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3646   const unsigned Opc = Inst.getOpcode();
3647   const MCInstrDesc &Desc = MII.get(Opc);
3648 
3649   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3650     return true;
3651 
3652   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3653   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3654       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3655 
3656   if (!BaseOpcode->MSAA)
3657     return true;
3658 
3659   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3660   assert(DimIdx != -1);
3661 
3662   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3663   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3664 
3665   return DimInfo->MSAA;
3666 }
3667 
3668 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3669 {
3670   switch (Opcode) {
3671   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3672   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3673   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3674     return true;
3675   default:
3676     return false;
3677   }
3678 }
3679 
3680 // movrels* opcodes should only allow VGPRS as src0.
3681 // This is specified in .td description for vop1/vop3,
3682 // but sdwa is handled differently. See isSDWAOperand.
3683 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3684                                       const OperandVector &Operands) {
3685 
3686   const unsigned Opc = Inst.getOpcode();
3687   const MCInstrDesc &Desc = MII.get(Opc);
3688 
3689   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3690     return true;
3691 
3692   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3693   assert(Src0Idx != -1);
3694 
3695   SMLoc ErrLoc;
3696   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3697   if (Src0.isReg()) {
3698     auto Reg = mc2PseudoReg(Src0.getReg());
3699     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3700     if (!isSGPR(Reg, TRI))
3701       return true;
3702     ErrLoc = getRegLoc(Reg, Operands);
3703   } else {
3704     ErrLoc = getConstLoc(Operands);
3705   }
3706 
3707   Error(ErrLoc, "source operand must be a VGPR");
3708   return false;
3709 }
3710 
3711 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3712                                           const OperandVector &Operands) {
3713 
3714   const unsigned Opc = Inst.getOpcode();
3715 
3716   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3717     return true;
3718 
3719   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3720   assert(Src0Idx != -1);
3721 
3722   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3723   if (!Src0.isReg())
3724     return true;
3725 
3726   auto Reg = mc2PseudoReg(Src0.getReg());
3727   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3728   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3729     Error(getRegLoc(Reg, Operands),
3730           "source operand must be either a VGPR or an inline constant");
3731     return false;
3732   }
3733 
3734   return true;
3735 }
3736 
3737 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3738                                    const OperandVector &Operands) {
3739   const unsigned Opc = Inst.getOpcode();
3740   const MCInstrDesc &Desc = MII.get(Opc);
3741 
3742   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3743     return true;
3744 
3745   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3746   if (Src2Idx == -1)
3747     return true;
3748 
3749   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3750   if (!Src2.isReg())
3751     return true;
3752 
3753   MCRegister Src2Reg = Src2.getReg();
3754   MCRegister DstReg = Inst.getOperand(0).getReg();
3755   if (Src2Reg == DstReg)
3756     return true;
3757 
3758   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3759   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3760     return true;
3761 
3762   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3763     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3764           "source 2 operand must not partially overlap with dst");
3765     return false;
3766   }
3767 
3768   return true;
3769 }
3770 
3771 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3772   switch (Inst.getOpcode()) {
3773   default:
3774     return true;
3775   case V_DIV_SCALE_F32_gfx6_gfx7:
3776   case V_DIV_SCALE_F32_vi:
3777   case V_DIV_SCALE_F32_gfx10:
3778   case V_DIV_SCALE_F64_gfx6_gfx7:
3779   case V_DIV_SCALE_F64_vi:
3780   case V_DIV_SCALE_F64_gfx10:
3781     break;
3782   }
3783 
3784   // TODO: Check that src0 = src1 or src2.
3785 
3786   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3787                     AMDGPU::OpName::src2_modifiers,
3788                     AMDGPU::OpName::src2_modifiers}) {
3789     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3790             .getImm() &
3791         SISrcMods::ABS) {
3792       return false;
3793     }
3794   }
3795 
3796   return true;
3797 }
3798 
3799 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3800 
3801   const unsigned Opc = Inst.getOpcode();
3802   const MCInstrDesc &Desc = MII.get(Opc);
3803 
3804   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3805     return true;
3806 
3807   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3808   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3809     if (isCI() || isSI())
3810       return false;
3811   }
3812 
3813   return true;
3814 }
3815 
3816 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3817   const unsigned Opc = Inst.getOpcode();
3818   const MCInstrDesc &Desc = MII.get(Opc);
3819 
3820   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3821     return true;
3822 
3823   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3824   if (DimIdx < 0)
3825     return true;
3826 
3827   long Imm = Inst.getOperand(DimIdx).getImm();
3828   if (Imm < 0 || Imm >= 8)
3829     return false;
3830 
3831   return true;
3832 }
3833 
3834 static bool IsRevOpcode(const unsigned Opcode)
3835 {
3836   switch (Opcode) {
3837   case AMDGPU::V_SUBREV_F32_e32:
3838   case AMDGPU::V_SUBREV_F32_e64:
3839   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3840   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3841   case AMDGPU::V_SUBREV_F32_e32_vi:
3842   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3843   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3844   case AMDGPU::V_SUBREV_F32_e64_vi:
3845 
3846   case AMDGPU::V_SUBREV_CO_U32_e32:
3847   case AMDGPU::V_SUBREV_CO_U32_e64:
3848   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3849   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3850 
3851   case AMDGPU::V_SUBBREV_U32_e32:
3852   case AMDGPU::V_SUBBREV_U32_e64:
3853   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3854   case AMDGPU::V_SUBBREV_U32_e32_vi:
3855   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3856   case AMDGPU::V_SUBBREV_U32_e64_vi:
3857 
3858   case AMDGPU::V_SUBREV_U32_e32:
3859   case AMDGPU::V_SUBREV_U32_e64:
3860   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3861   case AMDGPU::V_SUBREV_U32_e32_vi:
3862   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3863   case AMDGPU::V_SUBREV_U32_e64_vi:
3864 
3865   case AMDGPU::V_SUBREV_F16_e32:
3866   case AMDGPU::V_SUBREV_F16_e64:
3867   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3868   case AMDGPU::V_SUBREV_F16_e32_vi:
3869   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3870   case AMDGPU::V_SUBREV_F16_e64_vi:
3871 
3872   case AMDGPU::V_SUBREV_U16_e32:
3873   case AMDGPU::V_SUBREV_U16_e64:
3874   case AMDGPU::V_SUBREV_U16_e32_vi:
3875   case AMDGPU::V_SUBREV_U16_e64_vi:
3876 
3877   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3878   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3879   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3880 
3881   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3882   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3883 
3884   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3885   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3886 
3887   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3888   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3889 
3890   case AMDGPU::V_LSHRREV_B32_e32:
3891   case AMDGPU::V_LSHRREV_B32_e64:
3892   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3893   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3894   case AMDGPU::V_LSHRREV_B32_e32_vi:
3895   case AMDGPU::V_LSHRREV_B32_e64_vi:
3896   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3897   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3898 
3899   case AMDGPU::V_ASHRREV_I32_e32:
3900   case AMDGPU::V_ASHRREV_I32_e64:
3901   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3902   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3903   case AMDGPU::V_ASHRREV_I32_e32_vi:
3904   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3905   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3906   case AMDGPU::V_ASHRREV_I32_e64_vi:
3907 
3908   case AMDGPU::V_LSHLREV_B32_e32:
3909   case AMDGPU::V_LSHLREV_B32_e64:
3910   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3911   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3912   case AMDGPU::V_LSHLREV_B32_e32_vi:
3913   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3914   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3915   case AMDGPU::V_LSHLREV_B32_e64_vi:
3916 
3917   case AMDGPU::V_LSHLREV_B16_e32:
3918   case AMDGPU::V_LSHLREV_B16_e64:
3919   case AMDGPU::V_LSHLREV_B16_e32_vi:
3920   case AMDGPU::V_LSHLREV_B16_e64_vi:
3921   case AMDGPU::V_LSHLREV_B16_gfx10:
3922 
3923   case AMDGPU::V_LSHRREV_B16_e32:
3924   case AMDGPU::V_LSHRREV_B16_e64:
3925   case AMDGPU::V_LSHRREV_B16_e32_vi:
3926   case AMDGPU::V_LSHRREV_B16_e64_vi:
3927   case AMDGPU::V_LSHRREV_B16_gfx10:
3928 
3929   case AMDGPU::V_ASHRREV_I16_e32:
3930   case AMDGPU::V_ASHRREV_I16_e64:
3931   case AMDGPU::V_ASHRREV_I16_e32_vi:
3932   case AMDGPU::V_ASHRREV_I16_e64_vi:
3933   case AMDGPU::V_ASHRREV_I16_gfx10:
3934 
3935   case AMDGPU::V_LSHLREV_B64_e64:
3936   case AMDGPU::V_LSHLREV_B64_gfx10:
3937   case AMDGPU::V_LSHLREV_B64_vi:
3938 
3939   case AMDGPU::V_LSHRREV_B64_e64:
3940   case AMDGPU::V_LSHRREV_B64_gfx10:
3941   case AMDGPU::V_LSHRREV_B64_vi:
3942 
3943   case AMDGPU::V_ASHRREV_I64_e64:
3944   case AMDGPU::V_ASHRREV_I64_gfx10:
3945   case AMDGPU::V_ASHRREV_I64_vi:
3946 
3947   case AMDGPU::V_PK_LSHLREV_B16:
3948   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3949   case AMDGPU::V_PK_LSHLREV_B16_vi:
3950 
3951   case AMDGPU::V_PK_LSHRREV_B16:
3952   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3953   case AMDGPU::V_PK_LSHRREV_B16_vi:
3954   case AMDGPU::V_PK_ASHRREV_I16:
3955   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3956   case AMDGPU::V_PK_ASHRREV_I16_vi:
3957     return true;
3958   default:
3959     return false;
3960   }
3961 }
3962 
3963 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3964 
3965   using namespace SIInstrFlags;
3966   const unsigned Opcode = Inst.getOpcode();
3967   const MCInstrDesc &Desc = MII.get(Opcode);
3968 
3969   // lds_direct register is defined so that it can be used
3970   // with 9-bit operands only. Ignore encodings which do not accept these.
3971   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3972   if ((Desc.TSFlags & Enc) == 0)
3973     return None;
3974 
3975   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3976     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3977     if (SrcIdx == -1)
3978       break;
3979     const auto &Src = Inst.getOperand(SrcIdx);
3980     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3981 
3982       if (isGFX90A() || isGFX11Plus())
3983         return StringRef("lds_direct is not supported on this GPU");
3984 
3985       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3986         return StringRef("lds_direct cannot be used with this instruction");
3987 
3988       if (SrcName != OpName::src0)
3989         return StringRef("lds_direct may be used as src0 only");
3990     }
3991   }
3992 
3993   return None;
3994 }
3995 
3996 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3997   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3998     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3999     if (Op.isFlatOffset())
4000       return Op.getStartLoc();
4001   }
4002   return getLoc();
4003 }
4004 
4005 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4006                                          const OperandVector &Operands) {
4007   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4008   if ((TSFlags & SIInstrFlags::FLAT) == 0)
4009     return true;
4010 
4011   auto Opcode = Inst.getOpcode();
4012   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4013   assert(OpNum != -1);
4014 
4015   const auto &Op = Inst.getOperand(OpNum);
4016   if (!hasFlatOffsets() && Op.getImm() != 0) {
4017     Error(getFlatOffsetLoc(Operands),
4018           "flat offset modifier is not supported on this GPU");
4019     return false;
4020   }
4021 
4022   // For FLAT segment the offset must be positive;
4023   // MSB is ignored and forced to zero.
4024   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
4025     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
4026     if (!isIntN(OffsetSize, Op.getImm())) {
4027       Error(getFlatOffsetLoc(Operands),
4028             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4029       return false;
4030     }
4031   } else {
4032     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
4033     if (!isUIntN(OffsetSize, Op.getImm())) {
4034       Error(getFlatOffsetLoc(Operands),
4035             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4036       return false;
4037     }
4038   }
4039 
4040   return true;
4041 }
4042 
4043 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4044   // Start with second operand because SMEM Offset cannot be dst or src0.
4045   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4046     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4047     if (Op.isSMEMOffset())
4048       return Op.getStartLoc();
4049   }
4050   return getLoc();
4051 }
4052 
4053 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4054                                          const OperandVector &Operands) {
4055   if (isCI() || isSI())
4056     return true;
4057 
4058   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4059   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4060     return true;
4061 
4062   auto Opcode = Inst.getOpcode();
4063   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4064   if (OpNum == -1)
4065     return true;
4066 
4067   const auto &Op = Inst.getOperand(OpNum);
4068   if (!Op.isImm())
4069     return true;
4070 
4071   uint64_t Offset = Op.getImm();
4072   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4073   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4074       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4075     return true;
4076 
4077   Error(getSMEMOffsetLoc(Operands),
4078         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4079                                "expected a 21-bit signed offset");
4080 
4081   return false;
4082 }
4083 
4084 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4085   unsigned Opcode = Inst.getOpcode();
4086   const MCInstrDesc &Desc = MII.get(Opcode);
4087   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4088     return true;
4089 
4090   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4091   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4092 
4093   const int OpIndices[] = { Src0Idx, Src1Idx };
4094 
4095   unsigned NumExprs = 0;
4096   unsigned NumLiterals = 0;
4097   uint32_t LiteralValue;
4098 
4099   for (int OpIdx : OpIndices) {
4100     if (OpIdx == -1) break;
4101 
4102     const MCOperand &MO = Inst.getOperand(OpIdx);
4103     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4104     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4105       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4106         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4107         if (NumLiterals == 0 || LiteralValue != Value) {
4108           LiteralValue = Value;
4109           ++NumLiterals;
4110         }
4111       } else if (MO.isExpr()) {
4112         ++NumExprs;
4113       }
4114     }
4115   }
4116 
4117   return NumLiterals + NumExprs <= 1;
4118 }
4119 
4120 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4121   const unsigned Opc = Inst.getOpcode();
4122   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4123       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4124     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4125     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4126 
4127     if (OpSel & ~3)
4128       return false;
4129   }
4130 
4131   if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
4132     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4133     if (OpSelIdx != -1) {
4134       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4135         return false;
4136     }
4137     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4138     if (OpSelHiIdx != -1) {
4139       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4140         return false;
4141     }
4142   }
4143 
4144   return true;
4145 }
4146 
4147 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4148                                   const OperandVector &Operands) {
4149   const unsigned Opc = Inst.getOpcode();
4150   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4151   if (DppCtrlIdx < 0)
4152     return true;
4153   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4154 
4155   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4156     // DPP64 is supported for row_newbcast only.
4157     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4158     if (Src0Idx >= 0 &&
4159         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4160       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4161       Error(S, "64 bit dpp only supports row_newbcast");
4162       return false;
4163     }
4164   }
4165 
4166   return true;
4167 }
4168 
4169 // Check if VCC register matches wavefront size
4170 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4171   auto FB = getFeatureBits();
4172   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4173     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4174 }
4175 
4176 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4177 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4178                                          const OperandVector &Operands) {
4179   unsigned Opcode = Inst.getOpcode();
4180   const MCInstrDesc &Desc = MII.get(Opcode);
4181   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4182   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4183       ImmIdx == -1)
4184     return true;
4185 
4186   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4187   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4188   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4189 
4190   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4191 
4192   unsigned NumExprs = 0;
4193   unsigned NumLiterals = 0;
4194   uint32_t LiteralValue;
4195 
4196   for (int OpIdx : OpIndices) {
4197     if (OpIdx == -1)
4198       continue;
4199 
4200     const MCOperand &MO = Inst.getOperand(OpIdx);
4201     if (!MO.isImm() && !MO.isExpr())
4202       continue;
4203     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4204       continue;
4205 
4206     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4207         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4208       Error(getConstLoc(Operands),
4209             "inline constants are not allowed for this operand");
4210       return false;
4211     }
4212 
4213     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4214       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4215       if (NumLiterals == 0 || LiteralValue != Value) {
4216         LiteralValue = Value;
4217         ++NumLiterals;
4218       }
4219     } else if (MO.isExpr()) {
4220       ++NumExprs;
4221     }
4222   }
4223   NumLiterals += NumExprs;
4224 
4225   if (!NumLiterals)
4226     return true;
4227 
4228   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4229     Error(getLitLoc(Operands), "literal operands are not supported");
4230     return false;
4231   }
4232 
4233   if (NumLiterals > 1) {
4234     Error(getLitLoc(Operands), "only one literal operand is allowed");
4235     return false;
4236   }
4237 
4238   return true;
4239 }
4240 
4241 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4242 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4243                          const MCRegisterInfo *MRI) {
4244   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4245   if (OpIdx < 0)
4246     return -1;
4247 
4248   const MCOperand &Op = Inst.getOperand(OpIdx);
4249   if (!Op.isReg())
4250     return -1;
4251 
4252   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4253   auto Reg = Sub ? Sub : Op.getReg();
4254   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4255   return AGPR32.contains(Reg) ? 1 : 0;
4256 }
4257 
4258 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4259   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4260   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4261                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4262                   SIInstrFlags::DS)) == 0)
4263     return true;
4264 
4265   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4266                                                       : AMDGPU::OpName::vdata;
4267 
4268   const MCRegisterInfo *MRI = getMRI();
4269   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4270   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4271 
4272   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4273     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4274     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4275       return false;
4276   }
4277 
4278   auto FB = getFeatureBits();
4279   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4280     if (DataAreg < 0 || DstAreg < 0)
4281       return true;
4282     return DstAreg == DataAreg;
4283   }
4284 
4285   return DstAreg < 1 && DataAreg < 1;
4286 }
4287 
4288 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4289   auto FB = getFeatureBits();
4290   if (!FB[AMDGPU::FeatureGFX90AInsts])
4291     return true;
4292 
4293   const MCRegisterInfo *MRI = getMRI();
4294   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4295   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4296   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4297     const MCOperand &Op = Inst.getOperand(I);
4298     if (!Op.isReg())
4299       continue;
4300 
4301     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4302     if (!Sub)
4303       continue;
4304 
4305     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4306       return false;
4307     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4308       return false;
4309   }
4310 
4311   return true;
4312 }
4313 
4314 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4315   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4316     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4317     if (Op.isBLGP())
4318       return Op.getStartLoc();
4319   }
4320   return SMLoc();
4321 }
4322 
4323 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4324                                    const OperandVector &Operands) {
4325   unsigned Opc = Inst.getOpcode();
4326   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4327   if (BlgpIdx == -1)
4328     return true;
4329   SMLoc BLGPLoc = getBLGPLoc(Operands);
4330   if (!BLGPLoc.isValid())
4331     return true;
4332   bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4333   auto FB = getFeatureBits();
4334   bool UsesNeg = false;
4335   if (FB[AMDGPU::FeatureGFX940Insts]) {
4336     switch (Opc) {
4337     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4338     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4339     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4340     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4341       UsesNeg = true;
4342     }
4343   }
4344 
4345   if (IsNeg == UsesNeg)
4346     return true;
4347 
4348   Error(BLGPLoc,
4349         UsesNeg ? "invalid modifier: blgp is not supported"
4350                 : "invalid modifier: neg is not supported");
4351 
4352   return false;
4353 }
4354 
4355 // gfx90a has an undocumented limitation:
4356 // DS_GWS opcodes must use even aligned registers.
4357 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4358                                   const OperandVector &Operands) {
4359   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4360     return true;
4361 
4362   int Opc = Inst.getOpcode();
4363   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4364       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4365     return true;
4366 
4367   const MCRegisterInfo *MRI = getMRI();
4368   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4369   int Data0Pos =
4370       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4371   assert(Data0Pos != -1);
4372   auto Reg = Inst.getOperand(Data0Pos).getReg();
4373   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4374   if (RegIdx & 1) {
4375     SMLoc RegLoc = getRegLoc(Reg, Operands);
4376     Error(RegLoc, "vgpr must be even aligned");
4377     return false;
4378   }
4379 
4380   return true;
4381 }
4382 
4383 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4384                                             const OperandVector &Operands,
4385                                             const SMLoc &IDLoc) {
4386   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4387                                            AMDGPU::OpName::cpol);
4388   if (CPolPos == -1)
4389     return true;
4390 
4391   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4392 
4393   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4394   if (TSFlags & SIInstrFlags::SMRD) {
4395     if (CPol && (isSI() || isCI())) {
4396       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4397       Error(S, "cache policy is not supported for SMRD instructions");
4398       return false;
4399     }
4400     if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4401       Error(IDLoc, "invalid cache policy for SMEM instruction");
4402       return false;
4403     }
4404   }
4405 
4406   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4407     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4408     StringRef CStr(S.getPointer());
4409     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4410     Error(S, "scc is not supported on this GPU");
4411     return false;
4412   }
4413 
4414   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4415     return true;
4416 
4417   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4418     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4419       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4420                               : "instruction must use glc");
4421       return false;
4422     }
4423   } else {
4424     if (CPol & CPol::GLC) {
4425       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4426       StringRef CStr(S.getPointer());
4427       S = SMLoc::getFromPointer(
4428           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4429       Error(S, isGFX940() ? "instruction must not use sc0"
4430                           : "instruction must not use glc");
4431       return false;
4432     }
4433   }
4434 
4435   return true;
4436 }
4437 
4438 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst,
4439                                          const OperandVector &Operands,
4440                                          const SMLoc &IDLoc) {
4441   if (isGFX940())
4442     return true;
4443 
4444   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4445   if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) !=
4446       (SIInstrFlags::VALU | SIInstrFlags::FLAT))
4447     return true;
4448   // This is FLAT LDS DMA.
4449 
4450   SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands);
4451   StringRef CStr(S.getPointer());
4452   if (!CStr.startswith("lds")) {
4453     // This is incorrectly selected LDS DMA version of a FLAT load opcode.
4454     // And LDS version should have 'lds' modifier, but it follows optional
4455     // operands so its absense is ignored by the matcher.
4456     Error(IDLoc, "invalid operands for instruction");
4457     return false;
4458   }
4459 
4460   return true;
4461 }
4462 
4463 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4464                                           const SMLoc &IDLoc,
4465                                           const OperandVector &Operands) {
4466   if (auto ErrMsg = validateLdsDirect(Inst)) {
4467     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4468     return false;
4469   }
4470   if (!validateSOPLiteral(Inst)) {
4471     Error(getLitLoc(Operands),
4472       "only one literal operand is allowed");
4473     return false;
4474   }
4475   if (!validateVOPLiteral(Inst, Operands)) {
4476     return false;
4477   }
4478   if (!validateConstantBusLimitations(Inst, Operands)) {
4479     return false;
4480   }
4481   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4482     return false;
4483   }
4484   if (!validateIntClampSupported(Inst)) {
4485     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4486       "integer clamping is not supported on this GPU");
4487     return false;
4488   }
4489   if (!validateOpSel(Inst)) {
4490     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4491       "invalid op_sel operand");
4492     return false;
4493   }
4494   if (!validateDPP(Inst, Operands)) {
4495     return false;
4496   }
4497   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4498   if (!validateMIMGD16(Inst)) {
4499     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4500       "d16 modifier is not supported on this GPU");
4501     return false;
4502   }
4503   if (!validateMIMGDim(Inst)) {
4504     Error(IDLoc, "dim modifier is required on this GPU");
4505     return false;
4506   }
4507   if (!validateMIMGMSAA(Inst)) {
4508     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4509           "invalid dim; must be MSAA type");
4510     return false;
4511   }
4512   if (auto ErrMsg = validateMIMGDataSize(Inst)) {
4513     Error(IDLoc, *ErrMsg);
4514     return false;
4515   }
4516   if (!validateMIMGAddrSize(Inst)) {
4517     Error(IDLoc,
4518       "image address size does not match dim and a16");
4519     return false;
4520   }
4521   if (!validateMIMGAtomicDMask(Inst)) {
4522     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4523       "invalid atomic image dmask");
4524     return false;
4525   }
4526   if (!validateMIMGGatherDMask(Inst)) {
4527     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4528       "invalid image_gather dmask: only one bit must be set");
4529     return false;
4530   }
4531   if (!validateMovrels(Inst, Operands)) {
4532     return false;
4533   }
4534   if (!validateFlatOffset(Inst, Operands)) {
4535     return false;
4536   }
4537   if (!validateSMEMOffset(Inst, Operands)) {
4538     return false;
4539   }
4540   if (!validateMAIAccWrite(Inst, Operands)) {
4541     return false;
4542   }
4543   if (!validateMFMA(Inst, Operands)) {
4544     return false;
4545   }
4546   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4547     return false;
4548   }
4549 
4550   if (!validateAGPRLdSt(Inst)) {
4551     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4552     ? "invalid register class: data and dst should be all VGPR or AGPR"
4553     : "invalid register class: agpr loads and stores not supported on this GPU"
4554     );
4555     return false;
4556   }
4557   if (!validateVGPRAlign(Inst)) {
4558     Error(IDLoc,
4559       "invalid register class: vgpr tuples must be 64 bit aligned");
4560     return false;
4561   }
4562   if (!validateGWS(Inst, Operands)) {
4563     return false;
4564   }
4565 
4566   if (!validateBLGP(Inst, Operands)) {
4567     return false;
4568   }
4569 
4570   if (!validateDivScale(Inst)) {
4571     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4572     return false;
4573   }
4574   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4575     return false;
4576   }
4577 
4578   if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) {
4579     return false;
4580   }
4581 
4582   return true;
4583 }
4584 
4585 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4586                                             const FeatureBitset &FBS,
4587                                             unsigned VariantID = 0);
4588 
4589 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4590                                 const FeatureBitset &AvailableFeatures,
4591                                 unsigned VariantID);
4592 
4593 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4594                                        const FeatureBitset &FBS) {
4595   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4596 }
4597 
4598 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4599                                        const FeatureBitset &FBS,
4600                                        ArrayRef<unsigned> Variants) {
4601   for (auto Variant : Variants) {
4602     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4603       return true;
4604   }
4605 
4606   return false;
4607 }
4608 
4609 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4610                                                   const SMLoc &IDLoc) {
4611   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4612 
4613   // Check if requested instruction variant is supported.
4614   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4615     return false;
4616 
4617   // This instruction is not supported.
4618   // Clear any other pending errors because they are no longer relevant.
4619   getParser().clearPendingErrors();
4620 
4621   // Requested instruction variant is not supported.
4622   // Check if any other variants are supported.
4623   StringRef VariantName = getMatchedVariantName();
4624   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4625     return Error(IDLoc,
4626                  Twine(VariantName,
4627                        " variant of this instruction is not supported"));
4628   }
4629 
4630   // Finally check if this instruction is supported on any other GPU.
4631   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4632     return Error(IDLoc, "instruction not supported on this GPU");
4633   }
4634 
4635   // Instruction not supported on any GPU. Probably a typo.
4636   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4637   return Error(IDLoc, "invalid instruction" + Suggestion);
4638 }
4639 
4640 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4641                                               OperandVector &Operands,
4642                                               MCStreamer &Out,
4643                                               uint64_t &ErrorInfo,
4644                                               bool MatchingInlineAsm) {
4645   MCInst Inst;
4646   unsigned Result = Match_Success;
4647   for (auto Variant : getMatchedVariants()) {
4648     uint64_t EI;
4649     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4650                                   Variant);
4651     // We order match statuses from least to most specific. We use most specific
4652     // status as resulting
4653     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4654     if ((R == Match_Success) ||
4655         (R == Match_PreferE32) ||
4656         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4657         (R == Match_InvalidOperand && Result != Match_MissingFeature
4658                                    && Result != Match_PreferE32) ||
4659         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4660                                    && Result != Match_MissingFeature
4661                                    && Result != Match_PreferE32)) {
4662       Result = R;
4663       ErrorInfo = EI;
4664     }
4665     if (R == Match_Success)
4666       break;
4667   }
4668 
4669   if (Result == Match_Success) {
4670     if (!validateInstruction(Inst, IDLoc, Operands)) {
4671       return true;
4672     }
4673     Inst.setLoc(IDLoc);
4674     Out.emitInstruction(Inst, getSTI());
4675     return false;
4676   }
4677 
4678   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4679   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4680     return true;
4681   }
4682 
4683   switch (Result) {
4684   default: break;
4685   case Match_MissingFeature:
4686     // It has been verified that the specified instruction
4687     // mnemonic is valid. A match was found but it requires
4688     // features which are not supported on this GPU.
4689     return Error(IDLoc, "operands are not valid for this GPU or mode");
4690 
4691   case Match_InvalidOperand: {
4692     SMLoc ErrorLoc = IDLoc;
4693     if (ErrorInfo != ~0ULL) {
4694       if (ErrorInfo >= Operands.size()) {
4695         return Error(IDLoc, "too few operands for instruction");
4696       }
4697       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4698       if (ErrorLoc == SMLoc())
4699         ErrorLoc = IDLoc;
4700     }
4701     return Error(ErrorLoc, "invalid operand for instruction");
4702   }
4703 
4704   case Match_PreferE32:
4705     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4706                         "should be encoded as e32");
4707   case Match_MnemonicFail:
4708     llvm_unreachable("Invalid instructions should have been handled already");
4709   }
4710   llvm_unreachable("Implement any new match types added!");
4711 }
4712 
4713 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4714   int64_t Tmp = -1;
4715   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4716     return true;
4717   }
4718   if (getParser().parseAbsoluteExpression(Tmp)) {
4719     return true;
4720   }
4721   Ret = static_cast<uint32_t>(Tmp);
4722   return false;
4723 }
4724 
4725 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4726                                                uint32_t &Minor) {
4727   if (ParseAsAbsoluteExpression(Major))
4728     return TokError("invalid major version");
4729 
4730   if (!trySkipToken(AsmToken::Comma))
4731     return TokError("minor version number required, comma expected");
4732 
4733   if (ParseAsAbsoluteExpression(Minor))
4734     return TokError("invalid minor version");
4735 
4736   return false;
4737 }
4738 
4739 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4740   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4741     return TokError("directive only supported for amdgcn architecture");
4742 
4743   std::string TargetIDDirective;
4744   SMLoc TargetStart = getTok().getLoc();
4745   if (getParser().parseEscapedString(TargetIDDirective))
4746     return true;
4747 
4748   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4749   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4750     return getParser().Error(TargetRange.Start,
4751         (Twine(".amdgcn_target directive's target id ") +
4752          Twine(TargetIDDirective) +
4753          Twine(" does not match the specified target id ") +
4754          Twine(getTargetStreamer().getTargetID()->toString())).str());
4755 
4756   return false;
4757 }
4758 
4759 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4760   return Error(Range.Start, "value out of range", Range);
4761 }
4762 
4763 bool AMDGPUAsmParser::calculateGPRBlocks(
4764     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4765     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4766     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4767     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4768   // TODO(scott.linder): These calculations are duplicated from
4769   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4770   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4771 
4772   unsigned NumVGPRs = NextFreeVGPR;
4773   unsigned NumSGPRs = NextFreeSGPR;
4774 
4775   if (Version.Major >= 10)
4776     NumSGPRs = 0;
4777   else {
4778     unsigned MaxAddressableNumSGPRs =
4779         IsaInfo::getAddressableNumSGPRs(&getSTI());
4780 
4781     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4782         NumSGPRs > MaxAddressableNumSGPRs)
4783       return OutOfRangeError(SGPRRange);
4784 
4785     NumSGPRs +=
4786         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4787 
4788     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4789         NumSGPRs > MaxAddressableNumSGPRs)
4790       return OutOfRangeError(SGPRRange);
4791 
4792     if (Features.test(FeatureSGPRInitBug))
4793       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4794   }
4795 
4796   VGPRBlocks =
4797       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4798   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4799 
4800   return false;
4801 }
4802 
4803 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4804   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4805     return TokError("directive only supported for amdgcn architecture");
4806 
4807   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4808     return TokError("directive only supported for amdhsa OS");
4809 
4810   StringRef KernelName;
4811   if (getParser().parseIdentifier(KernelName))
4812     return true;
4813 
4814   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4815 
4816   StringSet<> Seen;
4817 
4818   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4819 
4820   SMRange VGPRRange;
4821   uint64_t NextFreeVGPR = 0;
4822   uint64_t AccumOffset = 0;
4823   uint64_t SharedVGPRCount = 0;
4824   SMRange SGPRRange;
4825   uint64_t NextFreeSGPR = 0;
4826 
4827   // Count the number of user SGPRs implied from the enabled feature bits.
4828   unsigned ImpliedUserSGPRCount = 0;
4829 
4830   // Track if the asm explicitly contains the directive for the user SGPR
4831   // count.
4832   Optional<unsigned> ExplicitUserSGPRCount;
4833   bool ReserveVCC = true;
4834   bool ReserveFlatScr = true;
4835   Optional<bool> EnableWavefrontSize32;
4836 
4837   while (true) {
4838     while (trySkipToken(AsmToken::EndOfStatement));
4839 
4840     StringRef ID;
4841     SMRange IDRange = getTok().getLocRange();
4842     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4843       return true;
4844 
4845     if (ID == ".end_amdhsa_kernel")
4846       break;
4847 
4848     if (Seen.find(ID) != Seen.end())
4849       return TokError(".amdhsa_ directives cannot be repeated");
4850     Seen.insert(ID);
4851 
4852     SMLoc ValStart = getLoc();
4853     int64_t IVal;
4854     if (getParser().parseAbsoluteExpression(IVal))
4855       return true;
4856     SMLoc ValEnd = getLoc();
4857     SMRange ValRange = SMRange(ValStart, ValEnd);
4858 
4859     if (IVal < 0)
4860       return OutOfRangeError(ValRange);
4861 
4862     uint64_t Val = IVal;
4863 
4864 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4865   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4866     return OutOfRangeError(RANGE);                                             \
4867   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4868 
4869     if (ID == ".amdhsa_group_segment_fixed_size") {
4870       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4871         return OutOfRangeError(ValRange);
4872       KD.group_segment_fixed_size = Val;
4873     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4874       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4875         return OutOfRangeError(ValRange);
4876       KD.private_segment_fixed_size = Val;
4877     } else if (ID == ".amdhsa_kernarg_size") {
4878       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4879         return OutOfRangeError(ValRange);
4880       KD.kernarg_size = Val;
4881     } else if (ID == ".amdhsa_user_sgpr_count") {
4882       ExplicitUserSGPRCount = Val;
4883     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4884       if (hasArchitectedFlatScratch())
4885         return Error(IDRange.Start,
4886                      "directive is not supported with architected flat scratch",
4887                      IDRange);
4888       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4889                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4890                        Val, ValRange);
4891       if (Val)
4892         ImpliedUserSGPRCount += 4;
4893     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4894       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4895                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4896                        ValRange);
4897       if (Val)
4898         ImpliedUserSGPRCount += 2;
4899     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4900       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4901                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4902                        ValRange);
4903       if (Val)
4904         ImpliedUserSGPRCount += 2;
4905     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4906       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4907                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4908                        Val, ValRange);
4909       if (Val)
4910         ImpliedUserSGPRCount += 2;
4911     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4912       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4913                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4914                        ValRange);
4915       if (Val)
4916         ImpliedUserSGPRCount += 2;
4917     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4918       if (hasArchitectedFlatScratch())
4919         return Error(IDRange.Start,
4920                      "directive is not supported with architected flat scratch",
4921                      IDRange);
4922       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4923                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4924                        ValRange);
4925       if (Val)
4926         ImpliedUserSGPRCount += 2;
4927     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4928       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4929                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4930                        Val, ValRange);
4931       if (Val)
4932         ImpliedUserSGPRCount += 1;
4933     } else if (ID == ".amdhsa_wavefront_size32") {
4934       if (IVersion.Major < 10)
4935         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4936       EnableWavefrontSize32 = Val;
4937       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4938                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4939                        Val, ValRange);
4940     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4941       if (hasArchitectedFlatScratch())
4942         return Error(IDRange.Start,
4943                      "directive is not supported with architected flat scratch",
4944                      IDRange);
4945       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4946                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4947     } else if (ID == ".amdhsa_enable_private_segment") {
4948       if (!hasArchitectedFlatScratch())
4949         return Error(
4950             IDRange.Start,
4951             "directive is not supported without architected flat scratch",
4952             IDRange);
4953       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4954                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4955     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4956       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4957                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4958                        ValRange);
4959     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4960       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4961                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4962                        ValRange);
4963     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4964       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4965                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4966                        ValRange);
4967     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4968       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4969                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4970                        ValRange);
4971     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4972       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4973                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4974                        ValRange);
4975     } else if (ID == ".amdhsa_next_free_vgpr") {
4976       VGPRRange = ValRange;
4977       NextFreeVGPR = Val;
4978     } else if (ID == ".amdhsa_next_free_sgpr") {
4979       SGPRRange = ValRange;
4980       NextFreeSGPR = Val;
4981     } else if (ID == ".amdhsa_accum_offset") {
4982       if (!isGFX90A())
4983         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4984       AccumOffset = Val;
4985     } else if (ID == ".amdhsa_reserve_vcc") {
4986       if (!isUInt<1>(Val))
4987         return OutOfRangeError(ValRange);
4988       ReserveVCC = Val;
4989     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4990       if (IVersion.Major < 7)
4991         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4992       if (hasArchitectedFlatScratch())
4993         return Error(IDRange.Start,
4994                      "directive is not supported with architected flat scratch",
4995                      IDRange);
4996       if (!isUInt<1>(Val))
4997         return OutOfRangeError(ValRange);
4998       ReserveFlatScr = Val;
4999     } else if (ID == ".amdhsa_reserve_xnack_mask") {
5000       if (IVersion.Major < 8)
5001         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5002       if (!isUInt<1>(Val))
5003         return OutOfRangeError(ValRange);
5004       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5005         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5006                                  IDRange);
5007     } else if (ID == ".amdhsa_float_round_mode_32") {
5008       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5009                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5010     } else if (ID == ".amdhsa_float_round_mode_16_64") {
5011       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5012                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5013     } else if (ID == ".amdhsa_float_denorm_mode_32") {
5014       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5015                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5016     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5017       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5018                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5019                        ValRange);
5020     } else if (ID == ".amdhsa_dx10_clamp") {
5021       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5022                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5023     } else if (ID == ".amdhsa_ieee_mode") {
5024       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5025                        Val, ValRange);
5026     } else if (ID == ".amdhsa_fp16_overflow") {
5027       if (IVersion.Major < 9)
5028         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5029       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5030                        ValRange);
5031     } else if (ID == ".amdhsa_tg_split") {
5032       if (!isGFX90A())
5033         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5034       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5035                        ValRange);
5036     } else if (ID == ".amdhsa_workgroup_processor_mode") {
5037       if (IVersion.Major < 10)
5038         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5039       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5040                        ValRange);
5041     } else if (ID == ".amdhsa_memory_ordered") {
5042       if (IVersion.Major < 10)
5043         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5044       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5045                        ValRange);
5046     } else if (ID == ".amdhsa_forward_progress") {
5047       if (IVersion.Major < 10)
5048         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5049       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5050                        ValRange);
5051     } else if (ID == ".amdhsa_shared_vgpr_count") {
5052       if (IVersion.Major < 10)
5053         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5054       SharedVGPRCount = Val;
5055       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5056                        COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val,
5057                        ValRange);
5058     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5059       PARSE_BITS_ENTRY(
5060           KD.compute_pgm_rsrc2,
5061           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5062           ValRange);
5063     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5064       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5065                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5066                        Val, ValRange);
5067     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5068       PARSE_BITS_ENTRY(
5069           KD.compute_pgm_rsrc2,
5070           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5071           ValRange);
5072     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5073       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5074                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5075                        Val, ValRange);
5076     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5077       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5078                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5079                        Val, ValRange);
5080     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5081       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5082                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5083                        Val, ValRange);
5084     } else if (ID == ".amdhsa_exception_int_div_zero") {
5085       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5086                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5087                        Val, ValRange);
5088     } else {
5089       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5090     }
5091 
5092 #undef PARSE_BITS_ENTRY
5093   }
5094 
5095   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5096     return TokError(".amdhsa_next_free_vgpr directive is required");
5097 
5098   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5099     return TokError(".amdhsa_next_free_sgpr directive is required");
5100 
5101   unsigned VGPRBlocks;
5102   unsigned SGPRBlocks;
5103   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5104                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5105                          EnableWavefrontSize32, NextFreeVGPR,
5106                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5107                          SGPRBlocks))
5108     return true;
5109 
5110   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5111           VGPRBlocks))
5112     return OutOfRangeError(VGPRRange);
5113   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5114                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5115 
5116   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5117           SGPRBlocks))
5118     return OutOfRangeError(SGPRRange);
5119   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5120                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5121                   SGPRBlocks);
5122 
5123   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5124     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5125                     "enabled user SGPRs");
5126 
5127   unsigned UserSGPRCount =
5128       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5129 
5130   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5131     return TokError("too many user SGPRs enabled");
5132   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5133                   UserSGPRCount);
5134 
5135   if (isGFX90A()) {
5136     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5137       return TokError(".amdhsa_accum_offset directive is required");
5138     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5139       return TokError("accum_offset should be in range [4..256] in "
5140                       "increments of 4");
5141     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5142       return TokError("accum_offset exceeds total VGPR allocation");
5143     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5144                     (AccumOffset / 4 - 1));
5145   }
5146 
5147   if (IVersion.Major == 10) {
5148     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5149     if (SharedVGPRCount && EnableWavefrontSize32) {
5150       return TokError("shared_vgpr_count directive not valid on "
5151                       "wavefront size 32");
5152     }
5153     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5154       return TokError("shared_vgpr_count*2 + "
5155                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5156                       "exceed 63\n");
5157     }
5158   }
5159 
5160   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5161       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5162       ReserveFlatScr);
5163   return false;
5164 }
5165 
5166 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5167   uint32_t Major;
5168   uint32_t Minor;
5169 
5170   if (ParseDirectiveMajorMinor(Major, Minor))
5171     return true;
5172 
5173   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5174   return false;
5175 }
5176 
5177 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5178   uint32_t Major;
5179   uint32_t Minor;
5180   uint32_t Stepping;
5181   StringRef VendorName;
5182   StringRef ArchName;
5183 
5184   // If this directive has no arguments, then use the ISA version for the
5185   // targeted GPU.
5186   if (isToken(AsmToken::EndOfStatement)) {
5187     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5188     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5189                                                         ISA.Stepping,
5190                                                         "AMD", "AMDGPU");
5191     return false;
5192   }
5193 
5194   if (ParseDirectiveMajorMinor(Major, Minor))
5195     return true;
5196 
5197   if (!trySkipToken(AsmToken::Comma))
5198     return TokError("stepping version number required, comma expected");
5199 
5200   if (ParseAsAbsoluteExpression(Stepping))
5201     return TokError("invalid stepping version");
5202 
5203   if (!trySkipToken(AsmToken::Comma))
5204     return TokError("vendor name required, comma expected");
5205 
5206   if (!parseString(VendorName, "invalid vendor name"))
5207     return true;
5208 
5209   if (!trySkipToken(AsmToken::Comma))
5210     return TokError("arch name required, comma expected");
5211 
5212   if (!parseString(ArchName, "invalid arch name"))
5213     return true;
5214 
5215   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5216                                                       VendorName, ArchName);
5217   return false;
5218 }
5219 
5220 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5221                                                amd_kernel_code_t &Header) {
5222   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5223   // assembly for backwards compatibility.
5224   if (ID == "max_scratch_backing_memory_byte_size") {
5225     Parser.eatToEndOfStatement();
5226     return false;
5227   }
5228 
5229   SmallString<40> ErrStr;
5230   raw_svector_ostream Err(ErrStr);
5231   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5232     return TokError(Err.str());
5233   }
5234   Lex();
5235 
5236   if (ID == "enable_wavefront_size32") {
5237     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5238       if (!isGFX10Plus())
5239         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5240       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5241         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5242     } else {
5243       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5244         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5245     }
5246   }
5247 
5248   if (ID == "wavefront_size") {
5249     if (Header.wavefront_size == 5) {
5250       if (!isGFX10Plus())
5251         return TokError("wavefront_size=5 is only allowed on GFX10+");
5252       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5253         return TokError("wavefront_size=5 requires +WavefrontSize32");
5254     } else if (Header.wavefront_size == 6) {
5255       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5256         return TokError("wavefront_size=6 requires +WavefrontSize64");
5257     }
5258   }
5259 
5260   if (ID == "enable_wgp_mode") {
5261     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5262         !isGFX10Plus())
5263       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5264   }
5265 
5266   if (ID == "enable_mem_ordered") {
5267     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5268         !isGFX10Plus())
5269       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5270   }
5271 
5272   if (ID == "enable_fwd_progress") {
5273     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5274         !isGFX10Plus())
5275       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5276   }
5277 
5278   return false;
5279 }
5280 
5281 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5282   amd_kernel_code_t Header;
5283   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5284 
5285   while (true) {
5286     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5287     // will set the current token to EndOfStatement.
5288     while(trySkipToken(AsmToken::EndOfStatement));
5289 
5290     StringRef ID;
5291     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5292       return true;
5293 
5294     if (ID == ".end_amd_kernel_code_t")
5295       break;
5296 
5297     if (ParseAMDKernelCodeTValue(ID, Header))
5298       return true;
5299   }
5300 
5301   getTargetStreamer().EmitAMDKernelCodeT(Header);
5302 
5303   return false;
5304 }
5305 
5306 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5307   StringRef KernelName;
5308   if (!parseId(KernelName, "expected symbol name"))
5309     return true;
5310 
5311   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5312                                            ELF::STT_AMDGPU_HSA_KERNEL);
5313 
5314   KernelScope.initialize(getContext());
5315   return false;
5316 }
5317 
5318 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5319   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5320     return Error(getLoc(),
5321                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5322                  "architectures");
5323   }
5324 
5325   auto TargetIDDirective = getLexer().getTok().getStringContents();
5326   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5327     return Error(getParser().getTok().getLoc(), "target id must match options");
5328 
5329   getTargetStreamer().EmitISAVersion();
5330   Lex();
5331 
5332   return false;
5333 }
5334 
5335 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5336   const char *AssemblerDirectiveBegin;
5337   const char *AssemblerDirectiveEnd;
5338   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5339       isHsaAbiVersion3AndAbove(&getSTI())
5340           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5341                             HSAMD::V3::AssemblerDirectiveEnd)
5342           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5343                             HSAMD::AssemblerDirectiveEnd);
5344 
5345   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5346     return Error(getLoc(),
5347                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5348                  "not available on non-amdhsa OSes")).str());
5349   }
5350 
5351   std::string HSAMetadataString;
5352   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5353                           HSAMetadataString))
5354     return true;
5355 
5356   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5357     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5358       return Error(getLoc(), "invalid HSA metadata");
5359   } else {
5360     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5361       return Error(getLoc(), "invalid HSA metadata");
5362   }
5363 
5364   return false;
5365 }
5366 
5367 /// Common code to parse out a block of text (typically YAML) between start and
5368 /// end directives.
5369 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5370                                           const char *AssemblerDirectiveEnd,
5371                                           std::string &CollectString) {
5372 
5373   raw_string_ostream CollectStream(CollectString);
5374 
5375   getLexer().setSkipSpace(false);
5376 
5377   bool FoundEnd = false;
5378   while (!isToken(AsmToken::Eof)) {
5379     while (isToken(AsmToken::Space)) {
5380       CollectStream << getTokenStr();
5381       Lex();
5382     }
5383 
5384     if (trySkipId(AssemblerDirectiveEnd)) {
5385       FoundEnd = true;
5386       break;
5387     }
5388 
5389     CollectStream << Parser.parseStringToEndOfStatement()
5390                   << getContext().getAsmInfo()->getSeparatorString();
5391 
5392     Parser.eatToEndOfStatement();
5393   }
5394 
5395   getLexer().setSkipSpace(true);
5396 
5397   if (isToken(AsmToken::Eof) && !FoundEnd) {
5398     return TokError(Twine("expected directive ") +
5399                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5400   }
5401 
5402   CollectStream.flush();
5403   return false;
5404 }
5405 
5406 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5407 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5408   std::string String;
5409   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5410                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5411     return true;
5412 
5413   auto PALMetadata = getTargetStreamer().getPALMetadata();
5414   if (!PALMetadata->setFromString(String))
5415     return Error(getLoc(), "invalid PAL metadata");
5416   return false;
5417 }
5418 
5419 /// Parse the assembler directive for old linear-format PAL metadata.
5420 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5421   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5422     return Error(getLoc(),
5423                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5424                  "not available on non-amdpal OSes")).str());
5425   }
5426 
5427   auto PALMetadata = getTargetStreamer().getPALMetadata();
5428   PALMetadata->setLegacy();
5429   for (;;) {
5430     uint32_t Key, Value;
5431     if (ParseAsAbsoluteExpression(Key)) {
5432       return TokError(Twine("invalid value in ") +
5433                       Twine(PALMD::AssemblerDirective));
5434     }
5435     if (!trySkipToken(AsmToken::Comma)) {
5436       return TokError(Twine("expected an even number of values in ") +
5437                       Twine(PALMD::AssemblerDirective));
5438     }
5439     if (ParseAsAbsoluteExpression(Value)) {
5440       return TokError(Twine("invalid value in ") +
5441                       Twine(PALMD::AssemblerDirective));
5442     }
5443     PALMetadata->setRegister(Key, Value);
5444     if (!trySkipToken(AsmToken::Comma))
5445       break;
5446   }
5447   return false;
5448 }
5449 
5450 /// ParseDirectiveAMDGPULDS
5451 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5452 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5453   if (getParser().checkForValidSection())
5454     return true;
5455 
5456   StringRef Name;
5457   SMLoc NameLoc = getLoc();
5458   if (getParser().parseIdentifier(Name))
5459     return TokError("expected identifier in directive");
5460 
5461   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5462   if (parseToken(AsmToken::Comma, "expected ','"))
5463     return true;
5464 
5465   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5466 
5467   int64_t Size;
5468   SMLoc SizeLoc = getLoc();
5469   if (getParser().parseAbsoluteExpression(Size))
5470     return true;
5471   if (Size < 0)
5472     return Error(SizeLoc, "size must be non-negative");
5473   if (Size > LocalMemorySize)
5474     return Error(SizeLoc, "size is too large");
5475 
5476   int64_t Alignment = 4;
5477   if (trySkipToken(AsmToken::Comma)) {
5478     SMLoc AlignLoc = getLoc();
5479     if (getParser().parseAbsoluteExpression(Alignment))
5480       return true;
5481     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5482       return Error(AlignLoc, "alignment must be a power of two");
5483 
5484     // Alignment larger than the size of LDS is possible in theory, as long
5485     // as the linker manages to place to symbol at address 0, but we do want
5486     // to make sure the alignment fits nicely into a 32-bit integer.
5487     if (Alignment >= 1u << 31)
5488       return Error(AlignLoc, "alignment is too large");
5489   }
5490 
5491   if (parseToken(AsmToken::EndOfStatement,
5492                  "unexpected token in '.amdgpu_lds' directive"))
5493     return true;
5494 
5495   Symbol->redefineIfPossible();
5496   if (!Symbol->isUndefined())
5497     return Error(NameLoc, "invalid symbol redefinition");
5498 
5499   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5500   return false;
5501 }
5502 
5503 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5504   StringRef IDVal = DirectiveID.getString();
5505 
5506   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5507     if (IDVal == ".amdhsa_kernel")
5508      return ParseDirectiveAMDHSAKernel();
5509 
5510     // TODO: Restructure/combine with PAL metadata directive.
5511     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5512       return ParseDirectiveHSAMetadata();
5513   } else {
5514     if (IDVal == ".hsa_code_object_version")
5515       return ParseDirectiveHSACodeObjectVersion();
5516 
5517     if (IDVal == ".hsa_code_object_isa")
5518       return ParseDirectiveHSACodeObjectISA();
5519 
5520     if (IDVal == ".amd_kernel_code_t")
5521       return ParseDirectiveAMDKernelCodeT();
5522 
5523     if (IDVal == ".amdgpu_hsa_kernel")
5524       return ParseDirectiveAMDGPUHsaKernel();
5525 
5526     if (IDVal == ".amd_amdgpu_isa")
5527       return ParseDirectiveISAVersion();
5528 
5529     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5530       return ParseDirectiveHSAMetadata();
5531   }
5532 
5533   if (IDVal == ".amdgcn_target")
5534     return ParseDirectiveAMDGCNTarget();
5535 
5536   if (IDVal == ".amdgpu_lds")
5537     return ParseDirectiveAMDGPULDS();
5538 
5539   if (IDVal == PALMD::AssemblerDirectiveBegin)
5540     return ParseDirectivePALMetadataBegin();
5541 
5542   if (IDVal == PALMD::AssemblerDirective)
5543     return ParseDirectivePALMetadata();
5544 
5545   return true;
5546 }
5547 
5548 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5549                                            unsigned RegNo) {
5550 
5551   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5552     return isGFX9Plus();
5553 
5554   // GFX10 has 2 more SGPRs 104 and 105.
5555   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5556     return hasSGPR104_SGPR105();
5557 
5558   switch (RegNo) {
5559   case AMDGPU::SRC_SHARED_BASE:
5560   case AMDGPU::SRC_SHARED_LIMIT:
5561   case AMDGPU::SRC_PRIVATE_BASE:
5562   case AMDGPU::SRC_PRIVATE_LIMIT:
5563   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5564     return isGFX9Plus();
5565   case AMDGPU::TBA:
5566   case AMDGPU::TBA_LO:
5567   case AMDGPU::TBA_HI:
5568   case AMDGPU::TMA:
5569   case AMDGPU::TMA_LO:
5570   case AMDGPU::TMA_HI:
5571     return !isGFX9Plus();
5572   case AMDGPU::XNACK_MASK:
5573   case AMDGPU::XNACK_MASK_LO:
5574   case AMDGPU::XNACK_MASK_HI:
5575     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5576   case AMDGPU::SGPR_NULL:
5577     return isGFX10Plus();
5578   default:
5579     break;
5580   }
5581 
5582   if (isCI())
5583     return true;
5584 
5585   if (isSI() || isGFX10Plus()) {
5586     // No flat_scr on SI.
5587     // On GFX10 flat scratch is not a valid register operand and can only be
5588     // accessed with s_setreg/s_getreg.
5589     switch (RegNo) {
5590     case AMDGPU::FLAT_SCR:
5591     case AMDGPU::FLAT_SCR_LO:
5592     case AMDGPU::FLAT_SCR_HI:
5593       return false;
5594     default:
5595       return true;
5596     }
5597   }
5598 
5599   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5600   // SI/CI have.
5601   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5602     return hasSGPR102_SGPR103();
5603 
5604   return true;
5605 }
5606 
5607 OperandMatchResultTy
5608 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5609                               OperandMode Mode) {
5610   // Try to parse with a custom parser
5611   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5612 
5613   // If we successfully parsed the operand or if there as an error parsing,
5614   // we are done.
5615   //
5616   // If we are parsing after we reach EndOfStatement then this means we
5617   // are appending default values to the Operands list.  This is only done
5618   // by custom parser, so we shouldn't continue on to the generic parsing.
5619   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5620       isToken(AsmToken::EndOfStatement))
5621     return ResTy;
5622 
5623   SMLoc RBraceLoc;
5624   SMLoc LBraceLoc = getLoc();
5625   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5626     unsigned Prefix = Operands.size();
5627 
5628     for (;;) {
5629       auto Loc = getLoc();
5630       ResTy = parseReg(Operands);
5631       if (ResTy == MatchOperand_NoMatch)
5632         Error(Loc, "expected a register");
5633       if (ResTy != MatchOperand_Success)
5634         return MatchOperand_ParseFail;
5635 
5636       RBraceLoc = getLoc();
5637       if (trySkipToken(AsmToken::RBrac))
5638         break;
5639 
5640       if (!skipToken(AsmToken::Comma,
5641                      "expected a comma or a closing square bracket")) {
5642         return MatchOperand_ParseFail;
5643       }
5644     }
5645 
5646     if (Operands.size() - Prefix > 1) {
5647       Operands.insert(Operands.begin() + Prefix,
5648                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5649       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5650     }
5651 
5652     return MatchOperand_Success;
5653   }
5654 
5655   return parseRegOrImm(Operands);
5656 }
5657 
5658 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5659   // Clear any forced encodings from the previous instruction.
5660   setForcedEncodingSize(0);
5661   setForcedDPP(false);
5662   setForcedSDWA(false);
5663 
5664   if (Name.endswith("_e64")) {
5665     setForcedEncodingSize(64);
5666     return Name.substr(0, Name.size() - 4);
5667   } else if (Name.endswith("_e32")) {
5668     setForcedEncodingSize(32);
5669     return Name.substr(0, Name.size() - 4);
5670   } else if (Name.endswith("_dpp")) {
5671     setForcedDPP(true);
5672     return Name.substr(0, Name.size() - 4);
5673   } else if (Name.endswith("_sdwa")) {
5674     setForcedSDWA(true);
5675     return Name.substr(0, Name.size() - 5);
5676   }
5677   return Name;
5678 }
5679 
5680 static void applyMnemonicAliases(StringRef &Mnemonic,
5681                                  const FeatureBitset &Features,
5682                                  unsigned VariantID);
5683 
5684 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5685                                        StringRef Name,
5686                                        SMLoc NameLoc, OperandVector &Operands) {
5687   // Add the instruction mnemonic
5688   Name = parseMnemonicSuffix(Name);
5689 
5690   // If the target architecture uses MnemonicAlias, call it here to parse
5691   // operands correctly.
5692   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
5693 
5694   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5695 
5696   bool IsMIMG = Name.startswith("image_");
5697 
5698   while (!trySkipToken(AsmToken::EndOfStatement)) {
5699     OperandMode Mode = OperandMode_Default;
5700     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5701       Mode = OperandMode_NSA;
5702     CPolSeen = 0;
5703     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5704 
5705     if (Res != MatchOperand_Success) {
5706       checkUnsupportedInstruction(Name, NameLoc);
5707       if (!Parser.hasPendingError()) {
5708         // FIXME: use real operand location rather than the current location.
5709         StringRef Msg =
5710           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5711                                             "not a valid operand.";
5712         Error(getLoc(), Msg);
5713       }
5714       while (!trySkipToken(AsmToken::EndOfStatement)) {
5715         lex();
5716       }
5717       return true;
5718     }
5719 
5720     // Eat the comma or space if there is one.
5721     trySkipToken(AsmToken::Comma);
5722   }
5723 
5724   return false;
5725 }
5726 
5727 //===----------------------------------------------------------------------===//
5728 // Utility functions
5729 //===----------------------------------------------------------------------===//
5730 
5731 OperandMatchResultTy
5732 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5733 
5734   if (!trySkipId(Prefix, AsmToken::Colon))
5735     return MatchOperand_NoMatch;
5736 
5737   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5738 }
5739 
5740 OperandMatchResultTy
5741 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5742                                     AMDGPUOperand::ImmTy ImmTy,
5743                                     bool (*ConvertResult)(int64_t&)) {
5744   SMLoc S = getLoc();
5745   int64_t Value = 0;
5746 
5747   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5748   if (Res != MatchOperand_Success)
5749     return Res;
5750 
5751   if (ConvertResult && !ConvertResult(Value)) {
5752     Error(S, "invalid " + StringRef(Prefix) + " value.");
5753   }
5754 
5755   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5756   return MatchOperand_Success;
5757 }
5758 
5759 OperandMatchResultTy
5760 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5761                                              OperandVector &Operands,
5762                                              AMDGPUOperand::ImmTy ImmTy,
5763                                              bool (*ConvertResult)(int64_t&)) {
5764   SMLoc S = getLoc();
5765   if (!trySkipId(Prefix, AsmToken::Colon))
5766     return MatchOperand_NoMatch;
5767 
5768   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5769     return MatchOperand_ParseFail;
5770 
5771   unsigned Val = 0;
5772   const unsigned MaxSize = 4;
5773 
5774   // FIXME: How to verify the number of elements matches the number of src
5775   // operands?
5776   for (int I = 0; ; ++I) {
5777     int64_t Op;
5778     SMLoc Loc = getLoc();
5779     if (!parseExpr(Op))
5780       return MatchOperand_ParseFail;
5781 
5782     if (Op != 0 && Op != 1) {
5783       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5784       return MatchOperand_ParseFail;
5785     }
5786 
5787     Val |= (Op << I);
5788 
5789     if (trySkipToken(AsmToken::RBrac))
5790       break;
5791 
5792     if (I + 1 == MaxSize) {
5793       Error(getLoc(), "expected a closing square bracket");
5794       return MatchOperand_ParseFail;
5795     }
5796 
5797     if (!skipToken(AsmToken::Comma, "expected a comma"))
5798       return MatchOperand_ParseFail;
5799   }
5800 
5801   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5802   return MatchOperand_Success;
5803 }
5804 
5805 OperandMatchResultTy
5806 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5807                                AMDGPUOperand::ImmTy ImmTy) {
5808   int64_t Bit;
5809   SMLoc S = getLoc();
5810 
5811   if (trySkipId(Name)) {
5812     Bit = 1;
5813   } else if (trySkipId("no", Name)) {
5814     Bit = 0;
5815   } else {
5816     return MatchOperand_NoMatch;
5817   }
5818 
5819   if (Name == "r128" && !hasMIMG_R128()) {
5820     Error(S, "r128 modifier is not supported on this GPU");
5821     return MatchOperand_ParseFail;
5822   }
5823   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5824     Error(S, "a16 modifier is not supported on this GPU");
5825     return MatchOperand_ParseFail;
5826   }
5827 
5828   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5829     ImmTy = AMDGPUOperand::ImmTyR128A16;
5830 
5831   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5832   return MatchOperand_Success;
5833 }
5834 
5835 OperandMatchResultTy
5836 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5837   unsigned CPolOn = 0;
5838   unsigned CPolOff = 0;
5839   SMLoc S = getLoc();
5840 
5841   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5842   if (isGFX940() && !Mnemo.startswith("s_")) {
5843     if (trySkipId("sc0"))
5844       CPolOn = AMDGPU::CPol::SC0;
5845     else if (trySkipId("nosc0"))
5846       CPolOff = AMDGPU::CPol::SC0;
5847     else if (trySkipId("nt"))
5848       CPolOn = AMDGPU::CPol::NT;
5849     else if (trySkipId("nont"))
5850       CPolOff = AMDGPU::CPol::NT;
5851     else if (trySkipId("sc1"))
5852       CPolOn = AMDGPU::CPol::SC1;
5853     else if (trySkipId("nosc1"))
5854       CPolOff = AMDGPU::CPol::SC1;
5855     else
5856       return MatchOperand_NoMatch;
5857   }
5858   else if (trySkipId("glc"))
5859     CPolOn = AMDGPU::CPol::GLC;
5860   else if (trySkipId("noglc"))
5861     CPolOff = AMDGPU::CPol::GLC;
5862   else if (trySkipId("slc"))
5863     CPolOn = AMDGPU::CPol::SLC;
5864   else if (trySkipId("noslc"))
5865     CPolOff = AMDGPU::CPol::SLC;
5866   else if (trySkipId("dlc"))
5867     CPolOn = AMDGPU::CPol::DLC;
5868   else if (trySkipId("nodlc"))
5869     CPolOff = AMDGPU::CPol::DLC;
5870   else if (trySkipId("scc"))
5871     CPolOn = AMDGPU::CPol::SCC;
5872   else if (trySkipId("noscc"))
5873     CPolOff = AMDGPU::CPol::SCC;
5874   else
5875     return MatchOperand_NoMatch;
5876 
5877   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5878     Error(S, "dlc modifier is not supported on this GPU");
5879     return MatchOperand_ParseFail;
5880   }
5881 
5882   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5883     Error(S, "scc modifier is not supported on this GPU");
5884     return MatchOperand_ParseFail;
5885   }
5886 
5887   if (CPolSeen & (CPolOn | CPolOff)) {
5888     Error(S, "duplicate cache policy modifier");
5889     return MatchOperand_ParseFail;
5890   }
5891 
5892   CPolSeen |= (CPolOn | CPolOff);
5893 
5894   for (unsigned I = 1; I != Operands.size(); ++I) {
5895     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5896     if (Op.isCPol()) {
5897       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5898       return MatchOperand_Success;
5899     }
5900   }
5901 
5902   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5903                                               AMDGPUOperand::ImmTyCPol));
5904 
5905   return MatchOperand_Success;
5906 }
5907 
5908 static void addOptionalImmOperand(
5909   MCInst& Inst, const OperandVector& Operands,
5910   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5911   AMDGPUOperand::ImmTy ImmT,
5912   int64_t Default = 0) {
5913   auto i = OptionalIdx.find(ImmT);
5914   if (i != OptionalIdx.end()) {
5915     unsigned Idx = i->second;
5916     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5917   } else {
5918     Inst.addOperand(MCOperand::createImm(Default));
5919   }
5920 }
5921 
5922 OperandMatchResultTy
5923 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5924                                        StringRef &Value,
5925                                        SMLoc &StringLoc) {
5926   if (!trySkipId(Prefix, AsmToken::Colon))
5927     return MatchOperand_NoMatch;
5928 
5929   StringLoc = getLoc();
5930   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5931                                                   : MatchOperand_ParseFail;
5932 }
5933 
5934 //===----------------------------------------------------------------------===//
5935 // MTBUF format
5936 //===----------------------------------------------------------------------===//
5937 
5938 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5939                                   int64_t MaxVal,
5940                                   int64_t &Fmt) {
5941   int64_t Val;
5942   SMLoc Loc = getLoc();
5943 
5944   auto Res = parseIntWithPrefix(Pref, Val);
5945   if (Res == MatchOperand_ParseFail)
5946     return false;
5947   if (Res == MatchOperand_NoMatch)
5948     return true;
5949 
5950   if (Val < 0 || Val > MaxVal) {
5951     Error(Loc, Twine("out of range ", StringRef(Pref)));
5952     return false;
5953   }
5954 
5955   Fmt = Val;
5956   return true;
5957 }
5958 
5959 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5960 // values to live in a joint format operand in the MCInst encoding.
5961 OperandMatchResultTy
5962 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5963   using namespace llvm::AMDGPU::MTBUFFormat;
5964 
5965   int64_t Dfmt = DFMT_UNDEF;
5966   int64_t Nfmt = NFMT_UNDEF;
5967 
5968   // dfmt and nfmt can appear in either order, and each is optional.
5969   for (int I = 0; I < 2; ++I) {
5970     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5971       return MatchOperand_ParseFail;
5972 
5973     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5974       return MatchOperand_ParseFail;
5975     }
5976     // Skip optional comma between dfmt/nfmt
5977     // but guard against 2 commas following each other.
5978     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5979         !peekToken().is(AsmToken::Comma)) {
5980       trySkipToken(AsmToken::Comma);
5981     }
5982   }
5983 
5984   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5985     return MatchOperand_NoMatch;
5986 
5987   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5988   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5989 
5990   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5991   return MatchOperand_Success;
5992 }
5993 
5994 OperandMatchResultTy
5995 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5996   using namespace llvm::AMDGPU::MTBUFFormat;
5997 
5998   int64_t Fmt = UFMT_UNDEF;
5999 
6000   if (!tryParseFmt("format", UFMT_MAX, Fmt))
6001     return MatchOperand_ParseFail;
6002 
6003   if (Fmt == UFMT_UNDEF)
6004     return MatchOperand_NoMatch;
6005 
6006   Format = Fmt;
6007   return MatchOperand_Success;
6008 }
6009 
6010 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6011                                     int64_t &Nfmt,
6012                                     StringRef FormatStr,
6013                                     SMLoc Loc) {
6014   using namespace llvm::AMDGPU::MTBUFFormat;
6015   int64_t Format;
6016 
6017   Format = getDfmt(FormatStr);
6018   if (Format != DFMT_UNDEF) {
6019     Dfmt = Format;
6020     return true;
6021   }
6022 
6023   Format = getNfmt(FormatStr, getSTI());
6024   if (Format != NFMT_UNDEF) {
6025     Nfmt = Format;
6026     return true;
6027   }
6028 
6029   Error(Loc, "unsupported format");
6030   return false;
6031 }
6032 
6033 OperandMatchResultTy
6034 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6035                                           SMLoc FormatLoc,
6036                                           int64_t &Format) {
6037   using namespace llvm::AMDGPU::MTBUFFormat;
6038 
6039   int64_t Dfmt = DFMT_UNDEF;
6040   int64_t Nfmt = NFMT_UNDEF;
6041   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6042     return MatchOperand_ParseFail;
6043 
6044   if (trySkipToken(AsmToken::Comma)) {
6045     StringRef Str;
6046     SMLoc Loc = getLoc();
6047     if (!parseId(Str, "expected a format string") ||
6048         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
6049       return MatchOperand_ParseFail;
6050     }
6051     if (Dfmt == DFMT_UNDEF) {
6052       Error(Loc, "duplicate numeric format");
6053       return MatchOperand_ParseFail;
6054     } else if (Nfmt == NFMT_UNDEF) {
6055       Error(Loc, "duplicate data format");
6056       return MatchOperand_ParseFail;
6057     }
6058   }
6059 
6060   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6061   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6062 
6063   if (isGFX10Plus()) {
6064     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6065     if (Ufmt == UFMT_UNDEF) {
6066       Error(FormatLoc, "unsupported format");
6067       return MatchOperand_ParseFail;
6068     }
6069     Format = Ufmt;
6070   } else {
6071     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6072   }
6073 
6074   return MatchOperand_Success;
6075 }
6076 
6077 OperandMatchResultTy
6078 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6079                                             SMLoc Loc,
6080                                             int64_t &Format) {
6081   using namespace llvm::AMDGPU::MTBUFFormat;
6082 
6083   auto Id = getUnifiedFormat(FormatStr, getSTI());
6084   if (Id == UFMT_UNDEF)
6085     return MatchOperand_NoMatch;
6086 
6087   if (!isGFX10Plus()) {
6088     Error(Loc, "unified format is not supported on this GPU");
6089     return MatchOperand_ParseFail;
6090   }
6091 
6092   Format = Id;
6093   return MatchOperand_Success;
6094 }
6095 
6096 OperandMatchResultTy
6097 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6098   using namespace llvm::AMDGPU::MTBUFFormat;
6099   SMLoc Loc = getLoc();
6100 
6101   if (!parseExpr(Format))
6102     return MatchOperand_ParseFail;
6103   if (!isValidFormatEncoding(Format, getSTI())) {
6104     Error(Loc, "out of range format");
6105     return MatchOperand_ParseFail;
6106   }
6107 
6108   return MatchOperand_Success;
6109 }
6110 
6111 OperandMatchResultTy
6112 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6113   using namespace llvm::AMDGPU::MTBUFFormat;
6114 
6115   if (!trySkipId("format", AsmToken::Colon))
6116     return MatchOperand_NoMatch;
6117 
6118   if (trySkipToken(AsmToken::LBrac)) {
6119     StringRef FormatStr;
6120     SMLoc Loc = getLoc();
6121     if (!parseId(FormatStr, "expected a format string"))
6122       return MatchOperand_ParseFail;
6123 
6124     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6125     if (Res == MatchOperand_NoMatch)
6126       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6127     if (Res != MatchOperand_Success)
6128       return Res;
6129 
6130     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6131       return MatchOperand_ParseFail;
6132 
6133     return MatchOperand_Success;
6134   }
6135 
6136   return parseNumericFormat(Format);
6137 }
6138 
6139 OperandMatchResultTy
6140 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6141   using namespace llvm::AMDGPU::MTBUFFormat;
6142 
6143   int64_t Format = getDefaultFormatEncoding(getSTI());
6144   OperandMatchResultTy Res;
6145   SMLoc Loc = getLoc();
6146 
6147   // Parse legacy format syntax.
6148   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6149   if (Res == MatchOperand_ParseFail)
6150     return Res;
6151 
6152   bool FormatFound = (Res == MatchOperand_Success);
6153 
6154   Operands.push_back(
6155     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6156 
6157   if (FormatFound)
6158     trySkipToken(AsmToken::Comma);
6159 
6160   if (isToken(AsmToken::EndOfStatement)) {
6161     // We are expecting an soffset operand,
6162     // but let matcher handle the error.
6163     return MatchOperand_Success;
6164   }
6165 
6166   // Parse soffset.
6167   Res = parseRegOrImm(Operands);
6168   if (Res != MatchOperand_Success)
6169     return Res;
6170 
6171   trySkipToken(AsmToken::Comma);
6172 
6173   if (!FormatFound) {
6174     Res = parseSymbolicOrNumericFormat(Format);
6175     if (Res == MatchOperand_ParseFail)
6176       return Res;
6177     if (Res == MatchOperand_Success) {
6178       auto Size = Operands.size();
6179       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6180       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6181       Op.setImm(Format);
6182     }
6183     return MatchOperand_Success;
6184   }
6185 
6186   if (isId("format") && peekToken().is(AsmToken::Colon)) {
6187     Error(getLoc(), "duplicate format");
6188     return MatchOperand_ParseFail;
6189   }
6190   return MatchOperand_Success;
6191 }
6192 
6193 //===----------------------------------------------------------------------===//
6194 // ds
6195 //===----------------------------------------------------------------------===//
6196 
6197 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6198                                     const OperandVector &Operands) {
6199   OptionalImmIndexMap OptionalIdx;
6200 
6201   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6202     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6203 
6204     // Add the register arguments
6205     if (Op.isReg()) {
6206       Op.addRegOperands(Inst, 1);
6207       continue;
6208     }
6209 
6210     // Handle optional arguments
6211     OptionalIdx[Op.getImmTy()] = i;
6212   }
6213 
6214   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6215   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6216   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6217 
6218   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6219 }
6220 
6221 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6222                                 bool IsGdsHardcoded) {
6223   OptionalImmIndexMap OptionalIdx;
6224 
6225   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6226     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6227 
6228     // Add the register arguments
6229     if (Op.isReg()) {
6230       Op.addRegOperands(Inst, 1);
6231       continue;
6232     }
6233 
6234     if (Op.isToken() && Op.getToken() == "gds") {
6235       IsGdsHardcoded = true;
6236       continue;
6237     }
6238 
6239     // Handle optional arguments
6240     OptionalIdx[Op.getImmTy()] = i;
6241   }
6242 
6243   AMDGPUOperand::ImmTy OffsetType =
6244     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
6245      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
6246      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
6247                                                       AMDGPUOperand::ImmTyOffset;
6248 
6249   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6250 
6251   if (!IsGdsHardcoded) {
6252     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6253   }
6254   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6255 }
6256 
6257 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6258   OptionalImmIndexMap OptionalIdx;
6259 
6260   unsigned OperandIdx[4];
6261   unsigned EnMask = 0;
6262   int SrcIdx = 0;
6263 
6264   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6265     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6266 
6267     // Add the register arguments
6268     if (Op.isReg()) {
6269       assert(SrcIdx < 4);
6270       OperandIdx[SrcIdx] = Inst.size();
6271       Op.addRegOperands(Inst, 1);
6272       ++SrcIdx;
6273       continue;
6274     }
6275 
6276     if (Op.isOff()) {
6277       assert(SrcIdx < 4);
6278       OperandIdx[SrcIdx] = Inst.size();
6279       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6280       ++SrcIdx;
6281       continue;
6282     }
6283 
6284     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6285       Op.addImmOperands(Inst, 1);
6286       continue;
6287     }
6288 
6289     if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
6290       continue;
6291 
6292     // Handle optional arguments
6293     OptionalIdx[Op.getImmTy()] = i;
6294   }
6295 
6296   assert(SrcIdx == 4);
6297 
6298   bool Compr = false;
6299   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6300     Compr = true;
6301     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6302     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6303     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6304   }
6305 
6306   for (auto i = 0; i < SrcIdx; ++i) {
6307     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6308       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6309     }
6310   }
6311 
6312   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6313   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6314 
6315   Inst.addOperand(MCOperand::createImm(EnMask));
6316 }
6317 
6318 //===----------------------------------------------------------------------===//
6319 // s_waitcnt
6320 //===----------------------------------------------------------------------===//
6321 
6322 static bool
6323 encodeCnt(
6324   const AMDGPU::IsaVersion ISA,
6325   int64_t &IntVal,
6326   int64_t CntVal,
6327   bool Saturate,
6328   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6329   unsigned (*decode)(const IsaVersion &Version, unsigned))
6330 {
6331   bool Failed = false;
6332 
6333   IntVal = encode(ISA, IntVal, CntVal);
6334   if (CntVal != decode(ISA, IntVal)) {
6335     if (Saturate) {
6336       IntVal = encode(ISA, IntVal, -1);
6337     } else {
6338       Failed = true;
6339     }
6340   }
6341   return Failed;
6342 }
6343 
6344 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6345 
6346   SMLoc CntLoc = getLoc();
6347   StringRef CntName = getTokenStr();
6348 
6349   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6350       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6351     return false;
6352 
6353   int64_t CntVal;
6354   SMLoc ValLoc = getLoc();
6355   if (!parseExpr(CntVal))
6356     return false;
6357 
6358   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6359 
6360   bool Failed = true;
6361   bool Sat = CntName.endswith("_sat");
6362 
6363   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6364     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6365   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6366     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6367   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6368     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6369   } else {
6370     Error(CntLoc, "invalid counter name " + CntName);
6371     return false;
6372   }
6373 
6374   if (Failed) {
6375     Error(ValLoc, "too large value for " + CntName);
6376     return false;
6377   }
6378 
6379   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6380     return false;
6381 
6382   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6383     if (isToken(AsmToken::EndOfStatement)) {
6384       Error(getLoc(), "expected a counter name");
6385       return false;
6386     }
6387   }
6388 
6389   return true;
6390 }
6391 
6392 OperandMatchResultTy
6393 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6394   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6395   int64_t Waitcnt = getWaitcntBitMask(ISA);
6396   SMLoc S = getLoc();
6397 
6398   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6399     while (!isToken(AsmToken::EndOfStatement)) {
6400       if (!parseCnt(Waitcnt))
6401         return MatchOperand_ParseFail;
6402     }
6403   } else {
6404     if (!parseExpr(Waitcnt))
6405       return MatchOperand_ParseFail;
6406   }
6407 
6408   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6409   return MatchOperand_Success;
6410 }
6411 
6412 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6413   SMLoc FieldLoc = getLoc();
6414   StringRef FieldName = getTokenStr();
6415   if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6416       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6417     return false;
6418 
6419   SMLoc ValueLoc = getLoc();
6420   StringRef ValueName = getTokenStr();
6421   if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6422       !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6423     return false;
6424 
6425   unsigned Shift;
6426   if (FieldName == "instid0") {
6427     Shift = 0;
6428   } else if (FieldName == "instskip") {
6429     Shift = 4;
6430   } else if (FieldName == "instid1") {
6431     Shift = 7;
6432   } else {
6433     Error(FieldLoc, "invalid field name " + FieldName);
6434     return false;
6435   }
6436 
6437   int Value;
6438   if (Shift == 4) {
6439     // Parse values for instskip.
6440     Value = StringSwitch<int>(ValueName)
6441                 .Case("SAME", 0)
6442                 .Case("NEXT", 1)
6443                 .Case("SKIP_1", 2)
6444                 .Case("SKIP_2", 3)
6445                 .Case("SKIP_3", 4)
6446                 .Case("SKIP_4", 5)
6447                 .Default(-1);
6448   } else {
6449     // Parse values for instid0 and instid1.
6450     Value = StringSwitch<int>(ValueName)
6451                 .Case("NO_DEP", 0)
6452                 .Case("VALU_DEP_1", 1)
6453                 .Case("VALU_DEP_2", 2)
6454                 .Case("VALU_DEP_3", 3)
6455                 .Case("VALU_DEP_4", 4)
6456                 .Case("TRANS32_DEP_1", 5)
6457                 .Case("TRANS32_DEP_2", 6)
6458                 .Case("TRANS32_DEP_3", 7)
6459                 .Case("FMA_ACCUM_CYCLE_1", 8)
6460                 .Case("SALU_CYCLE_1", 9)
6461                 .Case("SALU_CYCLE_2", 10)
6462                 .Case("SALU_CYCLE_3", 11)
6463                 .Default(-1);
6464   }
6465   if (Value < 0) {
6466     Error(ValueLoc, "invalid value name " + ValueName);
6467     return false;
6468   }
6469 
6470   Delay |= Value << Shift;
6471   return true;
6472 }
6473 
6474 OperandMatchResultTy
6475 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) {
6476   int64_t Delay = 0;
6477   SMLoc S = getLoc();
6478 
6479   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6480     do {
6481       if (!parseDelay(Delay))
6482         return MatchOperand_ParseFail;
6483     } while (trySkipToken(AsmToken::Pipe));
6484   } else {
6485     if (!parseExpr(Delay))
6486       return MatchOperand_ParseFail;
6487   }
6488 
6489   Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
6490   return MatchOperand_Success;
6491 }
6492 
6493 bool
6494 AMDGPUOperand::isSWaitCnt() const {
6495   return isImm();
6496 }
6497 
6498 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); }
6499 
6500 //===----------------------------------------------------------------------===//
6501 // DepCtr
6502 //===----------------------------------------------------------------------===//
6503 
6504 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6505                                   StringRef DepCtrName) {
6506   switch (ErrorId) {
6507   case OPR_ID_UNKNOWN:
6508     Error(Loc, Twine("invalid counter name ", DepCtrName));
6509     return;
6510   case OPR_ID_UNSUPPORTED:
6511     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6512     return;
6513   case OPR_ID_DUPLICATE:
6514     Error(Loc, Twine("duplicate counter name ", DepCtrName));
6515     return;
6516   case OPR_VAL_INVALID:
6517     Error(Loc, Twine("invalid value for ", DepCtrName));
6518     return;
6519   default:
6520     assert(false);
6521   }
6522 }
6523 
6524 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6525 
6526   using namespace llvm::AMDGPU::DepCtr;
6527 
6528   SMLoc DepCtrLoc = getLoc();
6529   StringRef DepCtrName = getTokenStr();
6530 
6531   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6532       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6533     return false;
6534 
6535   int64_t ExprVal;
6536   if (!parseExpr(ExprVal))
6537     return false;
6538 
6539   unsigned PrevOprMask = UsedOprMask;
6540   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6541 
6542   if (CntVal < 0) {
6543     depCtrError(DepCtrLoc, CntVal, DepCtrName);
6544     return false;
6545   }
6546 
6547   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6548     return false;
6549 
6550   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6551     if (isToken(AsmToken::EndOfStatement)) {
6552       Error(getLoc(), "expected a counter name");
6553       return false;
6554     }
6555   }
6556 
6557   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
6558   DepCtr = (DepCtr & ~CntValMask) | CntVal;
6559   return true;
6560 }
6561 
6562 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) {
6563   using namespace llvm::AMDGPU::DepCtr;
6564 
6565   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
6566   SMLoc Loc = getLoc();
6567 
6568   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6569     unsigned UsedOprMask = 0;
6570     while (!isToken(AsmToken::EndOfStatement)) {
6571       if (!parseDepCtr(DepCtr, UsedOprMask))
6572         return MatchOperand_ParseFail;
6573     }
6574   } else {
6575     if (!parseExpr(DepCtr))
6576       return MatchOperand_ParseFail;
6577   }
6578 
6579   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
6580   return MatchOperand_Success;
6581 }
6582 
6583 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
6584 
6585 //===----------------------------------------------------------------------===//
6586 // hwreg
6587 //===----------------------------------------------------------------------===//
6588 
6589 bool
6590 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6591                                 OperandInfoTy &Offset,
6592                                 OperandInfoTy &Width) {
6593   using namespace llvm::AMDGPU::Hwreg;
6594 
6595   // The register may be specified by name or using a numeric code
6596   HwReg.Loc = getLoc();
6597   if (isToken(AsmToken::Identifier) &&
6598       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6599     HwReg.IsSymbolic = true;
6600     lex(); // skip register name
6601   } else if (!parseExpr(HwReg.Id, "a register name")) {
6602     return false;
6603   }
6604 
6605   if (trySkipToken(AsmToken::RParen))
6606     return true;
6607 
6608   // parse optional params
6609   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6610     return false;
6611 
6612   Offset.Loc = getLoc();
6613   if (!parseExpr(Offset.Id))
6614     return false;
6615 
6616   if (!skipToken(AsmToken::Comma, "expected a comma"))
6617     return false;
6618 
6619   Width.Loc = getLoc();
6620   return parseExpr(Width.Id) &&
6621          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6622 }
6623 
6624 bool
6625 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6626                                const OperandInfoTy &Offset,
6627                                const OperandInfoTy &Width) {
6628 
6629   using namespace llvm::AMDGPU::Hwreg;
6630 
6631   if (HwReg.IsSymbolic) {
6632     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6633       Error(HwReg.Loc,
6634             "specified hardware register is not supported on this GPU");
6635       return false;
6636     }
6637   } else {
6638     if (!isValidHwreg(HwReg.Id)) {
6639       Error(HwReg.Loc,
6640             "invalid code of hardware register: only 6-bit values are legal");
6641       return false;
6642     }
6643   }
6644   if (!isValidHwregOffset(Offset.Id)) {
6645     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6646     return false;
6647   }
6648   if (!isValidHwregWidth(Width.Id)) {
6649     Error(Width.Loc,
6650           "invalid bitfield width: only values from 1 to 32 are legal");
6651     return false;
6652   }
6653   return true;
6654 }
6655 
6656 OperandMatchResultTy
6657 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6658   using namespace llvm::AMDGPU::Hwreg;
6659 
6660   int64_t ImmVal = 0;
6661   SMLoc Loc = getLoc();
6662 
6663   if (trySkipId("hwreg", AsmToken::LParen)) {
6664     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6665     OperandInfoTy Offset(OFFSET_DEFAULT_);
6666     OperandInfoTy Width(WIDTH_DEFAULT_);
6667     if (parseHwregBody(HwReg, Offset, Width) &&
6668         validateHwreg(HwReg, Offset, Width)) {
6669       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6670     } else {
6671       return MatchOperand_ParseFail;
6672     }
6673   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6674     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6675       Error(Loc, "invalid immediate: only 16-bit values are legal");
6676       return MatchOperand_ParseFail;
6677     }
6678   } else {
6679     return MatchOperand_ParseFail;
6680   }
6681 
6682   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6683   return MatchOperand_Success;
6684 }
6685 
6686 bool AMDGPUOperand::isHwreg() const {
6687   return isImmTy(ImmTyHwreg);
6688 }
6689 
6690 //===----------------------------------------------------------------------===//
6691 // sendmsg
6692 //===----------------------------------------------------------------------===//
6693 
6694 bool
6695 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6696                                   OperandInfoTy &Op,
6697                                   OperandInfoTy &Stream) {
6698   using namespace llvm::AMDGPU::SendMsg;
6699 
6700   Msg.Loc = getLoc();
6701   if (isToken(AsmToken::Identifier) &&
6702       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6703     Msg.IsSymbolic = true;
6704     lex(); // skip message name
6705   } else if (!parseExpr(Msg.Id, "a message name")) {
6706     return false;
6707   }
6708 
6709   if (trySkipToken(AsmToken::Comma)) {
6710     Op.IsDefined = true;
6711     Op.Loc = getLoc();
6712     if (isToken(AsmToken::Identifier) &&
6713         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6714       lex(); // skip operation name
6715     } else if (!parseExpr(Op.Id, "an operation name")) {
6716       return false;
6717     }
6718 
6719     if (trySkipToken(AsmToken::Comma)) {
6720       Stream.IsDefined = true;
6721       Stream.Loc = getLoc();
6722       if (!parseExpr(Stream.Id))
6723         return false;
6724     }
6725   }
6726 
6727   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6728 }
6729 
6730 bool
6731 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6732                                  const OperandInfoTy &Op,
6733                                  const OperandInfoTy &Stream) {
6734   using namespace llvm::AMDGPU::SendMsg;
6735 
6736   // Validation strictness depends on whether message is specified
6737   // in a symbolic or in a numeric form. In the latter case
6738   // only encoding possibility is checked.
6739   bool Strict = Msg.IsSymbolic;
6740 
6741   if (Strict) {
6742     if (Msg.Id == OPR_ID_UNSUPPORTED) {
6743       Error(Msg.Loc, "specified message id is not supported on this GPU");
6744       return false;
6745     }
6746   } else {
6747     if (!isValidMsgId(Msg.Id, getSTI())) {
6748       Error(Msg.Loc, "invalid message id");
6749       return false;
6750     }
6751   }
6752   if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
6753     if (Op.IsDefined) {
6754       Error(Op.Loc, "message does not support operations");
6755     } else {
6756       Error(Msg.Loc, "missing message operation");
6757     }
6758     return false;
6759   }
6760   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6761     Error(Op.Loc, "invalid operation id");
6762     return false;
6763   }
6764   if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
6765       Stream.IsDefined) {
6766     Error(Stream.Loc, "message operation does not support streams");
6767     return false;
6768   }
6769   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6770     Error(Stream.Loc, "invalid message stream id");
6771     return false;
6772   }
6773   return true;
6774 }
6775 
6776 OperandMatchResultTy
6777 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6778   using namespace llvm::AMDGPU::SendMsg;
6779 
6780   int64_t ImmVal = 0;
6781   SMLoc Loc = getLoc();
6782 
6783   if (trySkipId("sendmsg", AsmToken::LParen)) {
6784     OperandInfoTy Msg(OPR_ID_UNKNOWN);
6785     OperandInfoTy Op(OP_NONE_);
6786     OperandInfoTy Stream(STREAM_ID_NONE_);
6787     if (parseSendMsgBody(Msg, Op, Stream) &&
6788         validateSendMsg(Msg, Op, Stream)) {
6789       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6790     } else {
6791       return MatchOperand_ParseFail;
6792     }
6793   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6794     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6795       Error(Loc, "invalid immediate: only 16-bit values are legal");
6796       return MatchOperand_ParseFail;
6797     }
6798   } else {
6799     return MatchOperand_ParseFail;
6800   }
6801 
6802   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6803   return MatchOperand_Success;
6804 }
6805 
6806 bool AMDGPUOperand::isSendMsg() const {
6807   return isImmTy(ImmTySendMsg);
6808 }
6809 
6810 //===----------------------------------------------------------------------===//
6811 // v_interp
6812 //===----------------------------------------------------------------------===//
6813 
6814 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6815   StringRef Str;
6816   SMLoc S = getLoc();
6817 
6818   if (!parseId(Str))
6819     return MatchOperand_NoMatch;
6820 
6821   int Slot = StringSwitch<int>(Str)
6822     .Case("p10", 0)
6823     .Case("p20", 1)
6824     .Case("p0", 2)
6825     .Default(-1);
6826 
6827   if (Slot == -1) {
6828     Error(S, "invalid interpolation slot");
6829     return MatchOperand_ParseFail;
6830   }
6831 
6832   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6833                                               AMDGPUOperand::ImmTyInterpSlot));
6834   return MatchOperand_Success;
6835 }
6836 
6837 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6838   StringRef Str;
6839   SMLoc S = getLoc();
6840 
6841   if (!parseId(Str))
6842     return MatchOperand_NoMatch;
6843 
6844   if (!Str.startswith("attr")) {
6845     Error(S, "invalid interpolation attribute");
6846     return MatchOperand_ParseFail;
6847   }
6848 
6849   StringRef Chan = Str.take_back(2);
6850   int AttrChan = StringSwitch<int>(Chan)
6851     .Case(".x", 0)
6852     .Case(".y", 1)
6853     .Case(".z", 2)
6854     .Case(".w", 3)
6855     .Default(-1);
6856   if (AttrChan == -1) {
6857     Error(S, "invalid or missing interpolation attribute channel");
6858     return MatchOperand_ParseFail;
6859   }
6860 
6861   Str = Str.drop_back(2).drop_front(4);
6862 
6863   uint8_t Attr;
6864   if (Str.getAsInteger(10, Attr)) {
6865     Error(S, "invalid or missing interpolation attribute number");
6866     return MatchOperand_ParseFail;
6867   }
6868 
6869   if (Attr > 63) {
6870     Error(S, "out of bounds interpolation attribute number");
6871     return MatchOperand_ParseFail;
6872   }
6873 
6874   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6875 
6876   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6877                                               AMDGPUOperand::ImmTyInterpAttr));
6878   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6879                                               AMDGPUOperand::ImmTyAttrChan));
6880   return MatchOperand_Success;
6881 }
6882 
6883 //===----------------------------------------------------------------------===//
6884 // exp
6885 //===----------------------------------------------------------------------===//
6886 
6887 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6888   using namespace llvm::AMDGPU::Exp;
6889 
6890   StringRef Str;
6891   SMLoc S = getLoc();
6892 
6893   if (!parseId(Str))
6894     return MatchOperand_NoMatch;
6895 
6896   unsigned Id = getTgtId(Str);
6897   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6898     Error(S, (Id == ET_INVALID) ?
6899                 "invalid exp target" :
6900                 "exp target is not supported on this GPU");
6901     return MatchOperand_ParseFail;
6902   }
6903 
6904   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6905                                               AMDGPUOperand::ImmTyExpTgt));
6906   return MatchOperand_Success;
6907 }
6908 
6909 //===----------------------------------------------------------------------===//
6910 // parser helpers
6911 //===----------------------------------------------------------------------===//
6912 
6913 bool
6914 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6915   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6916 }
6917 
6918 bool
6919 AMDGPUAsmParser::isId(const StringRef Id) const {
6920   return isId(getToken(), Id);
6921 }
6922 
6923 bool
6924 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6925   return getTokenKind() == Kind;
6926 }
6927 
6928 bool
6929 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6930   if (isId(Id)) {
6931     lex();
6932     return true;
6933   }
6934   return false;
6935 }
6936 
6937 bool
6938 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6939   if (isToken(AsmToken::Identifier)) {
6940     StringRef Tok = getTokenStr();
6941     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6942       lex();
6943       return true;
6944     }
6945   }
6946   return false;
6947 }
6948 
6949 bool
6950 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6951   if (isId(Id) && peekToken().is(Kind)) {
6952     lex();
6953     lex();
6954     return true;
6955   }
6956   return false;
6957 }
6958 
6959 bool
6960 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6961   if (isToken(Kind)) {
6962     lex();
6963     return true;
6964   }
6965   return false;
6966 }
6967 
6968 bool
6969 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6970                            const StringRef ErrMsg) {
6971   if (!trySkipToken(Kind)) {
6972     Error(getLoc(), ErrMsg);
6973     return false;
6974   }
6975   return true;
6976 }
6977 
6978 bool
6979 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6980   SMLoc S = getLoc();
6981 
6982   const MCExpr *Expr;
6983   if (Parser.parseExpression(Expr))
6984     return false;
6985 
6986   if (Expr->evaluateAsAbsolute(Imm))
6987     return true;
6988 
6989   if (Expected.empty()) {
6990     Error(S, "expected absolute expression");
6991   } else {
6992     Error(S, Twine("expected ", Expected) +
6993              Twine(" or an absolute expression"));
6994   }
6995   return false;
6996 }
6997 
6998 bool
6999 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7000   SMLoc S = getLoc();
7001 
7002   const MCExpr *Expr;
7003   if (Parser.parseExpression(Expr))
7004     return false;
7005 
7006   int64_t IntVal;
7007   if (Expr->evaluateAsAbsolute(IntVal)) {
7008     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7009   } else {
7010     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7011   }
7012   return true;
7013 }
7014 
7015 bool
7016 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7017   if (isToken(AsmToken::String)) {
7018     Val = getToken().getStringContents();
7019     lex();
7020     return true;
7021   } else {
7022     Error(getLoc(), ErrMsg);
7023     return false;
7024   }
7025 }
7026 
7027 bool
7028 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7029   if (isToken(AsmToken::Identifier)) {
7030     Val = getTokenStr();
7031     lex();
7032     return true;
7033   } else {
7034     if (!ErrMsg.empty())
7035       Error(getLoc(), ErrMsg);
7036     return false;
7037   }
7038 }
7039 
7040 AsmToken
7041 AMDGPUAsmParser::getToken() const {
7042   return Parser.getTok();
7043 }
7044 
7045 AsmToken
7046 AMDGPUAsmParser::peekToken() {
7047   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
7048 }
7049 
7050 void
7051 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7052   auto TokCount = getLexer().peekTokens(Tokens);
7053 
7054   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7055     Tokens[Idx] = AsmToken(AsmToken::Error, "");
7056 }
7057 
7058 AsmToken::TokenKind
7059 AMDGPUAsmParser::getTokenKind() const {
7060   return getLexer().getKind();
7061 }
7062 
7063 SMLoc
7064 AMDGPUAsmParser::getLoc() const {
7065   return getToken().getLoc();
7066 }
7067 
7068 StringRef
7069 AMDGPUAsmParser::getTokenStr() const {
7070   return getToken().getString();
7071 }
7072 
7073 void
7074 AMDGPUAsmParser::lex() {
7075   Parser.Lex();
7076 }
7077 
7078 SMLoc
7079 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7080                                const OperandVector &Operands) const {
7081   for (unsigned i = Operands.size() - 1; i > 0; --i) {
7082     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7083     if (Test(Op))
7084       return Op.getStartLoc();
7085   }
7086   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7087 }
7088 
7089 SMLoc
7090 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7091                            const OperandVector &Operands) const {
7092   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7093   return getOperandLoc(Test, Operands);
7094 }
7095 
7096 SMLoc
7097 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7098                            const OperandVector &Operands) const {
7099   auto Test = [=](const AMDGPUOperand& Op) {
7100     return Op.isRegKind() && Op.getReg() == Reg;
7101   };
7102   return getOperandLoc(Test, Operands);
7103 }
7104 
7105 SMLoc
7106 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
7107   auto Test = [](const AMDGPUOperand& Op) {
7108     return Op.IsImmKindLiteral() || Op.isExpr();
7109   };
7110   return getOperandLoc(Test, Operands);
7111 }
7112 
7113 SMLoc
7114 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7115   auto Test = [](const AMDGPUOperand& Op) {
7116     return Op.isImmKindConst();
7117   };
7118   return getOperandLoc(Test, Operands);
7119 }
7120 
7121 //===----------------------------------------------------------------------===//
7122 // swizzle
7123 //===----------------------------------------------------------------------===//
7124 
7125 LLVM_READNONE
7126 static unsigned
7127 encodeBitmaskPerm(const unsigned AndMask,
7128                   const unsigned OrMask,
7129                   const unsigned XorMask) {
7130   using namespace llvm::AMDGPU::Swizzle;
7131 
7132   return BITMASK_PERM_ENC |
7133          (AndMask << BITMASK_AND_SHIFT) |
7134          (OrMask  << BITMASK_OR_SHIFT)  |
7135          (XorMask << BITMASK_XOR_SHIFT);
7136 }
7137 
7138 bool
7139 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7140                                      const unsigned MinVal,
7141                                      const unsigned MaxVal,
7142                                      const StringRef ErrMsg,
7143                                      SMLoc &Loc) {
7144   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7145     return false;
7146   }
7147   Loc = getLoc();
7148   if (!parseExpr(Op)) {
7149     return false;
7150   }
7151   if (Op < MinVal || Op > MaxVal) {
7152     Error(Loc, ErrMsg);
7153     return false;
7154   }
7155 
7156   return true;
7157 }
7158 
7159 bool
7160 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7161                                       const unsigned MinVal,
7162                                       const unsigned MaxVal,
7163                                       const StringRef ErrMsg) {
7164   SMLoc Loc;
7165   for (unsigned i = 0; i < OpNum; ++i) {
7166     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7167       return false;
7168   }
7169 
7170   return true;
7171 }
7172 
7173 bool
7174 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7175   using namespace llvm::AMDGPU::Swizzle;
7176 
7177   int64_t Lane[LANE_NUM];
7178   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7179                            "expected a 2-bit lane id")) {
7180     Imm = QUAD_PERM_ENC;
7181     for (unsigned I = 0; I < LANE_NUM; ++I) {
7182       Imm |= Lane[I] << (LANE_SHIFT * I);
7183     }
7184     return true;
7185   }
7186   return false;
7187 }
7188 
7189 bool
7190 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7191   using namespace llvm::AMDGPU::Swizzle;
7192 
7193   SMLoc Loc;
7194   int64_t GroupSize;
7195   int64_t LaneIdx;
7196 
7197   if (!parseSwizzleOperand(GroupSize,
7198                            2, 32,
7199                            "group size must be in the interval [2,32]",
7200                            Loc)) {
7201     return false;
7202   }
7203   if (!isPowerOf2_64(GroupSize)) {
7204     Error(Loc, "group size must be a power of two");
7205     return false;
7206   }
7207   if (parseSwizzleOperand(LaneIdx,
7208                           0, GroupSize - 1,
7209                           "lane id must be in the interval [0,group size - 1]",
7210                           Loc)) {
7211     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7212     return true;
7213   }
7214   return false;
7215 }
7216 
7217 bool
7218 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7219   using namespace llvm::AMDGPU::Swizzle;
7220 
7221   SMLoc Loc;
7222   int64_t GroupSize;
7223 
7224   if (!parseSwizzleOperand(GroupSize,
7225                            2, 32,
7226                            "group size must be in the interval [2,32]",
7227                            Loc)) {
7228     return false;
7229   }
7230   if (!isPowerOf2_64(GroupSize)) {
7231     Error(Loc, "group size must be a power of two");
7232     return false;
7233   }
7234 
7235   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7236   return true;
7237 }
7238 
7239 bool
7240 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7241   using namespace llvm::AMDGPU::Swizzle;
7242 
7243   SMLoc Loc;
7244   int64_t GroupSize;
7245 
7246   if (!parseSwizzleOperand(GroupSize,
7247                            1, 16,
7248                            "group size must be in the interval [1,16]",
7249                            Loc)) {
7250     return false;
7251   }
7252   if (!isPowerOf2_64(GroupSize)) {
7253     Error(Loc, "group size must be a power of two");
7254     return false;
7255   }
7256 
7257   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7258   return true;
7259 }
7260 
7261 bool
7262 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7263   using namespace llvm::AMDGPU::Swizzle;
7264 
7265   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7266     return false;
7267   }
7268 
7269   StringRef Ctl;
7270   SMLoc StrLoc = getLoc();
7271   if (!parseString(Ctl)) {
7272     return false;
7273   }
7274   if (Ctl.size() != BITMASK_WIDTH) {
7275     Error(StrLoc, "expected a 5-character mask");
7276     return false;
7277   }
7278 
7279   unsigned AndMask = 0;
7280   unsigned OrMask = 0;
7281   unsigned XorMask = 0;
7282 
7283   for (size_t i = 0; i < Ctl.size(); ++i) {
7284     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7285     switch(Ctl[i]) {
7286     default:
7287       Error(StrLoc, "invalid mask");
7288       return false;
7289     case '0':
7290       break;
7291     case '1':
7292       OrMask |= Mask;
7293       break;
7294     case 'p':
7295       AndMask |= Mask;
7296       break;
7297     case 'i':
7298       AndMask |= Mask;
7299       XorMask |= Mask;
7300       break;
7301     }
7302   }
7303 
7304   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7305   return true;
7306 }
7307 
7308 bool
7309 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7310 
7311   SMLoc OffsetLoc = getLoc();
7312 
7313   if (!parseExpr(Imm, "a swizzle macro")) {
7314     return false;
7315   }
7316   if (!isUInt<16>(Imm)) {
7317     Error(OffsetLoc, "expected a 16-bit offset");
7318     return false;
7319   }
7320   return true;
7321 }
7322 
7323 bool
7324 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7325   using namespace llvm::AMDGPU::Swizzle;
7326 
7327   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7328 
7329     SMLoc ModeLoc = getLoc();
7330     bool Ok = false;
7331 
7332     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7333       Ok = parseSwizzleQuadPerm(Imm);
7334     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7335       Ok = parseSwizzleBitmaskPerm(Imm);
7336     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7337       Ok = parseSwizzleBroadcast(Imm);
7338     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7339       Ok = parseSwizzleSwap(Imm);
7340     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7341       Ok = parseSwizzleReverse(Imm);
7342     } else {
7343       Error(ModeLoc, "expected a swizzle mode");
7344     }
7345 
7346     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7347   }
7348 
7349   return false;
7350 }
7351 
7352 OperandMatchResultTy
7353 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
7354   SMLoc S = getLoc();
7355   int64_t Imm = 0;
7356 
7357   if (trySkipId("offset")) {
7358 
7359     bool Ok = false;
7360     if (skipToken(AsmToken::Colon, "expected a colon")) {
7361       if (trySkipId("swizzle")) {
7362         Ok = parseSwizzleMacro(Imm);
7363       } else {
7364         Ok = parseSwizzleOffset(Imm);
7365       }
7366     }
7367 
7368     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7369 
7370     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
7371   } else {
7372     // Swizzle "offset" operand is optional.
7373     // If it is omitted, try parsing other optional operands.
7374     return parseOptionalOpr(Operands);
7375   }
7376 }
7377 
7378 bool
7379 AMDGPUOperand::isSwizzle() const {
7380   return isImmTy(ImmTySwizzle);
7381 }
7382 
7383 //===----------------------------------------------------------------------===//
7384 // VGPR Index Mode
7385 //===----------------------------------------------------------------------===//
7386 
7387 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7388 
7389   using namespace llvm::AMDGPU::VGPRIndexMode;
7390 
7391   if (trySkipToken(AsmToken::RParen)) {
7392     return OFF;
7393   }
7394 
7395   int64_t Imm = 0;
7396 
7397   while (true) {
7398     unsigned Mode = 0;
7399     SMLoc S = getLoc();
7400 
7401     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7402       if (trySkipId(IdSymbolic[ModeId])) {
7403         Mode = 1 << ModeId;
7404         break;
7405       }
7406     }
7407 
7408     if (Mode == 0) {
7409       Error(S, (Imm == 0)?
7410                "expected a VGPR index mode or a closing parenthesis" :
7411                "expected a VGPR index mode");
7412       return UNDEF;
7413     }
7414 
7415     if (Imm & Mode) {
7416       Error(S, "duplicate VGPR index mode");
7417       return UNDEF;
7418     }
7419     Imm |= Mode;
7420 
7421     if (trySkipToken(AsmToken::RParen))
7422       break;
7423     if (!skipToken(AsmToken::Comma,
7424                    "expected a comma or a closing parenthesis"))
7425       return UNDEF;
7426   }
7427 
7428   return Imm;
7429 }
7430 
7431 OperandMatchResultTy
7432 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7433 
7434   using namespace llvm::AMDGPU::VGPRIndexMode;
7435 
7436   int64_t Imm = 0;
7437   SMLoc S = getLoc();
7438 
7439   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7440     Imm = parseGPRIdxMacro();
7441     if (Imm == UNDEF)
7442       return MatchOperand_ParseFail;
7443   } else {
7444     if (getParser().parseAbsoluteExpression(Imm))
7445       return MatchOperand_ParseFail;
7446     if (Imm < 0 || !isUInt<4>(Imm)) {
7447       Error(S, "invalid immediate: only 4-bit values are legal");
7448       return MatchOperand_ParseFail;
7449     }
7450   }
7451 
7452   Operands.push_back(
7453       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7454   return MatchOperand_Success;
7455 }
7456 
7457 bool AMDGPUOperand::isGPRIdxMode() const {
7458   return isImmTy(ImmTyGprIdxMode);
7459 }
7460 
7461 //===----------------------------------------------------------------------===//
7462 // sopp branch targets
7463 //===----------------------------------------------------------------------===//
7464 
7465 OperandMatchResultTy
7466 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7467 
7468   // Make sure we are not parsing something
7469   // that looks like a label or an expression but is not.
7470   // This will improve error messages.
7471   if (isRegister() || isModifier())
7472     return MatchOperand_NoMatch;
7473 
7474   if (!parseExpr(Operands))
7475     return MatchOperand_ParseFail;
7476 
7477   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7478   assert(Opr.isImm() || Opr.isExpr());
7479   SMLoc Loc = Opr.getStartLoc();
7480 
7481   // Currently we do not support arbitrary expressions as branch targets.
7482   // Only labels and absolute expressions are accepted.
7483   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7484     Error(Loc, "expected an absolute expression or a label");
7485   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7486     Error(Loc, "expected a 16-bit signed jump offset");
7487   }
7488 
7489   return MatchOperand_Success;
7490 }
7491 
7492 //===----------------------------------------------------------------------===//
7493 // Boolean holding registers
7494 //===----------------------------------------------------------------------===//
7495 
7496 OperandMatchResultTy
7497 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7498   return parseReg(Operands);
7499 }
7500 
7501 //===----------------------------------------------------------------------===//
7502 // mubuf
7503 //===----------------------------------------------------------------------===//
7504 
7505 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7506   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7507 }
7508 
7509 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7510                                    const OperandVector &Operands,
7511                                    bool IsAtomic,
7512                                    bool IsLds) {
7513   OptionalImmIndexMap OptionalIdx;
7514   unsigned FirstOperandIdx = 1;
7515   bool IsAtomicReturn = false;
7516 
7517   if (IsAtomic) {
7518     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7519       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7520       if (!Op.isCPol())
7521         continue;
7522       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7523       break;
7524     }
7525 
7526     if (!IsAtomicReturn) {
7527       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7528       if (NewOpc != -1)
7529         Inst.setOpcode(NewOpc);
7530     }
7531 
7532     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7533                       SIInstrFlags::IsAtomicRet;
7534   }
7535 
7536   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7537     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7538 
7539     // Add the register arguments
7540     if (Op.isReg()) {
7541       Op.addRegOperands(Inst, 1);
7542       // Insert a tied src for atomic return dst.
7543       // This cannot be postponed as subsequent calls to
7544       // addImmOperands rely on correct number of MC operands.
7545       if (IsAtomicReturn && i == FirstOperandIdx)
7546         Op.addRegOperands(Inst, 1);
7547       continue;
7548     }
7549 
7550     // Handle the case where soffset is an immediate
7551     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7552       Op.addImmOperands(Inst, 1);
7553       continue;
7554     }
7555 
7556     // Handle tokens like 'offen' which are sometimes hard-coded into the
7557     // asm string.  There are no MCInst operands for these.
7558     if (Op.isToken()) {
7559       continue;
7560     }
7561     assert(Op.isImm());
7562 
7563     // Handle optional arguments
7564     OptionalIdx[Op.getImmTy()] = i;
7565   }
7566 
7567   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7568   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7569 
7570   if (!IsLds) { // tfe is not legal with lds opcodes
7571     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7572   }
7573   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7574 }
7575 
7576 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7577   OptionalImmIndexMap OptionalIdx;
7578 
7579   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7580     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7581 
7582     // Add the register arguments
7583     if (Op.isReg()) {
7584       Op.addRegOperands(Inst, 1);
7585       continue;
7586     }
7587 
7588     // Handle the case where soffset is an immediate
7589     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7590       Op.addImmOperands(Inst, 1);
7591       continue;
7592     }
7593 
7594     // Handle tokens like 'offen' which are sometimes hard-coded into the
7595     // asm string.  There are no MCInst operands for these.
7596     if (Op.isToken()) {
7597       continue;
7598     }
7599     assert(Op.isImm());
7600 
7601     // Handle optional arguments
7602     OptionalIdx[Op.getImmTy()] = i;
7603   }
7604 
7605   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7606                         AMDGPUOperand::ImmTyOffset);
7607   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7608   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7609   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7610   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7611 }
7612 
7613 //===----------------------------------------------------------------------===//
7614 // mimg
7615 //===----------------------------------------------------------------------===//
7616 
7617 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7618                               bool IsAtomic) {
7619   unsigned I = 1;
7620   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7621   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7622     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7623   }
7624 
7625   if (IsAtomic) {
7626     // Add src, same as dst
7627     assert(Desc.getNumDefs() == 1);
7628     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7629   }
7630 
7631   OptionalImmIndexMap OptionalIdx;
7632 
7633   for (unsigned E = Operands.size(); I != E; ++I) {
7634     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7635 
7636     // Add the register arguments
7637     if (Op.isReg()) {
7638       Op.addRegOperands(Inst, 1);
7639     } else if (Op.isImmModifier()) {
7640       OptionalIdx[Op.getImmTy()] = I;
7641     } else if (!Op.isToken()) {
7642       llvm_unreachable("unexpected operand type");
7643     }
7644   }
7645 
7646   bool IsGFX10Plus = isGFX10Plus();
7647 
7648   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7649   if (IsGFX10Plus)
7650     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7651   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7652   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7653   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7654   if (IsGFX10Plus)
7655     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7656   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7657     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7658   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7659   if (!IsGFX10Plus)
7660     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7661   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7662 }
7663 
7664 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7665   cvtMIMG(Inst, Operands, true);
7666 }
7667 
7668 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7669   OptionalImmIndexMap OptionalIdx;
7670   bool IsAtomicReturn = false;
7671 
7672   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7673     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7674     if (!Op.isCPol())
7675       continue;
7676     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7677     break;
7678   }
7679 
7680   if (!IsAtomicReturn) {
7681     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7682     if (NewOpc != -1)
7683       Inst.setOpcode(NewOpc);
7684   }
7685 
7686   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7687                     SIInstrFlags::IsAtomicRet;
7688 
7689   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7690     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7691 
7692     // Add the register arguments
7693     if (Op.isReg()) {
7694       Op.addRegOperands(Inst, 1);
7695       if (IsAtomicReturn && i == 1)
7696         Op.addRegOperands(Inst, 1);
7697       continue;
7698     }
7699 
7700     // Handle the case where soffset is an immediate
7701     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7702       Op.addImmOperands(Inst, 1);
7703       continue;
7704     }
7705 
7706     // Handle tokens like 'offen' which are sometimes hard-coded into the
7707     // asm string.  There are no MCInst operands for these.
7708     if (Op.isToken()) {
7709       continue;
7710     }
7711     assert(Op.isImm());
7712 
7713     // Handle optional arguments
7714     OptionalIdx[Op.getImmTy()] = i;
7715   }
7716 
7717   if ((int)Inst.getNumOperands() <=
7718       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7719     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7720   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7721 }
7722 
7723 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7724                                       const OperandVector &Operands) {
7725   for (unsigned I = 1; I < Operands.size(); ++I) {
7726     auto &Operand = (AMDGPUOperand &)*Operands[I];
7727     if (Operand.isReg())
7728       Operand.addRegOperands(Inst, 1);
7729   }
7730 
7731   Inst.addOperand(MCOperand::createImm(1)); // a16
7732 }
7733 
7734 //===----------------------------------------------------------------------===//
7735 // smrd
7736 //===----------------------------------------------------------------------===//
7737 
7738 bool AMDGPUOperand::isSMRDOffset8() const {
7739   return isImm() && isUInt<8>(getImm());
7740 }
7741 
7742 bool AMDGPUOperand::isSMEMOffset() const {
7743   return isImmTy(ImmTyNone) ||
7744          isImmTy(ImmTyOffset); // Offset range is checked later by validator.
7745 }
7746 
7747 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7748   // 32-bit literals are only supported on CI and we only want to use them
7749   // when the offset is > 8-bits.
7750   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7751 }
7752 
7753 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7754   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7755 }
7756 
7757 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7758   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7759 }
7760 
7761 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7762   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7763 }
7764 
7765 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7766   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7767 }
7768 
7769 //===----------------------------------------------------------------------===//
7770 // vop3
7771 //===----------------------------------------------------------------------===//
7772 
7773 static bool ConvertOmodMul(int64_t &Mul) {
7774   if (Mul != 1 && Mul != 2 && Mul != 4)
7775     return false;
7776 
7777   Mul >>= 1;
7778   return true;
7779 }
7780 
7781 static bool ConvertOmodDiv(int64_t &Div) {
7782   if (Div == 1) {
7783     Div = 0;
7784     return true;
7785   }
7786 
7787   if (Div == 2) {
7788     Div = 3;
7789     return true;
7790   }
7791 
7792   return false;
7793 }
7794 
7795 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7796 // This is intentional and ensures compatibility with sp3.
7797 // See bug 35397 for details.
7798 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7799   if (BoundCtrl == 0 || BoundCtrl == 1) {
7800     BoundCtrl = 1;
7801     return true;
7802   }
7803   return false;
7804 }
7805 
7806 // Note: the order in this table matches the order of operands in AsmString.
7807 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7808   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7809   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7810   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7811   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7812   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7813   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7814   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7815   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7816   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7817   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7818   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7819   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7820   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7821   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7822   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7823   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7824   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7825   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7826   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7827   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7828   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7829   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7830   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7831   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7832   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7833   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7834   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7835   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7836   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7837   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7838   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7839   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7840   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7841   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7842   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7843   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7844   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7845   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7846   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7847   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7848   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr},
7849   {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr},
7850   {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr}
7851 };
7852 
7853 void AMDGPUAsmParser::onBeginOfFile() {
7854   if (!getParser().getStreamer().getTargetStreamer() ||
7855       getSTI().getTargetTriple().getArch() == Triple::r600)
7856     return;
7857 
7858   if (!getTargetStreamer().getTargetID())
7859     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7860 
7861   if (isHsaAbiVersion3AndAbove(&getSTI()))
7862     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7863 }
7864 
7865 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7866 
7867   OperandMatchResultTy res = parseOptionalOpr(Operands);
7868 
7869   // This is a hack to enable hardcoded mandatory operands which follow
7870   // optional operands.
7871   //
7872   // Current design assumes that all operands after the first optional operand
7873   // are also optional. However implementation of some instructions violates
7874   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7875   //
7876   // To alleviate this problem, we have to (implicitly) parse extra operands
7877   // to make sure autogenerated parser of custom operands never hit hardcoded
7878   // mandatory operands.
7879 
7880   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7881     if (res != MatchOperand_Success ||
7882         isToken(AsmToken::EndOfStatement))
7883       break;
7884 
7885     trySkipToken(AsmToken::Comma);
7886     res = parseOptionalOpr(Operands);
7887   }
7888 
7889   return res;
7890 }
7891 
7892 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7893   OperandMatchResultTy res;
7894   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7895     // try to parse any optional operand here
7896     if (Op.IsBit) {
7897       res = parseNamedBit(Op.Name, Operands, Op.Type);
7898     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7899       res = parseOModOperand(Operands);
7900     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7901                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7902                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7903       res = parseSDWASel(Operands, Op.Name, Op.Type);
7904     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7905       res = parseSDWADstUnused(Operands);
7906     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7907                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7908                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7909                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7910       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7911                                         Op.ConvertResult);
7912     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7913       res = parseDim(Operands);
7914     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7915       res = parseCPol(Operands);
7916     } else {
7917       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7918       if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) {
7919         res = parseOperandArrayWithPrefix("neg", Operands,
7920                                           AMDGPUOperand::ImmTyBLGP,
7921                                           nullptr);
7922       }
7923     }
7924     if (res != MatchOperand_NoMatch) {
7925       return res;
7926     }
7927   }
7928   return MatchOperand_NoMatch;
7929 }
7930 
7931 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7932   StringRef Name = getTokenStr();
7933   if (Name == "mul") {
7934     return parseIntWithPrefix("mul", Operands,
7935                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7936   }
7937 
7938   if (Name == "div") {
7939     return parseIntWithPrefix("div", Operands,
7940                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7941   }
7942 
7943   return MatchOperand_NoMatch;
7944 }
7945 
7946 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7947   cvtVOP3P(Inst, Operands);
7948 
7949   int Opc = Inst.getOpcode();
7950 
7951   int SrcNum;
7952   const int Ops[] = { AMDGPU::OpName::src0,
7953                       AMDGPU::OpName::src1,
7954                       AMDGPU::OpName::src2 };
7955   for (SrcNum = 0;
7956        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7957        ++SrcNum);
7958   assert(SrcNum > 0);
7959 
7960   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7961   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7962 
7963   if ((OpSel & (1 << SrcNum)) != 0) {
7964     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7965     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7966     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7967   }
7968 }
7969 
7970 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7971       // 1. This operand is input modifiers
7972   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7973       // 2. This is not last operand
7974       && Desc.NumOperands > (OpNum + 1)
7975       // 3. Next operand is register class
7976       && Desc.OpInfo[OpNum + 1].RegClass != -1
7977       // 4. Next register is not tied to any other operand
7978       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7979 }
7980 
7981 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7982 {
7983   OptionalImmIndexMap OptionalIdx;
7984   unsigned Opc = Inst.getOpcode();
7985 
7986   unsigned I = 1;
7987   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7988   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7989     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7990   }
7991 
7992   for (unsigned E = Operands.size(); I != E; ++I) {
7993     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7994     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7995       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7996     } else if (Op.isInterpSlot() ||
7997                Op.isInterpAttr() ||
7998                Op.isAttrChan()) {
7999       Inst.addOperand(MCOperand::createImm(Op.getImm()));
8000     } else if (Op.isImmModifier()) {
8001       OptionalIdx[Op.getImmTy()] = I;
8002     } else {
8003       llvm_unreachable("unhandled operand type");
8004     }
8005   }
8006 
8007   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
8008     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
8009   }
8010 
8011   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8012     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8013   }
8014 
8015   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8016     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8017   }
8018 }
8019 
8020 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8021 {
8022   OptionalImmIndexMap OptionalIdx;
8023   unsigned Opc = Inst.getOpcode();
8024 
8025   unsigned I = 1;
8026   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8027   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8028     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8029   }
8030 
8031   for (unsigned E = Operands.size(); I != E; ++I) {
8032     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8033     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8034       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8035     } else if (Op.isImmModifier()) {
8036       OptionalIdx[Op.getImmTy()] = I;
8037     } else {
8038       llvm_unreachable("unhandled operand type");
8039     }
8040   }
8041 
8042   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8043 
8044   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8045   if (OpSelIdx != -1)
8046     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8047 
8048   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8049 
8050   if (OpSelIdx == -1)
8051     return;
8052 
8053   const int Ops[] = { AMDGPU::OpName::src0,
8054                       AMDGPU::OpName::src1,
8055                       AMDGPU::OpName::src2 };
8056   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8057                          AMDGPU::OpName::src1_modifiers,
8058                          AMDGPU::OpName::src2_modifiers };
8059 
8060   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8061 
8062   for (int J = 0; J < 3; ++J) {
8063     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8064     if (OpIdx == -1)
8065       break;
8066 
8067     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8068     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8069 
8070     if ((OpSel & (1 << J)) != 0)
8071       ModVal |= SISrcMods::OP_SEL_0;
8072     if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8073         (OpSel & (1 << 3)) != 0)
8074       ModVal |= SISrcMods::DST_OP_SEL;
8075 
8076     Inst.getOperand(ModIdx).setImm(ModVal);
8077   }
8078 }
8079 
8080 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8081                               OptionalImmIndexMap &OptionalIdx) {
8082   unsigned Opc = Inst.getOpcode();
8083 
8084   unsigned I = 1;
8085   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8086   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8087     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8088   }
8089 
8090   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
8091     // This instruction has src modifiers
8092     for (unsigned E = Operands.size(); I != E; ++I) {
8093       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8094       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8095         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8096       } else if (Op.isImmModifier()) {
8097         OptionalIdx[Op.getImmTy()] = I;
8098       } else if (Op.isRegOrImm()) {
8099         Op.addRegOrImmOperands(Inst, 1);
8100       } else {
8101         llvm_unreachable("unhandled operand type");
8102       }
8103     }
8104   } else {
8105     // No src modifiers
8106     for (unsigned E = Operands.size(); I != E; ++I) {
8107       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8108       if (Op.isMod()) {
8109         OptionalIdx[Op.getImmTy()] = I;
8110       } else {
8111         Op.addRegOrImmOperands(Inst, 1);
8112       }
8113     }
8114   }
8115 
8116   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8117     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8118   }
8119 
8120   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8121     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8122   }
8123 
8124   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8125   // it has src2 register operand that is tied to dst operand
8126   // we don't allow modifiers for this operand in assembler so src2_modifiers
8127   // should be 0.
8128   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
8129       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
8130       Opc == AMDGPU::V_MAC_F32_e64_vi ||
8131       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
8132       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
8133       Opc == AMDGPU::V_MAC_F16_e64_vi ||
8134       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
8135       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
8136       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
8137       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
8138       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
8139     auto it = Inst.begin();
8140     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8141     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8142     ++it;
8143     // Copy the operand to ensure it's not invalidated when Inst grows.
8144     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8145   }
8146 }
8147 
8148 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8149   OptionalImmIndexMap OptionalIdx;
8150   cvtVOP3(Inst, Operands, OptionalIdx);
8151 }
8152 
8153 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8154                                OptionalImmIndexMap &OptIdx) {
8155   const int Opc = Inst.getOpcode();
8156   const MCInstrDesc &Desc = MII.get(Opc);
8157 
8158   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8159 
8160   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
8161     assert(!IsPacked);
8162     Inst.addOperand(Inst.getOperand(0));
8163   }
8164 
8165   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8166   // instruction, and then figure out where to actually put the modifiers
8167 
8168   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8169   if (OpSelIdx != -1) {
8170     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8171   }
8172 
8173   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8174   if (OpSelHiIdx != -1) {
8175     int DefaultVal = IsPacked ? -1 : 0;
8176     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8177                           DefaultVal);
8178   }
8179 
8180   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8181   if (NegLoIdx != -1) {
8182     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8183     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8184   }
8185 
8186   const int Ops[] = { AMDGPU::OpName::src0,
8187                       AMDGPU::OpName::src1,
8188                       AMDGPU::OpName::src2 };
8189   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8190                          AMDGPU::OpName::src1_modifiers,
8191                          AMDGPU::OpName::src2_modifiers };
8192 
8193   unsigned OpSel = 0;
8194   unsigned OpSelHi = 0;
8195   unsigned NegLo = 0;
8196   unsigned NegHi = 0;
8197 
8198   if (OpSelIdx != -1)
8199     OpSel = Inst.getOperand(OpSelIdx).getImm();
8200 
8201   if (OpSelHiIdx != -1)
8202     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8203 
8204   if (NegLoIdx != -1) {
8205     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8206     NegLo = Inst.getOperand(NegLoIdx).getImm();
8207     NegHi = Inst.getOperand(NegHiIdx).getImm();
8208   }
8209 
8210   for (int J = 0; J < 3; ++J) {
8211     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8212     if (OpIdx == -1)
8213       break;
8214 
8215     uint32_t ModVal = 0;
8216 
8217     if ((OpSel & (1 << J)) != 0)
8218       ModVal |= SISrcMods::OP_SEL_0;
8219 
8220     if ((OpSelHi & (1 << J)) != 0)
8221       ModVal |= SISrcMods::OP_SEL_1;
8222 
8223     if ((NegLo & (1 << J)) != 0)
8224       ModVal |= SISrcMods::NEG;
8225 
8226     if ((NegHi & (1 << J)) != 0)
8227       ModVal |= SISrcMods::NEG_HI;
8228 
8229     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8230 
8231     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8232   }
8233 }
8234 
8235 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8236   OptionalImmIndexMap OptIdx;
8237   cvtVOP3(Inst, Operands, OptIdx);
8238   cvtVOP3P(Inst, Operands, OptIdx);
8239 }
8240 
8241 //===----------------------------------------------------------------------===//
8242 // dpp
8243 //===----------------------------------------------------------------------===//
8244 
8245 bool AMDGPUOperand::isDPP8() const {
8246   return isImmTy(ImmTyDPP8);
8247 }
8248 
8249 bool AMDGPUOperand::isDPPCtrl() const {
8250   using namespace AMDGPU::DPP;
8251 
8252   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8253   if (result) {
8254     int64_t Imm = getImm();
8255     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8256            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8257            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8258            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8259            (Imm == DppCtrl::WAVE_SHL1) ||
8260            (Imm == DppCtrl::WAVE_ROL1) ||
8261            (Imm == DppCtrl::WAVE_SHR1) ||
8262            (Imm == DppCtrl::WAVE_ROR1) ||
8263            (Imm == DppCtrl::ROW_MIRROR) ||
8264            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8265            (Imm == DppCtrl::BCAST15) ||
8266            (Imm == DppCtrl::BCAST31) ||
8267            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8268            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8269   }
8270   return false;
8271 }
8272 
8273 //===----------------------------------------------------------------------===//
8274 // mAI
8275 //===----------------------------------------------------------------------===//
8276 
8277 bool AMDGPUOperand::isBLGP() const {
8278   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8279 }
8280 
8281 bool AMDGPUOperand::isCBSZ() const {
8282   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8283 }
8284 
8285 bool AMDGPUOperand::isABID() const {
8286   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8287 }
8288 
8289 bool AMDGPUOperand::isS16Imm() const {
8290   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8291 }
8292 
8293 bool AMDGPUOperand::isU16Imm() const {
8294   return isImm() && isUInt<16>(getImm());
8295 }
8296 
8297 //===----------------------------------------------------------------------===//
8298 // dim
8299 //===----------------------------------------------------------------------===//
8300 
8301 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8302   // We want to allow "dim:1D" etc.,
8303   // but the initial 1 is tokenized as an integer.
8304   std::string Token;
8305   if (isToken(AsmToken::Integer)) {
8306     SMLoc Loc = getToken().getEndLoc();
8307     Token = std::string(getTokenStr());
8308     lex();
8309     if (getLoc() != Loc)
8310       return false;
8311   }
8312 
8313   StringRef Suffix;
8314   if (!parseId(Suffix))
8315     return false;
8316   Token += Suffix;
8317 
8318   StringRef DimId = Token;
8319   if (DimId.startswith("SQ_RSRC_IMG_"))
8320     DimId = DimId.drop_front(12);
8321 
8322   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8323   if (!DimInfo)
8324     return false;
8325 
8326   Encoding = DimInfo->Encoding;
8327   return true;
8328 }
8329 
8330 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8331   if (!isGFX10Plus())
8332     return MatchOperand_NoMatch;
8333 
8334   SMLoc S = getLoc();
8335 
8336   if (!trySkipId("dim", AsmToken::Colon))
8337     return MatchOperand_NoMatch;
8338 
8339   unsigned Encoding;
8340   SMLoc Loc = getLoc();
8341   if (!parseDimId(Encoding)) {
8342     Error(Loc, "invalid dim value");
8343     return MatchOperand_ParseFail;
8344   }
8345 
8346   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8347                                               AMDGPUOperand::ImmTyDim));
8348   return MatchOperand_Success;
8349 }
8350 
8351 //===----------------------------------------------------------------------===//
8352 // dpp
8353 //===----------------------------------------------------------------------===//
8354 
8355 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8356   SMLoc S = getLoc();
8357 
8358   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8359     return MatchOperand_NoMatch;
8360 
8361   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8362 
8363   int64_t Sels[8];
8364 
8365   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8366     return MatchOperand_ParseFail;
8367 
8368   for (size_t i = 0; i < 8; ++i) {
8369     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8370       return MatchOperand_ParseFail;
8371 
8372     SMLoc Loc = getLoc();
8373     if (getParser().parseAbsoluteExpression(Sels[i]))
8374       return MatchOperand_ParseFail;
8375     if (0 > Sels[i] || 7 < Sels[i]) {
8376       Error(Loc, "expected a 3-bit value");
8377       return MatchOperand_ParseFail;
8378     }
8379   }
8380 
8381   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8382     return MatchOperand_ParseFail;
8383 
8384   unsigned DPP8 = 0;
8385   for (size_t i = 0; i < 8; ++i)
8386     DPP8 |= (Sels[i] << (i * 3));
8387 
8388   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8389   return MatchOperand_Success;
8390 }
8391 
8392 bool
8393 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8394                                     const OperandVector &Operands) {
8395   if (Ctrl == "row_newbcast")
8396     return isGFX90A();
8397 
8398   if (Ctrl == "row_share" ||
8399       Ctrl == "row_xmask")
8400     return isGFX10Plus();
8401 
8402   if (Ctrl == "wave_shl" ||
8403       Ctrl == "wave_shr" ||
8404       Ctrl == "wave_rol" ||
8405       Ctrl == "wave_ror" ||
8406       Ctrl == "row_bcast")
8407     return isVI() || isGFX9();
8408 
8409   return Ctrl == "row_mirror" ||
8410          Ctrl == "row_half_mirror" ||
8411          Ctrl == "quad_perm" ||
8412          Ctrl == "row_shl" ||
8413          Ctrl == "row_shr" ||
8414          Ctrl == "row_ror";
8415 }
8416 
8417 int64_t
8418 AMDGPUAsmParser::parseDPPCtrlPerm() {
8419   // quad_perm:[%d,%d,%d,%d]
8420 
8421   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8422     return -1;
8423 
8424   int64_t Val = 0;
8425   for (int i = 0; i < 4; ++i) {
8426     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8427       return -1;
8428 
8429     int64_t Temp;
8430     SMLoc Loc = getLoc();
8431     if (getParser().parseAbsoluteExpression(Temp))
8432       return -1;
8433     if (Temp < 0 || Temp > 3) {
8434       Error(Loc, "expected a 2-bit value");
8435       return -1;
8436     }
8437 
8438     Val += (Temp << i * 2);
8439   }
8440 
8441   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8442     return -1;
8443 
8444   return Val;
8445 }
8446 
8447 int64_t
8448 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8449   using namespace AMDGPU::DPP;
8450 
8451   // sel:%d
8452 
8453   int64_t Val;
8454   SMLoc Loc = getLoc();
8455 
8456   if (getParser().parseAbsoluteExpression(Val))
8457     return -1;
8458 
8459   struct DppCtrlCheck {
8460     int64_t Ctrl;
8461     int Lo;
8462     int Hi;
8463   };
8464 
8465   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8466     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8467     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8468     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8469     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8470     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8471     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8472     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8473     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8474     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8475     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8476     .Default({-1, 0, 0});
8477 
8478   bool Valid;
8479   if (Check.Ctrl == -1) {
8480     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8481     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8482   } else {
8483     Valid = Check.Lo <= Val && Val <= Check.Hi;
8484     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8485   }
8486 
8487   if (!Valid) {
8488     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8489     return -1;
8490   }
8491 
8492   return Val;
8493 }
8494 
8495 OperandMatchResultTy
8496 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8497   using namespace AMDGPU::DPP;
8498 
8499   if (!isToken(AsmToken::Identifier) ||
8500       !isSupportedDPPCtrl(getTokenStr(), Operands))
8501     return MatchOperand_NoMatch;
8502 
8503   SMLoc S = getLoc();
8504   int64_t Val = -1;
8505   StringRef Ctrl;
8506 
8507   parseId(Ctrl);
8508 
8509   if (Ctrl == "row_mirror") {
8510     Val = DppCtrl::ROW_MIRROR;
8511   } else if (Ctrl == "row_half_mirror") {
8512     Val = DppCtrl::ROW_HALF_MIRROR;
8513   } else {
8514     if (skipToken(AsmToken::Colon, "expected a colon")) {
8515       if (Ctrl == "quad_perm") {
8516         Val = parseDPPCtrlPerm();
8517       } else {
8518         Val = parseDPPCtrlSel(Ctrl);
8519       }
8520     }
8521   }
8522 
8523   if (Val == -1)
8524     return MatchOperand_ParseFail;
8525 
8526   Operands.push_back(
8527     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8528   return MatchOperand_Success;
8529 }
8530 
8531 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8532   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8533 }
8534 
8535 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8536   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8537 }
8538 
8539 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8540   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8541 }
8542 
8543 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8544   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8545 }
8546 
8547 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8548   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8549 }
8550 
8551 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8552   OptionalImmIndexMap OptionalIdx;
8553 
8554   unsigned Opc = Inst.getOpcode();
8555   bool HasModifiers =
8556       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8557   unsigned I = 1;
8558   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8559   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8560     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8561   }
8562 
8563   int Fi = 0;
8564   for (unsigned E = Operands.size(); I != E; ++I) {
8565     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8566                                             MCOI::TIED_TO);
8567     if (TiedTo != -1) {
8568       assert((unsigned)TiedTo < Inst.getNumOperands());
8569       // handle tied old or src2 for MAC instructions
8570       Inst.addOperand(Inst.getOperand(TiedTo));
8571     }
8572     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8573     // Add the register arguments
8574     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8575       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8576       // Skip it.
8577       continue;
8578     }
8579 
8580     if (IsDPP8) {
8581       if (Op.isDPP8()) {
8582         Op.addImmOperands(Inst, 1);
8583       } else if (HasModifiers &&
8584                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8585         Op.addRegWithFPInputModsOperands(Inst, 2);
8586       } else if (Op.isFI()) {
8587         Fi = Op.getImm();
8588       } else if (Op.isReg()) {
8589         Op.addRegOperands(Inst, 1);
8590       } else {
8591         llvm_unreachable("Invalid operand type");
8592       }
8593     } else {
8594       if (HasModifiers &&
8595           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8596         Op.addRegWithFPInputModsOperands(Inst, 2);
8597       } else if (Op.isReg()) {
8598         Op.addRegOperands(Inst, 1);
8599       } else if (Op.isDPPCtrl()) {
8600         Op.addImmOperands(Inst, 1);
8601       } else if (Op.isImm()) {
8602         // Handle optional arguments
8603         OptionalIdx[Op.getImmTy()] = I;
8604       } else {
8605         llvm_unreachable("Invalid operand type");
8606       }
8607     }
8608   }
8609 
8610   if (IsDPP8) {
8611     using namespace llvm::AMDGPU::DPP;
8612     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8613   } else {
8614     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8615     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8616     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8617     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8618       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8619     }
8620   }
8621 }
8622 
8623 //===----------------------------------------------------------------------===//
8624 // sdwa
8625 //===----------------------------------------------------------------------===//
8626 
8627 OperandMatchResultTy
8628 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8629                               AMDGPUOperand::ImmTy Type) {
8630   using namespace llvm::AMDGPU::SDWA;
8631 
8632   SMLoc S = getLoc();
8633   StringRef Value;
8634   OperandMatchResultTy res;
8635 
8636   SMLoc StringLoc;
8637   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8638   if (res != MatchOperand_Success) {
8639     return res;
8640   }
8641 
8642   int64_t Int;
8643   Int = StringSwitch<int64_t>(Value)
8644         .Case("BYTE_0", SdwaSel::BYTE_0)
8645         .Case("BYTE_1", SdwaSel::BYTE_1)
8646         .Case("BYTE_2", SdwaSel::BYTE_2)
8647         .Case("BYTE_3", SdwaSel::BYTE_3)
8648         .Case("WORD_0", SdwaSel::WORD_0)
8649         .Case("WORD_1", SdwaSel::WORD_1)
8650         .Case("DWORD", SdwaSel::DWORD)
8651         .Default(0xffffffff);
8652 
8653   if (Int == 0xffffffff) {
8654     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8655     return MatchOperand_ParseFail;
8656   }
8657 
8658   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8659   return MatchOperand_Success;
8660 }
8661 
8662 OperandMatchResultTy
8663 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8664   using namespace llvm::AMDGPU::SDWA;
8665 
8666   SMLoc S = getLoc();
8667   StringRef Value;
8668   OperandMatchResultTy res;
8669 
8670   SMLoc StringLoc;
8671   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8672   if (res != MatchOperand_Success) {
8673     return res;
8674   }
8675 
8676   int64_t Int;
8677   Int = StringSwitch<int64_t>(Value)
8678         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8679         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8680         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8681         .Default(0xffffffff);
8682 
8683   if (Int == 0xffffffff) {
8684     Error(StringLoc, "invalid dst_unused value");
8685     return MatchOperand_ParseFail;
8686   }
8687 
8688   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8689   return MatchOperand_Success;
8690 }
8691 
8692 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8693   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8694 }
8695 
8696 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8697   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8698 }
8699 
8700 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8701   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8702 }
8703 
8704 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8705   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8706 }
8707 
8708 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8709   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8710 }
8711 
8712 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8713                               uint64_t BasicInstType,
8714                               bool SkipDstVcc,
8715                               bool SkipSrcVcc) {
8716   using namespace llvm::AMDGPU::SDWA;
8717 
8718   OptionalImmIndexMap OptionalIdx;
8719   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8720   bool SkippedVcc = false;
8721 
8722   unsigned I = 1;
8723   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8724   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8725     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8726   }
8727 
8728   for (unsigned E = Operands.size(); I != E; ++I) {
8729     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8730     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8731         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8732       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8733       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8734       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8735       // Skip VCC only if we didn't skip it on previous iteration.
8736       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8737       if (BasicInstType == SIInstrFlags::VOP2 &&
8738           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8739            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8740         SkippedVcc = true;
8741         continue;
8742       } else if (BasicInstType == SIInstrFlags::VOPC &&
8743                  Inst.getNumOperands() == 0) {
8744         SkippedVcc = true;
8745         continue;
8746       }
8747     }
8748     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8749       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8750     } else if (Op.isImm()) {
8751       // Handle optional arguments
8752       OptionalIdx[Op.getImmTy()] = I;
8753     } else {
8754       llvm_unreachable("Invalid operand type");
8755     }
8756     SkippedVcc = false;
8757   }
8758 
8759   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8760       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8761       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8762     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8763     switch (BasicInstType) {
8764     case SIInstrFlags::VOP1:
8765       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8766       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8767         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8768       }
8769       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8770       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8771       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8772       break;
8773 
8774     case SIInstrFlags::VOP2:
8775       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8776       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8777         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8778       }
8779       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8780       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8781       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8782       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8783       break;
8784 
8785     case SIInstrFlags::VOPC:
8786       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8787         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8788       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8789       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8790       break;
8791 
8792     default:
8793       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8794     }
8795   }
8796 
8797   // special case v_mac_{f16, f32}:
8798   // it has src2 register operand that is tied to dst operand
8799   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8800       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8801     auto it = Inst.begin();
8802     std::advance(
8803       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8804     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8805   }
8806 }
8807 
8808 //===----------------------------------------------------------------------===//
8809 // mAI
8810 //===----------------------------------------------------------------------===//
8811 
8812 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8813   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8814 }
8815 
8816 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8817   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8818 }
8819 
8820 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8821   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8822 }
8823 
8824 /// Force static initialization.
8825 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8826   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8827   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8828 }
8829 
8830 #define GET_REGISTER_MATCHER
8831 #define GET_MATCHER_IMPLEMENTATION
8832 #define GET_MNEMONIC_SPELL_CHECKER
8833 #define GET_MNEMONIC_CHECKER
8834 #include "AMDGPUGenAsmMatcher.inc"
8835 
8836 // This function should be defined after auto-generated include so that we have
8837 // MatchClassKind enum defined
8838 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8839                                                      unsigned Kind) {
8840   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8841   // But MatchInstructionImpl() expects to meet token and fails to validate
8842   // operand. This method checks if we are given immediate operand but expect to
8843   // get corresponding token.
8844   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8845   switch (Kind) {
8846   case MCK_addr64:
8847     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8848   case MCK_gds:
8849     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8850   case MCK_lds:
8851     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8852   case MCK_idxen:
8853     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8854   case MCK_offen:
8855     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8856   case MCK_SSrcB32:
8857     // When operands have expression values, they will return true for isToken,
8858     // because it is not possible to distinguish between a token and an
8859     // expression at parse time. MatchInstructionImpl() will always try to
8860     // match an operand as a token, when isToken returns true, and when the
8861     // name of the expression is not a valid token, the match will fail,
8862     // so we need to handle it here.
8863     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8864   case MCK_SSrcF32:
8865     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8866   case MCK_SoppBrTarget:
8867     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8868   case MCK_VReg32OrOff:
8869     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8870   case MCK_InterpSlot:
8871     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8872   case MCK_Attr:
8873     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8874   case MCK_AttrChan:
8875     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8876   case MCK_ImmSMEMOffset:
8877     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8878   case MCK_SReg_64:
8879   case MCK_SReg_64_XEXEC:
8880     // Null is defined as a 32-bit register but
8881     // it should also be enabled with 64-bit operands.
8882     // The following code enables it for SReg_64 operands
8883     // used as source and destination. Remaining source
8884     // operands are handled in isInlinableImm.
8885     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8886   default:
8887     return Match_InvalidOperand;
8888   }
8889 }
8890 
8891 //===----------------------------------------------------------------------===//
8892 // endpgm
8893 //===----------------------------------------------------------------------===//
8894 
8895 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8896   SMLoc S = getLoc();
8897   int64_t Imm = 0;
8898 
8899   if (!parseExpr(Imm)) {
8900     // The operand is optional, if not present default to 0
8901     Imm = 0;
8902   }
8903 
8904   if (!isUInt<16>(Imm)) {
8905     Error(S, "expected a 16-bit value");
8906     return MatchOperand_ParseFail;
8907   }
8908 
8909   Operands.push_back(
8910       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8911   return MatchOperand_Success;
8912 }
8913 
8914 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8915 
8916 //===----------------------------------------------------------------------===//
8917 // LDSDIR
8918 //===----------------------------------------------------------------------===//
8919 
8920 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const {
8921   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST);
8922 }
8923 
8924 bool AMDGPUOperand::isWaitVDST() const {
8925   return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
8926 }
8927 
8928 //===----------------------------------------------------------------------===//
8929 // VINTERP
8930 //===----------------------------------------------------------------------===//
8931 
8932 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const {
8933   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP);
8934 }
8935 
8936 bool AMDGPUOperand::isWaitEXP() const {
8937   return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
8938 }
8939