1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCParser/MCAsmLexer.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
31 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/AMDGPUMetadata.h"
35 #include "llvm/Support/AMDHSAKernelDescriptor.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/MachineValueType.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Support/TargetParser.h"
40 
41 using namespace llvm;
42 using namespace llvm::AMDGPU;
43 using namespace llvm::amdhsa;
44 
45 namespace {
46 
47 class AMDGPUAsmParser;
48 
49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
50 
51 //===----------------------------------------------------------------------===//
52 // Operand
53 //===----------------------------------------------------------------------===//
54 
55 class AMDGPUOperand : public MCParsedAsmOperand {
56   enum KindTy {
57     Token,
58     Immediate,
59     Register,
60     Expression
61   } Kind;
62 
63   SMLoc StartLoc, EndLoc;
64   const AMDGPUAsmParser *AsmParser;
65 
66 public:
67   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
68       : Kind(Kind_), AsmParser(AsmParser_) {}
69 
70   using Ptr = std::unique_ptr<AMDGPUOperand>;
71 
72   struct Modifiers {
73     bool Abs = false;
74     bool Neg = false;
75     bool Sext = false;
76 
77     bool hasFPModifiers() const { return Abs || Neg; }
78     bool hasIntModifiers() const { return Sext; }
79     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
80 
81     int64_t getFPModifiersOperand() const {
82       int64_t Operand = 0;
83       Operand |= Abs ? SISrcMods::ABS : 0u;
84       Operand |= Neg ? SISrcMods::NEG : 0u;
85       return Operand;
86     }
87 
88     int64_t getIntModifiersOperand() const {
89       int64_t Operand = 0;
90       Operand |= Sext ? SISrcMods::SEXT : 0u;
91       return Operand;
92     }
93 
94     int64_t getModifiersOperand() const {
95       assert(!(hasFPModifiers() && hasIntModifiers())
96            && "fp and int modifiers should not be used simultaneously");
97       if (hasFPModifiers()) {
98         return getFPModifiersOperand();
99       } else if (hasIntModifiers()) {
100         return getIntModifiersOperand();
101       } else {
102         return 0;
103       }
104     }
105 
106     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
107   };
108 
109   enum ImmTy {
110     ImmTyNone,
111     ImmTyGDS,
112     ImmTyLDS,
113     ImmTyOffen,
114     ImmTyIdxen,
115     ImmTyAddr64,
116     ImmTyOffset,
117     ImmTyInstOffset,
118     ImmTyOffset0,
119     ImmTyOffset1,
120     ImmTyCPol,
121     ImmTySWZ,
122     ImmTyTFE,
123     ImmTyD16,
124     ImmTyClampSI,
125     ImmTyOModSI,
126     ImmTyDPP8,
127     ImmTyDppCtrl,
128     ImmTyDppRowMask,
129     ImmTyDppBankMask,
130     ImmTyDppBoundCtrl,
131     ImmTyDppFi,
132     ImmTySdwaDstSel,
133     ImmTySdwaSrc0Sel,
134     ImmTySdwaSrc1Sel,
135     ImmTySdwaDstUnused,
136     ImmTyDMask,
137     ImmTyDim,
138     ImmTyUNorm,
139     ImmTyDA,
140     ImmTyR128A16,
141     ImmTyA16,
142     ImmTyLWE,
143     ImmTyExpTgt,
144     ImmTyExpCompr,
145     ImmTyExpVM,
146     ImmTyFORMAT,
147     ImmTyHwreg,
148     ImmTyOff,
149     ImmTySendMsg,
150     ImmTyInterpSlot,
151     ImmTyInterpAttr,
152     ImmTyAttrChan,
153     ImmTyOpSel,
154     ImmTyOpSelHi,
155     ImmTyNegLo,
156     ImmTyNegHi,
157     ImmTySwizzle,
158     ImmTyGprIdxMode,
159     ImmTyHigh,
160     ImmTyBLGP,
161     ImmTyCBSZ,
162     ImmTyABID,
163     ImmTyEndpgm,
164   };
165 
166   enum ImmKindTy {
167     ImmKindTyNone,
168     ImmKindTyLiteral,
169     ImmKindTyConst,
170   };
171 
172 private:
173   struct TokOp {
174     const char *Data;
175     unsigned Length;
176   };
177 
178   struct ImmOp {
179     int64_t Val;
180     ImmTy Type;
181     bool IsFPImm;
182     mutable ImmKindTy Kind;
183     Modifiers Mods;
184   };
185 
186   struct RegOp {
187     unsigned RegNo;
188     Modifiers Mods;
189   };
190 
191   union {
192     TokOp Tok;
193     ImmOp Imm;
194     RegOp Reg;
195     const MCExpr *Expr;
196   };
197 
198 public:
199   bool isToken() const override {
200     if (Kind == Token)
201       return true;
202 
203     // When parsing operands, we can't always tell if something was meant to be
204     // a token, like 'gds', or an expression that references a global variable.
205     // In this case, we assume the string is an expression, and if we need to
206     // interpret is a token, then we treat the symbol name as the token.
207     return isSymbolRefExpr();
208   }
209 
210   bool isSymbolRefExpr() const {
211     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
212   }
213 
214   bool isImm() const override {
215     return Kind == Immediate;
216   }
217 
218   void setImmKindNone() const {
219     assert(isImm());
220     Imm.Kind = ImmKindTyNone;
221   }
222 
223   void setImmKindLiteral() const {
224     assert(isImm());
225     Imm.Kind = ImmKindTyLiteral;
226   }
227 
228   void setImmKindConst() const {
229     assert(isImm());
230     Imm.Kind = ImmKindTyConst;
231   }
232 
233   bool IsImmKindLiteral() const {
234     return isImm() && Imm.Kind == ImmKindTyLiteral;
235   }
236 
237   bool isImmKindConst() const {
238     return isImm() && Imm.Kind == ImmKindTyConst;
239   }
240 
241   bool isInlinableImm(MVT type) const;
242   bool isLiteralImm(MVT type) const;
243 
244   bool isRegKind() const {
245     return Kind == Register;
246   }
247 
248   bool isReg() const override {
249     return isRegKind() && !hasModifiers();
250   }
251 
252   bool isRegOrInline(unsigned RCID, MVT type) const {
253     return isRegClass(RCID) || isInlinableImm(type);
254   }
255 
256   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
257     return isRegOrInline(RCID, type) || isLiteralImm(type);
258   }
259 
260   bool isRegOrImmWithInt16InputMods() const {
261     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
262   }
263 
264   bool isRegOrImmWithInt32InputMods() const {
265     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
266   }
267 
268   bool isRegOrImmWithInt64InputMods() const {
269     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
270   }
271 
272   bool isRegOrImmWithFP16InputMods() const {
273     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
274   }
275 
276   bool isRegOrImmWithFP32InputMods() const {
277     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
278   }
279 
280   bool isRegOrImmWithFP64InputMods() const {
281     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
282   }
283 
284   bool isVReg() const {
285     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
286            isRegClass(AMDGPU::VReg_64RegClassID) ||
287            isRegClass(AMDGPU::VReg_96RegClassID) ||
288            isRegClass(AMDGPU::VReg_128RegClassID) ||
289            isRegClass(AMDGPU::VReg_160RegClassID) ||
290            isRegClass(AMDGPU::VReg_192RegClassID) ||
291            isRegClass(AMDGPU::VReg_256RegClassID) ||
292            isRegClass(AMDGPU::VReg_512RegClassID) ||
293            isRegClass(AMDGPU::VReg_1024RegClassID);
294   }
295 
296   bool isVReg32() const {
297     return isRegClass(AMDGPU::VGPR_32RegClassID);
298   }
299 
300   bool isVReg32OrOff() const {
301     return isOff() || isVReg32();
302   }
303 
304   bool isNull() const {
305     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
306   }
307 
308   bool isVRegWithInputMods() const;
309 
310   bool isSDWAOperand(MVT type) const;
311   bool isSDWAFP16Operand() const;
312   bool isSDWAFP32Operand() const;
313   bool isSDWAInt16Operand() const;
314   bool isSDWAInt32Operand() const;
315 
316   bool isImmTy(ImmTy ImmT) const {
317     return isImm() && Imm.Type == ImmT;
318   }
319 
320   bool isImmModifier() const {
321     return isImm() && Imm.Type != ImmTyNone;
322   }
323 
324   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
325   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
326   bool isDMask() const { return isImmTy(ImmTyDMask); }
327   bool isDim() const { return isImmTy(ImmTyDim); }
328   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
329   bool isDA() const { return isImmTy(ImmTyDA); }
330   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
331   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
332   bool isLWE() const { return isImmTy(ImmTyLWE); }
333   bool isOff() const { return isImmTy(ImmTyOff); }
334   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
335   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
336   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
337   bool isOffen() const { return isImmTy(ImmTyOffen); }
338   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
339   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
340   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
341   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
342   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
343 
344   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
345   bool isGDS() const { return isImmTy(ImmTyGDS); }
346   bool isLDS() const { return isImmTy(ImmTyLDS); }
347   bool isCPol() const { return isImmTy(ImmTyCPol); }
348   bool isSWZ() const { return isImmTy(ImmTySWZ); }
349   bool isTFE() const { return isImmTy(ImmTyTFE); }
350   bool isD16() const { return isImmTy(ImmTyD16); }
351   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
352   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
353   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
354   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
355   bool isFI() const { return isImmTy(ImmTyDppFi); }
356   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
357   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
358   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
359   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
360   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
361   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
362   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
363   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
364   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
365   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
366   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
367   bool isHigh() const { return isImmTy(ImmTyHigh); }
368 
369   bool isMod() const {
370     return isClampSI() || isOModSI();
371   }
372 
373   bool isRegOrImm() const {
374     return isReg() || isImm();
375   }
376 
377   bool isRegClass(unsigned RCID) const;
378 
379   bool isInlineValue() const;
380 
381   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
382     return isRegOrInline(RCID, type) && !hasModifiers();
383   }
384 
385   bool isSCSrcB16() const {
386     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
387   }
388 
389   bool isSCSrcV2B16() const {
390     return isSCSrcB16();
391   }
392 
393   bool isSCSrcB32() const {
394     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
395   }
396 
397   bool isSCSrcB64() const {
398     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
399   }
400 
401   bool isBoolReg() const;
402 
403   bool isSCSrcF16() const {
404     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
405   }
406 
407   bool isSCSrcV2F16() const {
408     return isSCSrcF16();
409   }
410 
411   bool isSCSrcF32() const {
412     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
413   }
414 
415   bool isSCSrcF64() const {
416     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
417   }
418 
419   bool isSSrcB32() const {
420     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
421   }
422 
423   bool isSSrcB16() const {
424     return isSCSrcB16() || isLiteralImm(MVT::i16);
425   }
426 
427   bool isSSrcV2B16() const {
428     llvm_unreachable("cannot happen");
429     return isSSrcB16();
430   }
431 
432   bool isSSrcB64() const {
433     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
434     // See isVSrc64().
435     return isSCSrcB64() || isLiteralImm(MVT::i64);
436   }
437 
438   bool isSSrcF32() const {
439     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
440   }
441 
442   bool isSSrcF64() const {
443     return isSCSrcB64() || isLiteralImm(MVT::f64);
444   }
445 
446   bool isSSrcF16() const {
447     return isSCSrcB16() || isLiteralImm(MVT::f16);
448   }
449 
450   bool isSSrcV2F16() const {
451     llvm_unreachable("cannot happen");
452     return isSSrcF16();
453   }
454 
455   bool isSSrcV2FP32() const {
456     llvm_unreachable("cannot happen");
457     return isSSrcF32();
458   }
459 
460   bool isSCSrcV2FP32() const {
461     llvm_unreachable("cannot happen");
462     return isSCSrcF32();
463   }
464 
465   bool isSSrcV2INT32() const {
466     llvm_unreachable("cannot happen");
467     return isSSrcB32();
468   }
469 
470   bool isSCSrcV2INT32() const {
471     llvm_unreachable("cannot happen");
472     return isSCSrcB32();
473   }
474 
475   bool isSSrcOrLdsB32() const {
476     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
477            isLiteralImm(MVT::i32) || isExpr();
478   }
479 
480   bool isVCSrcB32() const {
481     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
482   }
483 
484   bool isVCSrcB64() const {
485     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
486   }
487 
488   bool isVCSrcB16() const {
489     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
490   }
491 
492   bool isVCSrcV2B16() const {
493     return isVCSrcB16();
494   }
495 
496   bool isVCSrcF32() const {
497     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
498   }
499 
500   bool isVCSrcF64() const {
501     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
502   }
503 
504   bool isVCSrcF16() const {
505     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
506   }
507 
508   bool isVCSrcV2F16() const {
509     return isVCSrcF16();
510   }
511 
512   bool isVSrcB32() const {
513     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
514   }
515 
516   bool isVSrcB64() const {
517     return isVCSrcF64() || isLiteralImm(MVT::i64);
518   }
519 
520   bool isVSrcB16() const {
521     return isVCSrcB16() || isLiteralImm(MVT::i16);
522   }
523 
524   bool isVSrcV2B16() const {
525     return isVSrcB16() || isLiteralImm(MVT::v2i16);
526   }
527 
528   bool isVCSrcV2FP32() const {
529     return isVCSrcF64();
530   }
531 
532   bool isVSrcV2FP32() const {
533     return isVSrcF64() || isLiteralImm(MVT::v2f32);
534   }
535 
536   bool isVCSrcV2INT32() const {
537     return isVCSrcB64();
538   }
539 
540   bool isVSrcV2INT32() const {
541     return isVSrcB64() || isLiteralImm(MVT::v2i32);
542   }
543 
544   bool isVSrcF32() const {
545     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
546   }
547 
548   bool isVSrcF64() const {
549     return isVCSrcF64() || isLiteralImm(MVT::f64);
550   }
551 
552   bool isVSrcF16() const {
553     return isVCSrcF16() || isLiteralImm(MVT::f16);
554   }
555 
556   bool isVSrcV2F16() const {
557     return isVSrcF16() || isLiteralImm(MVT::v2f16);
558   }
559 
560   bool isVISrcB32() const {
561     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
562   }
563 
564   bool isVISrcB16() const {
565     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
566   }
567 
568   bool isVISrcV2B16() const {
569     return isVISrcB16();
570   }
571 
572   bool isVISrcF32() const {
573     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
574   }
575 
576   bool isVISrcF16() const {
577     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
578   }
579 
580   bool isVISrcV2F16() const {
581     return isVISrcF16() || isVISrcB32();
582   }
583 
584   bool isVISrc_64B64() const {
585     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
586   }
587 
588   bool isVISrc_64F64() const {
589     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
590   }
591 
592   bool isVISrc_64V2FP32() const {
593     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
594   }
595 
596   bool isVISrc_64V2INT32() const {
597     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
598   }
599 
600   bool isVISrc_256B64() const {
601     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
602   }
603 
604   bool isVISrc_256F64() const {
605     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
606   }
607 
608   bool isVISrc_128B16() const {
609     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
610   }
611 
612   bool isVISrc_128V2B16() const {
613     return isVISrc_128B16();
614   }
615 
616   bool isVISrc_128B32() const {
617     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
618   }
619 
620   bool isVISrc_128F32() const {
621     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
622   }
623 
624   bool isVISrc_256V2FP32() const {
625     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
626   }
627 
628   bool isVISrc_256V2INT32() const {
629     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
630   }
631 
632   bool isVISrc_512B32() const {
633     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
634   }
635 
636   bool isVISrc_512B16() const {
637     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
638   }
639 
640   bool isVISrc_512V2B16() const {
641     return isVISrc_512B16();
642   }
643 
644   bool isVISrc_512F32() const {
645     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
646   }
647 
648   bool isVISrc_512F16() const {
649     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
650   }
651 
652   bool isVISrc_512V2F16() const {
653     return isVISrc_512F16() || isVISrc_512B32();
654   }
655 
656   bool isVISrc_1024B32() const {
657     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
658   }
659 
660   bool isVISrc_1024B16() const {
661     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
662   }
663 
664   bool isVISrc_1024V2B16() const {
665     return isVISrc_1024B16();
666   }
667 
668   bool isVISrc_1024F32() const {
669     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
670   }
671 
672   bool isVISrc_1024F16() const {
673     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
674   }
675 
676   bool isVISrc_1024V2F16() const {
677     return isVISrc_1024F16() || isVISrc_1024B32();
678   }
679 
680   bool isAISrcB32() const {
681     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
682   }
683 
684   bool isAISrcB16() const {
685     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
686   }
687 
688   bool isAISrcV2B16() const {
689     return isAISrcB16();
690   }
691 
692   bool isAISrcF32() const {
693     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
694   }
695 
696   bool isAISrcF16() const {
697     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
698   }
699 
700   bool isAISrcV2F16() const {
701     return isAISrcF16() || isAISrcB32();
702   }
703 
704   bool isAISrc_64B64() const {
705     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
706   }
707 
708   bool isAISrc_64F64() const {
709     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
710   }
711 
712   bool isAISrc_128B32() const {
713     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
714   }
715 
716   bool isAISrc_128B16() const {
717     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
718   }
719 
720   bool isAISrc_128V2B16() const {
721     return isAISrc_128B16();
722   }
723 
724   bool isAISrc_128F32() const {
725     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
726   }
727 
728   bool isAISrc_128F16() const {
729     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
730   }
731 
732   bool isAISrc_128V2F16() const {
733     return isAISrc_128F16() || isAISrc_128B32();
734   }
735 
736   bool isVISrc_128F16() const {
737     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
738   }
739 
740   bool isVISrc_128V2F16() const {
741     return isVISrc_128F16() || isVISrc_128B32();
742   }
743 
744   bool isAISrc_256B64() const {
745     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
746   }
747 
748   bool isAISrc_256F64() const {
749     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
750   }
751 
752   bool isAISrc_512B32() const {
753     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
754   }
755 
756   bool isAISrc_512B16() const {
757     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
758   }
759 
760   bool isAISrc_512V2B16() const {
761     return isAISrc_512B16();
762   }
763 
764   bool isAISrc_512F32() const {
765     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
766   }
767 
768   bool isAISrc_512F16() const {
769     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
770   }
771 
772   bool isAISrc_512V2F16() const {
773     return isAISrc_512F16() || isAISrc_512B32();
774   }
775 
776   bool isAISrc_1024B32() const {
777     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
778   }
779 
780   bool isAISrc_1024B16() const {
781     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
782   }
783 
784   bool isAISrc_1024V2B16() const {
785     return isAISrc_1024B16();
786   }
787 
788   bool isAISrc_1024F32() const {
789     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
790   }
791 
792   bool isAISrc_1024F16() const {
793     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
794   }
795 
796   bool isAISrc_1024V2F16() const {
797     return isAISrc_1024F16() || isAISrc_1024B32();
798   }
799 
800   bool isKImmFP32() const {
801     return isLiteralImm(MVT::f32);
802   }
803 
804   bool isKImmFP16() const {
805     return isLiteralImm(MVT::f16);
806   }
807 
808   bool isMem() const override {
809     return false;
810   }
811 
812   bool isExpr() const {
813     return Kind == Expression;
814   }
815 
816   bool isSoppBrTarget() const {
817     return isExpr() || isImm();
818   }
819 
820   bool isSWaitCnt() const;
821   bool isDepCtr() const;
822   bool isSDelayAlu() const;
823   bool isHwreg() const;
824   bool isSendMsg() const;
825   bool isSwizzle() const;
826   bool isSMRDOffset8() const;
827   bool isSMEMOffset() const;
828   bool isSMRDLiteralOffset() const;
829   bool isDPP8() const;
830   bool isDPPCtrl() const;
831   bool isBLGP() const;
832   bool isCBSZ() const;
833   bool isABID() const;
834   bool isGPRIdxMode() const;
835   bool isS16Imm() const;
836   bool isU16Imm() const;
837   bool isEndpgm() const;
838 
839   StringRef getExpressionAsToken() const {
840     assert(isExpr());
841     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
842     return S->getSymbol().getName();
843   }
844 
845   StringRef getToken() const {
846     assert(isToken());
847 
848     if (Kind == Expression)
849       return getExpressionAsToken();
850 
851     return StringRef(Tok.Data, Tok.Length);
852   }
853 
854   int64_t getImm() const {
855     assert(isImm());
856     return Imm.Val;
857   }
858 
859   void setImm(int64_t Val) {
860     assert(isImm());
861     Imm.Val = Val;
862   }
863 
864   ImmTy getImmTy() const {
865     assert(isImm());
866     return Imm.Type;
867   }
868 
869   unsigned getReg() const override {
870     assert(isRegKind());
871     return Reg.RegNo;
872   }
873 
874   SMLoc getStartLoc() const override {
875     return StartLoc;
876   }
877 
878   SMLoc getEndLoc() const override {
879     return EndLoc;
880   }
881 
882   SMRange getLocRange() const {
883     return SMRange(StartLoc, EndLoc);
884   }
885 
886   Modifiers getModifiers() const {
887     assert(isRegKind() || isImmTy(ImmTyNone));
888     return isRegKind() ? Reg.Mods : Imm.Mods;
889   }
890 
891   void setModifiers(Modifiers Mods) {
892     assert(isRegKind() || isImmTy(ImmTyNone));
893     if (isRegKind())
894       Reg.Mods = Mods;
895     else
896       Imm.Mods = Mods;
897   }
898 
899   bool hasModifiers() const {
900     return getModifiers().hasModifiers();
901   }
902 
903   bool hasFPModifiers() const {
904     return getModifiers().hasFPModifiers();
905   }
906 
907   bool hasIntModifiers() const {
908     return getModifiers().hasIntModifiers();
909   }
910 
911   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
912 
913   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
914 
915   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
916 
917   template <unsigned Bitwidth>
918   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
919 
920   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
921     addKImmFPOperands<16>(Inst, N);
922   }
923 
924   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
925     addKImmFPOperands<32>(Inst, N);
926   }
927 
928   void addRegOperands(MCInst &Inst, unsigned N) const;
929 
930   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
931     addRegOperands(Inst, N);
932   }
933 
934   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
935     if (isRegKind())
936       addRegOperands(Inst, N);
937     else if (isExpr())
938       Inst.addOperand(MCOperand::createExpr(Expr));
939     else
940       addImmOperands(Inst, N);
941   }
942 
943   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
944     Modifiers Mods = getModifiers();
945     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
946     if (isRegKind()) {
947       addRegOperands(Inst, N);
948     } else {
949       addImmOperands(Inst, N, false);
950     }
951   }
952 
953   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
954     assert(!hasIntModifiers());
955     addRegOrImmWithInputModsOperands(Inst, N);
956   }
957 
958   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
959     assert(!hasFPModifiers());
960     addRegOrImmWithInputModsOperands(Inst, N);
961   }
962 
963   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
964     Modifiers Mods = getModifiers();
965     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
966     assert(isRegKind());
967     addRegOperands(Inst, N);
968   }
969 
970   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
971     assert(!hasIntModifiers());
972     addRegWithInputModsOperands(Inst, N);
973   }
974 
975   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
976     assert(!hasFPModifiers());
977     addRegWithInputModsOperands(Inst, N);
978   }
979 
980   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
981     if (isImm())
982       addImmOperands(Inst, N);
983     else {
984       assert(isExpr());
985       Inst.addOperand(MCOperand::createExpr(Expr));
986     }
987   }
988 
989   static void printImmTy(raw_ostream& OS, ImmTy Type) {
990     switch (Type) {
991     case ImmTyNone: OS << "None"; break;
992     case ImmTyGDS: OS << "GDS"; break;
993     case ImmTyLDS: OS << "LDS"; break;
994     case ImmTyOffen: OS << "Offen"; break;
995     case ImmTyIdxen: OS << "Idxen"; break;
996     case ImmTyAddr64: OS << "Addr64"; break;
997     case ImmTyOffset: OS << "Offset"; break;
998     case ImmTyInstOffset: OS << "InstOffset"; break;
999     case ImmTyOffset0: OS << "Offset0"; break;
1000     case ImmTyOffset1: OS << "Offset1"; break;
1001     case ImmTyCPol: OS << "CPol"; break;
1002     case ImmTySWZ: OS << "SWZ"; break;
1003     case ImmTyTFE: OS << "TFE"; break;
1004     case ImmTyD16: OS << "D16"; break;
1005     case ImmTyFORMAT: OS << "FORMAT"; break;
1006     case ImmTyClampSI: OS << "ClampSI"; break;
1007     case ImmTyOModSI: OS << "OModSI"; break;
1008     case ImmTyDPP8: OS << "DPP8"; break;
1009     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1010     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1011     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1012     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1013     case ImmTyDppFi: OS << "FI"; break;
1014     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1015     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1016     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1017     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1018     case ImmTyDMask: OS << "DMask"; break;
1019     case ImmTyDim: OS << "Dim"; break;
1020     case ImmTyUNorm: OS << "UNorm"; break;
1021     case ImmTyDA: OS << "DA"; break;
1022     case ImmTyR128A16: OS << "R128A16"; break;
1023     case ImmTyA16: OS << "A16"; break;
1024     case ImmTyLWE: OS << "LWE"; break;
1025     case ImmTyOff: OS << "Off"; break;
1026     case ImmTyExpTgt: OS << "ExpTgt"; break;
1027     case ImmTyExpCompr: OS << "ExpCompr"; break;
1028     case ImmTyExpVM: OS << "ExpVM"; break;
1029     case ImmTyHwreg: OS << "Hwreg"; break;
1030     case ImmTySendMsg: OS << "SendMsg"; break;
1031     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1032     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1033     case ImmTyAttrChan: OS << "AttrChan"; break;
1034     case ImmTyOpSel: OS << "OpSel"; break;
1035     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1036     case ImmTyNegLo: OS << "NegLo"; break;
1037     case ImmTyNegHi: OS << "NegHi"; break;
1038     case ImmTySwizzle: OS << "Swizzle"; break;
1039     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1040     case ImmTyHigh: OS << "High"; break;
1041     case ImmTyBLGP: OS << "BLGP"; break;
1042     case ImmTyCBSZ: OS << "CBSZ"; break;
1043     case ImmTyABID: OS << "ABID"; break;
1044     case ImmTyEndpgm: OS << "Endpgm"; break;
1045     }
1046   }
1047 
1048   void print(raw_ostream &OS) const override {
1049     switch (Kind) {
1050     case Register:
1051       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1052       break;
1053     case Immediate:
1054       OS << '<' << getImm();
1055       if (getImmTy() != ImmTyNone) {
1056         OS << " type: "; printImmTy(OS, getImmTy());
1057       }
1058       OS << " mods: " << Imm.Mods << '>';
1059       break;
1060     case Token:
1061       OS << '\'' << getToken() << '\'';
1062       break;
1063     case Expression:
1064       OS << "<expr " << *Expr << '>';
1065       break;
1066     }
1067   }
1068 
1069   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1070                                       int64_t Val, SMLoc Loc,
1071                                       ImmTy Type = ImmTyNone,
1072                                       bool IsFPImm = false) {
1073     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1074     Op->Imm.Val = Val;
1075     Op->Imm.IsFPImm = IsFPImm;
1076     Op->Imm.Kind = ImmKindTyNone;
1077     Op->Imm.Type = Type;
1078     Op->Imm.Mods = Modifiers();
1079     Op->StartLoc = Loc;
1080     Op->EndLoc = Loc;
1081     return Op;
1082   }
1083 
1084   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1085                                         StringRef Str, SMLoc Loc,
1086                                         bool HasExplicitEncodingSize = true) {
1087     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1088     Res->Tok.Data = Str.data();
1089     Res->Tok.Length = Str.size();
1090     Res->StartLoc = Loc;
1091     Res->EndLoc = Loc;
1092     return Res;
1093   }
1094 
1095   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1096                                       unsigned RegNo, SMLoc S,
1097                                       SMLoc E) {
1098     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1099     Op->Reg.RegNo = RegNo;
1100     Op->Reg.Mods = Modifiers();
1101     Op->StartLoc = S;
1102     Op->EndLoc = E;
1103     return Op;
1104   }
1105 
1106   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1107                                        const class MCExpr *Expr, SMLoc S) {
1108     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1109     Op->Expr = Expr;
1110     Op->StartLoc = S;
1111     Op->EndLoc = S;
1112     return Op;
1113   }
1114 };
1115 
1116 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1117   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1118   return OS;
1119 }
1120 
1121 //===----------------------------------------------------------------------===//
1122 // AsmParser
1123 //===----------------------------------------------------------------------===//
1124 
1125 // Holds info related to the current kernel, e.g. count of SGPRs used.
1126 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1127 // .amdgpu_hsa_kernel or at EOF.
1128 class KernelScopeInfo {
1129   int SgprIndexUnusedMin = -1;
1130   int VgprIndexUnusedMin = -1;
1131   int AgprIndexUnusedMin = -1;
1132   MCContext *Ctx = nullptr;
1133   MCSubtargetInfo const *MSTI = nullptr;
1134 
1135   void usesSgprAt(int i) {
1136     if (i >= SgprIndexUnusedMin) {
1137       SgprIndexUnusedMin = ++i;
1138       if (Ctx) {
1139         MCSymbol* const Sym =
1140           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1141         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1142       }
1143     }
1144   }
1145 
1146   void usesVgprAt(int i) {
1147     if (i >= VgprIndexUnusedMin) {
1148       VgprIndexUnusedMin = ++i;
1149       if (Ctx) {
1150         MCSymbol* const Sym =
1151           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1152         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1153                                          VgprIndexUnusedMin);
1154         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1155       }
1156     }
1157   }
1158 
1159   void usesAgprAt(int i) {
1160     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1161     if (!hasMAIInsts(*MSTI))
1162       return;
1163 
1164     if (i >= AgprIndexUnusedMin) {
1165       AgprIndexUnusedMin = ++i;
1166       if (Ctx) {
1167         MCSymbol* const Sym =
1168           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1169         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1170 
1171         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1172         MCSymbol* const vSym =
1173           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1174         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1175                                          VgprIndexUnusedMin);
1176         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1177       }
1178     }
1179   }
1180 
1181 public:
1182   KernelScopeInfo() = default;
1183 
1184   void initialize(MCContext &Context) {
1185     Ctx = &Context;
1186     MSTI = Ctx->getSubtargetInfo();
1187 
1188     usesSgprAt(SgprIndexUnusedMin = -1);
1189     usesVgprAt(VgprIndexUnusedMin = -1);
1190     if (hasMAIInsts(*MSTI)) {
1191       usesAgprAt(AgprIndexUnusedMin = -1);
1192     }
1193   }
1194 
1195   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1196                     unsigned RegWidth) {
1197     switch (RegKind) {
1198     case IS_SGPR:
1199       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1200       break;
1201     case IS_AGPR:
1202       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1203       break;
1204     case IS_VGPR:
1205       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1206       break;
1207     default:
1208       break;
1209     }
1210   }
1211 };
1212 
1213 class AMDGPUAsmParser : public MCTargetAsmParser {
1214   MCAsmParser &Parser;
1215 
1216   // Number of extra operands parsed after the first optional operand.
1217   // This may be necessary to skip hardcoded mandatory operands.
1218   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1219 
1220   unsigned ForcedEncodingSize = 0;
1221   bool ForcedDPP = false;
1222   bool ForcedSDWA = false;
1223   KernelScopeInfo KernelScope;
1224   unsigned CPolSeen;
1225 
1226   /// @name Auto-generated Match Functions
1227   /// {
1228 
1229 #define GET_ASSEMBLER_HEADER
1230 #include "AMDGPUGenAsmMatcher.inc"
1231 
1232   /// }
1233 
1234 private:
1235   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1236   bool OutOfRangeError(SMRange Range);
1237   /// Calculate VGPR/SGPR blocks required for given target, reserved
1238   /// registers, and user-specified NextFreeXGPR values.
1239   ///
1240   /// \param Features [in] Target features, used for bug corrections.
1241   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1242   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1243   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1244   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1245   /// descriptor field, if valid.
1246   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1247   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1248   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1249   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1250   /// \param VGPRBlocks [out] Result VGPR block count.
1251   /// \param SGPRBlocks [out] Result SGPR block count.
1252   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1253                           bool FlatScrUsed, bool XNACKUsed,
1254                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1255                           SMRange VGPRRange, unsigned NextFreeSGPR,
1256                           SMRange SGPRRange, unsigned &VGPRBlocks,
1257                           unsigned &SGPRBlocks);
1258   bool ParseDirectiveAMDGCNTarget();
1259   bool ParseDirectiveAMDHSAKernel();
1260   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1261   bool ParseDirectiveHSACodeObjectVersion();
1262   bool ParseDirectiveHSACodeObjectISA();
1263   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1264   bool ParseDirectiveAMDKernelCodeT();
1265   // TODO: Possibly make subtargetHasRegister const.
1266   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1267   bool ParseDirectiveAMDGPUHsaKernel();
1268 
1269   bool ParseDirectiveISAVersion();
1270   bool ParseDirectiveHSAMetadata();
1271   bool ParseDirectivePALMetadataBegin();
1272   bool ParseDirectivePALMetadata();
1273   bool ParseDirectiveAMDGPULDS();
1274 
1275   /// Common code to parse out a block of text (typically YAML) between start and
1276   /// end directives.
1277   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1278                            const char *AssemblerDirectiveEnd,
1279                            std::string &CollectString);
1280 
1281   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1282                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1283   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1284                            unsigned &RegNum, unsigned &RegWidth,
1285                            bool RestoreOnFailure = false);
1286   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1287                            unsigned &RegNum, unsigned &RegWidth,
1288                            SmallVectorImpl<AsmToken> &Tokens);
1289   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1290                            unsigned &RegWidth,
1291                            SmallVectorImpl<AsmToken> &Tokens);
1292   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1293                            unsigned &RegWidth,
1294                            SmallVectorImpl<AsmToken> &Tokens);
1295   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1296                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1297   bool ParseRegRange(unsigned& Num, unsigned& Width);
1298   unsigned getRegularReg(RegisterKind RegKind,
1299                          unsigned RegNum,
1300                          unsigned RegWidth,
1301                          SMLoc Loc);
1302 
1303   bool isRegister();
1304   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1305   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1306   void initializeGprCountSymbol(RegisterKind RegKind);
1307   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1308                              unsigned RegWidth);
1309   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1310                     bool IsAtomic, bool IsLds = false);
1311   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1312                  bool IsGdsHardcoded);
1313 
1314 public:
1315   enum AMDGPUMatchResultTy {
1316     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1317   };
1318   enum OperandMode {
1319     OperandMode_Default,
1320     OperandMode_NSA,
1321   };
1322 
1323   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1324 
1325   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1326                const MCInstrInfo &MII,
1327                const MCTargetOptions &Options)
1328       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1329     MCAsmParserExtension::Initialize(Parser);
1330 
1331     if (getFeatureBits().none()) {
1332       // Set default features.
1333       copySTI().ToggleFeature("southern-islands");
1334     }
1335 
1336     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1337 
1338     {
1339       // TODO: make those pre-defined variables read-only.
1340       // Currently there is none suitable machinery in the core llvm-mc for this.
1341       // MCSymbol::isRedefinable is intended for another purpose, and
1342       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1343       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1344       MCContext &Ctx = getContext();
1345       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1346         MCSymbol *Sym =
1347             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1348         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1349         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1350         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1351         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1352         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1353       } else {
1354         MCSymbol *Sym =
1355             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1356         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1357         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1358         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1359         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1360         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1361       }
1362       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1363         initializeGprCountSymbol(IS_VGPR);
1364         initializeGprCountSymbol(IS_SGPR);
1365       } else
1366         KernelScope.initialize(getContext());
1367     }
1368   }
1369 
1370   bool hasMIMG_R128() const {
1371     return AMDGPU::hasMIMG_R128(getSTI());
1372   }
1373 
1374   bool hasPackedD16() const {
1375     return AMDGPU::hasPackedD16(getSTI());
1376   }
1377 
1378   bool hasGFX10A16() const {
1379     return AMDGPU::hasGFX10A16(getSTI());
1380   }
1381 
1382   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1383 
1384   bool isSI() const {
1385     return AMDGPU::isSI(getSTI());
1386   }
1387 
1388   bool isCI() const {
1389     return AMDGPU::isCI(getSTI());
1390   }
1391 
1392   bool isVI() const {
1393     return AMDGPU::isVI(getSTI());
1394   }
1395 
1396   bool isGFX9() const {
1397     return AMDGPU::isGFX9(getSTI());
1398   }
1399 
1400   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1401   bool isGFX90A() const {
1402     return AMDGPU::isGFX90A(getSTI());
1403   }
1404 
1405   bool isGFX940() const {
1406     return AMDGPU::isGFX940(getSTI());
1407   }
1408 
1409   bool isGFX9Plus() const {
1410     return AMDGPU::isGFX9Plus(getSTI());
1411   }
1412 
1413   bool isGFX10() const {
1414     return AMDGPU::isGFX10(getSTI());
1415   }
1416 
1417   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1418 
1419   bool isGFX11() const {
1420     return AMDGPU::isGFX11(getSTI());
1421   }
1422 
1423   bool isGFX11Plus() const {
1424     return AMDGPU::isGFX11Plus(getSTI());
1425   }
1426 
1427   bool isGFX10_BEncoding() const {
1428     return AMDGPU::isGFX10_BEncoding(getSTI());
1429   }
1430 
1431   bool hasInv2PiInlineImm() const {
1432     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1433   }
1434 
1435   bool hasFlatOffsets() const {
1436     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1437   }
1438 
1439   bool hasArchitectedFlatScratch() const {
1440     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1441   }
1442 
1443   bool hasSGPR102_SGPR103() const {
1444     return !isVI() && !isGFX9();
1445   }
1446 
1447   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1448 
1449   bool hasIntClamp() const {
1450     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1451   }
1452 
1453   AMDGPUTargetStreamer &getTargetStreamer() {
1454     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1455     return static_cast<AMDGPUTargetStreamer &>(TS);
1456   }
1457 
1458   const MCRegisterInfo *getMRI() const {
1459     // We need this const_cast because for some reason getContext() is not const
1460     // in MCAsmParser.
1461     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1462   }
1463 
1464   const MCInstrInfo *getMII() const {
1465     return &MII;
1466   }
1467 
1468   const FeatureBitset &getFeatureBits() const {
1469     return getSTI().getFeatureBits();
1470   }
1471 
1472   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1473   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1474   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1475 
1476   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1477   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1478   bool isForcedDPP() const { return ForcedDPP; }
1479   bool isForcedSDWA() const { return ForcedSDWA; }
1480   ArrayRef<unsigned> getMatchedVariants() const;
1481   StringRef getMatchedVariantName() const;
1482 
1483   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1484   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1485                      bool RestoreOnFailure);
1486   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1487   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1488                                         SMLoc &EndLoc) override;
1489   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1490   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1491                                       unsigned Kind) override;
1492   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1493                                OperandVector &Operands, MCStreamer &Out,
1494                                uint64_t &ErrorInfo,
1495                                bool MatchingInlineAsm) override;
1496   bool ParseDirective(AsmToken DirectiveID) override;
1497   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1498                                     OperandMode Mode = OperandMode_Default);
1499   StringRef parseMnemonicSuffix(StringRef Name);
1500   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1501                         SMLoc NameLoc, OperandVector &Operands) override;
1502   //bool ProcessInstruction(MCInst &Inst);
1503 
1504   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1505 
1506   OperandMatchResultTy
1507   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1508                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1509                      bool (*ConvertResult)(int64_t &) = nullptr);
1510 
1511   OperandMatchResultTy
1512   parseOperandArrayWithPrefix(const char *Prefix,
1513                               OperandVector &Operands,
1514                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1515                               bool (*ConvertResult)(int64_t&) = nullptr);
1516 
1517   OperandMatchResultTy
1518   parseNamedBit(StringRef Name, OperandVector &Operands,
1519                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1520   OperandMatchResultTy parseCPol(OperandVector &Operands);
1521   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1522                                              StringRef &Value,
1523                                              SMLoc &StringLoc);
1524 
1525   bool isModifier();
1526   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1527   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1528   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1529   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1530   bool parseSP3NegModifier();
1531   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1532   OperandMatchResultTy parseReg(OperandVector &Operands);
1533   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1534   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1535   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1536   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1537   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1538   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1539   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1540   OperandMatchResultTy parseUfmt(int64_t &Format);
1541   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1542   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1543   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1544   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1545   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1546   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1547   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1548 
1549   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1550   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1551   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1552   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1553 
1554   bool parseCnt(int64_t &IntVal);
1555   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1556 
1557   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1558   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1559   OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
1560 
1561   bool parseDelay(int64_t &Delay);
1562   OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
1563 
1564   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1565 
1566 private:
1567   struct OperandInfoTy {
1568     SMLoc Loc;
1569     int64_t Id;
1570     bool IsSymbolic = false;
1571     bool IsDefined = false;
1572 
1573     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1574   };
1575 
1576   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1577   bool validateSendMsg(const OperandInfoTy &Msg,
1578                        const OperandInfoTy &Op,
1579                        const OperandInfoTy &Stream);
1580 
1581   bool parseHwregBody(OperandInfoTy &HwReg,
1582                       OperandInfoTy &Offset,
1583                       OperandInfoTy &Width);
1584   bool validateHwreg(const OperandInfoTy &HwReg,
1585                      const OperandInfoTy &Offset,
1586                      const OperandInfoTy &Width);
1587 
1588   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1589   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1590   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1591 
1592   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1593                       const OperandVector &Operands) const;
1594   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1595   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1596   SMLoc getLitLoc(const OperandVector &Operands) const;
1597   SMLoc getConstLoc(const OperandVector &Operands) const;
1598 
1599   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1600   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1601   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1602   bool validateSOPLiteral(const MCInst &Inst) const;
1603   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1604   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1605   bool validateIntClampSupported(const MCInst &Inst);
1606   bool validateMIMGAtomicDMask(const MCInst &Inst);
1607   bool validateMIMGGatherDMask(const MCInst &Inst);
1608   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1609   Optional<StringRef> validateMIMGDataSize(const MCInst &Inst);
1610   bool validateMIMGAddrSize(const MCInst &Inst);
1611   bool validateMIMGD16(const MCInst &Inst);
1612   bool validateMIMGDim(const MCInst &Inst);
1613   bool validateMIMGMSAA(const MCInst &Inst);
1614   bool validateOpSel(const MCInst &Inst);
1615   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1616   bool validateVccOperand(unsigned Reg) const;
1617   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1618   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1619   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1620   bool validateAGPRLdSt(const MCInst &Inst) const;
1621   bool validateVGPRAlign(const MCInst &Inst) const;
1622   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1623   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1624   bool validateDivScale(const MCInst &Inst);
1625   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1626                              const SMLoc &IDLoc);
1627   bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands,
1628                           const SMLoc &IDLoc);
1629   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1630   unsigned getConstantBusLimit(unsigned Opcode) const;
1631   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1632   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1633   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1634 
1635   bool isSupportedMnemo(StringRef Mnemo,
1636                         const FeatureBitset &FBS);
1637   bool isSupportedMnemo(StringRef Mnemo,
1638                         const FeatureBitset &FBS,
1639                         ArrayRef<unsigned> Variants);
1640   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1641 
1642   bool isId(const StringRef Id) const;
1643   bool isId(const AsmToken &Token, const StringRef Id) const;
1644   bool isToken(const AsmToken::TokenKind Kind) const;
1645   bool trySkipId(const StringRef Id);
1646   bool trySkipId(const StringRef Pref, const StringRef Id);
1647   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1648   bool trySkipToken(const AsmToken::TokenKind Kind);
1649   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1650   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1651   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1652 
1653   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1654   AsmToken::TokenKind getTokenKind() const;
1655   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1656   bool parseExpr(OperandVector &Operands);
1657   StringRef getTokenStr() const;
1658   AsmToken peekToken();
1659   AsmToken getToken() const;
1660   SMLoc getLoc() const;
1661   void lex();
1662 
1663 public:
1664   void onBeginOfFile() override;
1665 
1666   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1667   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1668 
1669   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1670   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1671   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1672   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1673   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1674   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1675 
1676   bool parseSwizzleOperand(int64_t &Op,
1677                            const unsigned MinVal,
1678                            const unsigned MaxVal,
1679                            const StringRef ErrMsg,
1680                            SMLoc &Loc);
1681   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1682                             const unsigned MinVal,
1683                             const unsigned MaxVal,
1684                             const StringRef ErrMsg);
1685   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1686   bool parseSwizzleOffset(int64_t &Imm);
1687   bool parseSwizzleMacro(int64_t &Imm);
1688   bool parseSwizzleQuadPerm(int64_t &Imm);
1689   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1690   bool parseSwizzleBroadcast(int64_t &Imm);
1691   bool parseSwizzleSwap(int64_t &Imm);
1692   bool parseSwizzleReverse(int64_t &Imm);
1693 
1694   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1695   int64_t parseGPRIdxMacro();
1696 
1697   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1698   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1699   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1700   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1701 
1702   AMDGPUOperand::Ptr defaultCPol() const;
1703 
1704   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1705   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1706   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1707   AMDGPUOperand::Ptr defaultFlatOffset() const;
1708 
1709   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1710 
1711   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1712                OptionalImmIndexMap &OptionalIdx);
1713   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1714   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1715   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1716   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1717                 OptionalImmIndexMap &OptionalIdx);
1718 
1719   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1720 
1721   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1722                bool IsAtomic = false);
1723   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1724   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1725 
1726   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1727 
1728   bool parseDimId(unsigned &Encoding);
1729   OperandMatchResultTy parseDim(OperandVector &Operands);
1730   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1731   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1732   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1733   int64_t parseDPPCtrlSel(StringRef Ctrl);
1734   int64_t parseDPPCtrlPerm();
1735   AMDGPUOperand::Ptr defaultRowMask() const;
1736   AMDGPUOperand::Ptr defaultBankMask() const;
1737   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1738   AMDGPUOperand::Ptr defaultFI() const;
1739   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1740   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1741 
1742   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1743                                     AMDGPUOperand::ImmTy Type);
1744   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1745   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1746   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1747   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1748   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1749   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1750   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1751                uint64_t BasicInstType,
1752                bool SkipDstVcc = false,
1753                bool SkipSrcVcc = false);
1754 
1755   AMDGPUOperand::Ptr defaultBLGP() const;
1756   AMDGPUOperand::Ptr defaultCBSZ() const;
1757   AMDGPUOperand::Ptr defaultABID() const;
1758 
1759   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1760   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1761 };
1762 
1763 struct OptionalOperand {
1764   const char *Name;
1765   AMDGPUOperand::ImmTy Type;
1766   bool IsBit;
1767   bool (*ConvertResult)(int64_t&);
1768 };
1769 
1770 } // end anonymous namespace
1771 
1772 // May be called with integer type with equivalent bitwidth.
1773 static const fltSemantics *getFltSemantics(unsigned Size) {
1774   switch (Size) {
1775   case 4:
1776     return &APFloat::IEEEsingle();
1777   case 8:
1778     return &APFloat::IEEEdouble();
1779   case 2:
1780     return &APFloat::IEEEhalf();
1781   default:
1782     llvm_unreachable("unsupported fp type");
1783   }
1784 }
1785 
1786 static const fltSemantics *getFltSemantics(MVT VT) {
1787   return getFltSemantics(VT.getSizeInBits() / 8);
1788 }
1789 
1790 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1791   switch (OperandType) {
1792   case AMDGPU::OPERAND_REG_IMM_INT32:
1793   case AMDGPU::OPERAND_REG_IMM_FP32:
1794   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1795   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1796   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1797   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1798   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1799   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1800   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1801   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1802   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1803   case AMDGPU::OPERAND_KIMM32:
1804     return &APFloat::IEEEsingle();
1805   case AMDGPU::OPERAND_REG_IMM_INT64:
1806   case AMDGPU::OPERAND_REG_IMM_FP64:
1807   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1808   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1809   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1810     return &APFloat::IEEEdouble();
1811   case AMDGPU::OPERAND_REG_IMM_INT16:
1812   case AMDGPU::OPERAND_REG_IMM_FP16:
1813   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1814   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1815   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1816   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1817   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1818   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1819   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1820   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1821   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1822   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1823   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1824   case AMDGPU::OPERAND_KIMM16:
1825     return &APFloat::IEEEhalf();
1826   default:
1827     llvm_unreachable("unsupported fp type");
1828   }
1829 }
1830 
1831 //===----------------------------------------------------------------------===//
1832 // Operand
1833 //===----------------------------------------------------------------------===//
1834 
1835 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1836   bool Lost;
1837 
1838   // Convert literal to single precision
1839   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1840                                                APFloat::rmNearestTiesToEven,
1841                                                &Lost);
1842   // We allow precision lost but not overflow or underflow
1843   if (Status != APFloat::opOK &&
1844       Lost &&
1845       ((Status & APFloat::opOverflow)  != 0 ||
1846        (Status & APFloat::opUnderflow) != 0)) {
1847     return false;
1848   }
1849 
1850   return true;
1851 }
1852 
1853 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1854   return isUIntN(Size, Val) || isIntN(Size, Val);
1855 }
1856 
1857 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1858   if (VT.getScalarType() == MVT::i16) {
1859     // FP immediate values are broken.
1860     return isInlinableIntLiteral(Val);
1861   }
1862 
1863   // f16/v2f16 operands work correctly for all values.
1864   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1865 }
1866 
1867 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1868 
1869   // This is a hack to enable named inline values like
1870   // shared_base with both 32-bit and 64-bit operands.
1871   // Note that these values are defined as
1872   // 32-bit operands only.
1873   if (isInlineValue()) {
1874     return true;
1875   }
1876 
1877   if (!isImmTy(ImmTyNone)) {
1878     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1879     return false;
1880   }
1881   // TODO: We should avoid using host float here. It would be better to
1882   // check the float bit values which is what a few other places do.
1883   // We've had bot failures before due to weird NaN support on mips hosts.
1884 
1885   APInt Literal(64, Imm.Val);
1886 
1887   if (Imm.IsFPImm) { // We got fp literal token
1888     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1889       return AMDGPU::isInlinableLiteral64(Imm.Val,
1890                                           AsmParser->hasInv2PiInlineImm());
1891     }
1892 
1893     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1894     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1895       return false;
1896 
1897     if (type.getScalarSizeInBits() == 16) {
1898       return isInlineableLiteralOp16(
1899         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1900         type, AsmParser->hasInv2PiInlineImm());
1901     }
1902 
1903     // Check if single precision literal is inlinable
1904     return AMDGPU::isInlinableLiteral32(
1905       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1906       AsmParser->hasInv2PiInlineImm());
1907   }
1908 
1909   // We got int literal token.
1910   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1911     return AMDGPU::isInlinableLiteral64(Imm.Val,
1912                                         AsmParser->hasInv2PiInlineImm());
1913   }
1914 
1915   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1916     return false;
1917   }
1918 
1919   if (type.getScalarSizeInBits() == 16) {
1920     return isInlineableLiteralOp16(
1921       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1922       type, AsmParser->hasInv2PiInlineImm());
1923   }
1924 
1925   return AMDGPU::isInlinableLiteral32(
1926     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1927     AsmParser->hasInv2PiInlineImm());
1928 }
1929 
1930 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1931   // Check that this immediate can be added as literal
1932   if (!isImmTy(ImmTyNone)) {
1933     return false;
1934   }
1935 
1936   if (!Imm.IsFPImm) {
1937     // We got int literal token.
1938 
1939     if (type == MVT::f64 && hasFPModifiers()) {
1940       // Cannot apply fp modifiers to int literals preserving the same semantics
1941       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1942       // disable these cases.
1943       return false;
1944     }
1945 
1946     unsigned Size = type.getSizeInBits();
1947     if (Size == 64)
1948       Size = 32;
1949 
1950     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1951     // types.
1952     return isSafeTruncation(Imm.Val, Size);
1953   }
1954 
1955   // We got fp literal token
1956   if (type == MVT::f64) { // Expected 64-bit fp operand
1957     // We would set low 64-bits of literal to zeroes but we accept this literals
1958     return true;
1959   }
1960 
1961   if (type == MVT::i64) { // Expected 64-bit int operand
1962     // We don't allow fp literals in 64-bit integer instructions. It is
1963     // unclear how we should encode them.
1964     return false;
1965   }
1966 
1967   // We allow fp literals with f16x2 operands assuming that the specified
1968   // literal goes into the lower half and the upper half is zero. We also
1969   // require that the literal may be losslessly converted to f16.
1970   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1971                      (type == MVT::v2i16)? MVT::i16 :
1972                      (type == MVT::v2f32)? MVT::f32 : type;
1973 
1974   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1975   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1976 }
1977 
1978 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1979   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1980 }
1981 
1982 bool AMDGPUOperand::isVRegWithInputMods() const {
1983   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1984          // GFX90A allows DPP on 64-bit operands.
1985          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1986           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1987 }
1988 
1989 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1990   if (AsmParser->isVI())
1991     return isVReg32();
1992   else if (AsmParser->isGFX9Plus())
1993     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1994   else
1995     return false;
1996 }
1997 
1998 bool AMDGPUOperand::isSDWAFP16Operand() const {
1999   return isSDWAOperand(MVT::f16);
2000 }
2001 
2002 bool AMDGPUOperand::isSDWAFP32Operand() const {
2003   return isSDWAOperand(MVT::f32);
2004 }
2005 
2006 bool AMDGPUOperand::isSDWAInt16Operand() const {
2007   return isSDWAOperand(MVT::i16);
2008 }
2009 
2010 bool AMDGPUOperand::isSDWAInt32Operand() const {
2011   return isSDWAOperand(MVT::i32);
2012 }
2013 
2014 bool AMDGPUOperand::isBoolReg() const {
2015   auto FB = AsmParser->getFeatureBits();
2016   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2017                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2018 }
2019 
2020 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2021 {
2022   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2023   assert(Size == 2 || Size == 4 || Size == 8);
2024 
2025   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2026 
2027   if (Imm.Mods.Abs) {
2028     Val &= ~FpSignMask;
2029   }
2030   if (Imm.Mods.Neg) {
2031     Val ^= FpSignMask;
2032   }
2033 
2034   return Val;
2035 }
2036 
2037 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2038   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2039                              Inst.getNumOperands())) {
2040     addLiteralImmOperand(Inst, Imm.Val,
2041                          ApplyModifiers &
2042                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2043   } else {
2044     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2045     Inst.addOperand(MCOperand::createImm(Imm.Val));
2046     setImmKindNone();
2047   }
2048 }
2049 
2050 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2051   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2052   auto OpNum = Inst.getNumOperands();
2053   // Check that this operand accepts literals
2054   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2055 
2056   if (ApplyModifiers) {
2057     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2058     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2059     Val = applyInputFPModifiers(Val, Size);
2060   }
2061 
2062   APInt Literal(64, Val);
2063   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2064 
2065   if (Imm.IsFPImm) { // We got fp literal token
2066     switch (OpTy) {
2067     case AMDGPU::OPERAND_REG_IMM_INT64:
2068     case AMDGPU::OPERAND_REG_IMM_FP64:
2069     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2070     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2071     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2072       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2073                                        AsmParser->hasInv2PiInlineImm())) {
2074         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2075         setImmKindConst();
2076         return;
2077       }
2078 
2079       // Non-inlineable
2080       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2081         // For fp operands we check if low 32 bits are zeros
2082         if (Literal.getLoBits(32) != 0) {
2083           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2084           "Can't encode literal as exact 64-bit floating-point operand. "
2085           "Low 32-bits will be set to zero");
2086         }
2087 
2088         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2089         setImmKindLiteral();
2090         return;
2091       }
2092 
2093       // We don't allow fp literals in 64-bit integer instructions. It is
2094       // unclear how we should encode them. This case should be checked earlier
2095       // in predicate methods (isLiteralImm())
2096       llvm_unreachable("fp literal in 64-bit integer instruction.");
2097 
2098     case AMDGPU::OPERAND_REG_IMM_INT32:
2099     case AMDGPU::OPERAND_REG_IMM_FP32:
2100     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2101     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2102     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2103     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2104     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2105     case AMDGPU::OPERAND_REG_IMM_INT16:
2106     case AMDGPU::OPERAND_REG_IMM_FP16:
2107     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2108     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2109     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2110     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2111     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2112     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2113     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2114     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2115     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2116     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2117     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2118     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2119     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2120     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2121     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2122     case AMDGPU::OPERAND_KIMM32:
2123     case AMDGPU::OPERAND_KIMM16: {
2124       bool lost;
2125       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2126       // Convert literal to single precision
2127       FPLiteral.convert(*getOpFltSemantics(OpTy),
2128                         APFloat::rmNearestTiesToEven, &lost);
2129       // We allow precision lost but not overflow or underflow. This should be
2130       // checked earlier in isLiteralImm()
2131 
2132       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2133       Inst.addOperand(MCOperand::createImm(ImmVal));
2134       setImmKindLiteral();
2135       return;
2136     }
2137     default:
2138       llvm_unreachable("invalid operand size");
2139     }
2140 
2141     return;
2142   }
2143 
2144   // We got int literal token.
2145   // Only sign extend inline immediates.
2146   switch (OpTy) {
2147   case AMDGPU::OPERAND_REG_IMM_INT32:
2148   case AMDGPU::OPERAND_REG_IMM_FP32:
2149   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2150   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2151   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2152   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2153   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2154   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2155   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2156   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2157   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2158   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2159   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2160     if (isSafeTruncation(Val, 32) &&
2161         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2162                                      AsmParser->hasInv2PiInlineImm())) {
2163       Inst.addOperand(MCOperand::createImm(Val));
2164       setImmKindConst();
2165       return;
2166     }
2167 
2168     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2169     setImmKindLiteral();
2170     return;
2171 
2172   case AMDGPU::OPERAND_REG_IMM_INT64:
2173   case AMDGPU::OPERAND_REG_IMM_FP64:
2174   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2175   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2176   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2177     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2178       Inst.addOperand(MCOperand::createImm(Val));
2179       setImmKindConst();
2180       return;
2181     }
2182 
2183     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2184     setImmKindLiteral();
2185     return;
2186 
2187   case AMDGPU::OPERAND_REG_IMM_INT16:
2188   case AMDGPU::OPERAND_REG_IMM_FP16:
2189   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2190   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2191   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2192   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2193   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2194     if (isSafeTruncation(Val, 16) &&
2195         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2196                                      AsmParser->hasInv2PiInlineImm())) {
2197       Inst.addOperand(MCOperand::createImm(Val));
2198       setImmKindConst();
2199       return;
2200     }
2201 
2202     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2203     setImmKindLiteral();
2204     return;
2205 
2206   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2207   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2208   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2209   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2210     assert(isSafeTruncation(Val, 16));
2211     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2212                                         AsmParser->hasInv2PiInlineImm()));
2213 
2214     Inst.addOperand(MCOperand::createImm(Val));
2215     return;
2216   }
2217   case AMDGPU::OPERAND_KIMM32:
2218     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2219     setImmKindNone();
2220     return;
2221   case AMDGPU::OPERAND_KIMM16:
2222     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2223     setImmKindNone();
2224     return;
2225   default:
2226     llvm_unreachable("invalid operand size");
2227   }
2228 }
2229 
2230 template <unsigned Bitwidth>
2231 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2232   APInt Literal(64, Imm.Val);
2233   setImmKindNone();
2234 
2235   if (!Imm.IsFPImm) {
2236     // We got int literal token.
2237     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2238     return;
2239   }
2240 
2241   bool Lost;
2242   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2243   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2244                     APFloat::rmNearestTiesToEven, &Lost);
2245   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2246 }
2247 
2248 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2249   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2250 }
2251 
2252 static bool isInlineValue(unsigned Reg) {
2253   switch (Reg) {
2254   case AMDGPU::SRC_SHARED_BASE:
2255   case AMDGPU::SRC_SHARED_LIMIT:
2256   case AMDGPU::SRC_PRIVATE_BASE:
2257   case AMDGPU::SRC_PRIVATE_LIMIT:
2258   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2259     return true;
2260   case AMDGPU::SRC_VCCZ:
2261   case AMDGPU::SRC_EXECZ:
2262   case AMDGPU::SRC_SCC:
2263     return true;
2264   case AMDGPU::SGPR_NULL:
2265     return true;
2266   default:
2267     return false;
2268   }
2269 }
2270 
2271 bool AMDGPUOperand::isInlineValue() const {
2272   return isRegKind() && ::isInlineValue(getReg());
2273 }
2274 
2275 //===----------------------------------------------------------------------===//
2276 // AsmParser
2277 //===----------------------------------------------------------------------===//
2278 
2279 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2280   if (Is == IS_VGPR) {
2281     switch (RegWidth) {
2282       default: return -1;
2283       case 32:
2284         return AMDGPU::VGPR_32RegClassID;
2285       case 64:
2286         return AMDGPU::VReg_64RegClassID;
2287       case 96:
2288         return AMDGPU::VReg_96RegClassID;
2289       case 128:
2290         return AMDGPU::VReg_128RegClassID;
2291       case 160:
2292         return AMDGPU::VReg_160RegClassID;
2293       case 192:
2294         return AMDGPU::VReg_192RegClassID;
2295       case 224:
2296         return AMDGPU::VReg_224RegClassID;
2297       case 256:
2298         return AMDGPU::VReg_256RegClassID;
2299       case 512:
2300         return AMDGPU::VReg_512RegClassID;
2301       case 1024:
2302         return AMDGPU::VReg_1024RegClassID;
2303     }
2304   } else if (Is == IS_TTMP) {
2305     switch (RegWidth) {
2306       default: return -1;
2307       case 32:
2308         return AMDGPU::TTMP_32RegClassID;
2309       case 64:
2310         return AMDGPU::TTMP_64RegClassID;
2311       case 128:
2312         return AMDGPU::TTMP_128RegClassID;
2313       case 256:
2314         return AMDGPU::TTMP_256RegClassID;
2315       case 512:
2316         return AMDGPU::TTMP_512RegClassID;
2317     }
2318   } else if (Is == IS_SGPR) {
2319     switch (RegWidth) {
2320       default: return -1;
2321       case 32:
2322         return AMDGPU::SGPR_32RegClassID;
2323       case 64:
2324         return AMDGPU::SGPR_64RegClassID;
2325       case 96:
2326         return AMDGPU::SGPR_96RegClassID;
2327       case 128:
2328         return AMDGPU::SGPR_128RegClassID;
2329       case 160:
2330         return AMDGPU::SGPR_160RegClassID;
2331       case 192:
2332         return AMDGPU::SGPR_192RegClassID;
2333       case 224:
2334         return AMDGPU::SGPR_224RegClassID;
2335       case 256:
2336         return AMDGPU::SGPR_256RegClassID;
2337       case 512:
2338         return AMDGPU::SGPR_512RegClassID;
2339     }
2340   } else if (Is == IS_AGPR) {
2341     switch (RegWidth) {
2342       default: return -1;
2343       case 32:
2344         return AMDGPU::AGPR_32RegClassID;
2345       case 64:
2346         return AMDGPU::AReg_64RegClassID;
2347       case 96:
2348         return AMDGPU::AReg_96RegClassID;
2349       case 128:
2350         return AMDGPU::AReg_128RegClassID;
2351       case 160:
2352         return AMDGPU::AReg_160RegClassID;
2353       case 192:
2354         return AMDGPU::AReg_192RegClassID;
2355       case 224:
2356         return AMDGPU::AReg_224RegClassID;
2357       case 256:
2358         return AMDGPU::AReg_256RegClassID;
2359       case 512:
2360         return AMDGPU::AReg_512RegClassID;
2361       case 1024:
2362         return AMDGPU::AReg_1024RegClassID;
2363     }
2364   }
2365   return -1;
2366 }
2367 
2368 static unsigned getSpecialRegForName(StringRef RegName) {
2369   return StringSwitch<unsigned>(RegName)
2370     .Case("exec", AMDGPU::EXEC)
2371     .Case("vcc", AMDGPU::VCC)
2372     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2373     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2374     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2375     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2376     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2377     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2378     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2379     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2380     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2381     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2382     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2383     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2384     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2385     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2386     .Case("m0", AMDGPU::M0)
2387     .Case("vccz", AMDGPU::SRC_VCCZ)
2388     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2389     .Case("execz", AMDGPU::SRC_EXECZ)
2390     .Case("src_execz", AMDGPU::SRC_EXECZ)
2391     .Case("scc", AMDGPU::SRC_SCC)
2392     .Case("src_scc", AMDGPU::SRC_SCC)
2393     .Case("tba", AMDGPU::TBA)
2394     .Case("tma", AMDGPU::TMA)
2395     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2396     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2397     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2398     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2399     .Case("vcc_lo", AMDGPU::VCC_LO)
2400     .Case("vcc_hi", AMDGPU::VCC_HI)
2401     .Case("exec_lo", AMDGPU::EXEC_LO)
2402     .Case("exec_hi", AMDGPU::EXEC_HI)
2403     .Case("tma_lo", AMDGPU::TMA_LO)
2404     .Case("tma_hi", AMDGPU::TMA_HI)
2405     .Case("tba_lo", AMDGPU::TBA_LO)
2406     .Case("tba_hi", AMDGPU::TBA_HI)
2407     .Case("pc", AMDGPU::PC_REG)
2408     .Case("null", AMDGPU::SGPR_NULL)
2409     .Default(AMDGPU::NoRegister);
2410 }
2411 
2412 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2413                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2414   auto R = parseRegister();
2415   if (!R) return true;
2416   assert(R->isReg());
2417   RegNo = R->getReg();
2418   StartLoc = R->getStartLoc();
2419   EndLoc = R->getEndLoc();
2420   return false;
2421 }
2422 
2423 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2424                                     SMLoc &EndLoc) {
2425   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2426 }
2427 
2428 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2429                                                        SMLoc &StartLoc,
2430                                                        SMLoc &EndLoc) {
2431   bool Result =
2432       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2433   bool PendingErrors = getParser().hasPendingError();
2434   getParser().clearPendingErrors();
2435   if (PendingErrors)
2436     return MatchOperand_ParseFail;
2437   if (Result)
2438     return MatchOperand_NoMatch;
2439   return MatchOperand_Success;
2440 }
2441 
2442 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2443                                             RegisterKind RegKind, unsigned Reg1,
2444                                             SMLoc Loc) {
2445   switch (RegKind) {
2446   case IS_SPECIAL:
2447     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2448       Reg = AMDGPU::EXEC;
2449       RegWidth = 64;
2450       return true;
2451     }
2452     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2453       Reg = AMDGPU::FLAT_SCR;
2454       RegWidth = 64;
2455       return true;
2456     }
2457     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2458       Reg = AMDGPU::XNACK_MASK;
2459       RegWidth = 64;
2460       return true;
2461     }
2462     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2463       Reg = AMDGPU::VCC;
2464       RegWidth = 64;
2465       return true;
2466     }
2467     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2468       Reg = AMDGPU::TBA;
2469       RegWidth = 64;
2470       return true;
2471     }
2472     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2473       Reg = AMDGPU::TMA;
2474       RegWidth = 64;
2475       return true;
2476     }
2477     Error(Loc, "register does not fit in the list");
2478     return false;
2479   case IS_VGPR:
2480   case IS_SGPR:
2481   case IS_AGPR:
2482   case IS_TTMP:
2483     if (Reg1 != Reg + RegWidth / 32) {
2484       Error(Loc, "registers in a list must have consecutive indices");
2485       return false;
2486     }
2487     RegWidth += 32;
2488     return true;
2489   default:
2490     llvm_unreachable("unexpected register kind");
2491   }
2492 }
2493 
2494 struct RegInfo {
2495   StringLiteral Name;
2496   RegisterKind Kind;
2497 };
2498 
2499 static constexpr RegInfo RegularRegisters[] = {
2500   {{"v"},    IS_VGPR},
2501   {{"s"},    IS_SGPR},
2502   {{"ttmp"}, IS_TTMP},
2503   {{"acc"},  IS_AGPR},
2504   {{"a"},    IS_AGPR},
2505 };
2506 
2507 static bool isRegularReg(RegisterKind Kind) {
2508   return Kind == IS_VGPR ||
2509          Kind == IS_SGPR ||
2510          Kind == IS_TTMP ||
2511          Kind == IS_AGPR;
2512 }
2513 
2514 static const RegInfo* getRegularRegInfo(StringRef Str) {
2515   for (const RegInfo &Reg : RegularRegisters)
2516     if (Str.startswith(Reg.Name))
2517       return &Reg;
2518   return nullptr;
2519 }
2520 
2521 static bool getRegNum(StringRef Str, unsigned& Num) {
2522   return !Str.getAsInteger(10, Num);
2523 }
2524 
2525 bool
2526 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2527                             const AsmToken &NextToken) const {
2528 
2529   // A list of consecutive registers: [s0,s1,s2,s3]
2530   if (Token.is(AsmToken::LBrac))
2531     return true;
2532 
2533   if (!Token.is(AsmToken::Identifier))
2534     return false;
2535 
2536   // A single register like s0 or a range of registers like s[0:1]
2537 
2538   StringRef Str = Token.getString();
2539   const RegInfo *Reg = getRegularRegInfo(Str);
2540   if (Reg) {
2541     StringRef RegName = Reg->Name;
2542     StringRef RegSuffix = Str.substr(RegName.size());
2543     if (!RegSuffix.empty()) {
2544       unsigned Num;
2545       // A single register with an index: rXX
2546       if (getRegNum(RegSuffix, Num))
2547         return true;
2548     } else {
2549       // A range of registers: r[XX:YY].
2550       if (NextToken.is(AsmToken::LBrac))
2551         return true;
2552     }
2553   }
2554 
2555   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2556 }
2557 
2558 bool
2559 AMDGPUAsmParser::isRegister()
2560 {
2561   return isRegister(getToken(), peekToken());
2562 }
2563 
2564 unsigned
2565 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2566                                unsigned RegNum,
2567                                unsigned RegWidth,
2568                                SMLoc Loc) {
2569 
2570   assert(isRegularReg(RegKind));
2571 
2572   unsigned AlignSize = 1;
2573   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2574     // SGPR and TTMP registers must be aligned.
2575     // Max required alignment is 4 dwords.
2576     AlignSize = std::min(RegWidth / 32, 4u);
2577   }
2578 
2579   if (RegNum % AlignSize != 0) {
2580     Error(Loc, "invalid register alignment");
2581     return AMDGPU::NoRegister;
2582   }
2583 
2584   unsigned RegIdx = RegNum / AlignSize;
2585   int RCID = getRegClass(RegKind, RegWidth);
2586   if (RCID == -1) {
2587     Error(Loc, "invalid or unsupported register size");
2588     return AMDGPU::NoRegister;
2589   }
2590 
2591   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2592   const MCRegisterClass RC = TRI->getRegClass(RCID);
2593   if (RegIdx >= RC.getNumRegs()) {
2594     Error(Loc, "register index is out of range");
2595     return AMDGPU::NoRegister;
2596   }
2597 
2598   return RC.getRegister(RegIdx);
2599 }
2600 
2601 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2602   int64_t RegLo, RegHi;
2603   if (!skipToken(AsmToken::LBrac, "missing register index"))
2604     return false;
2605 
2606   SMLoc FirstIdxLoc = getLoc();
2607   SMLoc SecondIdxLoc;
2608 
2609   if (!parseExpr(RegLo))
2610     return false;
2611 
2612   if (trySkipToken(AsmToken::Colon)) {
2613     SecondIdxLoc = getLoc();
2614     if (!parseExpr(RegHi))
2615       return false;
2616   } else {
2617     RegHi = RegLo;
2618   }
2619 
2620   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2621     return false;
2622 
2623   if (!isUInt<32>(RegLo)) {
2624     Error(FirstIdxLoc, "invalid register index");
2625     return false;
2626   }
2627 
2628   if (!isUInt<32>(RegHi)) {
2629     Error(SecondIdxLoc, "invalid register index");
2630     return false;
2631   }
2632 
2633   if (RegLo > RegHi) {
2634     Error(FirstIdxLoc, "first register index should not exceed second index");
2635     return false;
2636   }
2637 
2638   Num = static_cast<unsigned>(RegLo);
2639   RegWidth = 32 * ((RegHi - RegLo) + 1);
2640   return true;
2641 }
2642 
2643 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2644                                           unsigned &RegNum, unsigned &RegWidth,
2645                                           SmallVectorImpl<AsmToken> &Tokens) {
2646   assert(isToken(AsmToken::Identifier));
2647   unsigned Reg = getSpecialRegForName(getTokenStr());
2648   if (Reg) {
2649     RegNum = 0;
2650     RegWidth = 32;
2651     RegKind = IS_SPECIAL;
2652     Tokens.push_back(getToken());
2653     lex(); // skip register name
2654   }
2655   return Reg;
2656 }
2657 
2658 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2659                                           unsigned &RegNum, unsigned &RegWidth,
2660                                           SmallVectorImpl<AsmToken> &Tokens) {
2661   assert(isToken(AsmToken::Identifier));
2662   StringRef RegName = getTokenStr();
2663   auto Loc = getLoc();
2664 
2665   const RegInfo *RI = getRegularRegInfo(RegName);
2666   if (!RI) {
2667     Error(Loc, "invalid register name");
2668     return AMDGPU::NoRegister;
2669   }
2670 
2671   Tokens.push_back(getToken());
2672   lex(); // skip register name
2673 
2674   RegKind = RI->Kind;
2675   StringRef RegSuffix = RegName.substr(RI->Name.size());
2676   if (!RegSuffix.empty()) {
2677     // Single 32-bit register: vXX.
2678     if (!getRegNum(RegSuffix, RegNum)) {
2679       Error(Loc, "invalid register index");
2680       return AMDGPU::NoRegister;
2681     }
2682     RegWidth = 32;
2683   } else {
2684     // Range of registers: v[XX:YY]. ":YY" is optional.
2685     if (!ParseRegRange(RegNum, RegWidth))
2686       return AMDGPU::NoRegister;
2687   }
2688 
2689   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2690 }
2691 
2692 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2693                                        unsigned &RegWidth,
2694                                        SmallVectorImpl<AsmToken> &Tokens) {
2695   unsigned Reg = AMDGPU::NoRegister;
2696   auto ListLoc = getLoc();
2697 
2698   if (!skipToken(AsmToken::LBrac,
2699                  "expected a register or a list of registers")) {
2700     return AMDGPU::NoRegister;
2701   }
2702 
2703   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2704 
2705   auto Loc = getLoc();
2706   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2707     return AMDGPU::NoRegister;
2708   if (RegWidth != 32) {
2709     Error(Loc, "expected a single 32-bit register");
2710     return AMDGPU::NoRegister;
2711   }
2712 
2713   for (; trySkipToken(AsmToken::Comma); ) {
2714     RegisterKind NextRegKind;
2715     unsigned NextReg, NextRegNum, NextRegWidth;
2716     Loc = getLoc();
2717 
2718     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2719                              NextRegNum, NextRegWidth,
2720                              Tokens)) {
2721       return AMDGPU::NoRegister;
2722     }
2723     if (NextRegWidth != 32) {
2724       Error(Loc, "expected a single 32-bit register");
2725       return AMDGPU::NoRegister;
2726     }
2727     if (NextRegKind != RegKind) {
2728       Error(Loc, "registers in a list must be of the same kind");
2729       return AMDGPU::NoRegister;
2730     }
2731     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2732       return AMDGPU::NoRegister;
2733   }
2734 
2735   if (!skipToken(AsmToken::RBrac,
2736                  "expected a comma or a closing square bracket")) {
2737     return AMDGPU::NoRegister;
2738   }
2739 
2740   if (isRegularReg(RegKind))
2741     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2742 
2743   return Reg;
2744 }
2745 
2746 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2747                                           unsigned &RegNum, unsigned &RegWidth,
2748                                           SmallVectorImpl<AsmToken> &Tokens) {
2749   auto Loc = getLoc();
2750   Reg = AMDGPU::NoRegister;
2751 
2752   if (isToken(AsmToken::Identifier)) {
2753     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2754     if (Reg == AMDGPU::NoRegister)
2755       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2756   } else {
2757     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2758   }
2759 
2760   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2761   if (Reg == AMDGPU::NoRegister) {
2762     assert(Parser.hasPendingError());
2763     return false;
2764   }
2765 
2766   if (!subtargetHasRegister(*TRI, Reg)) {
2767     if (Reg == AMDGPU::SGPR_NULL) {
2768       Error(Loc, "'null' operand is not supported on this GPU");
2769     } else {
2770       Error(Loc, "register not available on this GPU");
2771     }
2772     return false;
2773   }
2774 
2775   return true;
2776 }
2777 
2778 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2779                                           unsigned &RegNum, unsigned &RegWidth,
2780                                           bool RestoreOnFailure /*=false*/) {
2781   Reg = AMDGPU::NoRegister;
2782 
2783   SmallVector<AsmToken, 1> Tokens;
2784   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2785     if (RestoreOnFailure) {
2786       while (!Tokens.empty()) {
2787         getLexer().UnLex(Tokens.pop_back_val());
2788       }
2789     }
2790     return true;
2791   }
2792   return false;
2793 }
2794 
2795 Optional<StringRef>
2796 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2797   switch (RegKind) {
2798   case IS_VGPR:
2799     return StringRef(".amdgcn.next_free_vgpr");
2800   case IS_SGPR:
2801     return StringRef(".amdgcn.next_free_sgpr");
2802   default:
2803     return None;
2804   }
2805 }
2806 
2807 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2808   auto SymbolName = getGprCountSymbolName(RegKind);
2809   assert(SymbolName && "initializing invalid register kind");
2810   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2811   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2812 }
2813 
2814 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2815                                             unsigned DwordRegIndex,
2816                                             unsigned RegWidth) {
2817   // Symbols are only defined for GCN targets
2818   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2819     return true;
2820 
2821   auto SymbolName = getGprCountSymbolName(RegKind);
2822   if (!SymbolName)
2823     return true;
2824   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2825 
2826   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2827   int64_t OldCount;
2828 
2829   if (!Sym->isVariable())
2830     return !Error(getLoc(),
2831                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2832   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2833     return !Error(
2834         getLoc(),
2835         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2836 
2837   if (OldCount <= NewMax)
2838     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2839 
2840   return true;
2841 }
2842 
2843 std::unique_ptr<AMDGPUOperand>
2844 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2845   const auto &Tok = getToken();
2846   SMLoc StartLoc = Tok.getLoc();
2847   SMLoc EndLoc = Tok.getEndLoc();
2848   RegisterKind RegKind;
2849   unsigned Reg, RegNum, RegWidth;
2850 
2851   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2852     return nullptr;
2853   }
2854   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2855     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2856       return nullptr;
2857   } else
2858     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2859   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2860 }
2861 
2862 OperandMatchResultTy
2863 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2864   // TODO: add syntactic sugar for 1/(2*PI)
2865 
2866   assert(!isRegister());
2867   assert(!isModifier());
2868 
2869   const auto& Tok = getToken();
2870   const auto& NextTok = peekToken();
2871   bool IsReal = Tok.is(AsmToken::Real);
2872   SMLoc S = getLoc();
2873   bool Negate = false;
2874 
2875   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2876     lex();
2877     IsReal = true;
2878     Negate = true;
2879   }
2880 
2881   if (IsReal) {
2882     // Floating-point expressions are not supported.
2883     // Can only allow floating-point literals with an
2884     // optional sign.
2885 
2886     StringRef Num = getTokenStr();
2887     lex();
2888 
2889     APFloat RealVal(APFloat::IEEEdouble());
2890     auto roundMode = APFloat::rmNearestTiesToEven;
2891     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2892       return MatchOperand_ParseFail;
2893     }
2894     if (Negate)
2895       RealVal.changeSign();
2896 
2897     Operands.push_back(
2898       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2899                                AMDGPUOperand::ImmTyNone, true));
2900 
2901     return MatchOperand_Success;
2902 
2903   } else {
2904     int64_t IntVal;
2905     const MCExpr *Expr;
2906     SMLoc S = getLoc();
2907 
2908     if (HasSP3AbsModifier) {
2909       // This is a workaround for handling expressions
2910       // as arguments of SP3 'abs' modifier, for example:
2911       //     |1.0|
2912       //     |-1|
2913       //     |1+x|
2914       // This syntax is not compatible with syntax of standard
2915       // MC expressions (due to the trailing '|').
2916       SMLoc EndLoc;
2917       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2918         return MatchOperand_ParseFail;
2919     } else {
2920       if (Parser.parseExpression(Expr))
2921         return MatchOperand_ParseFail;
2922     }
2923 
2924     if (Expr->evaluateAsAbsolute(IntVal)) {
2925       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2926     } else {
2927       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2928     }
2929 
2930     return MatchOperand_Success;
2931   }
2932 
2933   return MatchOperand_NoMatch;
2934 }
2935 
2936 OperandMatchResultTy
2937 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2938   if (!isRegister())
2939     return MatchOperand_NoMatch;
2940 
2941   if (auto R = parseRegister()) {
2942     assert(R->isReg());
2943     Operands.push_back(std::move(R));
2944     return MatchOperand_Success;
2945   }
2946   return MatchOperand_ParseFail;
2947 }
2948 
2949 OperandMatchResultTy
2950 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2951   auto res = parseReg(Operands);
2952   if (res != MatchOperand_NoMatch) {
2953     return res;
2954   } else if (isModifier()) {
2955     return MatchOperand_NoMatch;
2956   } else {
2957     return parseImm(Operands, HasSP3AbsMod);
2958   }
2959 }
2960 
2961 bool
2962 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2963   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2964     const auto &str = Token.getString();
2965     return str == "abs" || str == "neg" || str == "sext";
2966   }
2967   return false;
2968 }
2969 
2970 bool
2971 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2972   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2973 }
2974 
2975 bool
2976 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2977   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2978 }
2979 
2980 bool
2981 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2982   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2983 }
2984 
2985 // Check if this is an operand modifier or an opcode modifier
2986 // which may look like an expression but it is not. We should
2987 // avoid parsing these modifiers as expressions. Currently
2988 // recognized sequences are:
2989 //   |...|
2990 //   abs(...)
2991 //   neg(...)
2992 //   sext(...)
2993 //   -reg
2994 //   -|...|
2995 //   -abs(...)
2996 //   name:...
2997 // Note that simple opcode modifiers like 'gds' may be parsed as
2998 // expressions; this is a special case. See getExpressionAsToken.
2999 //
3000 bool
3001 AMDGPUAsmParser::isModifier() {
3002 
3003   AsmToken Tok = getToken();
3004   AsmToken NextToken[2];
3005   peekTokens(NextToken);
3006 
3007   return isOperandModifier(Tok, NextToken[0]) ||
3008          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3009          isOpcodeModifierWithVal(Tok, NextToken[0]);
3010 }
3011 
3012 // Check if the current token is an SP3 'neg' modifier.
3013 // Currently this modifier is allowed in the following context:
3014 //
3015 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3016 // 2. Before an 'abs' modifier: -abs(...)
3017 // 3. Before an SP3 'abs' modifier: -|...|
3018 //
3019 // In all other cases "-" is handled as a part
3020 // of an expression that follows the sign.
3021 //
3022 // Note: When "-" is followed by an integer literal,
3023 // this is interpreted as integer negation rather
3024 // than a floating-point NEG modifier applied to N.
3025 // Beside being contr-intuitive, such use of floating-point
3026 // NEG modifier would have resulted in different meaning
3027 // of integer literals used with VOP1/2/C and VOP3,
3028 // for example:
3029 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3030 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3031 // Negative fp literals with preceding "-" are
3032 // handled likewise for uniformity
3033 //
3034 bool
3035 AMDGPUAsmParser::parseSP3NegModifier() {
3036 
3037   AsmToken NextToken[2];
3038   peekTokens(NextToken);
3039 
3040   if (isToken(AsmToken::Minus) &&
3041       (isRegister(NextToken[0], NextToken[1]) ||
3042        NextToken[0].is(AsmToken::Pipe) ||
3043        isId(NextToken[0], "abs"))) {
3044     lex();
3045     return true;
3046   }
3047 
3048   return false;
3049 }
3050 
3051 OperandMatchResultTy
3052 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3053                                               bool AllowImm) {
3054   bool Neg, SP3Neg;
3055   bool Abs, SP3Abs;
3056   SMLoc Loc;
3057 
3058   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3059   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3060     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3061     return MatchOperand_ParseFail;
3062   }
3063 
3064   SP3Neg = parseSP3NegModifier();
3065 
3066   Loc = getLoc();
3067   Neg = trySkipId("neg");
3068   if (Neg && SP3Neg) {
3069     Error(Loc, "expected register or immediate");
3070     return MatchOperand_ParseFail;
3071   }
3072   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3073     return MatchOperand_ParseFail;
3074 
3075   Abs = trySkipId("abs");
3076   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3077     return MatchOperand_ParseFail;
3078 
3079   Loc = getLoc();
3080   SP3Abs = trySkipToken(AsmToken::Pipe);
3081   if (Abs && SP3Abs) {
3082     Error(Loc, "expected register or immediate");
3083     return MatchOperand_ParseFail;
3084   }
3085 
3086   OperandMatchResultTy Res;
3087   if (AllowImm) {
3088     Res = parseRegOrImm(Operands, SP3Abs);
3089   } else {
3090     Res = parseReg(Operands);
3091   }
3092   if (Res != MatchOperand_Success) {
3093     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3094   }
3095 
3096   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3097     return MatchOperand_ParseFail;
3098   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3099     return MatchOperand_ParseFail;
3100   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3101     return MatchOperand_ParseFail;
3102 
3103   AMDGPUOperand::Modifiers Mods;
3104   Mods.Abs = Abs || SP3Abs;
3105   Mods.Neg = Neg || SP3Neg;
3106 
3107   if (Mods.hasFPModifiers()) {
3108     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3109     if (Op.isExpr()) {
3110       Error(Op.getStartLoc(), "expected an absolute expression");
3111       return MatchOperand_ParseFail;
3112     }
3113     Op.setModifiers(Mods);
3114   }
3115   return MatchOperand_Success;
3116 }
3117 
3118 OperandMatchResultTy
3119 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3120                                                bool AllowImm) {
3121   bool Sext = trySkipId("sext");
3122   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3123     return MatchOperand_ParseFail;
3124 
3125   OperandMatchResultTy Res;
3126   if (AllowImm) {
3127     Res = parseRegOrImm(Operands);
3128   } else {
3129     Res = parseReg(Operands);
3130   }
3131   if (Res != MatchOperand_Success) {
3132     return Sext? MatchOperand_ParseFail : Res;
3133   }
3134 
3135   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3136     return MatchOperand_ParseFail;
3137 
3138   AMDGPUOperand::Modifiers Mods;
3139   Mods.Sext = Sext;
3140 
3141   if (Mods.hasIntModifiers()) {
3142     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3143     if (Op.isExpr()) {
3144       Error(Op.getStartLoc(), "expected an absolute expression");
3145       return MatchOperand_ParseFail;
3146     }
3147     Op.setModifiers(Mods);
3148   }
3149 
3150   return MatchOperand_Success;
3151 }
3152 
3153 OperandMatchResultTy
3154 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3155   return parseRegOrImmWithFPInputMods(Operands, false);
3156 }
3157 
3158 OperandMatchResultTy
3159 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3160   return parseRegOrImmWithIntInputMods(Operands, false);
3161 }
3162 
3163 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3164   auto Loc = getLoc();
3165   if (trySkipId("off")) {
3166     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3167                                                 AMDGPUOperand::ImmTyOff, false));
3168     return MatchOperand_Success;
3169   }
3170 
3171   if (!isRegister())
3172     return MatchOperand_NoMatch;
3173 
3174   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3175   if (Reg) {
3176     Operands.push_back(std::move(Reg));
3177     return MatchOperand_Success;
3178   }
3179 
3180   return MatchOperand_ParseFail;
3181 
3182 }
3183 
3184 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3185   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3186 
3187   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3188       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3189       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3190       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3191     return Match_InvalidOperand;
3192 
3193   if ((TSFlags & SIInstrFlags::VOP3) &&
3194       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3195       getForcedEncodingSize() != 64)
3196     return Match_PreferE32;
3197 
3198   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3199       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3200     // v_mac_f32/16 allow only dst_sel == DWORD;
3201     auto OpNum =
3202         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3203     const auto &Op = Inst.getOperand(OpNum);
3204     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3205       return Match_InvalidOperand;
3206     }
3207   }
3208 
3209   return Match_Success;
3210 }
3211 
3212 static ArrayRef<unsigned> getAllVariants() {
3213   static const unsigned Variants[] = {
3214     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3215     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3216   };
3217 
3218   return makeArrayRef(Variants);
3219 }
3220 
3221 // What asm variants we should check
3222 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3223   if (getForcedEncodingSize() == 32) {
3224     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3225     return makeArrayRef(Variants);
3226   }
3227 
3228   if (isForcedVOP3()) {
3229     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3230     return makeArrayRef(Variants);
3231   }
3232 
3233   if (isForcedSDWA()) {
3234     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3235                                         AMDGPUAsmVariants::SDWA9};
3236     return makeArrayRef(Variants);
3237   }
3238 
3239   if (isForcedDPP()) {
3240     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3241     return makeArrayRef(Variants);
3242   }
3243 
3244   return getAllVariants();
3245 }
3246 
3247 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3248   if (getForcedEncodingSize() == 32)
3249     return "e32";
3250 
3251   if (isForcedVOP3())
3252     return "e64";
3253 
3254   if (isForcedSDWA())
3255     return "sdwa";
3256 
3257   if (isForcedDPP())
3258     return "dpp";
3259 
3260   return "";
3261 }
3262 
3263 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3264   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3265   const unsigned Num = Desc.getNumImplicitUses();
3266   for (unsigned i = 0; i < Num; ++i) {
3267     unsigned Reg = Desc.ImplicitUses[i];
3268     switch (Reg) {
3269     case AMDGPU::FLAT_SCR:
3270     case AMDGPU::VCC:
3271     case AMDGPU::VCC_LO:
3272     case AMDGPU::VCC_HI:
3273     case AMDGPU::M0:
3274       return Reg;
3275     default:
3276       break;
3277     }
3278   }
3279   return AMDGPU::NoRegister;
3280 }
3281 
3282 // NB: This code is correct only when used to check constant
3283 // bus limitations because GFX7 support no f16 inline constants.
3284 // Note that there are no cases when a GFX7 opcode violates
3285 // constant bus limitations due to the use of an f16 constant.
3286 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3287                                        unsigned OpIdx) const {
3288   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3289 
3290   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3291     return false;
3292   }
3293 
3294   const MCOperand &MO = Inst.getOperand(OpIdx);
3295 
3296   int64_t Val = MO.getImm();
3297   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3298 
3299   switch (OpSize) { // expected operand size
3300   case 8:
3301     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3302   case 4:
3303     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3304   case 2: {
3305     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3306     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3307         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3308         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3309       return AMDGPU::isInlinableIntLiteral(Val);
3310 
3311     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3312         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3313         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3314       return AMDGPU::isInlinableIntLiteralV216(Val);
3315 
3316     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3317         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3318         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3319       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3320 
3321     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3322   }
3323   default:
3324     llvm_unreachable("invalid operand size");
3325   }
3326 }
3327 
3328 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3329   if (!isGFX10Plus())
3330     return 1;
3331 
3332   switch (Opcode) {
3333   // 64-bit shift instructions can use only one scalar value input
3334   case AMDGPU::V_LSHLREV_B64_e64:
3335   case AMDGPU::V_LSHLREV_B64_gfx10:
3336   case AMDGPU::V_LSHRREV_B64_e64:
3337   case AMDGPU::V_LSHRREV_B64_gfx10:
3338   case AMDGPU::V_ASHRREV_I64_e64:
3339   case AMDGPU::V_ASHRREV_I64_gfx10:
3340   case AMDGPU::V_LSHL_B64_e64:
3341   case AMDGPU::V_LSHR_B64_e64:
3342   case AMDGPU::V_ASHR_I64_e64:
3343     return 1;
3344   default:
3345     return 2;
3346   }
3347 }
3348 
3349 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3350   const MCOperand &MO = Inst.getOperand(OpIdx);
3351   if (MO.isImm()) {
3352     return !isInlineConstant(Inst, OpIdx);
3353   } else if (MO.isReg()) {
3354     auto Reg = MO.getReg();
3355     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3356     auto PReg = mc2PseudoReg(Reg);
3357     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3358   } else {
3359     return true;
3360   }
3361 }
3362 
3363 bool
3364 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3365                                                 const OperandVector &Operands) {
3366   const unsigned Opcode = Inst.getOpcode();
3367   const MCInstrDesc &Desc = MII.get(Opcode);
3368   unsigned LastSGPR = AMDGPU::NoRegister;
3369   unsigned ConstantBusUseCount = 0;
3370   unsigned NumLiterals = 0;
3371   unsigned LiteralSize;
3372 
3373   if (Desc.TSFlags &
3374       (SIInstrFlags::VOPC |
3375        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3376        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3377        SIInstrFlags::SDWA)) {
3378     // Check special imm operands (used by madmk, etc)
3379     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3380       ++NumLiterals;
3381       LiteralSize = 4;
3382     }
3383 
3384     SmallDenseSet<unsigned> SGPRsUsed;
3385     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3386     if (SGPRUsed != AMDGPU::NoRegister) {
3387       SGPRsUsed.insert(SGPRUsed);
3388       ++ConstantBusUseCount;
3389     }
3390 
3391     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3392     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3393     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3394 
3395     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3396 
3397     for (int OpIdx : OpIndices) {
3398       if (OpIdx == -1) break;
3399 
3400       const MCOperand &MO = Inst.getOperand(OpIdx);
3401       if (usesConstantBus(Inst, OpIdx)) {
3402         if (MO.isReg()) {
3403           LastSGPR = mc2PseudoReg(MO.getReg());
3404           // Pairs of registers with a partial intersections like these
3405           //   s0, s[0:1]
3406           //   flat_scratch_lo, flat_scratch
3407           //   flat_scratch_lo, flat_scratch_hi
3408           // are theoretically valid but they are disabled anyway.
3409           // Note that this code mimics SIInstrInfo::verifyInstruction
3410           if (!SGPRsUsed.count(LastSGPR)) {
3411             SGPRsUsed.insert(LastSGPR);
3412             ++ConstantBusUseCount;
3413           }
3414         } else { // Expression or a literal
3415 
3416           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3417             continue; // special operand like VINTERP attr_chan
3418 
3419           // An instruction may use only one literal.
3420           // This has been validated on the previous step.
3421           // See validateVOPLiteral.
3422           // This literal may be used as more than one operand.
3423           // If all these operands are of the same size,
3424           // this literal counts as one scalar value.
3425           // Otherwise it counts as 2 scalar values.
3426           // See "GFX10 Shader Programming", section 3.6.2.3.
3427 
3428           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3429           if (Size < 4) Size = 4;
3430 
3431           if (NumLiterals == 0) {
3432             NumLiterals = 1;
3433             LiteralSize = Size;
3434           } else if (LiteralSize != Size) {
3435             NumLiterals = 2;
3436           }
3437         }
3438       }
3439     }
3440   }
3441   ConstantBusUseCount += NumLiterals;
3442 
3443   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3444     return true;
3445 
3446   SMLoc LitLoc = getLitLoc(Operands);
3447   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3448   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3449   Error(Loc, "invalid operand (violates constant bus restrictions)");
3450   return false;
3451 }
3452 
3453 bool
3454 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3455                                                  const OperandVector &Operands) {
3456   const unsigned Opcode = Inst.getOpcode();
3457   const MCInstrDesc &Desc = MII.get(Opcode);
3458 
3459   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3460   if (DstIdx == -1 ||
3461       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3462     return true;
3463   }
3464 
3465   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3466 
3467   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3468   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3469   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3470 
3471   assert(DstIdx != -1);
3472   const MCOperand &Dst = Inst.getOperand(DstIdx);
3473   assert(Dst.isReg());
3474 
3475   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3476 
3477   for (int SrcIdx : SrcIndices) {
3478     if (SrcIdx == -1) break;
3479     const MCOperand &Src = Inst.getOperand(SrcIdx);
3480     if (Src.isReg()) {
3481       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3482         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3483         Error(getRegLoc(SrcReg, Operands),
3484           "destination must be different than all sources");
3485         return false;
3486       }
3487     }
3488   }
3489 
3490   return true;
3491 }
3492 
3493 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3494 
3495   const unsigned Opc = Inst.getOpcode();
3496   const MCInstrDesc &Desc = MII.get(Opc);
3497 
3498   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3499     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3500     assert(ClampIdx != -1);
3501     return Inst.getOperand(ClampIdx).getImm() == 0;
3502   }
3503 
3504   return true;
3505 }
3506 
3507 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3508 
3509   const unsigned Opc = Inst.getOpcode();
3510   const MCInstrDesc &Desc = MII.get(Opc);
3511 
3512   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3513     return None;
3514 
3515   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3516   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3517   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3518 
3519   assert(VDataIdx != -1);
3520 
3521   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3522     return None;
3523 
3524   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3525   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3526   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3527   if (DMask == 0)
3528     DMask = 1;
3529 
3530   bool isPackedD16 = false;
3531   unsigned DataSize =
3532     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3533   if (hasPackedD16()) {
3534     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3535     isPackedD16 = D16Idx >= 0;
3536     if (isPackedD16 && Inst.getOperand(D16Idx).getImm())
3537       DataSize = (DataSize + 1) / 2;
3538   }
3539 
3540   if ((VDataSize / 4) == DataSize + TFESize)
3541     return None;
3542 
3543   return StringRef(isPackedD16
3544                        ? "image data size does not match dmask, d16 and tfe"
3545                        : "image data size does not match dmask and tfe");
3546 }
3547 
3548 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3549   const unsigned Opc = Inst.getOpcode();
3550   const MCInstrDesc &Desc = MII.get(Opc);
3551 
3552   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3553     return true;
3554 
3555   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3556 
3557   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3558       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3559   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3560   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3561   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3562   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3563 
3564   assert(VAddr0Idx != -1);
3565   assert(SrsrcIdx != -1);
3566   assert(SrsrcIdx > VAddr0Idx);
3567 
3568   if (DimIdx == -1)
3569     return true; // intersect_ray
3570 
3571   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3572   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3573   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3574   unsigned ActualAddrSize =
3575       IsNSA ? SrsrcIdx - VAddr0Idx
3576             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3577   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3578 
3579   unsigned ExpectedAddrSize =
3580       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3581 
3582   if (!IsNSA) {
3583     if (ExpectedAddrSize > 8)
3584       ExpectedAddrSize = 16;
3585 
3586     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3587     // This provides backward compatibility for assembly created
3588     // before 160b/192b/224b types were directly supported.
3589     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3590       return true;
3591   }
3592 
3593   return ActualAddrSize == ExpectedAddrSize;
3594 }
3595 
3596 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3597 
3598   const unsigned Opc = Inst.getOpcode();
3599   const MCInstrDesc &Desc = MII.get(Opc);
3600 
3601   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3602     return true;
3603   if (!Desc.mayLoad() || !Desc.mayStore())
3604     return true; // Not atomic
3605 
3606   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3607   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3608 
3609   // This is an incomplete check because image_atomic_cmpswap
3610   // may only use 0x3 and 0xf while other atomic operations
3611   // may use 0x1 and 0x3. However these limitations are
3612   // verified when we check that dmask matches dst size.
3613   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3614 }
3615 
3616 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3617 
3618   const unsigned Opc = Inst.getOpcode();
3619   const MCInstrDesc &Desc = MII.get(Opc);
3620 
3621   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3622     return true;
3623 
3624   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3625   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3626 
3627   // GATHER4 instructions use dmask in a different fashion compared to
3628   // other MIMG instructions. The only useful DMASK values are
3629   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3630   // (red,red,red,red) etc.) The ISA document doesn't mention
3631   // this.
3632   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3633 }
3634 
3635 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3636   const unsigned Opc = Inst.getOpcode();
3637   const MCInstrDesc &Desc = MII.get(Opc);
3638 
3639   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3640     return true;
3641 
3642   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3643   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3644       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3645 
3646   if (!BaseOpcode->MSAA)
3647     return true;
3648 
3649   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3650   assert(DimIdx != -1);
3651 
3652   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3653   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3654 
3655   return DimInfo->MSAA;
3656 }
3657 
3658 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3659 {
3660   switch (Opcode) {
3661   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3662   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3663   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3664     return true;
3665   default:
3666     return false;
3667   }
3668 }
3669 
3670 // movrels* opcodes should only allow VGPRS as src0.
3671 // This is specified in .td description for vop1/vop3,
3672 // but sdwa is handled differently. See isSDWAOperand.
3673 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3674                                       const OperandVector &Operands) {
3675 
3676   const unsigned Opc = Inst.getOpcode();
3677   const MCInstrDesc &Desc = MII.get(Opc);
3678 
3679   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3680     return true;
3681 
3682   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3683   assert(Src0Idx != -1);
3684 
3685   SMLoc ErrLoc;
3686   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3687   if (Src0.isReg()) {
3688     auto Reg = mc2PseudoReg(Src0.getReg());
3689     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3690     if (!isSGPR(Reg, TRI))
3691       return true;
3692     ErrLoc = getRegLoc(Reg, Operands);
3693   } else {
3694     ErrLoc = getConstLoc(Operands);
3695   }
3696 
3697   Error(ErrLoc, "source operand must be a VGPR");
3698   return false;
3699 }
3700 
3701 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3702                                           const OperandVector &Operands) {
3703 
3704   const unsigned Opc = Inst.getOpcode();
3705 
3706   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3707     return true;
3708 
3709   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3710   assert(Src0Idx != -1);
3711 
3712   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3713   if (!Src0.isReg())
3714     return true;
3715 
3716   auto Reg = mc2PseudoReg(Src0.getReg());
3717   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3718   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3719     Error(getRegLoc(Reg, Operands),
3720           "source operand must be either a VGPR or an inline constant");
3721     return false;
3722   }
3723 
3724   return true;
3725 }
3726 
3727 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3728                                    const OperandVector &Operands) {
3729   const unsigned Opc = Inst.getOpcode();
3730   const MCInstrDesc &Desc = MII.get(Opc);
3731 
3732   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3733     return true;
3734 
3735   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3736   if (Src2Idx == -1)
3737     return true;
3738 
3739   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3740   if (!Src2.isReg())
3741     return true;
3742 
3743   MCRegister Src2Reg = Src2.getReg();
3744   MCRegister DstReg = Inst.getOperand(0).getReg();
3745   if (Src2Reg == DstReg)
3746     return true;
3747 
3748   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3749   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3750     return true;
3751 
3752   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3753     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3754           "source 2 operand must not partially overlap with dst");
3755     return false;
3756   }
3757 
3758   return true;
3759 }
3760 
3761 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3762   switch (Inst.getOpcode()) {
3763   default:
3764     return true;
3765   case V_DIV_SCALE_F32_gfx6_gfx7:
3766   case V_DIV_SCALE_F32_vi:
3767   case V_DIV_SCALE_F32_gfx10:
3768   case V_DIV_SCALE_F64_gfx6_gfx7:
3769   case V_DIV_SCALE_F64_vi:
3770   case V_DIV_SCALE_F64_gfx10:
3771     break;
3772   }
3773 
3774   // TODO: Check that src0 = src1 or src2.
3775 
3776   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3777                     AMDGPU::OpName::src2_modifiers,
3778                     AMDGPU::OpName::src2_modifiers}) {
3779     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3780             .getImm() &
3781         SISrcMods::ABS) {
3782       return false;
3783     }
3784   }
3785 
3786   return true;
3787 }
3788 
3789 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3790 
3791   const unsigned Opc = Inst.getOpcode();
3792   const MCInstrDesc &Desc = MII.get(Opc);
3793 
3794   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3795     return true;
3796 
3797   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3798   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3799     if (isCI() || isSI())
3800       return false;
3801   }
3802 
3803   return true;
3804 }
3805 
3806 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3807   const unsigned Opc = Inst.getOpcode();
3808   const MCInstrDesc &Desc = MII.get(Opc);
3809 
3810   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3811     return true;
3812 
3813   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3814   if (DimIdx < 0)
3815     return true;
3816 
3817   long Imm = Inst.getOperand(DimIdx).getImm();
3818   if (Imm < 0 || Imm >= 8)
3819     return false;
3820 
3821   return true;
3822 }
3823 
3824 static bool IsRevOpcode(const unsigned Opcode)
3825 {
3826   switch (Opcode) {
3827   case AMDGPU::V_SUBREV_F32_e32:
3828   case AMDGPU::V_SUBREV_F32_e64:
3829   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3830   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3831   case AMDGPU::V_SUBREV_F32_e32_vi:
3832   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3833   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3834   case AMDGPU::V_SUBREV_F32_e64_vi:
3835 
3836   case AMDGPU::V_SUBREV_CO_U32_e32:
3837   case AMDGPU::V_SUBREV_CO_U32_e64:
3838   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3839   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3840 
3841   case AMDGPU::V_SUBBREV_U32_e32:
3842   case AMDGPU::V_SUBBREV_U32_e64:
3843   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3844   case AMDGPU::V_SUBBREV_U32_e32_vi:
3845   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3846   case AMDGPU::V_SUBBREV_U32_e64_vi:
3847 
3848   case AMDGPU::V_SUBREV_U32_e32:
3849   case AMDGPU::V_SUBREV_U32_e64:
3850   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3851   case AMDGPU::V_SUBREV_U32_e32_vi:
3852   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3853   case AMDGPU::V_SUBREV_U32_e64_vi:
3854 
3855   case AMDGPU::V_SUBREV_F16_e32:
3856   case AMDGPU::V_SUBREV_F16_e64:
3857   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3858   case AMDGPU::V_SUBREV_F16_e32_vi:
3859   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3860   case AMDGPU::V_SUBREV_F16_e64_vi:
3861 
3862   case AMDGPU::V_SUBREV_U16_e32:
3863   case AMDGPU::V_SUBREV_U16_e64:
3864   case AMDGPU::V_SUBREV_U16_e32_vi:
3865   case AMDGPU::V_SUBREV_U16_e64_vi:
3866 
3867   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3868   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3869   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3870 
3871   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3872   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3873 
3874   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3875   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3876 
3877   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3878   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3879 
3880   case AMDGPU::V_LSHRREV_B32_e32:
3881   case AMDGPU::V_LSHRREV_B32_e64:
3882   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3883   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3884   case AMDGPU::V_LSHRREV_B32_e32_vi:
3885   case AMDGPU::V_LSHRREV_B32_e64_vi:
3886   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3887   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3888 
3889   case AMDGPU::V_ASHRREV_I32_e32:
3890   case AMDGPU::V_ASHRREV_I32_e64:
3891   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3892   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3893   case AMDGPU::V_ASHRREV_I32_e32_vi:
3894   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3895   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3896   case AMDGPU::V_ASHRREV_I32_e64_vi:
3897 
3898   case AMDGPU::V_LSHLREV_B32_e32:
3899   case AMDGPU::V_LSHLREV_B32_e64:
3900   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3901   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3902   case AMDGPU::V_LSHLREV_B32_e32_vi:
3903   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3904   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3905   case AMDGPU::V_LSHLREV_B32_e64_vi:
3906 
3907   case AMDGPU::V_LSHLREV_B16_e32:
3908   case AMDGPU::V_LSHLREV_B16_e64:
3909   case AMDGPU::V_LSHLREV_B16_e32_vi:
3910   case AMDGPU::V_LSHLREV_B16_e64_vi:
3911   case AMDGPU::V_LSHLREV_B16_gfx10:
3912 
3913   case AMDGPU::V_LSHRREV_B16_e32:
3914   case AMDGPU::V_LSHRREV_B16_e64:
3915   case AMDGPU::V_LSHRREV_B16_e32_vi:
3916   case AMDGPU::V_LSHRREV_B16_e64_vi:
3917   case AMDGPU::V_LSHRREV_B16_gfx10:
3918 
3919   case AMDGPU::V_ASHRREV_I16_e32:
3920   case AMDGPU::V_ASHRREV_I16_e64:
3921   case AMDGPU::V_ASHRREV_I16_e32_vi:
3922   case AMDGPU::V_ASHRREV_I16_e64_vi:
3923   case AMDGPU::V_ASHRREV_I16_gfx10:
3924 
3925   case AMDGPU::V_LSHLREV_B64_e64:
3926   case AMDGPU::V_LSHLREV_B64_gfx10:
3927   case AMDGPU::V_LSHLREV_B64_vi:
3928 
3929   case AMDGPU::V_LSHRREV_B64_e64:
3930   case AMDGPU::V_LSHRREV_B64_gfx10:
3931   case AMDGPU::V_LSHRREV_B64_vi:
3932 
3933   case AMDGPU::V_ASHRREV_I64_e64:
3934   case AMDGPU::V_ASHRREV_I64_gfx10:
3935   case AMDGPU::V_ASHRREV_I64_vi:
3936 
3937   case AMDGPU::V_PK_LSHLREV_B16:
3938   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3939   case AMDGPU::V_PK_LSHLREV_B16_vi:
3940 
3941   case AMDGPU::V_PK_LSHRREV_B16:
3942   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3943   case AMDGPU::V_PK_LSHRREV_B16_vi:
3944   case AMDGPU::V_PK_ASHRREV_I16:
3945   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3946   case AMDGPU::V_PK_ASHRREV_I16_vi:
3947     return true;
3948   default:
3949     return false;
3950   }
3951 }
3952 
3953 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3954 
3955   using namespace SIInstrFlags;
3956   const unsigned Opcode = Inst.getOpcode();
3957   const MCInstrDesc &Desc = MII.get(Opcode);
3958 
3959   // lds_direct register is defined so that it can be used
3960   // with 9-bit operands only. Ignore encodings which do not accept these.
3961   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3962   if ((Desc.TSFlags & Enc) == 0)
3963     return None;
3964 
3965   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3966     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3967     if (SrcIdx == -1)
3968       break;
3969     const auto &Src = Inst.getOperand(SrcIdx);
3970     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3971 
3972       if (isGFX90A())
3973         return StringRef("lds_direct is not supported on this GPU");
3974 
3975       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3976         return StringRef("lds_direct cannot be used with this instruction");
3977 
3978       if (SrcName != OpName::src0)
3979         return StringRef("lds_direct may be used as src0 only");
3980     }
3981   }
3982 
3983   return None;
3984 }
3985 
3986 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3987   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3988     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3989     if (Op.isFlatOffset())
3990       return Op.getStartLoc();
3991   }
3992   return getLoc();
3993 }
3994 
3995 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3996                                          const OperandVector &Operands) {
3997   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3998   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3999     return true;
4000 
4001   auto Opcode = Inst.getOpcode();
4002   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4003   assert(OpNum != -1);
4004 
4005   const auto &Op = Inst.getOperand(OpNum);
4006   if (!hasFlatOffsets() && Op.getImm() != 0) {
4007     Error(getFlatOffsetLoc(Operands),
4008           "flat offset modifier is not supported on this GPU");
4009     return false;
4010   }
4011 
4012   // For FLAT segment the offset must be positive;
4013   // MSB is ignored and forced to zero.
4014   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
4015     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
4016     if (!isIntN(OffsetSize, Op.getImm())) {
4017       Error(getFlatOffsetLoc(Operands),
4018             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4019       return false;
4020     }
4021   } else {
4022     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
4023     if (!isUIntN(OffsetSize, Op.getImm())) {
4024       Error(getFlatOffsetLoc(Operands),
4025             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4026       return false;
4027     }
4028   }
4029 
4030   return true;
4031 }
4032 
4033 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4034   // Start with second operand because SMEM Offset cannot be dst or src0.
4035   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4036     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4037     if (Op.isSMEMOffset())
4038       return Op.getStartLoc();
4039   }
4040   return getLoc();
4041 }
4042 
4043 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4044                                          const OperandVector &Operands) {
4045   if (isCI() || isSI())
4046     return true;
4047 
4048   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4049   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4050     return true;
4051 
4052   auto Opcode = Inst.getOpcode();
4053   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4054   if (OpNum == -1)
4055     return true;
4056 
4057   const auto &Op = Inst.getOperand(OpNum);
4058   if (!Op.isImm())
4059     return true;
4060 
4061   uint64_t Offset = Op.getImm();
4062   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4063   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4064       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4065     return true;
4066 
4067   Error(getSMEMOffsetLoc(Operands),
4068         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4069                                "expected a 21-bit signed offset");
4070 
4071   return false;
4072 }
4073 
4074 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4075   unsigned Opcode = Inst.getOpcode();
4076   const MCInstrDesc &Desc = MII.get(Opcode);
4077   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4078     return true;
4079 
4080   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4081   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4082 
4083   const int OpIndices[] = { Src0Idx, Src1Idx };
4084 
4085   unsigned NumExprs = 0;
4086   unsigned NumLiterals = 0;
4087   uint32_t LiteralValue;
4088 
4089   for (int OpIdx : OpIndices) {
4090     if (OpIdx == -1) break;
4091 
4092     const MCOperand &MO = Inst.getOperand(OpIdx);
4093     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4094     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4095       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4096         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4097         if (NumLiterals == 0 || LiteralValue != Value) {
4098           LiteralValue = Value;
4099           ++NumLiterals;
4100         }
4101       } else if (MO.isExpr()) {
4102         ++NumExprs;
4103       }
4104     }
4105   }
4106 
4107   return NumLiterals + NumExprs <= 1;
4108 }
4109 
4110 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4111   const unsigned Opc = Inst.getOpcode();
4112   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4113       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4114     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4115     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4116 
4117     if (OpSel & ~3)
4118       return false;
4119   }
4120 
4121   if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
4122     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4123     if (OpSelIdx != -1) {
4124       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4125         return false;
4126     }
4127     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4128     if (OpSelHiIdx != -1) {
4129       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4130         return false;
4131     }
4132   }
4133 
4134   return true;
4135 }
4136 
4137 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4138                                   const OperandVector &Operands) {
4139   const unsigned Opc = Inst.getOpcode();
4140   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4141   if (DppCtrlIdx < 0)
4142     return true;
4143   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4144 
4145   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4146     // DPP64 is supported for row_newbcast only.
4147     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4148     if (Src0Idx >= 0 &&
4149         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4150       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4151       Error(S, "64 bit dpp only supports row_newbcast");
4152       return false;
4153     }
4154   }
4155 
4156   return true;
4157 }
4158 
4159 // Check if VCC register matches wavefront size
4160 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4161   auto FB = getFeatureBits();
4162   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4163     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4164 }
4165 
4166 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4167 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4168                                          const OperandVector &Operands) {
4169   unsigned Opcode = Inst.getOpcode();
4170   const MCInstrDesc &Desc = MII.get(Opcode);
4171   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4172   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4173       ImmIdx == -1)
4174     return true;
4175 
4176   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4177   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4178   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4179 
4180   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4181 
4182   unsigned NumExprs = 0;
4183   unsigned NumLiterals = 0;
4184   uint32_t LiteralValue;
4185 
4186   for (int OpIdx : OpIndices) {
4187     if (OpIdx == -1)
4188       continue;
4189 
4190     const MCOperand &MO = Inst.getOperand(OpIdx);
4191     if (!MO.isImm() && !MO.isExpr())
4192       continue;
4193     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4194       continue;
4195 
4196     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4197         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4198       Error(getConstLoc(Operands),
4199             "inline constants are not allowed for this operand");
4200       return false;
4201     }
4202 
4203     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4204       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4205       if (NumLiterals == 0 || LiteralValue != Value) {
4206         LiteralValue = Value;
4207         ++NumLiterals;
4208       }
4209     } else if (MO.isExpr()) {
4210       ++NumExprs;
4211     }
4212   }
4213   NumLiterals += NumExprs;
4214 
4215   if (!NumLiterals)
4216     return true;
4217 
4218   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4219     Error(getLitLoc(Operands), "literal operands are not supported");
4220     return false;
4221   }
4222 
4223   if (NumLiterals > 1) {
4224     Error(getLitLoc(Operands), "only one literal operand is allowed");
4225     return false;
4226   }
4227 
4228   return true;
4229 }
4230 
4231 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4232 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4233                          const MCRegisterInfo *MRI) {
4234   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4235   if (OpIdx < 0)
4236     return -1;
4237 
4238   const MCOperand &Op = Inst.getOperand(OpIdx);
4239   if (!Op.isReg())
4240     return -1;
4241 
4242   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4243   auto Reg = Sub ? Sub : Op.getReg();
4244   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4245   return AGPR32.contains(Reg) ? 1 : 0;
4246 }
4247 
4248 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4249   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4250   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4251                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4252                   SIInstrFlags::DS)) == 0)
4253     return true;
4254 
4255   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4256                                                       : AMDGPU::OpName::vdata;
4257 
4258   const MCRegisterInfo *MRI = getMRI();
4259   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4260   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4261 
4262   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4263     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4264     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4265       return false;
4266   }
4267 
4268   auto FB = getFeatureBits();
4269   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4270     if (DataAreg < 0 || DstAreg < 0)
4271       return true;
4272     return DstAreg == DataAreg;
4273   }
4274 
4275   return DstAreg < 1 && DataAreg < 1;
4276 }
4277 
4278 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4279   auto FB = getFeatureBits();
4280   if (!FB[AMDGPU::FeatureGFX90AInsts])
4281     return true;
4282 
4283   const MCRegisterInfo *MRI = getMRI();
4284   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4285   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4286   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4287     const MCOperand &Op = Inst.getOperand(I);
4288     if (!Op.isReg())
4289       continue;
4290 
4291     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4292     if (!Sub)
4293       continue;
4294 
4295     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4296       return false;
4297     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4298       return false;
4299   }
4300 
4301   return true;
4302 }
4303 
4304 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4305   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4306     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4307     if (Op.isBLGP())
4308       return Op.getStartLoc();
4309   }
4310   return SMLoc();
4311 }
4312 
4313 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4314                                    const OperandVector &Operands) {
4315   unsigned Opc = Inst.getOpcode();
4316   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4317   if (BlgpIdx == -1)
4318     return true;
4319   SMLoc BLGPLoc = getBLGPLoc(Operands);
4320   if (!BLGPLoc.isValid())
4321     return true;
4322   bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4323   auto FB = getFeatureBits();
4324   bool UsesNeg = false;
4325   if (FB[AMDGPU::FeatureGFX940Insts]) {
4326     switch (Opc) {
4327     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4328     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4329     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4330     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4331       UsesNeg = true;
4332     }
4333   }
4334 
4335   if (IsNeg == UsesNeg)
4336     return true;
4337 
4338   Error(BLGPLoc,
4339         UsesNeg ? "invalid modifier: blgp is not supported"
4340                 : "invalid modifier: neg is not supported");
4341 
4342   return false;
4343 }
4344 
4345 // gfx90a has an undocumented limitation:
4346 // DS_GWS opcodes must use even aligned registers.
4347 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4348                                   const OperandVector &Operands) {
4349   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4350     return true;
4351 
4352   int Opc = Inst.getOpcode();
4353   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4354       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4355     return true;
4356 
4357   const MCRegisterInfo *MRI = getMRI();
4358   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4359   int Data0Pos =
4360       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4361   assert(Data0Pos != -1);
4362   auto Reg = Inst.getOperand(Data0Pos).getReg();
4363   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4364   if (RegIdx & 1) {
4365     SMLoc RegLoc = getRegLoc(Reg, Operands);
4366     Error(RegLoc, "vgpr must be even aligned");
4367     return false;
4368   }
4369 
4370   return true;
4371 }
4372 
4373 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4374                                             const OperandVector &Operands,
4375                                             const SMLoc &IDLoc) {
4376   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4377                                            AMDGPU::OpName::cpol);
4378   if (CPolPos == -1)
4379     return true;
4380 
4381   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4382 
4383   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4384   if (TSFlags & SIInstrFlags::SMRD) {
4385     if (CPol && (isSI() || isCI())) {
4386       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4387       Error(S, "cache policy is not supported for SMRD instructions");
4388       return false;
4389     }
4390     if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4391       Error(IDLoc, "invalid cache policy for SMEM instruction");
4392       return false;
4393     }
4394   }
4395 
4396   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4397     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4398     StringRef CStr(S.getPointer());
4399     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4400     Error(S, "scc is not supported on this GPU");
4401     return false;
4402   }
4403 
4404   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4405     return true;
4406 
4407   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4408     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4409       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4410                               : "instruction must use glc");
4411       return false;
4412     }
4413   } else {
4414     if (CPol & CPol::GLC) {
4415       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4416       StringRef CStr(S.getPointer());
4417       S = SMLoc::getFromPointer(
4418           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4419       Error(S, isGFX940() ? "instruction must not use sc0"
4420                           : "instruction must not use glc");
4421       return false;
4422     }
4423   }
4424 
4425   return true;
4426 }
4427 
4428 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst,
4429                                          const OperandVector &Operands,
4430                                          const SMLoc &IDLoc) {
4431   if (isGFX940())
4432     return true;
4433 
4434   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4435   if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) !=
4436       (SIInstrFlags::VALU | SIInstrFlags::FLAT))
4437     return true;
4438   // This is FLAT LDS DMA.
4439 
4440   SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands);
4441   StringRef CStr(S.getPointer());
4442   if (!CStr.startswith("lds")) {
4443     // This is incorrectly selected LDS DMA version of a FLAT load opcode.
4444     // And LDS version should have 'lds' modifier, but it follows optional
4445     // operands so its absense is ignored by the matcher.
4446     Error(IDLoc, "invalid operands for instruction");
4447     return false;
4448   }
4449 
4450   return true;
4451 }
4452 
4453 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4454                                           const SMLoc &IDLoc,
4455                                           const OperandVector &Operands) {
4456   if (auto ErrMsg = validateLdsDirect(Inst)) {
4457     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4458     return false;
4459   }
4460   if (!validateSOPLiteral(Inst)) {
4461     Error(getLitLoc(Operands),
4462       "only one literal operand is allowed");
4463     return false;
4464   }
4465   if (!validateVOPLiteral(Inst, Operands)) {
4466     return false;
4467   }
4468   if (!validateConstantBusLimitations(Inst, Operands)) {
4469     return false;
4470   }
4471   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4472     return false;
4473   }
4474   if (!validateIntClampSupported(Inst)) {
4475     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4476       "integer clamping is not supported on this GPU");
4477     return false;
4478   }
4479   if (!validateOpSel(Inst)) {
4480     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4481       "invalid op_sel operand");
4482     return false;
4483   }
4484   if (!validateDPP(Inst, Operands)) {
4485     return false;
4486   }
4487   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4488   if (!validateMIMGD16(Inst)) {
4489     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4490       "d16 modifier is not supported on this GPU");
4491     return false;
4492   }
4493   if (!validateMIMGDim(Inst)) {
4494     Error(IDLoc, "dim modifier is required on this GPU");
4495     return false;
4496   }
4497   if (!validateMIMGMSAA(Inst)) {
4498     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4499           "invalid dim; must be MSAA type");
4500     return false;
4501   }
4502   if (auto ErrMsg = validateMIMGDataSize(Inst)) {
4503     Error(IDLoc, *ErrMsg);
4504     return false;
4505   }
4506   if (!validateMIMGAddrSize(Inst)) {
4507     Error(IDLoc,
4508       "image address size does not match dim and a16");
4509     return false;
4510   }
4511   if (!validateMIMGAtomicDMask(Inst)) {
4512     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4513       "invalid atomic image dmask");
4514     return false;
4515   }
4516   if (!validateMIMGGatherDMask(Inst)) {
4517     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4518       "invalid image_gather dmask: only one bit must be set");
4519     return false;
4520   }
4521   if (!validateMovrels(Inst, Operands)) {
4522     return false;
4523   }
4524   if (!validateFlatOffset(Inst, Operands)) {
4525     return false;
4526   }
4527   if (!validateSMEMOffset(Inst, Operands)) {
4528     return false;
4529   }
4530   if (!validateMAIAccWrite(Inst, Operands)) {
4531     return false;
4532   }
4533   if (!validateMFMA(Inst, Operands)) {
4534     return false;
4535   }
4536   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4537     return false;
4538   }
4539 
4540   if (!validateAGPRLdSt(Inst)) {
4541     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4542     ? "invalid register class: data and dst should be all VGPR or AGPR"
4543     : "invalid register class: agpr loads and stores not supported on this GPU"
4544     );
4545     return false;
4546   }
4547   if (!validateVGPRAlign(Inst)) {
4548     Error(IDLoc,
4549       "invalid register class: vgpr tuples must be 64 bit aligned");
4550     return false;
4551   }
4552   if (!validateGWS(Inst, Operands)) {
4553     return false;
4554   }
4555 
4556   if (!validateBLGP(Inst, Operands)) {
4557     return false;
4558   }
4559 
4560   if (!validateDivScale(Inst)) {
4561     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4562     return false;
4563   }
4564   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4565     return false;
4566   }
4567 
4568   if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) {
4569     return false;
4570   }
4571 
4572   return true;
4573 }
4574 
4575 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4576                                             const FeatureBitset &FBS,
4577                                             unsigned VariantID = 0);
4578 
4579 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4580                                 const FeatureBitset &AvailableFeatures,
4581                                 unsigned VariantID);
4582 
4583 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4584                                        const FeatureBitset &FBS) {
4585   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4586 }
4587 
4588 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4589                                        const FeatureBitset &FBS,
4590                                        ArrayRef<unsigned> Variants) {
4591   for (auto Variant : Variants) {
4592     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4593       return true;
4594   }
4595 
4596   return false;
4597 }
4598 
4599 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4600                                                   const SMLoc &IDLoc) {
4601   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4602 
4603   // Check if requested instruction variant is supported.
4604   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4605     return false;
4606 
4607   // This instruction is not supported.
4608   // Clear any other pending errors because they are no longer relevant.
4609   getParser().clearPendingErrors();
4610 
4611   // Requested instruction variant is not supported.
4612   // Check if any other variants are supported.
4613   StringRef VariantName = getMatchedVariantName();
4614   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4615     return Error(IDLoc,
4616                  Twine(VariantName,
4617                        " variant of this instruction is not supported"));
4618   }
4619 
4620   // Finally check if this instruction is supported on any other GPU.
4621   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4622     return Error(IDLoc, "instruction not supported on this GPU");
4623   }
4624 
4625   // Instruction not supported on any GPU. Probably a typo.
4626   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4627   return Error(IDLoc, "invalid instruction" + Suggestion);
4628 }
4629 
4630 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4631                                               OperandVector &Operands,
4632                                               MCStreamer &Out,
4633                                               uint64_t &ErrorInfo,
4634                                               bool MatchingInlineAsm) {
4635   MCInst Inst;
4636   unsigned Result = Match_Success;
4637   for (auto Variant : getMatchedVariants()) {
4638     uint64_t EI;
4639     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4640                                   Variant);
4641     // We order match statuses from least to most specific. We use most specific
4642     // status as resulting
4643     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4644     if ((R == Match_Success) ||
4645         (R == Match_PreferE32) ||
4646         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4647         (R == Match_InvalidOperand && Result != Match_MissingFeature
4648                                    && Result != Match_PreferE32) ||
4649         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4650                                    && Result != Match_MissingFeature
4651                                    && Result != Match_PreferE32)) {
4652       Result = R;
4653       ErrorInfo = EI;
4654     }
4655     if (R == Match_Success)
4656       break;
4657   }
4658 
4659   if (Result == Match_Success) {
4660     if (!validateInstruction(Inst, IDLoc, Operands)) {
4661       return true;
4662     }
4663     Inst.setLoc(IDLoc);
4664     Out.emitInstruction(Inst, getSTI());
4665     return false;
4666   }
4667 
4668   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4669   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4670     return true;
4671   }
4672 
4673   switch (Result) {
4674   default: break;
4675   case Match_MissingFeature:
4676     // It has been verified that the specified instruction
4677     // mnemonic is valid. A match was found but it requires
4678     // features which are not supported on this GPU.
4679     return Error(IDLoc, "operands are not valid for this GPU or mode");
4680 
4681   case Match_InvalidOperand: {
4682     SMLoc ErrorLoc = IDLoc;
4683     if (ErrorInfo != ~0ULL) {
4684       if (ErrorInfo >= Operands.size()) {
4685         return Error(IDLoc, "too few operands for instruction");
4686       }
4687       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4688       if (ErrorLoc == SMLoc())
4689         ErrorLoc = IDLoc;
4690     }
4691     return Error(ErrorLoc, "invalid operand for instruction");
4692   }
4693 
4694   case Match_PreferE32:
4695     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4696                         "should be encoded as e32");
4697   case Match_MnemonicFail:
4698     llvm_unreachable("Invalid instructions should have been handled already");
4699   }
4700   llvm_unreachable("Implement any new match types added!");
4701 }
4702 
4703 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4704   int64_t Tmp = -1;
4705   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4706     return true;
4707   }
4708   if (getParser().parseAbsoluteExpression(Tmp)) {
4709     return true;
4710   }
4711   Ret = static_cast<uint32_t>(Tmp);
4712   return false;
4713 }
4714 
4715 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4716                                                uint32_t &Minor) {
4717   if (ParseAsAbsoluteExpression(Major))
4718     return TokError("invalid major version");
4719 
4720   if (!trySkipToken(AsmToken::Comma))
4721     return TokError("minor version number required, comma expected");
4722 
4723   if (ParseAsAbsoluteExpression(Minor))
4724     return TokError("invalid minor version");
4725 
4726   return false;
4727 }
4728 
4729 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4730   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4731     return TokError("directive only supported for amdgcn architecture");
4732 
4733   std::string TargetIDDirective;
4734   SMLoc TargetStart = getTok().getLoc();
4735   if (getParser().parseEscapedString(TargetIDDirective))
4736     return true;
4737 
4738   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4739   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4740     return getParser().Error(TargetRange.Start,
4741         (Twine(".amdgcn_target directive's target id ") +
4742          Twine(TargetIDDirective) +
4743          Twine(" does not match the specified target id ") +
4744          Twine(getTargetStreamer().getTargetID()->toString())).str());
4745 
4746   return false;
4747 }
4748 
4749 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4750   return Error(Range.Start, "value out of range", Range);
4751 }
4752 
4753 bool AMDGPUAsmParser::calculateGPRBlocks(
4754     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4755     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4756     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4757     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4758   // TODO(scott.linder): These calculations are duplicated from
4759   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4760   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4761 
4762   unsigned NumVGPRs = NextFreeVGPR;
4763   unsigned NumSGPRs = NextFreeSGPR;
4764 
4765   if (Version.Major >= 10)
4766     NumSGPRs = 0;
4767   else {
4768     unsigned MaxAddressableNumSGPRs =
4769         IsaInfo::getAddressableNumSGPRs(&getSTI());
4770 
4771     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4772         NumSGPRs > MaxAddressableNumSGPRs)
4773       return OutOfRangeError(SGPRRange);
4774 
4775     NumSGPRs +=
4776         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4777 
4778     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4779         NumSGPRs > MaxAddressableNumSGPRs)
4780       return OutOfRangeError(SGPRRange);
4781 
4782     if (Features.test(FeatureSGPRInitBug))
4783       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4784   }
4785 
4786   VGPRBlocks =
4787       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4788   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4789 
4790   return false;
4791 }
4792 
4793 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4794   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4795     return TokError("directive only supported for amdgcn architecture");
4796 
4797   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4798     return TokError("directive only supported for amdhsa OS");
4799 
4800   StringRef KernelName;
4801   if (getParser().parseIdentifier(KernelName))
4802     return true;
4803 
4804   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4805 
4806   StringSet<> Seen;
4807 
4808   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4809 
4810   SMRange VGPRRange;
4811   uint64_t NextFreeVGPR = 0;
4812   uint64_t AccumOffset = 0;
4813   uint64_t SharedVGPRCount = 0;
4814   SMRange SGPRRange;
4815   uint64_t NextFreeSGPR = 0;
4816 
4817   // Count the number of user SGPRs implied from the enabled feature bits.
4818   unsigned ImpliedUserSGPRCount = 0;
4819 
4820   // Track if the asm explicitly contains the directive for the user SGPR
4821   // count.
4822   Optional<unsigned> ExplicitUserSGPRCount;
4823   bool ReserveVCC = true;
4824   bool ReserveFlatScr = true;
4825   Optional<bool> EnableWavefrontSize32;
4826 
4827   while (true) {
4828     while (trySkipToken(AsmToken::EndOfStatement));
4829 
4830     StringRef ID;
4831     SMRange IDRange = getTok().getLocRange();
4832     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4833       return true;
4834 
4835     if (ID == ".end_amdhsa_kernel")
4836       break;
4837 
4838     if (Seen.find(ID) != Seen.end())
4839       return TokError(".amdhsa_ directives cannot be repeated");
4840     Seen.insert(ID);
4841 
4842     SMLoc ValStart = getLoc();
4843     int64_t IVal;
4844     if (getParser().parseAbsoluteExpression(IVal))
4845       return true;
4846     SMLoc ValEnd = getLoc();
4847     SMRange ValRange = SMRange(ValStart, ValEnd);
4848 
4849     if (IVal < 0)
4850       return OutOfRangeError(ValRange);
4851 
4852     uint64_t Val = IVal;
4853 
4854 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4855   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4856     return OutOfRangeError(RANGE);                                             \
4857   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4858 
4859     if (ID == ".amdhsa_group_segment_fixed_size") {
4860       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4861         return OutOfRangeError(ValRange);
4862       KD.group_segment_fixed_size = Val;
4863     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4864       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4865         return OutOfRangeError(ValRange);
4866       KD.private_segment_fixed_size = Val;
4867     } else if (ID == ".amdhsa_kernarg_size") {
4868       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4869         return OutOfRangeError(ValRange);
4870       KD.kernarg_size = Val;
4871     } else if (ID == ".amdhsa_user_sgpr_count") {
4872       ExplicitUserSGPRCount = Val;
4873     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4874       if (hasArchitectedFlatScratch())
4875         return Error(IDRange.Start,
4876                      "directive is not supported with architected flat scratch",
4877                      IDRange);
4878       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4879                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4880                        Val, ValRange);
4881       if (Val)
4882         ImpliedUserSGPRCount += 4;
4883     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4884       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4885                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4886                        ValRange);
4887       if (Val)
4888         ImpliedUserSGPRCount += 2;
4889     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4890       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4891                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4892                        ValRange);
4893       if (Val)
4894         ImpliedUserSGPRCount += 2;
4895     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4896       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4897                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4898                        Val, ValRange);
4899       if (Val)
4900         ImpliedUserSGPRCount += 2;
4901     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4902       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4903                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4904                        ValRange);
4905       if (Val)
4906         ImpliedUserSGPRCount += 2;
4907     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4908       if (hasArchitectedFlatScratch())
4909         return Error(IDRange.Start,
4910                      "directive is not supported with architected flat scratch",
4911                      IDRange);
4912       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4913                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4914                        ValRange);
4915       if (Val)
4916         ImpliedUserSGPRCount += 2;
4917     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4918       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4919                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4920                        Val, ValRange);
4921       if (Val)
4922         ImpliedUserSGPRCount += 1;
4923     } else if (ID == ".amdhsa_wavefront_size32") {
4924       if (IVersion.Major < 10)
4925         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4926       EnableWavefrontSize32 = Val;
4927       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4928                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4929                        Val, ValRange);
4930     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4931       if (hasArchitectedFlatScratch())
4932         return Error(IDRange.Start,
4933                      "directive is not supported with architected flat scratch",
4934                      IDRange);
4935       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4936                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4937     } else if (ID == ".amdhsa_enable_private_segment") {
4938       if (!hasArchitectedFlatScratch())
4939         return Error(
4940             IDRange.Start,
4941             "directive is not supported without architected flat scratch",
4942             IDRange);
4943       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4944                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4945     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4946       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4947                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4948                        ValRange);
4949     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4950       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4951                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4952                        ValRange);
4953     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4954       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4955                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4956                        ValRange);
4957     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4958       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4959                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4960                        ValRange);
4961     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4962       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4963                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4964                        ValRange);
4965     } else if (ID == ".amdhsa_next_free_vgpr") {
4966       VGPRRange = ValRange;
4967       NextFreeVGPR = Val;
4968     } else if (ID == ".amdhsa_next_free_sgpr") {
4969       SGPRRange = ValRange;
4970       NextFreeSGPR = Val;
4971     } else if (ID == ".amdhsa_accum_offset") {
4972       if (!isGFX90A())
4973         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4974       AccumOffset = Val;
4975     } else if (ID == ".amdhsa_reserve_vcc") {
4976       if (!isUInt<1>(Val))
4977         return OutOfRangeError(ValRange);
4978       ReserveVCC = Val;
4979     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4980       if (IVersion.Major < 7)
4981         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4982       if (hasArchitectedFlatScratch())
4983         return Error(IDRange.Start,
4984                      "directive is not supported with architected flat scratch",
4985                      IDRange);
4986       if (!isUInt<1>(Val))
4987         return OutOfRangeError(ValRange);
4988       ReserveFlatScr = Val;
4989     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4990       if (IVersion.Major < 8)
4991         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4992       if (!isUInt<1>(Val))
4993         return OutOfRangeError(ValRange);
4994       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4995         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4996                                  IDRange);
4997     } else if (ID == ".amdhsa_float_round_mode_32") {
4998       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4999                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5000     } else if (ID == ".amdhsa_float_round_mode_16_64") {
5001       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5002                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5003     } else if (ID == ".amdhsa_float_denorm_mode_32") {
5004       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5005                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5006     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5007       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5008                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5009                        ValRange);
5010     } else if (ID == ".amdhsa_dx10_clamp") {
5011       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5012                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5013     } else if (ID == ".amdhsa_ieee_mode") {
5014       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5015                        Val, ValRange);
5016     } else if (ID == ".amdhsa_fp16_overflow") {
5017       if (IVersion.Major < 9)
5018         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5019       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5020                        ValRange);
5021     } else if (ID == ".amdhsa_tg_split") {
5022       if (!isGFX90A())
5023         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5024       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5025                        ValRange);
5026     } else if (ID == ".amdhsa_workgroup_processor_mode") {
5027       if (IVersion.Major < 10)
5028         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5029       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5030                        ValRange);
5031     } else if (ID == ".amdhsa_memory_ordered") {
5032       if (IVersion.Major < 10)
5033         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5034       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5035                        ValRange);
5036     } else if (ID == ".amdhsa_forward_progress") {
5037       if (IVersion.Major < 10)
5038         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5039       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5040                        ValRange);
5041     } else if (ID == ".amdhsa_shared_vgpr_count") {
5042       if (IVersion.Major < 10)
5043         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5044       SharedVGPRCount = Val;
5045       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5046                        COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val,
5047                        ValRange);
5048     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5049       PARSE_BITS_ENTRY(
5050           KD.compute_pgm_rsrc2,
5051           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5052           ValRange);
5053     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5054       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5055                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5056                        Val, ValRange);
5057     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5058       PARSE_BITS_ENTRY(
5059           KD.compute_pgm_rsrc2,
5060           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5061           ValRange);
5062     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5063       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5064                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5065                        Val, ValRange);
5066     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5067       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5068                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5069                        Val, ValRange);
5070     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5071       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5072                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5073                        Val, ValRange);
5074     } else if (ID == ".amdhsa_exception_int_div_zero") {
5075       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5076                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5077                        Val, ValRange);
5078     } else {
5079       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5080     }
5081 
5082 #undef PARSE_BITS_ENTRY
5083   }
5084 
5085   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5086     return TokError(".amdhsa_next_free_vgpr directive is required");
5087 
5088   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5089     return TokError(".amdhsa_next_free_sgpr directive is required");
5090 
5091   unsigned VGPRBlocks;
5092   unsigned SGPRBlocks;
5093   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5094                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5095                          EnableWavefrontSize32, NextFreeVGPR,
5096                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5097                          SGPRBlocks))
5098     return true;
5099 
5100   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5101           VGPRBlocks))
5102     return OutOfRangeError(VGPRRange);
5103   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5104                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5105 
5106   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5107           SGPRBlocks))
5108     return OutOfRangeError(SGPRRange);
5109   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5110                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5111                   SGPRBlocks);
5112 
5113   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5114     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5115                     "enabled user SGPRs");
5116 
5117   unsigned UserSGPRCount =
5118       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5119 
5120   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5121     return TokError("too many user SGPRs enabled");
5122   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5123                   UserSGPRCount);
5124 
5125   if (isGFX90A()) {
5126     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5127       return TokError(".amdhsa_accum_offset directive is required");
5128     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5129       return TokError("accum_offset should be in range [4..256] in "
5130                       "increments of 4");
5131     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5132       return TokError("accum_offset exceeds total VGPR allocation");
5133     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5134                     (AccumOffset / 4 - 1));
5135   }
5136 
5137   if (IVersion.Major == 10) {
5138     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5139     if (SharedVGPRCount && EnableWavefrontSize32) {
5140       return TokError("shared_vgpr_count directive not valid on "
5141                       "wavefront size 32");
5142     }
5143     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5144       return TokError("shared_vgpr_count*2 + "
5145                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5146                       "exceed 63\n");
5147     }
5148   }
5149 
5150   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5151       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5152       ReserveFlatScr);
5153   return false;
5154 }
5155 
5156 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5157   uint32_t Major;
5158   uint32_t Minor;
5159 
5160   if (ParseDirectiveMajorMinor(Major, Minor))
5161     return true;
5162 
5163   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5164   return false;
5165 }
5166 
5167 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5168   uint32_t Major;
5169   uint32_t Minor;
5170   uint32_t Stepping;
5171   StringRef VendorName;
5172   StringRef ArchName;
5173 
5174   // If this directive has no arguments, then use the ISA version for the
5175   // targeted GPU.
5176   if (isToken(AsmToken::EndOfStatement)) {
5177     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5178     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5179                                                         ISA.Stepping,
5180                                                         "AMD", "AMDGPU");
5181     return false;
5182   }
5183 
5184   if (ParseDirectiveMajorMinor(Major, Minor))
5185     return true;
5186 
5187   if (!trySkipToken(AsmToken::Comma))
5188     return TokError("stepping version number required, comma expected");
5189 
5190   if (ParseAsAbsoluteExpression(Stepping))
5191     return TokError("invalid stepping version");
5192 
5193   if (!trySkipToken(AsmToken::Comma))
5194     return TokError("vendor name required, comma expected");
5195 
5196   if (!parseString(VendorName, "invalid vendor name"))
5197     return true;
5198 
5199   if (!trySkipToken(AsmToken::Comma))
5200     return TokError("arch name required, comma expected");
5201 
5202   if (!parseString(ArchName, "invalid arch name"))
5203     return true;
5204 
5205   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5206                                                       VendorName, ArchName);
5207   return false;
5208 }
5209 
5210 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5211                                                amd_kernel_code_t &Header) {
5212   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5213   // assembly for backwards compatibility.
5214   if (ID == "max_scratch_backing_memory_byte_size") {
5215     Parser.eatToEndOfStatement();
5216     return false;
5217   }
5218 
5219   SmallString<40> ErrStr;
5220   raw_svector_ostream Err(ErrStr);
5221   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5222     return TokError(Err.str());
5223   }
5224   Lex();
5225 
5226   if (ID == "enable_wavefront_size32") {
5227     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5228       if (!isGFX10Plus())
5229         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5230       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5231         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5232     } else {
5233       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5234         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5235     }
5236   }
5237 
5238   if (ID == "wavefront_size") {
5239     if (Header.wavefront_size == 5) {
5240       if (!isGFX10Plus())
5241         return TokError("wavefront_size=5 is only allowed on GFX10+");
5242       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5243         return TokError("wavefront_size=5 requires +WavefrontSize32");
5244     } else if (Header.wavefront_size == 6) {
5245       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5246         return TokError("wavefront_size=6 requires +WavefrontSize64");
5247     }
5248   }
5249 
5250   if (ID == "enable_wgp_mode") {
5251     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5252         !isGFX10Plus())
5253       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5254   }
5255 
5256   if (ID == "enable_mem_ordered") {
5257     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5258         !isGFX10Plus())
5259       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5260   }
5261 
5262   if (ID == "enable_fwd_progress") {
5263     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5264         !isGFX10Plus())
5265       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5266   }
5267 
5268   return false;
5269 }
5270 
5271 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5272   amd_kernel_code_t Header;
5273   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5274 
5275   while (true) {
5276     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5277     // will set the current token to EndOfStatement.
5278     while(trySkipToken(AsmToken::EndOfStatement));
5279 
5280     StringRef ID;
5281     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5282       return true;
5283 
5284     if (ID == ".end_amd_kernel_code_t")
5285       break;
5286 
5287     if (ParseAMDKernelCodeTValue(ID, Header))
5288       return true;
5289   }
5290 
5291   getTargetStreamer().EmitAMDKernelCodeT(Header);
5292 
5293   return false;
5294 }
5295 
5296 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5297   StringRef KernelName;
5298   if (!parseId(KernelName, "expected symbol name"))
5299     return true;
5300 
5301   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5302                                            ELF::STT_AMDGPU_HSA_KERNEL);
5303 
5304   KernelScope.initialize(getContext());
5305   return false;
5306 }
5307 
5308 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5309   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5310     return Error(getLoc(),
5311                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5312                  "architectures");
5313   }
5314 
5315   auto TargetIDDirective = getLexer().getTok().getStringContents();
5316   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5317     return Error(getParser().getTok().getLoc(), "target id must match options");
5318 
5319   getTargetStreamer().EmitISAVersion();
5320   Lex();
5321 
5322   return false;
5323 }
5324 
5325 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5326   const char *AssemblerDirectiveBegin;
5327   const char *AssemblerDirectiveEnd;
5328   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5329       isHsaAbiVersion3AndAbove(&getSTI())
5330           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5331                             HSAMD::V3::AssemblerDirectiveEnd)
5332           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5333                             HSAMD::AssemblerDirectiveEnd);
5334 
5335   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5336     return Error(getLoc(),
5337                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5338                  "not available on non-amdhsa OSes")).str());
5339   }
5340 
5341   std::string HSAMetadataString;
5342   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5343                           HSAMetadataString))
5344     return true;
5345 
5346   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5347     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5348       return Error(getLoc(), "invalid HSA metadata");
5349   } else {
5350     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5351       return Error(getLoc(), "invalid HSA metadata");
5352   }
5353 
5354   return false;
5355 }
5356 
5357 /// Common code to parse out a block of text (typically YAML) between start and
5358 /// end directives.
5359 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5360                                           const char *AssemblerDirectiveEnd,
5361                                           std::string &CollectString) {
5362 
5363   raw_string_ostream CollectStream(CollectString);
5364 
5365   getLexer().setSkipSpace(false);
5366 
5367   bool FoundEnd = false;
5368   while (!isToken(AsmToken::Eof)) {
5369     while (isToken(AsmToken::Space)) {
5370       CollectStream << getTokenStr();
5371       Lex();
5372     }
5373 
5374     if (trySkipId(AssemblerDirectiveEnd)) {
5375       FoundEnd = true;
5376       break;
5377     }
5378 
5379     CollectStream << Parser.parseStringToEndOfStatement()
5380                   << getContext().getAsmInfo()->getSeparatorString();
5381 
5382     Parser.eatToEndOfStatement();
5383   }
5384 
5385   getLexer().setSkipSpace(true);
5386 
5387   if (isToken(AsmToken::Eof) && !FoundEnd) {
5388     return TokError(Twine("expected directive ") +
5389                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5390   }
5391 
5392   CollectStream.flush();
5393   return false;
5394 }
5395 
5396 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5397 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5398   std::string String;
5399   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5400                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5401     return true;
5402 
5403   auto PALMetadata = getTargetStreamer().getPALMetadata();
5404   if (!PALMetadata->setFromString(String))
5405     return Error(getLoc(), "invalid PAL metadata");
5406   return false;
5407 }
5408 
5409 /// Parse the assembler directive for old linear-format PAL metadata.
5410 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5411   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5412     return Error(getLoc(),
5413                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5414                  "not available on non-amdpal OSes")).str());
5415   }
5416 
5417   auto PALMetadata = getTargetStreamer().getPALMetadata();
5418   PALMetadata->setLegacy();
5419   for (;;) {
5420     uint32_t Key, Value;
5421     if (ParseAsAbsoluteExpression(Key)) {
5422       return TokError(Twine("invalid value in ") +
5423                       Twine(PALMD::AssemblerDirective));
5424     }
5425     if (!trySkipToken(AsmToken::Comma)) {
5426       return TokError(Twine("expected an even number of values in ") +
5427                       Twine(PALMD::AssemblerDirective));
5428     }
5429     if (ParseAsAbsoluteExpression(Value)) {
5430       return TokError(Twine("invalid value in ") +
5431                       Twine(PALMD::AssemblerDirective));
5432     }
5433     PALMetadata->setRegister(Key, Value);
5434     if (!trySkipToken(AsmToken::Comma))
5435       break;
5436   }
5437   return false;
5438 }
5439 
5440 /// ParseDirectiveAMDGPULDS
5441 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5442 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5443   if (getParser().checkForValidSection())
5444     return true;
5445 
5446   StringRef Name;
5447   SMLoc NameLoc = getLoc();
5448   if (getParser().parseIdentifier(Name))
5449     return TokError("expected identifier in directive");
5450 
5451   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5452   if (parseToken(AsmToken::Comma, "expected ','"))
5453     return true;
5454 
5455   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5456 
5457   int64_t Size;
5458   SMLoc SizeLoc = getLoc();
5459   if (getParser().parseAbsoluteExpression(Size))
5460     return true;
5461   if (Size < 0)
5462     return Error(SizeLoc, "size must be non-negative");
5463   if (Size > LocalMemorySize)
5464     return Error(SizeLoc, "size is too large");
5465 
5466   int64_t Alignment = 4;
5467   if (trySkipToken(AsmToken::Comma)) {
5468     SMLoc AlignLoc = getLoc();
5469     if (getParser().parseAbsoluteExpression(Alignment))
5470       return true;
5471     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5472       return Error(AlignLoc, "alignment must be a power of two");
5473 
5474     // Alignment larger than the size of LDS is possible in theory, as long
5475     // as the linker manages to place to symbol at address 0, but we do want
5476     // to make sure the alignment fits nicely into a 32-bit integer.
5477     if (Alignment >= 1u << 31)
5478       return Error(AlignLoc, "alignment is too large");
5479   }
5480 
5481   if (parseToken(AsmToken::EndOfStatement,
5482                  "unexpected token in '.amdgpu_lds' directive"))
5483     return true;
5484 
5485   Symbol->redefineIfPossible();
5486   if (!Symbol->isUndefined())
5487     return Error(NameLoc, "invalid symbol redefinition");
5488 
5489   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5490   return false;
5491 }
5492 
5493 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5494   StringRef IDVal = DirectiveID.getString();
5495 
5496   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5497     if (IDVal == ".amdhsa_kernel")
5498      return ParseDirectiveAMDHSAKernel();
5499 
5500     // TODO: Restructure/combine with PAL metadata directive.
5501     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5502       return ParseDirectiveHSAMetadata();
5503   } else {
5504     if (IDVal == ".hsa_code_object_version")
5505       return ParseDirectiveHSACodeObjectVersion();
5506 
5507     if (IDVal == ".hsa_code_object_isa")
5508       return ParseDirectiveHSACodeObjectISA();
5509 
5510     if (IDVal == ".amd_kernel_code_t")
5511       return ParseDirectiveAMDKernelCodeT();
5512 
5513     if (IDVal == ".amdgpu_hsa_kernel")
5514       return ParseDirectiveAMDGPUHsaKernel();
5515 
5516     if (IDVal == ".amd_amdgpu_isa")
5517       return ParseDirectiveISAVersion();
5518 
5519     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5520       return ParseDirectiveHSAMetadata();
5521   }
5522 
5523   if (IDVal == ".amdgcn_target")
5524     return ParseDirectiveAMDGCNTarget();
5525 
5526   if (IDVal == ".amdgpu_lds")
5527     return ParseDirectiveAMDGPULDS();
5528 
5529   if (IDVal == PALMD::AssemblerDirectiveBegin)
5530     return ParseDirectivePALMetadataBegin();
5531 
5532   if (IDVal == PALMD::AssemblerDirective)
5533     return ParseDirectivePALMetadata();
5534 
5535   return true;
5536 }
5537 
5538 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5539                                            unsigned RegNo) {
5540 
5541   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5542     return isGFX9Plus();
5543 
5544   // GFX10 has 2 more SGPRs 104 and 105.
5545   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5546     return hasSGPR104_SGPR105();
5547 
5548   switch (RegNo) {
5549   case AMDGPU::SRC_SHARED_BASE:
5550   case AMDGPU::SRC_SHARED_LIMIT:
5551   case AMDGPU::SRC_PRIVATE_BASE:
5552   case AMDGPU::SRC_PRIVATE_LIMIT:
5553   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5554     return isGFX9Plus();
5555   case AMDGPU::TBA:
5556   case AMDGPU::TBA_LO:
5557   case AMDGPU::TBA_HI:
5558   case AMDGPU::TMA:
5559   case AMDGPU::TMA_LO:
5560   case AMDGPU::TMA_HI:
5561     return !isGFX9Plus();
5562   case AMDGPU::XNACK_MASK:
5563   case AMDGPU::XNACK_MASK_LO:
5564   case AMDGPU::XNACK_MASK_HI:
5565     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5566   case AMDGPU::SGPR_NULL:
5567     return isGFX10Plus();
5568   default:
5569     break;
5570   }
5571 
5572   if (isCI())
5573     return true;
5574 
5575   if (isSI() || isGFX10Plus()) {
5576     // No flat_scr on SI.
5577     // On GFX10 flat scratch is not a valid register operand and can only be
5578     // accessed with s_setreg/s_getreg.
5579     switch (RegNo) {
5580     case AMDGPU::FLAT_SCR:
5581     case AMDGPU::FLAT_SCR_LO:
5582     case AMDGPU::FLAT_SCR_HI:
5583       return false;
5584     default:
5585       return true;
5586     }
5587   }
5588 
5589   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5590   // SI/CI have.
5591   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5592     return hasSGPR102_SGPR103();
5593 
5594   return true;
5595 }
5596 
5597 OperandMatchResultTy
5598 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5599                               OperandMode Mode) {
5600   // Try to parse with a custom parser
5601   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5602 
5603   // If we successfully parsed the operand or if there as an error parsing,
5604   // we are done.
5605   //
5606   // If we are parsing after we reach EndOfStatement then this means we
5607   // are appending default values to the Operands list.  This is only done
5608   // by custom parser, so we shouldn't continue on to the generic parsing.
5609   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5610       isToken(AsmToken::EndOfStatement))
5611     return ResTy;
5612 
5613   SMLoc RBraceLoc;
5614   SMLoc LBraceLoc = getLoc();
5615   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5616     unsigned Prefix = Operands.size();
5617 
5618     for (;;) {
5619       auto Loc = getLoc();
5620       ResTy = parseReg(Operands);
5621       if (ResTy == MatchOperand_NoMatch)
5622         Error(Loc, "expected a register");
5623       if (ResTy != MatchOperand_Success)
5624         return MatchOperand_ParseFail;
5625 
5626       RBraceLoc = getLoc();
5627       if (trySkipToken(AsmToken::RBrac))
5628         break;
5629 
5630       if (!skipToken(AsmToken::Comma,
5631                      "expected a comma or a closing square bracket")) {
5632         return MatchOperand_ParseFail;
5633       }
5634     }
5635 
5636     if (Operands.size() - Prefix > 1) {
5637       Operands.insert(Operands.begin() + Prefix,
5638                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5639       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5640     }
5641 
5642     return MatchOperand_Success;
5643   }
5644 
5645   return parseRegOrImm(Operands);
5646 }
5647 
5648 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5649   // Clear any forced encodings from the previous instruction.
5650   setForcedEncodingSize(0);
5651   setForcedDPP(false);
5652   setForcedSDWA(false);
5653 
5654   if (Name.endswith("_e64")) {
5655     setForcedEncodingSize(64);
5656     return Name.substr(0, Name.size() - 4);
5657   } else if (Name.endswith("_e32")) {
5658     setForcedEncodingSize(32);
5659     return Name.substr(0, Name.size() - 4);
5660   } else if (Name.endswith("_dpp")) {
5661     setForcedDPP(true);
5662     return Name.substr(0, Name.size() - 4);
5663   } else if (Name.endswith("_sdwa")) {
5664     setForcedSDWA(true);
5665     return Name.substr(0, Name.size() - 5);
5666   }
5667   return Name;
5668 }
5669 
5670 static void applyMnemonicAliases(StringRef &Mnemonic,
5671                                  const FeatureBitset &Features,
5672                                  unsigned VariantID);
5673 
5674 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5675                                        StringRef Name,
5676                                        SMLoc NameLoc, OperandVector &Operands) {
5677   // Add the instruction mnemonic
5678   Name = parseMnemonicSuffix(Name);
5679 
5680   // If the target architecture uses MnemonicAlias, call it here to parse
5681   // operands correctly.
5682   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
5683 
5684   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5685 
5686   bool IsMIMG = Name.startswith("image_");
5687 
5688   while (!trySkipToken(AsmToken::EndOfStatement)) {
5689     OperandMode Mode = OperandMode_Default;
5690     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5691       Mode = OperandMode_NSA;
5692     CPolSeen = 0;
5693     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5694 
5695     if (Res != MatchOperand_Success) {
5696       checkUnsupportedInstruction(Name, NameLoc);
5697       if (!Parser.hasPendingError()) {
5698         // FIXME: use real operand location rather than the current location.
5699         StringRef Msg =
5700           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5701                                             "not a valid operand.";
5702         Error(getLoc(), Msg);
5703       }
5704       while (!trySkipToken(AsmToken::EndOfStatement)) {
5705         lex();
5706       }
5707       return true;
5708     }
5709 
5710     // Eat the comma or space if there is one.
5711     trySkipToken(AsmToken::Comma);
5712   }
5713 
5714   return false;
5715 }
5716 
5717 //===----------------------------------------------------------------------===//
5718 // Utility functions
5719 //===----------------------------------------------------------------------===//
5720 
5721 OperandMatchResultTy
5722 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5723 
5724   if (!trySkipId(Prefix, AsmToken::Colon))
5725     return MatchOperand_NoMatch;
5726 
5727   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5728 }
5729 
5730 OperandMatchResultTy
5731 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5732                                     AMDGPUOperand::ImmTy ImmTy,
5733                                     bool (*ConvertResult)(int64_t&)) {
5734   SMLoc S = getLoc();
5735   int64_t Value = 0;
5736 
5737   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5738   if (Res != MatchOperand_Success)
5739     return Res;
5740 
5741   if (ConvertResult && !ConvertResult(Value)) {
5742     Error(S, "invalid " + StringRef(Prefix) + " value.");
5743   }
5744 
5745   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5746   return MatchOperand_Success;
5747 }
5748 
5749 OperandMatchResultTy
5750 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5751                                              OperandVector &Operands,
5752                                              AMDGPUOperand::ImmTy ImmTy,
5753                                              bool (*ConvertResult)(int64_t&)) {
5754   SMLoc S = getLoc();
5755   if (!trySkipId(Prefix, AsmToken::Colon))
5756     return MatchOperand_NoMatch;
5757 
5758   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5759     return MatchOperand_ParseFail;
5760 
5761   unsigned Val = 0;
5762   const unsigned MaxSize = 4;
5763 
5764   // FIXME: How to verify the number of elements matches the number of src
5765   // operands?
5766   for (int I = 0; ; ++I) {
5767     int64_t Op;
5768     SMLoc Loc = getLoc();
5769     if (!parseExpr(Op))
5770       return MatchOperand_ParseFail;
5771 
5772     if (Op != 0 && Op != 1) {
5773       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5774       return MatchOperand_ParseFail;
5775     }
5776 
5777     Val |= (Op << I);
5778 
5779     if (trySkipToken(AsmToken::RBrac))
5780       break;
5781 
5782     if (I + 1 == MaxSize) {
5783       Error(getLoc(), "expected a closing square bracket");
5784       return MatchOperand_ParseFail;
5785     }
5786 
5787     if (!skipToken(AsmToken::Comma, "expected a comma"))
5788       return MatchOperand_ParseFail;
5789   }
5790 
5791   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5792   return MatchOperand_Success;
5793 }
5794 
5795 OperandMatchResultTy
5796 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5797                                AMDGPUOperand::ImmTy ImmTy) {
5798   int64_t Bit;
5799   SMLoc S = getLoc();
5800 
5801   if (trySkipId(Name)) {
5802     Bit = 1;
5803   } else if (trySkipId("no", Name)) {
5804     Bit = 0;
5805   } else {
5806     return MatchOperand_NoMatch;
5807   }
5808 
5809   if (Name == "r128" && !hasMIMG_R128()) {
5810     Error(S, "r128 modifier is not supported on this GPU");
5811     return MatchOperand_ParseFail;
5812   }
5813   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5814     Error(S, "a16 modifier is not supported on this GPU");
5815     return MatchOperand_ParseFail;
5816   }
5817 
5818   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5819     ImmTy = AMDGPUOperand::ImmTyR128A16;
5820 
5821   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5822   return MatchOperand_Success;
5823 }
5824 
5825 OperandMatchResultTy
5826 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5827   unsigned CPolOn = 0;
5828   unsigned CPolOff = 0;
5829   SMLoc S = getLoc();
5830 
5831   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5832   if (isGFX940() && !Mnemo.startswith("s_")) {
5833     if (trySkipId("sc0"))
5834       CPolOn = AMDGPU::CPol::SC0;
5835     else if (trySkipId("nosc0"))
5836       CPolOff = AMDGPU::CPol::SC0;
5837     else if (trySkipId("nt"))
5838       CPolOn = AMDGPU::CPol::NT;
5839     else if (trySkipId("nont"))
5840       CPolOff = AMDGPU::CPol::NT;
5841     else if (trySkipId("sc1"))
5842       CPolOn = AMDGPU::CPol::SC1;
5843     else if (trySkipId("nosc1"))
5844       CPolOff = AMDGPU::CPol::SC1;
5845     else
5846       return MatchOperand_NoMatch;
5847   }
5848   else if (trySkipId("glc"))
5849     CPolOn = AMDGPU::CPol::GLC;
5850   else if (trySkipId("noglc"))
5851     CPolOff = AMDGPU::CPol::GLC;
5852   else if (trySkipId("slc"))
5853     CPolOn = AMDGPU::CPol::SLC;
5854   else if (trySkipId("noslc"))
5855     CPolOff = AMDGPU::CPol::SLC;
5856   else if (trySkipId("dlc"))
5857     CPolOn = AMDGPU::CPol::DLC;
5858   else if (trySkipId("nodlc"))
5859     CPolOff = AMDGPU::CPol::DLC;
5860   else if (trySkipId("scc"))
5861     CPolOn = AMDGPU::CPol::SCC;
5862   else if (trySkipId("noscc"))
5863     CPolOff = AMDGPU::CPol::SCC;
5864   else
5865     return MatchOperand_NoMatch;
5866 
5867   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5868     Error(S, "dlc modifier is not supported on this GPU");
5869     return MatchOperand_ParseFail;
5870   }
5871 
5872   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5873     Error(S, "scc modifier is not supported on this GPU");
5874     return MatchOperand_ParseFail;
5875   }
5876 
5877   if (CPolSeen & (CPolOn | CPolOff)) {
5878     Error(S, "duplicate cache policy modifier");
5879     return MatchOperand_ParseFail;
5880   }
5881 
5882   CPolSeen |= (CPolOn | CPolOff);
5883 
5884   for (unsigned I = 1; I != Operands.size(); ++I) {
5885     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5886     if (Op.isCPol()) {
5887       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5888       return MatchOperand_Success;
5889     }
5890   }
5891 
5892   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5893                                               AMDGPUOperand::ImmTyCPol));
5894 
5895   return MatchOperand_Success;
5896 }
5897 
5898 static void addOptionalImmOperand(
5899   MCInst& Inst, const OperandVector& Operands,
5900   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5901   AMDGPUOperand::ImmTy ImmT,
5902   int64_t Default = 0) {
5903   auto i = OptionalIdx.find(ImmT);
5904   if (i != OptionalIdx.end()) {
5905     unsigned Idx = i->second;
5906     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5907   } else {
5908     Inst.addOperand(MCOperand::createImm(Default));
5909   }
5910 }
5911 
5912 OperandMatchResultTy
5913 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5914                                        StringRef &Value,
5915                                        SMLoc &StringLoc) {
5916   if (!trySkipId(Prefix, AsmToken::Colon))
5917     return MatchOperand_NoMatch;
5918 
5919   StringLoc = getLoc();
5920   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5921                                                   : MatchOperand_ParseFail;
5922 }
5923 
5924 //===----------------------------------------------------------------------===//
5925 // MTBUF format
5926 //===----------------------------------------------------------------------===//
5927 
5928 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5929                                   int64_t MaxVal,
5930                                   int64_t &Fmt) {
5931   int64_t Val;
5932   SMLoc Loc = getLoc();
5933 
5934   auto Res = parseIntWithPrefix(Pref, Val);
5935   if (Res == MatchOperand_ParseFail)
5936     return false;
5937   if (Res == MatchOperand_NoMatch)
5938     return true;
5939 
5940   if (Val < 0 || Val > MaxVal) {
5941     Error(Loc, Twine("out of range ", StringRef(Pref)));
5942     return false;
5943   }
5944 
5945   Fmt = Val;
5946   return true;
5947 }
5948 
5949 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5950 // values to live in a joint format operand in the MCInst encoding.
5951 OperandMatchResultTy
5952 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5953   using namespace llvm::AMDGPU::MTBUFFormat;
5954 
5955   int64_t Dfmt = DFMT_UNDEF;
5956   int64_t Nfmt = NFMT_UNDEF;
5957 
5958   // dfmt and nfmt can appear in either order, and each is optional.
5959   for (int I = 0; I < 2; ++I) {
5960     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5961       return MatchOperand_ParseFail;
5962 
5963     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5964       return MatchOperand_ParseFail;
5965     }
5966     // Skip optional comma between dfmt/nfmt
5967     // but guard against 2 commas following each other.
5968     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5969         !peekToken().is(AsmToken::Comma)) {
5970       trySkipToken(AsmToken::Comma);
5971     }
5972   }
5973 
5974   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5975     return MatchOperand_NoMatch;
5976 
5977   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5978   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5979 
5980   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5981   return MatchOperand_Success;
5982 }
5983 
5984 OperandMatchResultTy
5985 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5986   using namespace llvm::AMDGPU::MTBUFFormat;
5987 
5988   int64_t Fmt = UFMT_UNDEF;
5989 
5990   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5991     return MatchOperand_ParseFail;
5992 
5993   if (Fmt == UFMT_UNDEF)
5994     return MatchOperand_NoMatch;
5995 
5996   Format = Fmt;
5997   return MatchOperand_Success;
5998 }
5999 
6000 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6001                                     int64_t &Nfmt,
6002                                     StringRef FormatStr,
6003                                     SMLoc Loc) {
6004   using namespace llvm::AMDGPU::MTBUFFormat;
6005   int64_t Format;
6006 
6007   Format = getDfmt(FormatStr);
6008   if (Format != DFMT_UNDEF) {
6009     Dfmt = Format;
6010     return true;
6011   }
6012 
6013   Format = getNfmt(FormatStr, getSTI());
6014   if (Format != NFMT_UNDEF) {
6015     Nfmt = Format;
6016     return true;
6017   }
6018 
6019   Error(Loc, "unsupported format");
6020   return false;
6021 }
6022 
6023 OperandMatchResultTy
6024 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6025                                           SMLoc FormatLoc,
6026                                           int64_t &Format) {
6027   using namespace llvm::AMDGPU::MTBUFFormat;
6028 
6029   int64_t Dfmt = DFMT_UNDEF;
6030   int64_t Nfmt = NFMT_UNDEF;
6031   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6032     return MatchOperand_ParseFail;
6033 
6034   if (trySkipToken(AsmToken::Comma)) {
6035     StringRef Str;
6036     SMLoc Loc = getLoc();
6037     if (!parseId(Str, "expected a format string") ||
6038         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
6039       return MatchOperand_ParseFail;
6040     }
6041     if (Dfmt == DFMT_UNDEF) {
6042       Error(Loc, "duplicate numeric format");
6043       return MatchOperand_ParseFail;
6044     } else if (Nfmt == NFMT_UNDEF) {
6045       Error(Loc, "duplicate data format");
6046       return MatchOperand_ParseFail;
6047     }
6048   }
6049 
6050   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6051   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6052 
6053   if (isGFX10Plus()) {
6054     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6055     if (Ufmt == UFMT_UNDEF) {
6056       Error(FormatLoc, "unsupported format");
6057       return MatchOperand_ParseFail;
6058     }
6059     Format = Ufmt;
6060   } else {
6061     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6062   }
6063 
6064   return MatchOperand_Success;
6065 }
6066 
6067 OperandMatchResultTy
6068 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6069                                             SMLoc Loc,
6070                                             int64_t &Format) {
6071   using namespace llvm::AMDGPU::MTBUFFormat;
6072 
6073   auto Id = getUnifiedFormat(FormatStr, getSTI());
6074   if (Id == UFMT_UNDEF)
6075     return MatchOperand_NoMatch;
6076 
6077   if (!isGFX10Plus()) {
6078     Error(Loc, "unified format is not supported on this GPU");
6079     return MatchOperand_ParseFail;
6080   }
6081 
6082   Format = Id;
6083   return MatchOperand_Success;
6084 }
6085 
6086 OperandMatchResultTy
6087 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6088   using namespace llvm::AMDGPU::MTBUFFormat;
6089   SMLoc Loc = getLoc();
6090 
6091   if (!parseExpr(Format))
6092     return MatchOperand_ParseFail;
6093   if (!isValidFormatEncoding(Format, getSTI())) {
6094     Error(Loc, "out of range format");
6095     return MatchOperand_ParseFail;
6096   }
6097 
6098   return MatchOperand_Success;
6099 }
6100 
6101 OperandMatchResultTy
6102 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6103   using namespace llvm::AMDGPU::MTBUFFormat;
6104 
6105   if (!trySkipId("format", AsmToken::Colon))
6106     return MatchOperand_NoMatch;
6107 
6108   if (trySkipToken(AsmToken::LBrac)) {
6109     StringRef FormatStr;
6110     SMLoc Loc = getLoc();
6111     if (!parseId(FormatStr, "expected a format string"))
6112       return MatchOperand_ParseFail;
6113 
6114     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6115     if (Res == MatchOperand_NoMatch)
6116       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6117     if (Res != MatchOperand_Success)
6118       return Res;
6119 
6120     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6121       return MatchOperand_ParseFail;
6122 
6123     return MatchOperand_Success;
6124   }
6125 
6126   return parseNumericFormat(Format);
6127 }
6128 
6129 OperandMatchResultTy
6130 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6131   using namespace llvm::AMDGPU::MTBUFFormat;
6132 
6133   int64_t Format = getDefaultFormatEncoding(getSTI());
6134   OperandMatchResultTy Res;
6135   SMLoc Loc = getLoc();
6136 
6137   // Parse legacy format syntax.
6138   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6139   if (Res == MatchOperand_ParseFail)
6140     return Res;
6141 
6142   bool FormatFound = (Res == MatchOperand_Success);
6143 
6144   Operands.push_back(
6145     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6146 
6147   if (FormatFound)
6148     trySkipToken(AsmToken::Comma);
6149 
6150   if (isToken(AsmToken::EndOfStatement)) {
6151     // We are expecting an soffset operand,
6152     // but let matcher handle the error.
6153     return MatchOperand_Success;
6154   }
6155 
6156   // Parse soffset.
6157   Res = parseRegOrImm(Operands);
6158   if (Res != MatchOperand_Success)
6159     return Res;
6160 
6161   trySkipToken(AsmToken::Comma);
6162 
6163   if (!FormatFound) {
6164     Res = parseSymbolicOrNumericFormat(Format);
6165     if (Res == MatchOperand_ParseFail)
6166       return Res;
6167     if (Res == MatchOperand_Success) {
6168       auto Size = Operands.size();
6169       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6170       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6171       Op.setImm(Format);
6172     }
6173     return MatchOperand_Success;
6174   }
6175 
6176   if (isId("format") && peekToken().is(AsmToken::Colon)) {
6177     Error(getLoc(), "duplicate format");
6178     return MatchOperand_ParseFail;
6179   }
6180   return MatchOperand_Success;
6181 }
6182 
6183 //===----------------------------------------------------------------------===//
6184 // ds
6185 //===----------------------------------------------------------------------===//
6186 
6187 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6188                                     const OperandVector &Operands) {
6189   OptionalImmIndexMap OptionalIdx;
6190 
6191   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6192     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6193 
6194     // Add the register arguments
6195     if (Op.isReg()) {
6196       Op.addRegOperands(Inst, 1);
6197       continue;
6198     }
6199 
6200     // Handle optional arguments
6201     OptionalIdx[Op.getImmTy()] = i;
6202   }
6203 
6204   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6205   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6206   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6207 
6208   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6209 }
6210 
6211 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6212                                 bool IsGdsHardcoded) {
6213   OptionalImmIndexMap OptionalIdx;
6214 
6215   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6216     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6217 
6218     // Add the register arguments
6219     if (Op.isReg()) {
6220       Op.addRegOperands(Inst, 1);
6221       continue;
6222     }
6223 
6224     if (Op.isToken() && Op.getToken() == "gds") {
6225       IsGdsHardcoded = true;
6226       continue;
6227     }
6228 
6229     // Handle optional arguments
6230     OptionalIdx[Op.getImmTy()] = i;
6231   }
6232 
6233   AMDGPUOperand::ImmTy OffsetType =
6234     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
6235      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
6236      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
6237                                                       AMDGPUOperand::ImmTyOffset;
6238 
6239   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6240 
6241   if (!IsGdsHardcoded) {
6242     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6243   }
6244   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6245 }
6246 
6247 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6248   OptionalImmIndexMap OptionalIdx;
6249 
6250   unsigned OperandIdx[4];
6251   unsigned EnMask = 0;
6252   int SrcIdx = 0;
6253 
6254   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6255     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6256 
6257     // Add the register arguments
6258     if (Op.isReg()) {
6259       assert(SrcIdx < 4);
6260       OperandIdx[SrcIdx] = Inst.size();
6261       Op.addRegOperands(Inst, 1);
6262       ++SrcIdx;
6263       continue;
6264     }
6265 
6266     if (Op.isOff()) {
6267       assert(SrcIdx < 4);
6268       OperandIdx[SrcIdx] = Inst.size();
6269       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6270       ++SrcIdx;
6271       continue;
6272     }
6273 
6274     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6275       Op.addImmOperands(Inst, 1);
6276       continue;
6277     }
6278 
6279     if (Op.isToken() && Op.getToken() == "done")
6280       continue;
6281 
6282     // Handle optional arguments
6283     OptionalIdx[Op.getImmTy()] = i;
6284   }
6285 
6286   assert(SrcIdx == 4);
6287 
6288   bool Compr = false;
6289   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6290     Compr = true;
6291     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6292     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6293     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6294   }
6295 
6296   for (auto i = 0; i < SrcIdx; ++i) {
6297     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6298       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6299     }
6300   }
6301 
6302   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6303   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6304 
6305   Inst.addOperand(MCOperand::createImm(EnMask));
6306 }
6307 
6308 //===----------------------------------------------------------------------===//
6309 // s_waitcnt
6310 //===----------------------------------------------------------------------===//
6311 
6312 static bool
6313 encodeCnt(
6314   const AMDGPU::IsaVersion ISA,
6315   int64_t &IntVal,
6316   int64_t CntVal,
6317   bool Saturate,
6318   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6319   unsigned (*decode)(const IsaVersion &Version, unsigned))
6320 {
6321   bool Failed = false;
6322 
6323   IntVal = encode(ISA, IntVal, CntVal);
6324   if (CntVal != decode(ISA, IntVal)) {
6325     if (Saturate) {
6326       IntVal = encode(ISA, IntVal, -1);
6327     } else {
6328       Failed = true;
6329     }
6330   }
6331   return Failed;
6332 }
6333 
6334 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6335 
6336   SMLoc CntLoc = getLoc();
6337   StringRef CntName = getTokenStr();
6338 
6339   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6340       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6341     return false;
6342 
6343   int64_t CntVal;
6344   SMLoc ValLoc = getLoc();
6345   if (!parseExpr(CntVal))
6346     return false;
6347 
6348   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6349 
6350   bool Failed = true;
6351   bool Sat = CntName.endswith("_sat");
6352 
6353   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6354     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6355   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6356     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6357   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6358     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6359   } else {
6360     Error(CntLoc, "invalid counter name " + CntName);
6361     return false;
6362   }
6363 
6364   if (Failed) {
6365     Error(ValLoc, "too large value for " + CntName);
6366     return false;
6367   }
6368 
6369   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6370     return false;
6371 
6372   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6373     if (isToken(AsmToken::EndOfStatement)) {
6374       Error(getLoc(), "expected a counter name");
6375       return false;
6376     }
6377   }
6378 
6379   return true;
6380 }
6381 
6382 OperandMatchResultTy
6383 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6384   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6385   int64_t Waitcnt = getWaitcntBitMask(ISA);
6386   SMLoc S = getLoc();
6387 
6388   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6389     while (!isToken(AsmToken::EndOfStatement)) {
6390       if (!parseCnt(Waitcnt))
6391         return MatchOperand_ParseFail;
6392     }
6393   } else {
6394     if (!parseExpr(Waitcnt))
6395       return MatchOperand_ParseFail;
6396   }
6397 
6398   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6399   return MatchOperand_Success;
6400 }
6401 
6402 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6403   SMLoc FieldLoc = getLoc();
6404   StringRef FieldName = getTokenStr();
6405   if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6406       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6407     return false;
6408 
6409   SMLoc ValueLoc = getLoc();
6410   StringRef ValueName = getTokenStr();
6411   if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6412       !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6413     return false;
6414 
6415   unsigned Shift;
6416   if (FieldName == "instid0") {
6417     Shift = 0;
6418   } else if (FieldName == "instskip") {
6419     Shift = 4;
6420   } else if (FieldName == "instid1") {
6421     Shift = 7;
6422   } else {
6423     Error(FieldLoc, "invalid field name " + FieldName);
6424     return false;
6425   }
6426 
6427   int Value;
6428   if (Shift == 4) {
6429     // Parse values for instskip.
6430     Value = StringSwitch<int>(ValueName)
6431                 .Case("SAME", 0)
6432                 .Case("NEXT", 1)
6433                 .Case("SKIP_1", 2)
6434                 .Case("SKIP_2", 3)
6435                 .Case("SKIP_3", 4)
6436                 .Case("SKIP_4", 5)
6437                 .Default(-1);
6438   } else {
6439     // Parse values for instid0 and instid1.
6440     Value = StringSwitch<int>(ValueName)
6441                 .Case("NO_DEP", 0)
6442                 .Case("VALU_DEP_1", 1)
6443                 .Case("VALU_DEP_2", 2)
6444                 .Case("VALU_DEP_3", 3)
6445                 .Case("VALU_DEP_4", 4)
6446                 .Case("TRANS32_DEP_1", 5)
6447                 .Case("TRANS32_DEP_2", 6)
6448                 .Case("TRANS32_DEP_3", 7)
6449                 .Case("FMA_ACCUM_CYCLE_1", 8)
6450                 .Case("SALU_CYCLE_1", 9)
6451                 .Case("SALU_CYCLE_2", 10)
6452                 .Case("SALU_CYCLE_3", 11)
6453                 .Default(-1);
6454   }
6455   if (Value < 0) {
6456     Error(ValueLoc, "invalid value name " + ValueName);
6457     return false;
6458   }
6459 
6460   Delay |= Value << Shift;
6461   return true;
6462 }
6463 
6464 OperandMatchResultTy
6465 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) {
6466   int64_t Delay = 0;
6467   SMLoc S = getLoc();
6468 
6469   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6470     do {
6471       if (!parseDelay(Delay))
6472         return MatchOperand_ParseFail;
6473     } while (trySkipToken(AsmToken::Pipe));
6474   } else {
6475     if (!parseExpr(Delay))
6476       return MatchOperand_ParseFail;
6477   }
6478 
6479   Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
6480   return MatchOperand_Success;
6481 }
6482 
6483 bool
6484 AMDGPUOperand::isSWaitCnt() const {
6485   return isImm();
6486 }
6487 
6488 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); }
6489 
6490 //===----------------------------------------------------------------------===//
6491 // DepCtr
6492 //===----------------------------------------------------------------------===//
6493 
6494 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6495                                   StringRef DepCtrName) {
6496   switch (ErrorId) {
6497   case OPR_ID_UNKNOWN:
6498     Error(Loc, Twine("invalid counter name ", DepCtrName));
6499     return;
6500   case OPR_ID_UNSUPPORTED:
6501     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6502     return;
6503   case OPR_ID_DUPLICATE:
6504     Error(Loc, Twine("duplicate counter name ", DepCtrName));
6505     return;
6506   case OPR_VAL_INVALID:
6507     Error(Loc, Twine("invalid value for ", DepCtrName));
6508     return;
6509   default:
6510     assert(false);
6511   }
6512 }
6513 
6514 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6515 
6516   using namespace llvm::AMDGPU::DepCtr;
6517 
6518   SMLoc DepCtrLoc = getLoc();
6519   StringRef DepCtrName = getTokenStr();
6520 
6521   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6522       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6523     return false;
6524 
6525   int64_t ExprVal;
6526   if (!parseExpr(ExprVal))
6527     return false;
6528 
6529   unsigned PrevOprMask = UsedOprMask;
6530   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6531 
6532   if (CntVal < 0) {
6533     depCtrError(DepCtrLoc, CntVal, DepCtrName);
6534     return false;
6535   }
6536 
6537   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6538     return false;
6539 
6540   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6541     if (isToken(AsmToken::EndOfStatement)) {
6542       Error(getLoc(), "expected a counter name");
6543       return false;
6544     }
6545   }
6546 
6547   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
6548   DepCtr = (DepCtr & ~CntValMask) | CntVal;
6549   return true;
6550 }
6551 
6552 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) {
6553   using namespace llvm::AMDGPU::DepCtr;
6554 
6555   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
6556   SMLoc Loc = getLoc();
6557 
6558   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6559     unsigned UsedOprMask = 0;
6560     while (!isToken(AsmToken::EndOfStatement)) {
6561       if (!parseDepCtr(DepCtr, UsedOprMask))
6562         return MatchOperand_ParseFail;
6563     }
6564   } else {
6565     if (!parseExpr(DepCtr))
6566       return MatchOperand_ParseFail;
6567   }
6568 
6569   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
6570   return MatchOperand_Success;
6571 }
6572 
6573 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
6574 
6575 //===----------------------------------------------------------------------===//
6576 // hwreg
6577 //===----------------------------------------------------------------------===//
6578 
6579 bool
6580 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6581                                 OperandInfoTy &Offset,
6582                                 OperandInfoTy &Width) {
6583   using namespace llvm::AMDGPU::Hwreg;
6584 
6585   // The register may be specified by name or using a numeric code
6586   HwReg.Loc = getLoc();
6587   if (isToken(AsmToken::Identifier) &&
6588       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6589     HwReg.IsSymbolic = true;
6590     lex(); // skip register name
6591   } else if (!parseExpr(HwReg.Id, "a register name")) {
6592     return false;
6593   }
6594 
6595   if (trySkipToken(AsmToken::RParen))
6596     return true;
6597 
6598   // parse optional params
6599   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6600     return false;
6601 
6602   Offset.Loc = getLoc();
6603   if (!parseExpr(Offset.Id))
6604     return false;
6605 
6606   if (!skipToken(AsmToken::Comma, "expected a comma"))
6607     return false;
6608 
6609   Width.Loc = getLoc();
6610   return parseExpr(Width.Id) &&
6611          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6612 }
6613 
6614 bool
6615 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6616                                const OperandInfoTy &Offset,
6617                                const OperandInfoTy &Width) {
6618 
6619   using namespace llvm::AMDGPU::Hwreg;
6620 
6621   if (HwReg.IsSymbolic) {
6622     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6623       Error(HwReg.Loc,
6624             "specified hardware register is not supported on this GPU");
6625       return false;
6626     }
6627   } else {
6628     if (!isValidHwreg(HwReg.Id)) {
6629       Error(HwReg.Loc,
6630             "invalid code of hardware register: only 6-bit values are legal");
6631       return false;
6632     }
6633   }
6634   if (!isValidHwregOffset(Offset.Id)) {
6635     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6636     return false;
6637   }
6638   if (!isValidHwregWidth(Width.Id)) {
6639     Error(Width.Loc,
6640           "invalid bitfield width: only values from 1 to 32 are legal");
6641     return false;
6642   }
6643   return true;
6644 }
6645 
6646 OperandMatchResultTy
6647 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6648   using namespace llvm::AMDGPU::Hwreg;
6649 
6650   int64_t ImmVal = 0;
6651   SMLoc Loc = getLoc();
6652 
6653   if (trySkipId("hwreg", AsmToken::LParen)) {
6654     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6655     OperandInfoTy Offset(OFFSET_DEFAULT_);
6656     OperandInfoTy Width(WIDTH_DEFAULT_);
6657     if (parseHwregBody(HwReg, Offset, Width) &&
6658         validateHwreg(HwReg, Offset, Width)) {
6659       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6660     } else {
6661       return MatchOperand_ParseFail;
6662     }
6663   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6664     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6665       Error(Loc, "invalid immediate: only 16-bit values are legal");
6666       return MatchOperand_ParseFail;
6667     }
6668   } else {
6669     return MatchOperand_ParseFail;
6670   }
6671 
6672   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6673   return MatchOperand_Success;
6674 }
6675 
6676 bool AMDGPUOperand::isHwreg() const {
6677   return isImmTy(ImmTyHwreg);
6678 }
6679 
6680 //===----------------------------------------------------------------------===//
6681 // sendmsg
6682 //===----------------------------------------------------------------------===//
6683 
6684 bool
6685 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6686                                   OperandInfoTy &Op,
6687                                   OperandInfoTy &Stream) {
6688   using namespace llvm::AMDGPU::SendMsg;
6689 
6690   Msg.Loc = getLoc();
6691   if (isToken(AsmToken::Identifier) &&
6692       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6693     Msg.IsSymbolic = true;
6694     lex(); // skip message name
6695   } else if (!parseExpr(Msg.Id, "a message name")) {
6696     return false;
6697   }
6698 
6699   if (trySkipToken(AsmToken::Comma)) {
6700     Op.IsDefined = true;
6701     Op.Loc = getLoc();
6702     if (isToken(AsmToken::Identifier) &&
6703         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6704       lex(); // skip operation name
6705     } else if (!parseExpr(Op.Id, "an operation name")) {
6706       return false;
6707     }
6708 
6709     if (trySkipToken(AsmToken::Comma)) {
6710       Stream.IsDefined = true;
6711       Stream.Loc = getLoc();
6712       if (!parseExpr(Stream.Id))
6713         return false;
6714     }
6715   }
6716 
6717   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6718 }
6719 
6720 bool
6721 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6722                                  const OperandInfoTy &Op,
6723                                  const OperandInfoTy &Stream) {
6724   using namespace llvm::AMDGPU::SendMsg;
6725 
6726   // Validation strictness depends on whether message is specified
6727   // in a symbolic or in a numeric form. In the latter case
6728   // only encoding possibility is checked.
6729   bool Strict = Msg.IsSymbolic;
6730 
6731   if (Strict) {
6732     if (Msg.Id == OPR_ID_UNSUPPORTED) {
6733       Error(Msg.Loc, "specified message id is not supported on this GPU");
6734       return false;
6735     }
6736   } else {
6737     if (!isValidMsgId(Msg.Id, getSTI())) {
6738       Error(Msg.Loc, "invalid message id");
6739       return false;
6740     }
6741   }
6742   if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
6743     if (Op.IsDefined) {
6744       Error(Op.Loc, "message does not support operations");
6745     } else {
6746       Error(Msg.Loc, "missing message operation");
6747     }
6748     return false;
6749   }
6750   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6751     Error(Op.Loc, "invalid operation id");
6752     return false;
6753   }
6754   if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
6755       Stream.IsDefined) {
6756     Error(Stream.Loc, "message operation does not support streams");
6757     return false;
6758   }
6759   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6760     Error(Stream.Loc, "invalid message stream id");
6761     return false;
6762   }
6763   return true;
6764 }
6765 
6766 OperandMatchResultTy
6767 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6768   using namespace llvm::AMDGPU::SendMsg;
6769 
6770   int64_t ImmVal = 0;
6771   SMLoc Loc = getLoc();
6772 
6773   if (trySkipId("sendmsg", AsmToken::LParen)) {
6774     OperandInfoTy Msg(OPR_ID_UNKNOWN);
6775     OperandInfoTy Op(OP_NONE_);
6776     OperandInfoTy Stream(STREAM_ID_NONE_);
6777     if (parseSendMsgBody(Msg, Op, Stream) &&
6778         validateSendMsg(Msg, Op, Stream)) {
6779       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6780     } else {
6781       return MatchOperand_ParseFail;
6782     }
6783   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6784     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6785       Error(Loc, "invalid immediate: only 16-bit values are legal");
6786       return MatchOperand_ParseFail;
6787     }
6788   } else {
6789     return MatchOperand_ParseFail;
6790   }
6791 
6792   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6793   return MatchOperand_Success;
6794 }
6795 
6796 bool AMDGPUOperand::isSendMsg() const {
6797   return isImmTy(ImmTySendMsg);
6798 }
6799 
6800 //===----------------------------------------------------------------------===//
6801 // v_interp
6802 //===----------------------------------------------------------------------===//
6803 
6804 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6805   StringRef Str;
6806   SMLoc S = getLoc();
6807 
6808   if (!parseId(Str))
6809     return MatchOperand_NoMatch;
6810 
6811   int Slot = StringSwitch<int>(Str)
6812     .Case("p10", 0)
6813     .Case("p20", 1)
6814     .Case("p0", 2)
6815     .Default(-1);
6816 
6817   if (Slot == -1) {
6818     Error(S, "invalid interpolation slot");
6819     return MatchOperand_ParseFail;
6820   }
6821 
6822   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6823                                               AMDGPUOperand::ImmTyInterpSlot));
6824   return MatchOperand_Success;
6825 }
6826 
6827 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6828   StringRef Str;
6829   SMLoc S = getLoc();
6830 
6831   if (!parseId(Str))
6832     return MatchOperand_NoMatch;
6833 
6834   if (!Str.startswith("attr")) {
6835     Error(S, "invalid interpolation attribute");
6836     return MatchOperand_ParseFail;
6837   }
6838 
6839   StringRef Chan = Str.take_back(2);
6840   int AttrChan = StringSwitch<int>(Chan)
6841     .Case(".x", 0)
6842     .Case(".y", 1)
6843     .Case(".z", 2)
6844     .Case(".w", 3)
6845     .Default(-1);
6846   if (AttrChan == -1) {
6847     Error(S, "invalid or missing interpolation attribute channel");
6848     return MatchOperand_ParseFail;
6849   }
6850 
6851   Str = Str.drop_back(2).drop_front(4);
6852 
6853   uint8_t Attr;
6854   if (Str.getAsInteger(10, Attr)) {
6855     Error(S, "invalid or missing interpolation attribute number");
6856     return MatchOperand_ParseFail;
6857   }
6858 
6859   if (Attr > 63) {
6860     Error(S, "out of bounds interpolation attribute number");
6861     return MatchOperand_ParseFail;
6862   }
6863 
6864   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6865 
6866   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6867                                               AMDGPUOperand::ImmTyInterpAttr));
6868   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6869                                               AMDGPUOperand::ImmTyAttrChan));
6870   return MatchOperand_Success;
6871 }
6872 
6873 //===----------------------------------------------------------------------===//
6874 // exp
6875 //===----------------------------------------------------------------------===//
6876 
6877 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6878   using namespace llvm::AMDGPU::Exp;
6879 
6880   StringRef Str;
6881   SMLoc S = getLoc();
6882 
6883   if (!parseId(Str))
6884     return MatchOperand_NoMatch;
6885 
6886   unsigned Id = getTgtId(Str);
6887   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6888     Error(S, (Id == ET_INVALID) ?
6889                 "invalid exp target" :
6890                 "exp target is not supported on this GPU");
6891     return MatchOperand_ParseFail;
6892   }
6893 
6894   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6895                                               AMDGPUOperand::ImmTyExpTgt));
6896   return MatchOperand_Success;
6897 }
6898 
6899 //===----------------------------------------------------------------------===//
6900 // parser helpers
6901 //===----------------------------------------------------------------------===//
6902 
6903 bool
6904 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6905   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6906 }
6907 
6908 bool
6909 AMDGPUAsmParser::isId(const StringRef Id) const {
6910   return isId(getToken(), Id);
6911 }
6912 
6913 bool
6914 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6915   return getTokenKind() == Kind;
6916 }
6917 
6918 bool
6919 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6920   if (isId(Id)) {
6921     lex();
6922     return true;
6923   }
6924   return false;
6925 }
6926 
6927 bool
6928 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6929   if (isToken(AsmToken::Identifier)) {
6930     StringRef Tok = getTokenStr();
6931     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6932       lex();
6933       return true;
6934     }
6935   }
6936   return false;
6937 }
6938 
6939 bool
6940 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6941   if (isId(Id) && peekToken().is(Kind)) {
6942     lex();
6943     lex();
6944     return true;
6945   }
6946   return false;
6947 }
6948 
6949 bool
6950 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6951   if (isToken(Kind)) {
6952     lex();
6953     return true;
6954   }
6955   return false;
6956 }
6957 
6958 bool
6959 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6960                            const StringRef ErrMsg) {
6961   if (!trySkipToken(Kind)) {
6962     Error(getLoc(), ErrMsg);
6963     return false;
6964   }
6965   return true;
6966 }
6967 
6968 bool
6969 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6970   SMLoc S = getLoc();
6971 
6972   const MCExpr *Expr;
6973   if (Parser.parseExpression(Expr))
6974     return false;
6975 
6976   if (Expr->evaluateAsAbsolute(Imm))
6977     return true;
6978 
6979   if (Expected.empty()) {
6980     Error(S, "expected absolute expression");
6981   } else {
6982     Error(S, Twine("expected ", Expected) +
6983              Twine(" or an absolute expression"));
6984   }
6985   return false;
6986 }
6987 
6988 bool
6989 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6990   SMLoc S = getLoc();
6991 
6992   const MCExpr *Expr;
6993   if (Parser.parseExpression(Expr))
6994     return false;
6995 
6996   int64_t IntVal;
6997   if (Expr->evaluateAsAbsolute(IntVal)) {
6998     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6999   } else {
7000     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7001   }
7002   return true;
7003 }
7004 
7005 bool
7006 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7007   if (isToken(AsmToken::String)) {
7008     Val = getToken().getStringContents();
7009     lex();
7010     return true;
7011   } else {
7012     Error(getLoc(), ErrMsg);
7013     return false;
7014   }
7015 }
7016 
7017 bool
7018 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7019   if (isToken(AsmToken::Identifier)) {
7020     Val = getTokenStr();
7021     lex();
7022     return true;
7023   } else {
7024     if (!ErrMsg.empty())
7025       Error(getLoc(), ErrMsg);
7026     return false;
7027   }
7028 }
7029 
7030 AsmToken
7031 AMDGPUAsmParser::getToken() const {
7032   return Parser.getTok();
7033 }
7034 
7035 AsmToken
7036 AMDGPUAsmParser::peekToken() {
7037   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
7038 }
7039 
7040 void
7041 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7042   auto TokCount = getLexer().peekTokens(Tokens);
7043 
7044   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7045     Tokens[Idx] = AsmToken(AsmToken::Error, "");
7046 }
7047 
7048 AsmToken::TokenKind
7049 AMDGPUAsmParser::getTokenKind() const {
7050   return getLexer().getKind();
7051 }
7052 
7053 SMLoc
7054 AMDGPUAsmParser::getLoc() const {
7055   return getToken().getLoc();
7056 }
7057 
7058 StringRef
7059 AMDGPUAsmParser::getTokenStr() const {
7060   return getToken().getString();
7061 }
7062 
7063 void
7064 AMDGPUAsmParser::lex() {
7065   Parser.Lex();
7066 }
7067 
7068 SMLoc
7069 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7070                                const OperandVector &Operands) const {
7071   for (unsigned i = Operands.size() - 1; i > 0; --i) {
7072     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7073     if (Test(Op))
7074       return Op.getStartLoc();
7075   }
7076   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7077 }
7078 
7079 SMLoc
7080 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7081                            const OperandVector &Operands) const {
7082   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7083   return getOperandLoc(Test, Operands);
7084 }
7085 
7086 SMLoc
7087 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7088                            const OperandVector &Operands) const {
7089   auto Test = [=](const AMDGPUOperand& Op) {
7090     return Op.isRegKind() && Op.getReg() == Reg;
7091   };
7092   return getOperandLoc(Test, Operands);
7093 }
7094 
7095 SMLoc
7096 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
7097   auto Test = [](const AMDGPUOperand& Op) {
7098     return Op.IsImmKindLiteral() || Op.isExpr();
7099   };
7100   return getOperandLoc(Test, Operands);
7101 }
7102 
7103 SMLoc
7104 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7105   auto Test = [](const AMDGPUOperand& Op) {
7106     return Op.isImmKindConst();
7107   };
7108   return getOperandLoc(Test, Operands);
7109 }
7110 
7111 //===----------------------------------------------------------------------===//
7112 // swizzle
7113 //===----------------------------------------------------------------------===//
7114 
7115 LLVM_READNONE
7116 static unsigned
7117 encodeBitmaskPerm(const unsigned AndMask,
7118                   const unsigned OrMask,
7119                   const unsigned XorMask) {
7120   using namespace llvm::AMDGPU::Swizzle;
7121 
7122   return BITMASK_PERM_ENC |
7123          (AndMask << BITMASK_AND_SHIFT) |
7124          (OrMask  << BITMASK_OR_SHIFT)  |
7125          (XorMask << BITMASK_XOR_SHIFT);
7126 }
7127 
7128 bool
7129 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7130                                      const unsigned MinVal,
7131                                      const unsigned MaxVal,
7132                                      const StringRef ErrMsg,
7133                                      SMLoc &Loc) {
7134   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7135     return false;
7136   }
7137   Loc = getLoc();
7138   if (!parseExpr(Op)) {
7139     return false;
7140   }
7141   if (Op < MinVal || Op > MaxVal) {
7142     Error(Loc, ErrMsg);
7143     return false;
7144   }
7145 
7146   return true;
7147 }
7148 
7149 bool
7150 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7151                                       const unsigned MinVal,
7152                                       const unsigned MaxVal,
7153                                       const StringRef ErrMsg) {
7154   SMLoc Loc;
7155   for (unsigned i = 0; i < OpNum; ++i) {
7156     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7157       return false;
7158   }
7159 
7160   return true;
7161 }
7162 
7163 bool
7164 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7165   using namespace llvm::AMDGPU::Swizzle;
7166 
7167   int64_t Lane[LANE_NUM];
7168   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7169                            "expected a 2-bit lane id")) {
7170     Imm = QUAD_PERM_ENC;
7171     for (unsigned I = 0; I < LANE_NUM; ++I) {
7172       Imm |= Lane[I] << (LANE_SHIFT * I);
7173     }
7174     return true;
7175   }
7176   return false;
7177 }
7178 
7179 bool
7180 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7181   using namespace llvm::AMDGPU::Swizzle;
7182 
7183   SMLoc Loc;
7184   int64_t GroupSize;
7185   int64_t LaneIdx;
7186 
7187   if (!parseSwizzleOperand(GroupSize,
7188                            2, 32,
7189                            "group size must be in the interval [2,32]",
7190                            Loc)) {
7191     return false;
7192   }
7193   if (!isPowerOf2_64(GroupSize)) {
7194     Error(Loc, "group size must be a power of two");
7195     return false;
7196   }
7197   if (parseSwizzleOperand(LaneIdx,
7198                           0, GroupSize - 1,
7199                           "lane id must be in the interval [0,group size - 1]",
7200                           Loc)) {
7201     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7202     return true;
7203   }
7204   return false;
7205 }
7206 
7207 bool
7208 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7209   using namespace llvm::AMDGPU::Swizzle;
7210 
7211   SMLoc Loc;
7212   int64_t GroupSize;
7213 
7214   if (!parseSwizzleOperand(GroupSize,
7215                            2, 32,
7216                            "group size must be in the interval [2,32]",
7217                            Loc)) {
7218     return false;
7219   }
7220   if (!isPowerOf2_64(GroupSize)) {
7221     Error(Loc, "group size must be a power of two");
7222     return false;
7223   }
7224 
7225   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7226   return true;
7227 }
7228 
7229 bool
7230 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7231   using namespace llvm::AMDGPU::Swizzle;
7232 
7233   SMLoc Loc;
7234   int64_t GroupSize;
7235 
7236   if (!parseSwizzleOperand(GroupSize,
7237                            1, 16,
7238                            "group size must be in the interval [1,16]",
7239                            Loc)) {
7240     return false;
7241   }
7242   if (!isPowerOf2_64(GroupSize)) {
7243     Error(Loc, "group size must be a power of two");
7244     return false;
7245   }
7246 
7247   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7248   return true;
7249 }
7250 
7251 bool
7252 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7253   using namespace llvm::AMDGPU::Swizzle;
7254 
7255   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7256     return false;
7257   }
7258 
7259   StringRef Ctl;
7260   SMLoc StrLoc = getLoc();
7261   if (!parseString(Ctl)) {
7262     return false;
7263   }
7264   if (Ctl.size() != BITMASK_WIDTH) {
7265     Error(StrLoc, "expected a 5-character mask");
7266     return false;
7267   }
7268 
7269   unsigned AndMask = 0;
7270   unsigned OrMask = 0;
7271   unsigned XorMask = 0;
7272 
7273   for (size_t i = 0; i < Ctl.size(); ++i) {
7274     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7275     switch(Ctl[i]) {
7276     default:
7277       Error(StrLoc, "invalid mask");
7278       return false;
7279     case '0':
7280       break;
7281     case '1':
7282       OrMask |= Mask;
7283       break;
7284     case 'p':
7285       AndMask |= Mask;
7286       break;
7287     case 'i':
7288       AndMask |= Mask;
7289       XorMask |= Mask;
7290       break;
7291     }
7292   }
7293 
7294   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7295   return true;
7296 }
7297 
7298 bool
7299 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7300 
7301   SMLoc OffsetLoc = getLoc();
7302 
7303   if (!parseExpr(Imm, "a swizzle macro")) {
7304     return false;
7305   }
7306   if (!isUInt<16>(Imm)) {
7307     Error(OffsetLoc, "expected a 16-bit offset");
7308     return false;
7309   }
7310   return true;
7311 }
7312 
7313 bool
7314 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7315   using namespace llvm::AMDGPU::Swizzle;
7316 
7317   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7318 
7319     SMLoc ModeLoc = getLoc();
7320     bool Ok = false;
7321 
7322     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7323       Ok = parseSwizzleQuadPerm(Imm);
7324     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7325       Ok = parseSwizzleBitmaskPerm(Imm);
7326     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7327       Ok = parseSwizzleBroadcast(Imm);
7328     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7329       Ok = parseSwizzleSwap(Imm);
7330     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7331       Ok = parseSwizzleReverse(Imm);
7332     } else {
7333       Error(ModeLoc, "expected a swizzle mode");
7334     }
7335 
7336     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7337   }
7338 
7339   return false;
7340 }
7341 
7342 OperandMatchResultTy
7343 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
7344   SMLoc S = getLoc();
7345   int64_t Imm = 0;
7346 
7347   if (trySkipId("offset")) {
7348 
7349     bool Ok = false;
7350     if (skipToken(AsmToken::Colon, "expected a colon")) {
7351       if (trySkipId("swizzle")) {
7352         Ok = parseSwizzleMacro(Imm);
7353       } else {
7354         Ok = parseSwizzleOffset(Imm);
7355       }
7356     }
7357 
7358     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7359 
7360     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
7361   } else {
7362     // Swizzle "offset" operand is optional.
7363     // If it is omitted, try parsing other optional operands.
7364     return parseOptionalOpr(Operands);
7365   }
7366 }
7367 
7368 bool
7369 AMDGPUOperand::isSwizzle() const {
7370   return isImmTy(ImmTySwizzle);
7371 }
7372 
7373 //===----------------------------------------------------------------------===//
7374 // VGPR Index Mode
7375 //===----------------------------------------------------------------------===//
7376 
7377 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7378 
7379   using namespace llvm::AMDGPU::VGPRIndexMode;
7380 
7381   if (trySkipToken(AsmToken::RParen)) {
7382     return OFF;
7383   }
7384 
7385   int64_t Imm = 0;
7386 
7387   while (true) {
7388     unsigned Mode = 0;
7389     SMLoc S = getLoc();
7390 
7391     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7392       if (trySkipId(IdSymbolic[ModeId])) {
7393         Mode = 1 << ModeId;
7394         break;
7395       }
7396     }
7397 
7398     if (Mode == 0) {
7399       Error(S, (Imm == 0)?
7400                "expected a VGPR index mode or a closing parenthesis" :
7401                "expected a VGPR index mode");
7402       return UNDEF;
7403     }
7404 
7405     if (Imm & Mode) {
7406       Error(S, "duplicate VGPR index mode");
7407       return UNDEF;
7408     }
7409     Imm |= Mode;
7410 
7411     if (trySkipToken(AsmToken::RParen))
7412       break;
7413     if (!skipToken(AsmToken::Comma,
7414                    "expected a comma or a closing parenthesis"))
7415       return UNDEF;
7416   }
7417 
7418   return Imm;
7419 }
7420 
7421 OperandMatchResultTy
7422 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7423 
7424   using namespace llvm::AMDGPU::VGPRIndexMode;
7425 
7426   int64_t Imm = 0;
7427   SMLoc S = getLoc();
7428 
7429   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7430     Imm = parseGPRIdxMacro();
7431     if (Imm == UNDEF)
7432       return MatchOperand_ParseFail;
7433   } else {
7434     if (getParser().parseAbsoluteExpression(Imm))
7435       return MatchOperand_ParseFail;
7436     if (Imm < 0 || !isUInt<4>(Imm)) {
7437       Error(S, "invalid immediate: only 4-bit values are legal");
7438       return MatchOperand_ParseFail;
7439     }
7440   }
7441 
7442   Operands.push_back(
7443       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7444   return MatchOperand_Success;
7445 }
7446 
7447 bool AMDGPUOperand::isGPRIdxMode() const {
7448   return isImmTy(ImmTyGprIdxMode);
7449 }
7450 
7451 //===----------------------------------------------------------------------===//
7452 // sopp branch targets
7453 //===----------------------------------------------------------------------===//
7454 
7455 OperandMatchResultTy
7456 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7457 
7458   // Make sure we are not parsing something
7459   // that looks like a label or an expression but is not.
7460   // This will improve error messages.
7461   if (isRegister() || isModifier())
7462     return MatchOperand_NoMatch;
7463 
7464   if (!parseExpr(Operands))
7465     return MatchOperand_ParseFail;
7466 
7467   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7468   assert(Opr.isImm() || Opr.isExpr());
7469   SMLoc Loc = Opr.getStartLoc();
7470 
7471   // Currently we do not support arbitrary expressions as branch targets.
7472   // Only labels and absolute expressions are accepted.
7473   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7474     Error(Loc, "expected an absolute expression or a label");
7475   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7476     Error(Loc, "expected a 16-bit signed jump offset");
7477   }
7478 
7479   return MatchOperand_Success;
7480 }
7481 
7482 //===----------------------------------------------------------------------===//
7483 // Boolean holding registers
7484 //===----------------------------------------------------------------------===//
7485 
7486 OperandMatchResultTy
7487 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7488   return parseReg(Operands);
7489 }
7490 
7491 //===----------------------------------------------------------------------===//
7492 // mubuf
7493 //===----------------------------------------------------------------------===//
7494 
7495 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7496   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7497 }
7498 
7499 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7500                                    const OperandVector &Operands,
7501                                    bool IsAtomic,
7502                                    bool IsLds) {
7503   OptionalImmIndexMap OptionalIdx;
7504   unsigned FirstOperandIdx = 1;
7505   bool IsAtomicReturn = false;
7506 
7507   if (IsAtomic) {
7508     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7509       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7510       if (!Op.isCPol())
7511         continue;
7512       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7513       break;
7514     }
7515 
7516     if (!IsAtomicReturn) {
7517       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7518       if (NewOpc != -1)
7519         Inst.setOpcode(NewOpc);
7520     }
7521 
7522     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7523                       SIInstrFlags::IsAtomicRet;
7524   }
7525 
7526   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7527     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7528 
7529     // Add the register arguments
7530     if (Op.isReg()) {
7531       Op.addRegOperands(Inst, 1);
7532       // Insert a tied src for atomic return dst.
7533       // This cannot be postponed as subsequent calls to
7534       // addImmOperands rely on correct number of MC operands.
7535       if (IsAtomicReturn && i == FirstOperandIdx)
7536         Op.addRegOperands(Inst, 1);
7537       continue;
7538     }
7539 
7540     // Handle the case where soffset is an immediate
7541     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7542       Op.addImmOperands(Inst, 1);
7543       continue;
7544     }
7545 
7546     // Handle tokens like 'offen' which are sometimes hard-coded into the
7547     // asm string.  There are no MCInst operands for these.
7548     if (Op.isToken()) {
7549       continue;
7550     }
7551     assert(Op.isImm());
7552 
7553     // Handle optional arguments
7554     OptionalIdx[Op.getImmTy()] = i;
7555   }
7556 
7557   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7558   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7559 
7560   if (!IsLds) { // tfe is not legal with lds opcodes
7561     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7562   }
7563   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7564 }
7565 
7566 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7567   OptionalImmIndexMap OptionalIdx;
7568 
7569   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7570     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7571 
7572     // Add the register arguments
7573     if (Op.isReg()) {
7574       Op.addRegOperands(Inst, 1);
7575       continue;
7576     }
7577 
7578     // Handle the case where soffset is an immediate
7579     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7580       Op.addImmOperands(Inst, 1);
7581       continue;
7582     }
7583 
7584     // Handle tokens like 'offen' which are sometimes hard-coded into the
7585     // asm string.  There are no MCInst operands for these.
7586     if (Op.isToken()) {
7587       continue;
7588     }
7589     assert(Op.isImm());
7590 
7591     // Handle optional arguments
7592     OptionalIdx[Op.getImmTy()] = i;
7593   }
7594 
7595   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7596                         AMDGPUOperand::ImmTyOffset);
7597   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7598   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7599   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7600   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7601 }
7602 
7603 //===----------------------------------------------------------------------===//
7604 // mimg
7605 //===----------------------------------------------------------------------===//
7606 
7607 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7608                               bool IsAtomic) {
7609   unsigned I = 1;
7610   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7611   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7612     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7613   }
7614 
7615   if (IsAtomic) {
7616     // Add src, same as dst
7617     assert(Desc.getNumDefs() == 1);
7618     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7619   }
7620 
7621   OptionalImmIndexMap OptionalIdx;
7622 
7623   for (unsigned E = Operands.size(); I != E; ++I) {
7624     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7625 
7626     // Add the register arguments
7627     if (Op.isReg()) {
7628       Op.addRegOperands(Inst, 1);
7629     } else if (Op.isImmModifier()) {
7630       OptionalIdx[Op.getImmTy()] = I;
7631     } else if (!Op.isToken()) {
7632       llvm_unreachable("unexpected operand type");
7633     }
7634   }
7635 
7636   bool IsGFX10Plus = isGFX10Plus();
7637 
7638   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7639   if (IsGFX10Plus)
7640     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7641   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7642   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7643   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7644   if (IsGFX10Plus)
7645     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7646   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7647     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7648   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7649   if (!IsGFX10Plus)
7650     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7651   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7652 }
7653 
7654 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7655   cvtMIMG(Inst, Operands, true);
7656 }
7657 
7658 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7659   OptionalImmIndexMap OptionalIdx;
7660   bool IsAtomicReturn = false;
7661 
7662   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7663     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7664     if (!Op.isCPol())
7665       continue;
7666     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7667     break;
7668   }
7669 
7670   if (!IsAtomicReturn) {
7671     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7672     if (NewOpc != -1)
7673       Inst.setOpcode(NewOpc);
7674   }
7675 
7676   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7677                     SIInstrFlags::IsAtomicRet;
7678 
7679   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7680     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7681 
7682     // Add the register arguments
7683     if (Op.isReg()) {
7684       Op.addRegOperands(Inst, 1);
7685       if (IsAtomicReturn && i == 1)
7686         Op.addRegOperands(Inst, 1);
7687       continue;
7688     }
7689 
7690     // Handle the case where soffset is an immediate
7691     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7692       Op.addImmOperands(Inst, 1);
7693       continue;
7694     }
7695 
7696     // Handle tokens like 'offen' which are sometimes hard-coded into the
7697     // asm string.  There are no MCInst operands for these.
7698     if (Op.isToken()) {
7699       continue;
7700     }
7701     assert(Op.isImm());
7702 
7703     // Handle optional arguments
7704     OptionalIdx[Op.getImmTy()] = i;
7705   }
7706 
7707   if ((int)Inst.getNumOperands() <=
7708       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7709     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7710   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7711 }
7712 
7713 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7714                                       const OperandVector &Operands) {
7715   for (unsigned I = 1; I < Operands.size(); ++I) {
7716     auto &Operand = (AMDGPUOperand &)*Operands[I];
7717     if (Operand.isReg())
7718       Operand.addRegOperands(Inst, 1);
7719   }
7720 
7721   Inst.addOperand(MCOperand::createImm(1)); // a16
7722 }
7723 
7724 //===----------------------------------------------------------------------===//
7725 // smrd
7726 //===----------------------------------------------------------------------===//
7727 
7728 bool AMDGPUOperand::isSMRDOffset8() const {
7729   return isImm() && isUInt<8>(getImm());
7730 }
7731 
7732 bool AMDGPUOperand::isSMEMOffset() const {
7733   return isImm(); // Offset range is checked later by validator.
7734 }
7735 
7736 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7737   // 32-bit literals are only supported on CI and we only want to use them
7738   // when the offset is > 8-bits.
7739   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7740 }
7741 
7742 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7743   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7744 }
7745 
7746 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7747   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7748 }
7749 
7750 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7751   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7752 }
7753 
7754 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7755   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7756 }
7757 
7758 //===----------------------------------------------------------------------===//
7759 // vop3
7760 //===----------------------------------------------------------------------===//
7761 
7762 static bool ConvertOmodMul(int64_t &Mul) {
7763   if (Mul != 1 && Mul != 2 && Mul != 4)
7764     return false;
7765 
7766   Mul >>= 1;
7767   return true;
7768 }
7769 
7770 static bool ConvertOmodDiv(int64_t &Div) {
7771   if (Div == 1) {
7772     Div = 0;
7773     return true;
7774   }
7775 
7776   if (Div == 2) {
7777     Div = 3;
7778     return true;
7779   }
7780 
7781   return false;
7782 }
7783 
7784 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7785 // This is intentional and ensures compatibility with sp3.
7786 // See bug 35397 for details.
7787 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7788   if (BoundCtrl == 0 || BoundCtrl == 1) {
7789     BoundCtrl = 1;
7790     return true;
7791   }
7792   return false;
7793 }
7794 
7795 // Note: the order in this table matches the order of operands in AsmString.
7796 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7797   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7798   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7799   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7800   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7801   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7802   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7803   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7804   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7805   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7806   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7807   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7808   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7809   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7810   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7811   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7812   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7813   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7814   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7815   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7816   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7817   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7818   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7819   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7820   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7821   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7822   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7823   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7824   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7825   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7826   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7827   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7828   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7829   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7830   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7831   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7832   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7833   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7834   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7835   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7836   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7837   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7838 };
7839 
7840 void AMDGPUAsmParser::onBeginOfFile() {
7841   if (!getParser().getStreamer().getTargetStreamer() ||
7842       getSTI().getTargetTriple().getArch() == Triple::r600)
7843     return;
7844 
7845   if (!getTargetStreamer().getTargetID())
7846     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7847 
7848   if (isHsaAbiVersion3AndAbove(&getSTI()))
7849     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7850 }
7851 
7852 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7853 
7854   OperandMatchResultTy res = parseOptionalOpr(Operands);
7855 
7856   // This is a hack to enable hardcoded mandatory operands which follow
7857   // optional operands.
7858   //
7859   // Current design assumes that all operands after the first optional operand
7860   // are also optional. However implementation of some instructions violates
7861   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7862   //
7863   // To alleviate this problem, we have to (implicitly) parse extra operands
7864   // to make sure autogenerated parser of custom operands never hit hardcoded
7865   // mandatory operands.
7866 
7867   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7868     if (res != MatchOperand_Success ||
7869         isToken(AsmToken::EndOfStatement))
7870       break;
7871 
7872     trySkipToken(AsmToken::Comma);
7873     res = parseOptionalOpr(Operands);
7874   }
7875 
7876   return res;
7877 }
7878 
7879 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7880   OperandMatchResultTy res;
7881   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7882     // try to parse any optional operand here
7883     if (Op.IsBit) {
7884       res = parseNamedBit(Op.Name, Operands, Op.Type);
7885     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7886       res = parseOModOperand(Operands);
7887     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7888                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7889                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7890       res = parseSDWASel(Operands, Op.Name, Op.Type);
7891     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7892       res = parseSDWADstUnused(Operands);
7893     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7894                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7895                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7896                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7897       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7898                                         Op.ConvertResult);
7899     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7900       res = parseDim(Operands);
7901     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7902       res = parseCPol(Operands);
7903     } else {
7904       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7905       if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) {
7906         res = parseOperandArrayWithPrefix("neg", Operands,
7907                                           AMDGPUOperand::ImmTyBLGP,
7908                                           nullptr);
7909       }
7910     }
7911     if (res != MatchOperand_NoMatch) {
7912       return res;
7913     }
7914   }
7915   return MatchOperand_NoMatch;
7916 }
7917 
7918 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7919   StringRef Name = getTokenStr();
7920   if (Name == "mul") {
7921     return parseIntWithPrefix("mul", Operands,
7922                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7923   }
7924 
7925   if (Name == "div") {
7926     return parseIntWithPrefix("div", Operands,
7927                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7928   }
7929 
7930   return MatchOperand_NoMatch;
7931 }
7932 
7933 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7934   cvtVOP3P(Inst, Operands);
7935 
7936   int Opc = Inst.getOpcode();
7937 
7938   int SrcNum;
7939   const int Ops[] = { AMDGPU::OpName::src0,
7940                       AMDGPU::OpName::src1,
7941                       AMDGPU::OpName::src2 };
7942   for (SrcNum = 0;
7943        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7944        ++SrcNum);
7945   assert(SrcNum > 0);
7946 
7947   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7948   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7949 
7950   if ((OpSel & (1 << SrcNum)) != 0) {
7951     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7952     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7953     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7954   }
7955 }
7956 
7957 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7958       // 1. This operand is input modifiers
7959   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7960       // 2. This is not last operand
7961       && Desc.NumOperands > (OpNum + 1)
7962       // 3. Next operand is register class
7963       && Desc.OpInfo[OpNum + 1].RegClass != -1
7964       // 4. Next register is not tied to any other operand
7965       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7966 }
7967 
7968 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7969 {
7970   OptionalImmIndexMap OptionalIdx;
7971   unsigned Opc = Inst.getOpcode();
7972 
7973   unsigned I = 1;
7974   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7975   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7976     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7977   }
7978 
7979   for (unsigned E = Operands.size(); I != E; ++I) {
7980     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7981     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7982       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7983     } else if (Op.isInterpSlot() ||
7984                Op.isInterpAttr() ||
7985                Op.isAttrChan()) {
7986       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7987     } else if (Op.isImmModifier()) {
7988       OptionalIdx[Op.getImmTy()] = I;
7989     } else {
7990       llvm_unreachable("unhandled operand type");
7991     }
7992   }
7993 
7994   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7995     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7996   }
7997 
7998   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7999     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8000   }
8001 
8002   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8003     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8004   }
8005 }
8006 
8007 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8008                               OptionalImmIndexMap &OptionalIdx) {
8009   unsigned Opc = Inst.getOpcode();
8010 
8011   unsigned I = 1;
8012   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8013   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8014     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8015   }
8016 
8017   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
8018     // This instruction has src modifiers
8019     for (unsigned E = Operands.size(); I != E; ++I) {
8020       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8021       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8022         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8023       } else if (Op.isImmModifier()) {
8024         OptionalIdx[Op.getImmTy()] = I;
8025       } else if (Op.isRegOrImm()) {
8026         Op.addRegOrImmOperands(Inst, 1);
8027       } else {
8028         llvm_unreachable("unhandled operand type");
8029       }
8030     }
8031   } else {
8032     // No src modifiers
8033     for (unsigned E = Operands.size(); I != E; ++I) {
8034       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8035       if (Op.isMod()) {
8036         OptionalIdx[Op.getImmTy()] = I;
8037       } else {
8038         Op.addRegOrImmOperands(Inst, 1);
8039       }
8040     }
8041   }
8042 
8043   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8044     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8045   }
8046 
8047   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8048     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8049   }
8050 
8051   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8052   // it has src2 register operand that is tied to dst operand
8053   // we don't allow modifiers for this operand in assembler so src2_modifiers
8054   // should be 0.
8055   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
8056       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
8057       Opc == AMDGPU::V_MAC_F32_e64_vi ||
8058       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
8059       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
8060       Opc == AMDGPU::V_MAC_F16_e64_vi ||
8061       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
8062       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
8063       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
8064       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
8065       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
8066     auto it = Inst.begin();
8067     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8068     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8069     ++it;
8070     // Copy the operand to ensure it's not invalidated when Inst grows.
8071     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8072   }
8073 }
8074 
8075 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8076   OptionalImmIndexMap OptionalIdx;
8077   cvtVOP3(Inst, Operands, OptionalIdx);
8078 }
8079 
8080 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8081                                OptionalImmIndexMap &OptIdx) {
8082   const int Opc = Inst.getOpcode();
8083   const MCInstrDesc &Desc = MII.get(Opc);
8084 
8085   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8086 
8087   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
8088     assert(!IsPacked);
8089     Inst.addOperand(Inst.getOperand(0));
8090   }
8091 
8092   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8093   // instruction, and then figure out where to actually put the modifiers
8094 
8095   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8096   if (OpSelIdx != -1) {
8097     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8098   }
8099 
8100   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8101   if (OpSelHiIdx != -1) {
8102     int DefaultVal = IsPacked ? -1 : 0;
8103     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8104                           DefaultVal);
8105   }
8106 
8107   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8108   if (NegLoIdx != -1) {
8109     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8110     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8111   }
8112 
8113   const int Ops[] = { AMDGPU::OpName::src0,
8114                       AMDGPU::OpName::src1,
8115                       AMDGPU::OpName::src2 };
8116   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8117                          AMDGPU::OpName::src1_modifiers,
8118                          AMDGPU::OpName::src2_modifiers };
8119 
8120   unsigned OpSel = 0;
8121   unsigned OpSelHi = 0;
8122   unsigned NegLo = 0;
8123   unsigned NegHi = 0;
8124 
8125   if (OpSelIdx != -1)
8126     OpSel = Inst.getOperand(OpSelIdx).getImm();
8127 
8128   if (OpSelHiIdx != -1)
8129     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8130 
8131   if (NegLoIdx != -1) {
8132     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8133     NegLo = Inst.getOperand(NegLoIdx).getImm();
8134     NegHi = Inst.getOperand(NegHiIdx).getImm();
8135   }
8136 
8137   for (int J = 0; J < 3; ++J) {
8138     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8139     if (OpIdx == -1)
8140       break;
8141 
8142     uint32_t ModVal = 0;
8143 
8144     if ((OpSel & (1 << J)) != 0)
8145       ModVal |= SISrcMods::OP_SEL_0;
8146 
8147     if ((OpSelHi & (1 << J)) != 0)
8148       ModVal |= SISrcMods::OP_SEL_1;
8149 
8150     if ((NegLo & (1 << J)) != 0)
8151       ModVal |= SISrcMods::NEG;
8152 
8153     if ((NegHi & (1 << J)) != 0)
8154       ModVal |= SISrcMods::NEG_HI;
8155 
8156     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8157 
8158     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8159   }
8160 }
8161 
8162 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8163   OptionalImmIndexMap OptIdx;
8164   cvtVOP3(Inst, Operands, OptIdx);
8165   cvtVOP3P(Inst, Operands, OptIdx);
8166 }
8167 
8168 //===----------------------------------------------------------------------===//
8169 // dpp
8170 //===----------------------------------------------------------------------===//
8171 
8172 bool AMDGPUOperand::isDPP8() const {
8173   return isImmTy(ImmTyDPP8);
8174 }
8175 
8176 bool AMDGPUOperand::isDPPCtrl() const {
8177   using namespace AMDGPU::DPP;
8178 
8179   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8180   if (result) {
8181     int64_t Imm = getImm();
8182     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8183            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8184            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8185            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8186            (Imm == DppCtrl::WAVE_SHL1) ||
8187            (Imm == DppCtrl::WAVE_ROL1) ||
8188            (Imm == DppCtrl::WAVE_SHR1) ||
8189            (Imm == DppCtrl::WAVE_ROR1) ||
8190            (Imm == DppCtrl::ROW_MIRROR) ||
8191            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8192            (Imm == DppCtrl::BCAST15) ||
8193            (Imm == DppCtrl::BCAST31) ||
8194            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8195            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8196   }
8197   return false;
8198 }
8199 
8200 //===----------------------------------------------------------------------===//
8201 // mAI
8202 //===----------------------------------------------------------------------===//
8203 
8204 bool AMDGPUOperand::isBLGP() const {
8205   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8206 }
8207 
8208 bool AMDGPUOperand::isCBSZ() const {
8209   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8210 }
8211 
8212 bool AMDGPUOperand::isABID() const {
8213   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8214 }
8215 
8216 bool AMDGPUOperand::isS16Imm() const {
8217   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8218 }
8219 
8220 bool AMDGPUOperand::isU16Imm() const {
8221   return isImm() && isUInt<16>(getImm());
8222 }
8223 
8224 //===----------------------------------------------------------------------===//
8225 // dim
8226 //===----------------------------------------------------------------------===//
8227 
8228 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8229   // We want to allow "dim:1D" etc.,
8230   // but the initial 1 is tokenized as an integer.
8231   std::string Token;
8232   if (isToken(AsmToken::Integer)) {
8233     SMLoc Loc = getToken().getEndLoc();
8234     Token = std::string(getTokenStr());
8235     lex();
8236     if (getLoc() != Loc)
8237       return false;
8238   }
8239 
8240   StringRef Suffix;
8241   if (!parseId(Suffix))
8242     return false;
8243   Token += Suffix;
8244 
8245   StringRef DimId = Token;
8246   if (DimId.startswith("SQ_RSRC_IMG_"))
8247     DimId = DimId.drop_front(12);
8248 
8249   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8250   if (!DimInfo)
8251     return false;
8252 
8253   Encoding = DimInfo->Encoding;
8254   return true;
8255 }
8256 
8257 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8258   if (!isGFX10Plus())
8259     return MatchOperand_NoMatch;
8260 
8261   SMLoc S = getLoc();
8262 
8263   if (!trySkipId("dim", AsmToken::Colon))
8264     return MatchOperand_NoMatch;
8265 
8266   unsigned Encoding;
8267   SMLoc Loc = getLoc();
8268   if (!parseDimId(Encoding)) {
8269     Error(Loc, "invalid dim value");
8270     return MatchOperand_ParseFail;
8271   }
8272 
8273   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8274                                               AMDGPUOperand::ImmTyDim));
8275   return MatchOperand_Success;
8276 }
8277 
8278 //===----------------------------------------------------------------------===//
8279 // dpp
8280 //===----------------------------------------------------------------------===//
8281 
8282 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8283   SMLoc S = getLoc();
8284 
8285   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8286     return MatchOperand_NoMatch;
8287 
8288   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8289 
8290   int64_t Sels[8];
8291 
8292   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8293     return MatchOperand_ParseFail;
8294 
8295   for (size_t i = 0; i < 8; ++i) {
8296     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8297       return MatchOperand_ParseFail;
8298 
8299     SMLoc Loc = getLoc();
8300     if (getParser().parseAbsoluteExpression(Sels[i]))
8301       return MatchOperand_ParseFail;
8302     if (0 > Sels[i] || 7 < Sels[i]) {
8303       Error(Loc, "expected a 3-bit value");
8304       return MatchOperand_ParseFail;
8305     }
8306   }
8307 
8308   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8309     return MatchOperand_ParseFail;
8310 
8311   unsigned DPP8 = 0;
8312   for (size_t i = 0; i < 8; ++i)
8313     DPP8 |= (Sels[i] << (i * 3));
8314 
8315   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8316   return MatchOperand_Success;
8317 }
8318 
8319 bool
8320 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8321                                     const OperandVector &Operands) {
8322   if (Ctrl == "row_newbcast")
8323     return isGFX90A();
8324 
8325   if (Ctrl == "row_share" ||
8326       Ctrl == "row_xmask")
8327     return isGFX10Plus();
8328 
8329   if (Ctrl == "wave_shl" ||
8330       Ctrl == "wave_shr" ||
8331       Ctrl == "wave_rol" ||
8332       Ctrl == "wave_ror" ||
8333       Ctrl == "row_bcast")
8334     return isVI() || isGFX9();
8335 
8336   return Ctrl == "row_mirror" ||
8337          Ctrl == "row_half_mirror" ||
8338          Ctrl == "quad_perm" ||
8339          Ctrl == "row_shl" ||
8340          Ctrl == "row_shr" ||
8341          Ctrl == "row_ror";
8342 }
8343 
8344 int64_t
8345 AMDGPUAsmParser::parseDPPCtrlPerm() {
8346   // quad_perm:[%d,%d,%d,%d]
8347 
8348   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8349     return -1;
8350 
8351   int64_t Val = 0;
8352   for (int i = 0; i < 4; ++i) {
8353     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8354       return -1;
8355 
8356     int64_t Temp;
8357     SMLoc Loc = getLoc();
8358     if (getParser().parseAbsoluteExpression(Temp))
8359       return -1;
8360     if (Temp < 0 || Temp > 3) {
8361       Error(Loc, "expected a 2-bit value");
8362       return -1;
8363     }
8364 
8365     Val += (Temp << i * 2);
8366   }
8367 
8368   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8369     return -1;
8370 
8371   return Val;
8372 }
8373 
8374 int64_t
8375 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8376   using namespace AMDGPU::DPP;
8377 
8378   // sel:%d
8379 
8380   int64_t Val;
8381   SMLoc Loc = getLoc();
8382 
8383   if (getParser().parseAbsoluteExpression(Val))
8384     return -1;
8385 
8386   struct DppCtrlCheck {
8387     int64_t Ctrl;
8388     int Lo;
8389     int Hi;
8390   };
8391 
8392   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8393     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8394     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8395     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8396     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8397     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8398     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8399     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8400     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8401     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8402     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8403     .Default({-1, 0, 0});
8404 
8405   bool Valid;
8406   if (Check.Ctrl == -1) {
8407     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8408     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8409   } else {
8410     Valid = Check.Lo <= Val && Val <= Check.Hi;
8411     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8412   }
8413 
8414   if (!Valid) {
8415     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8416     return -1;
8417   }
8418 
8419   return Val;
8420 }
8421 
8422 OperandMatchResultTy
8423 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8424   using namespace AMDGPU::DPP;
8425 
8426   if (!isToken(AsmToken::Identifier) ||
8427       !isSupportedDPPCtrl(getTokenStr(), Operands))
8428     return MatchOperand_NoMatch;
8429 
8430   SMLoc S = getLoc();
8431   int64_t Val = -1;
8432   StringRef Ctrl;
8433 
8434   parseId(Ctrl);
8435 
8436   if (Ctrl == "row_mirror") {
8437     Val = DppCtrl::ROW_MIRROR;
8438   } else if (Ctrl == "row_half_mirror") {
8439     Val = DppCtrl::ROW_HALF_MIRROR;
8440   } else {
8441     if (skipToken(AsmToken::Colon, "expected a colon")) {
8442       if (Ctrl == "quad_perm") {
8443         Val = parseDPPCtrlPerm();
8444       } else {
8445         Val = parseDPPCtrlSel(Ctrl);
8446       }
8447     }
8448   }
8449 
8450   if (Val == -1)
8451     return MatchOperand_ParseFail;
8452 
8453   Operands.push_back(
8454     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8455   return MatchOperand_Success;
8456 }
8457 
8458 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8459   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8460 }
8461 
8462 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8463   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8464 }
8465 
8466 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8467   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8468 }
8469 
8470 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8471   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8472 }
8473 
8474 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8475   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8476 }
8477 
8478 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8479   OptionalImmIndexMap OptionalIdx;
8480 
8481   unsigned Opc = Inst.getOpcode();
8482   bool HasModifiers =
8483       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8484   unsigned I = 1;
8485   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8486   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8487     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8488   }
8489 
8490   int Fi = 0;
8491   for (unsigned E = Operands.size(); I != E; ++I) {
8492     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8493                                             MCOI::TIED_TO);
8494     if (TiedTo != -1) {
8495       assert((unsigned)TiedTo < Inst.getNumOperands());
8496       // handle tied old or src2 for MAC instructions
8497       Inst.addOperand(Inst.getOperand(TiedTo));
8498     }
8499     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8500     // Add the register arguments
8501     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8502       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8503       // Skip it.
8504       continue;
8505     }
8506 
8507     if (IsDPP8) {
8508       if (Op.isDPP8()) {
8509         Op.addImmOperands(Inst, 1);
8510       } else if (HasModifiers &&
8511                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8512         Op.addRegWithFPInputModsOperands(Inst, 2);
8513       } else if (Op.isFI()) {
8514         Fi = Op.getImm();
8515       } else if (Op.isReg()) {
8516         Op.addRegOperands(Inst, 1);
8517       } else {
8518         llvm_unreachable("Invalid operand type");
8519       }
8520     } else {
8521       if (HasModifiers &&
8522           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8523         Op.addRegWithFPInputModsOperands(Inst, 2);
8524       } else if (Op.isReg()) {
8525         Op.addRegOperands(Inst, 1);
8526       } else if (Op.isDPPCtrl()) {
8527         Op.addImmOperands(Inst, 1);
8528       } else if (Op.isImm()) {
8529         // Handle optional arguments
8530         OptionalIdx[Op.getImmTy()] = I;
8531       } else {
8532         llvm_unreachable("Invalid operand type");
8533       }
8534     }
8535   }
8536 
8537   if (IsDPP8) {
8538     using namespace llvm::AMDGPU::DPP;
8539     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8540   } else {
8541     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8542     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8543     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8544     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8545       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8546     }
8547   }
8548 }
8549 
8550 //===----------------------------------------------------------------------===//
8551 // sdwa
8552 //===----------------------------------------------------------------------===//
8553 
8554 OperandMatchResultTy
8555 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8556                               AMDGPUOperand::ImmTy Type) {
8557   using namespace llvm::AMDGPU::SDWA;
8558 
8559   SMLoc S = getLoc();
8560   StringRef Value;
8561   OperandMatchResultTy res;
8562 
8563   SMLoc StringLoc;
8564   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8565   if (res != MatchOperand_Success) {
8566     return res;
8567   }
8568 
8569   int64_t Int;
8570   Int = StringSwitch<int64_t>(Value)
8571         .Case("BYTE_0", SdwaSel::BYTE_0)
8572         .Case("BYTE_1", SdwaSel::BYTE_1)
8573         .Case("BYTE_2", SdwaSel::BYTE_2)
8574         .Case("BYTE_3", SdwaSel::BYTE_3)
8575         .Case("WORD_0", SdwaSel::WORD_0)
8576         .Case("WORD_1", SdwaSel::WORD_1)
8577         .Case("DWORD", SdwaSel::DWORD)
8578         .Default(0xffffffff);
8579 
8580   if (Int == 0xffffffff) {
8581     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8582     return MatchOperand_ParseFail;
8583   }
8584 
8585   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8586   return MatchOperand_Success;
8587 }
8588 
8589 OperandMatchResultTy
8590 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8591   using namespace llvm::AMDGPU::SDWA;
8592 
8593   SMLoc S = getLoc();
8594   StringRef Value;
8595   OperandMatchResultTy res;
8596 
8597   SMLoc StringLoc;
8598   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8599   if (res != MatchOperand_Success) {
8600     return res;
8601   }
8602 
8603   int64_t Int;
8604   Int = StringSwitch<int64_t>(Value)
8605         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8606         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8607         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8608         .Default(0xffffffff);
8609 
8610   if (Int == 0xffffffff) {
8611     Error(StringLoc, "invalid dst_unused value");
8612     return MatchOperand_ParseFail;
8613   }
8614 
8615   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8616   return MatchOperand_Success;
8617 }
8618 
8619 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8620   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8621 }
8622 
8623 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8624   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8625 }
8626 
8627 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8628   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8629 }
8630 
8631 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8632   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8633 }
8634 
8635 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8636   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8637 }
8638 
8639 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8640                               uint64_t BasicInstType,
8641                               bool SkipDstVcc,
8642                               bool SkipSrcVcc) {
8643   using namespace llvm::AMDGPU::SDWA;
8644 
8645   OptionalImmIndexMap OptionalIdx;
8646   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8647   bool SkippedVcc = false;
8648 
8649   unsigned I = 1;
8650   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8651   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8652     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8653   }
8654 
8655   for (unsigned E = Operands.size(); I != E; ++I) {
8656     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8657     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8658         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8659       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8660       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8661       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8662       // Skip VCC only if we didn't skip it on previous iteration.
8663       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8664       if (BasicInstType == SIInstrFlags::VOP2 &&
8665           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8666            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8667         SkippedVcc = true;
8668         continue;
8669       } else if (BasicInstType == SIInstrFlags::VOPC &&
8670                  Inst.getNumOperands() == 0) {
8671         SkippedVcc = true;
8672         continue;
8673       }
8674     }
8675     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8676       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8677     } else if (Op.isImm()) {
8678       // Handle optional arguments
8679       OptionalIdx[Op.getImmTy()] = I;
8680     } else {
8681       llvm_unreachable("Invalid operand type");
8682     }
8683     SkippedVcc = false;
8684   }
8685 
8686   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8687       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8688       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8689     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8690     switch (BasicInstType) {
8691     case SIInstrFlags::VOP1:
8692       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8693       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8694         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8695       }
8696       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8697       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8698       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8699       break;
8700 
8701     case SIInstrFlags::VOP2:
8702       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8703       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8704         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8705       }
8706       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8707       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8708       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8709       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8710       break;
8711 
8712     case SIInstrFlags::VOPC:
8713       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8714         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8715       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8716       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8717       break;
8718 
8719     default:
8720       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8721     }
8722   }
8723 
8724   // special case v_mac_{f16, f32}:
8725   // it has src2 register operand that is tied to dst operand
8726   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8727       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8728     auto it = Inst.begin();
8729     std::advance(
8730       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8731     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8732   }
8733 }
8734 
8735 //===----------------------------------------------------------------------===//
8736 // mAI
8737 //===----------------------------------------------------------------------===//
8738 
8739 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8740   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8741 }
8742 
8743 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8744   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8745 }
8746 
8747 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8748   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8749 }
8750 
8751 /// Force static initialization.
8752 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8753   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8754   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8755 }
8756 
8757 #define GET_REGISTER_MATCHER
8758 #define GET_MATCHER_IMPLEMENTATION
8759 #define GET_MNEMONIC_SPELL_CHECKER
8760 #define GET_MNEMONIC_CHECKER
8761 #include "AMDGPUGenAsmMatcher.inc"
8762 
8763 // This function should be defined after auto-generated include so that we have
8764 // MatchClassKind enum defined
8765 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8766                                                      unsigned Kind) {
8767   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8768   // But MatchInstructionImpl() expects to meet token and fails to validate
8769   // operand. This method checks if we are given immediate operand but expect to
8770   // get corresponding token.
8771   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8772   switch (Kind) {
8773   case MCK_addr64:
8774     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8775   case MCK_gds:
8776     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8777   case MCK_lds:
8778     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8779   case MCK_idxen:
8780     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8781   case MCK_offen:
8782     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8783   case MCK_SSrcB32:
8784     // When operands have expression values, they will return true for isToken,
8785     // because it is not possible to distinguish between a token and an
8786     // expression at parse time. MatchInstructionImpl() will always try to
8787     // match an operand as a token, when isToken returns true, and when the
8788     // name of the expression is not a valid token, the match will fail,
8789     // so we need to handle it here.
8790     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8791   case MCK_SSrcF32:
8792     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8793   case MCK_SoppBrTarget:
8794     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8795   case MCK_VReg32OrOff:
8796     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8797   case MCK_InterpSlot:
8798     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8799   case MCK_Attr:
8800     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8801   case MCK_AttrChan:
8802     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8803   case MCK_ImmSMEMOffset:
8804     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8805   case MCK_SReg_64:
8806   case MCK_SReg_64_XEXEC:
8807     // Null is defined as a 32-bit register but
8808     // it should also be enabled with 64-bit operands.
8809     // The following code enables it for SReg_64 operands
8810     // used as source and destination. Remaining source
8811     // operands are handled in isInlinableImm.
8812     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8813   default:
8814     return Match_InvalidOperand;
8815   }
8816 }
8817 
8818 //===----------------------------------------------------------------------===//
8819 // endpgm
8820 //===----------------------------------------------------------------------===//
8821 
8822 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8823   SMLoc S = getLoc();
8824   int64_t Imm = 0;
8825 
8826   if (!parseExpr(Imm)) {
8827     // The operand is optional, if not present default to 0
8828     Imm = 0;
8829   }
8830 
8831   if (!isUInt<16>(Imm)) {
8832     Error(S, "expected a 16-bit value");
8833     return MatchOperand_ParseFail;
8834   }
8835 
8836   Operands.push_back(
8837       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8838   return MatchOperand_Success;
8839 }
8840 
8841 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8842