1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCParser/MCAsmLexer.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
31 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/AMDGPUMetadata.h"
35 #include "llvm/Support/AMDHSAKernelDescriptor.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/MachineValueType.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Support/TargetParser.h"
40 
41 using namespace llvm;
42 using namespace llvm::AMDGPU;
43 using namespace llvm::amdhsa;
44 
45 namespace {
46 
47 class AMDGPUAsmParser;
48 
49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
50 
51 //===----------------------------------------------------------------------===//
52 // Operand
53 //===----------------------------------------------------------------------===//
54 
55 class AMDGPUOperand : public MCParsedAsmOperand {
56   enum KindTy {
57     Token,
58     Immediate,
59     Register,
60     Expression
61   } Kind;
62 
63   SMLoc StartLoc, EndLoc;
64   const AMDGPUAsmParser *AsmParser;
65 
66 public:
67   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
68       : Kind(Kind_), AsmParser(AsmParser_) {}
69 
70   using Ptr = std::unique_ptr<AMDGPUOperand>;
71 
72   struct Modifiers {
73     bool Abs = false;
74     bool Neg = false;
75     bool Sext = false;
76 
77     bool hasFPModifiers() const { return Abs || Neg; }
78     bool hasIntModifiers() const { return Sext; }
79     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
80 
81     int64_t getFPModifiersOperand() const {
82       int64_t Operand = 0;
83       Operand |= Abs ? SISrcMods::ABS : 0u;
84       Operand |= Neg ? SISrcMods::NEG : 0u;
85       return Operand;
86     }
87 
88     int64_t getIntModifiersOperand() const {
89       int64_t Operand = 0;
90       Operand |= Sext ? SISrcMods::SEXT : 0u;
91       return Operand;
92     }
93 
94     int64_t getModifiersOperand() const {
95       assert(!(hasFPModifiers() && hasIntModifiers())
96            && "fp and int modifiers should not be used simultaneously");
97       if (hasFPModifiers()) {
98         return getFPModifiersOperand();
99       } else if (hasIntModifiers()) {
100         return getIntModifiersOperand();
101       } else {
102         return 0;
103       }
104     }
105 
106     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
107   };
108 
109   enum ImmTy {
110     ImmTyNone,
111     ImmTyGDS,
112     ImmTyLDS,
113     ImmTyOffen,
114     ImmTyIdxen,
115     ImmTyAddr64,
116     ImmTyOffset,
117     ImmTyInstOffset,
118     ImmTyOffset0,
119     ImmTyOffset1,
120     ImmTyCPol,
121     ImmTySWZ,
122     ImmTyTFE,
123     ImmTyD16,
124     ImmTyClampSI,
125     ImmTyOModSI,
126     ImmTyDPP8,
127     ImmTyDppCtrl,
128     ImmTyDppRowMask,
129     ImmTyDppBankMask,
130     ImmTyDppBoundCtrl,
131     ImmTyDppFi,
132     ImmTySdwaDstSel,
133     ImmTySdwaSrc0Sel,
134     ImmTySdwaSrc1Sel,
135     ImmTySdwaDstUnused,
136     ImmTyDMask,
137     ImmTyDim,
138     ImmTyUNorm,
139     ImmTyDA,
140     ImmTyR128A16,
141     ImmTyA16,
142     ImmTyLWE,
143     ImmTyExpTgt,
144     ImmTyExpCompr,
145     ImmTyExpVM,
146     ImmTyFORMAT,
147     ImmTyHwreg,
148     ImmTyOff,
149     ImmTySendMsg,
150     ImmTyInterpSlot,
151     ImmTyInterpAttr,
152     ImmTyAttrChan,
153     ImmTyOpSel,
154     ImmTyOpSelHi,
155     ImmTyNegLo,
156     ImmTyNegHi,
157     ImmTySwizzle,
158     ImmTyGprIdxMode,
159     ImmTyHigh,
160     ImmTyBLGP,
161     ImmTyCBSZ,
162     ImmTyABID,
163     ImmTyEndpgm,
164     ImmTyWaitVDST,
165     ImmTyWaitEXP,
166   };
167 
168   enum ImmKindTy {
169     ImmKindTyNone,
170     ImmKindTyLiteral,
171     ImmKindTyConst,
172   };
173 
174 private:
175   struct TokOp {
176     const char *Data;
177     unsigned Length;
178   };
179 
180   struct ImmOp {
181     int64_t Val;
182     ImmTy Type;
183     bool IsFPImm;
184     mutable ImmKindTy Kind;
185     Modifiers Mods;
186   };
187 
188   struct RegOp {
189     unsigned RegNo;
190     Modifiers Mods;
191   };
192 
193   union {
194     TokOp Tok;
195     ImmOp Imm;
196     RegOp Reg;
197     const MCExpr *Expr;
198   };
199 
200 public:
201   bool isToken() const override {
202     if (Kind == Token)
203       return true;
204 
205     // When parsing operands, we can't always tell if something was meant to be
206     // a token, like 'gds', or an expression that references a global variable.
207     // In this case, we assume the string is an expression, and if we need to
208     // interpret is a token, then we treat the symbol name as the token.
209     return isSymbolRefExpr();
210   }
211 
212   bool isSymbolRefExpr() const {
213     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
214   }
215 
216   bool isImm() const override {
217     return Kind == Immediate;
218   }
219 
220   void setImmKindNone() const {
221     assert(isImm());
222     Imm.Kind = ImmKindTyNone;
223   }
224 
225   void setImmKindLiteral() const {
226     assert(isImm());
227     Imm.Kind = ImmKindTyLiteral;
228   }
229 
230   void setImmKindConst() const {
231     assert(isImm());
232     Imm.Kind = ImmKindTyConst;
233   }
234 
235   bool IsImmKindLiteral() const {
236     return isImm() && Imm.Kind == ImmKindTyLiteral;
237   }
238 
239   bool isImmKindConst() const {
240     return isImm() && Imm.Kind == ImmKindTyConst;
241   }
242 
243   bool isInlinableImm(MVT type) const;
244   bool isLiteralImm(MVT type) const;
245 
246   bool isRegKind() const {
247     return Kind == Register;
248   }
249 
250   bool isReg() const override {
251     return isRegKind() && !hasModifiers();
252   }
253 
254   bool isRegOrInline(unsigned RCID, MVT type) const {
255     return isRegClass(RCID) || isInlinableImm(type);
256   }
257 
258   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
259     return isRegOrInline(RCID, type) || isLiteralImm(type);
260   }
261 
262   bool isRegOrImmWithInt16InputMods() const {
263     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
264   }
265 
266   bool isRegOrImmWithInt32InputMods() const {
267     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
268   }
269 
270   bool isRegOrImmWithInt64InputMods() const {
271     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
272   }
273 
274   bool isRegOrImmWithFP16InputMods() const {
275     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
276   }
277 
278   bool isRegOrImmWithFP32InputMods() const {
279     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
280   }
281 
282   bool isRegOrImmWithFP64InputMods() const {
283     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
284   }
285 
286   bool isVReg() const {
287     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
288            isRegClass(AMDGPU::VReg_64RegClassID) ||
289            isRegClass(AMDGPU::VReg_96RegClassID) ||
290            isRegClass(AMDGPU::VReg_128RegClassID) ||
291            isRegClass(AMDGPU::VReg_160RegClassID) ||
292            isRegClass(AMDGPU::VReg_192RegClassID) ||
293            isRegClass(AMDGPU::VReg_256RegClassID) ||
294            isRegClass(AMDGPU::VReg_512RegClassID) ||
295            isRegClass(AMDGPU::VReg_1024RegClassID);
296   }
297 
298   bool isVReg32() const {
299     return isRegClass(AMDGPU::VGPR_32RegClassID);
300   }
301 
302   bool isVReg32OrOff() const {
303     return isOff() || isVReg32();
304   }
305 
306   bool isNull() const {
307     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
308   }
309 
310   bool isVRegWithInputMods() const;
311 
312   bool isSDWAOperand(MVT type) const;
313   bool isSDWAFP16Operand() const;
314   bool isSDWAFP32Operand() const;
315   bool isSDWAInt16Operand() const;
316   bool isSDWAInt32Operand() const;
317 
318   bool isImmTy(ImmTy ImmT) const {
319     return isImm() && Imm.Type == ImmT;
320   }
321 
322   bool isImmModifier() const {
323     return isImm() && Imm.Type != ImmTyNone;
324   }
325 
326   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
327   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
328   bool isDMask() const { return isImmTy(ImmTyDMask); }
329   bool isDim() const { return isImmTy(ImmTyDim); }
330   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
331   bool isDA() const { return isImmTy(ImmTyDA); }
332   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
333   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
334   bool isLWE() const { return isImmTy(ImmTyLWE); }
335   bool isOff() const { return isImmTy(ImmTyOff); }
336   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
337   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
338   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
339   bool isOffen() const { return isImmTy(ImmTyOffen); }
340   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
341   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
342   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
343   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
344   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
345 
346   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
347   bool isGDS() const { return isImmTy(ImmTyGDS); }
348   bool isLDS() const { return isImmTy(ImmTyLDS); }
349   bool isCPol() const { return isImmTy(ImmTyCPol); }
350   bool isSWZ() const { return isImmTy(ImmTySWZ); }
351   bool isTFE() const { return isImmTy(ImmTyTFE); }
352   bool isD16() const { return isImmTy(ImmTyD16); }
353   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
354   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
355   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
356   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
357   bool isFI() const { return isImmTy(ImmTyDppFi); }
358   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
359   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
360   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
361   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
362   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
363   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
364   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
365   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
366   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
367   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
368   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
369   bool isHigh() const { return isImmTy(ImmTyHigh); }
370 
371   bool isMod() const {
372     return isClampSI() || isOModSI();
373   }
374 
375   bool isRegOrImm() const {
376     return isReg() || isImm();
377   }
378 
379   bool isRegClass(unsigned RCID) const;
380 
381   bool isInlineValue() const;
382 
383   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
384     return isRegOrInline(RCID, type) && !hasModifiers();
385   }
386 
387   bool isSCSrcB16() const {
388     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
389   }
390 
391   bool isSCSrcV2B16() const {
392     return isSCSrcB16();
393   }
394 
395   bool isSCSrcB32() const {
396     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
397   }
398 
399   bool isSCSrcB64() const {
400     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
401   }
402 
403   bool isBoolReg() const;
404 
405   bool isSCSrcF16() const {
406     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
407   }
408 
409   bool isSCSrcV2F16() const {
410     return isSCSrcF16();
411   }
412 
413   bool isSCSrcF32() const {
414     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
415   }
416 
417   bool isSCSrcF64() const {
418     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
419   }
420 
421   bool isSSrcB32() const {
422     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
423   }
424 
425   bool isSSrcB16() const {
426     return isSCSrcB16() || isLiteralImm(MVT::i16);
427   }
428 
429   bool isSSrcV2B16() const {
430     llvm_unreachable("cannot happen");
431     return isSSrcB16();
432   }
433 
434   bool isSSrcB64() const {
435     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
436     // See isVSrc64().
437     return isSCSrcB64() || isLiteralImm(MVT::i64);
438   }
439 
440   bool isSSrcF32() const {
441     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
442   }
443 
444   bool isSSrcF64() const {
445     return isSCSrcB64() || isLiteralImm(MVT::f64);
446   }
447 
448   bool isSSrcF16() const {
449     return isSCSrcB16() || isLiteralImm(MVT::f16);
450   }
451 
452   bool isSSrcV2F16() const {
453     llvm_unreachable("cannot happen");
454     return isSSrcF16();
455   }
456 
457   bool isSSrcV2FP32() const {
458     llvm_unreachable("cannot happen");
459     return isSSrcF32();
460   }
461 
462   bool isSCSrcV2FP32() const {
463     llvm_unreachable("cannot happen");
464     return isSCSrcF32();
465   }
466 
467   bool isSSrcV2INT32() const {
468     llvm_unreachable("cannot happen");
469     return isSSrcB32();
470   }
471 
472   bool isSCSrcV2INT32() const {
473     llvm_unreachable("cannot happen");
474     return isSCSrcB32();
475   }
476 
477   bool isSSrcOrLdsB32() const {
478     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
479            isLiteralImm(MVT::i32) || isExpr();
480   }
481 
482   bool isVCSrcB32() const {
483     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
484   }
485 
486   bool isVCSrcB64() const {
487     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
488   }
489 
490   bool isVCSrcB16() const {
491     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
492   }
493 
494   bool isVCSrcV2B16() const {
495     return isVCSrcB16();
496   }
497 
498   bool isVCSrcF32() const {
499     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
500   }
501 
502   bool isVCSrcF64() const {
503     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
504   }
505 
506   bool isVCSrcF16() const {
507     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
508   }
509 
510   bool isVCSrcV2F16() const {
511     return isVCSrcF16();
512   }
513 
514   bool isVSrcB32() const {
515     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
516   }
517 
518   bool isVSrcB64() const {
519     return isVCSrcF64() || isLiteralImm(MVT::i64);
520   }
521 
522   bool isVSrcB16() const {
523     return isVCSrcB16() || isLiteralImm(MVT::i16);
524   }
525 
526   bool isVSrcV2B16() const {
527     return isVSrcB16() || isLiteralImm(MVT::v2i16);
528   }
529 
530   bool isVCSrcV2FP32() const {
531     return isVCSrcF64();
532   }
533 
534   bool isVSrcV2FP32() const {
535     return isVSrcF64() || isLiteralImm(MVT::v2f32);
536   }
537 
538   bool isVCSrcV2INT32() const {
539     return isVCSrcB64();
540   }
541 
542   bool isVSrcV2INT32() const {
543     return isVSrcB64() || isLiteralImm(MVT::v2i32);
544   }
545 
546   bool isVSrcF32() const {
547     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
548   }
549 
550   bool isVSrcF64() const {
551     return isVCSrcF64() || isLiteralImm(MVT::f64);
552   }
553 
554   bool isVSrcF16() const {
555     return isVCSrcF16() || isLiteralImm(MVT::f16);
556   }
557 
558   bool isVSrcV2F16() const {
559     return isVSrcF16() || isLiteralImm(MVT::v2f16);
560   }
561 
562   bool isVISrcB32() const {
563     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
564   }
565 
566   bool isVISrcB16() const {
567     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
568   }
569 
570   bool isVISrcV2B16() const {
571     return isVISrcB16();
572   }
573 
574   bool isVISrcF32() const {
575     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
576   }
577 
578   bool isVISrcF16() const {
579     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
580   }
581 
582   bool isVISrcV2F16() const {
583     return isVISrcF16() || isVISrcB32();
584   }
585 
586   bool isVISrc_64B64() const {
587     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
588   }
589 
590   bool isVISrc_64F64() const {
591     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
592   }
593 
594   bool isVISrc_64V2FP32() const {
595     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
596   }
597 
598   bool isVISrc_64V2INT32() const {
599     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
600   }
601 
602   bool isVISrc_256B64() const {
603     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
604   }
605 
606   bool isVISrc_256F64() const {
607     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
608   }
609 
610   bool isVISrc_128B16() const {
611     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
612   }
613 
614   bool isVISrc_128V2B16() const {
615     return isVISrc_128B16();
616   }
617 
618   bool isVISrc_128B32() const {
619     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
620   }
621 
622   bool isVISrc_128F32() const {
623     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
624   }
625 
626   bool isVISrc_256V2FP32() const {
627     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
628   }
629 
630   bool isVISrc_256V2INT32() const {
631     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
632   }
633 
634   bool isVISrc_512B32() const {
635     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
636   }
637 
638   bool isVISrc_512B16() const {
639     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
640   }
641 
642   bool isVISrc_512V2B16() const {
643     return isVISrc_512B16();
644   }
645 
646   bool isVISrc_512F32() const {
647     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
648   }
649 
650   bool isVISrc_512F16() const {
651     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
652   }
653 
654   bool isVISrc_512V2F16() const {
655     return isVISrc_512F16() || isVISrc_512B32();
656   }
657 
658   bool isVISrc_1024B32() const {
659     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
660   }
661 
662   bool isVISrc_1024B16() const {
663     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
664   }
665 
666   bool isVISrc_1024V2B16() const {
667     return isVISrc_1024B16();
668   }
669 
670   bool isVISrc_1024F32() const {
671     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
672   }
673 
674   bool isVISrc_1024F16() const {
675     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
676   }
677 
678   bool isVISrc_1024V2F16() const {
679     return isVISrc_1024F16() || isVISrc_1024B32();
680   }
681 
682   bool isAISrcB32() const {
683     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
684   }
685 
686   bool isAISrcB16() const {
687     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
688   }
689 
690   bool isAISrcV2B16() const {
691     return isAISrcB16();
692   }
693 
694   bool isAISrcF32() const {
695     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
696   }
697 
698   bool isAISrcF16() const {
699     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
700   }
701 
702   bool isAISrcV2F16() const {
703     return isAISrcF16() || isAISrcB32();
704   }
705 
706   bool isAISrc_64B64() const {
707     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
708   }
709 
710   bool isAISrc_64F64() const {
711     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
712   }
713 
714   bool isAISrc_128B32() const {
715     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
716   }
717 
718   bool isAISrc_128B16() const {
719     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
720   }
721 
722   bool isAISrc_128V2B16() const {
723     return isAISrc_128B16();
724   }
725 
726   bool isAISrc_128F32() const {
727     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
728   }
729 
730   bool isAISrc_128F16() const {
731     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
732   }
733 
734   bool isAISrc_128V2F16() const {
735     return isAISrc_128F16() || isAISrc_128B32();
736   }
737 
738   bool isVISrc_128F16() const {
739     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
740   }
741 
742   bool isVISrc_128V2F16() const {
743     return isVISrc_128F16() || isVISrc_128B32();
744   }
745 
746   bool isAISrc_256B64() const {
747     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
748   }
749 
750   bool isAISrc_256F64() const {
751     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
752   }
753 
754   bool isAISrc_512B32() const {
755     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
756   }
757 
758   bool isAISrc_512B16() const {
759     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
760   }
761 
762   bool isAISrc_512V2B16() const {
763     return isAISrc_512B16();
764   }
765 
766   bool isAISrc_512F32() const {
767     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
768   }
769 
770   bool isAISrc_512F16() const {
771     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
772   }
773 
774   bool isAISrc_512V2F16() const {
775     return isAISrc_512F16() || isAISrc_512B32();
776   }
777 
778   bool isAISrc_1024B32() const {
779     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
780   }
781 
782   bool isAISrc_1024B16() const {
783     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
784   }
785 
786   bool isAISrc_1024V2B16() const {
787     return isAISrc_1024B16();
788   }
789 
790   bool isAISrc_1024F32() const {
791     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
792   }
793 
794   bool isAISrc_1024F16() const {
795     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
796   }
797 
798   bool isAISrc_1024V2F16() const {
799     return isAISrc_1024F16() || isAISrc_1024B32();
800   }
801 
802   bool isKImmFP32() const {
803     return isLiteralImm(MVT::f32);
804   }
805 
806   bool isKImmFP16() const {
807     return isLiteralImm(MVT::f16);
808   }
809 
810   bool isMem() const override {
811     return false;
812   }
813 
814   bool isExpr() const {
815     return Kind == Expression;
816   }
817 
818   bool isSoppBrTarget() const {
819     return isExpr() || isImm();
820   }
821 
822   bool isSWaitCnt() const;
823   bool isDepCtr() const;
824   bool isSDelayAlu() const;
825   bool isHwreg() const;
826   bool isSendMsg() const;
827   bool isSwizzle() const;
828   bool isSMRDOffset8() const;
829   bool isSMEMOffset() const;
830   bool isSMRDLiteralOffset() const;
831   bool isDPP8() const;
832   bool isDPPCtrl() const;
833   bool isBLGP() const;
834   bool isCBSZ() const;
835   bool isABID() const;
836   bool isGPRIdxMode() const;
837   bool isS16Imm() const;
838   bool isU16Imm() const;
839   bool isEndpgm() const;
840   bool isWaitVDST() const;
841   bool isWaitEXP() const;
842 
843   StringRef getExpressionAsToken() const {
844     assert(isExpr());
845     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
846     return S->getSymbol().getName();
847   }
848 
849   StringRef getToken() const {
850     assert(isToken());
851 
852     if (Kind == Expression)
853       return getExpressionAsToken();
854 
855     return StringRef(Tok.Data, Tok.Length);
856   }
857 
858   int64_t getImm() const {
859     assert(isImm());
860     return Imm.Val;
861   }
862 
863   void setImm(int64_t Val) {
864     assert(isImm());
865     Imm.Val = Val;
866   }
867 
868   ImmTy getImmTy() const {
869     assert(isImm());
870     return Imm.Type;
871   }
872 
873   unsigned getReg() const override {
874     assert(isRegKind());
875     return Reg.RegNo;
876   }
877 
878   SMLoc getStartLoc() const override {
879     return StartLoc;
880   }
881 
882   SMLoc getEndLoc() const override {
883     return EndLoc;
884   }
885 
886   SMRange getLocRange() const {
887     return SMRange(StartLoc, EndLoc);
888   }
889 
890   Modifiers getModifiers() const {
891     assert(isRegKind() || isImmTy(ImmTyNone));
892     return isRegKind() ? Reg.Mods : Imm.Mods;
893   }
894 
895   void setModifiers(Modifiers Mods) {
896     assert(isRegKind() || isImmTy(ImmTyNone));
897     if (isRegKind())
898       Reg.Mods = Mods;
899     else
900       Imm.Mods = Mods;
901   }
902 
903   bool hasModifiers() const {
904     return getModifiers().hasModifiers();
905   }
906 
907   bool hasFPModifiers() const {
908     return getModifiers().hasFPModifiers();
909   }
910 
911   bool hasIntModifiers() const {
912     return getModifiers().hasIntModifiers();
913   }
914 
915   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
916 
917   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
918 
919   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
920 
921   template <unsigned Bitwidth>
922   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
923 
924   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
925     addKImmFPOperands<16>(Inst, N);
926   }
927 
928   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
929     addKImmFPOperands<32>(Inst, N);
930   }
931 
932   void addRegOperands(MCInst &Inst, unsigned N) const;
933 
934   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
935     addRegOperands(Inst, N);
936   }
937 
938   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
939     if (isRegKind())
940       addRegOperands(Inst, N);
941     else if (isExpr())
942       Inst.addOperand(MCOperand::createExpr(Expr));
943     else
944       addImmOperands(Inst, N);
945   }
946 
947   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
948     Modifiers Mods = getModifiers();
949     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
950     if (isRegKind()) {
951       addRegOperands(Inst, N);
952     } else {
953       addImmOperands(Inst, N, false);
954     }
955   }
956 
957   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
958     assert(!hasIntModifiers());
959     addRegOrImmWithInputModsOperands(Inst, N);
960   }
961 
962   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
963     assert(!hasFPModifiers());
964     addRegOrImmWithInputModsOperands(Inst, N);
965   }
966 
967   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
968     Modifiers Mods = getModifiers();
969     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
970     assert(isRegKind());
971     addRegOperands(Inst, N);
972   }
973 
974   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
975     assert(!hasIntModifiers());
976     addRegWithInputModsOperands(Inst, N);
977   }
978 
979   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
980     assert(!hasFPModifiers());
981     addRegWithInputModsOperands(Inst, N);
982   }
983 
984   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
985     if (isImm())
986       addImmOperands(Inst, N);
987     else {
988       assert(isExpr());
989       Inst.addOperand(MCOperand::createExpr(Expr));
990     }
991   }
992 
993   static void printImmTy(raw_ostream& OS, ImmTy Type) {
994     switch (Type) {
995     case ImmTyNone: OS << "None"; break;
996     case ImmTyGDS: OS << "GDS"; break;
997     case ImmTyLDS: OS << "LDS"; break;
998     case ImmTyOffen: OS << "Offen"; break;
999     case ImmTyIdxen: OS << "Idxen"; break;
1000     case ImmTyAddr64: OS << "Addr64"; break;
1001     case ImmTyOffset: OS << "Offset"; break;
1002     case ImmTyInstOffset: OS << "InstOffset"; break;
1003     case ImmTyOffset0: OS << "Offset0"; break;
1004     case ImmTyOffset1: OS << "Offset1"; break;
1005     case ImmTyCPol: OS << "CPol"; break;
1006     case ImmTySWZ: OS << "SWZ"; break;
1007     case ImmTyTFE: OS << "TFE"; break;
1008     case ImmTyD16: OS << "D16"; break;
1009     case ImmTyFORMAT: OS << "FORMAT"; break;
1010     case ImmTyClampSI: OS << "ClampSI"; break;
1011     case ImmTyOModSI: OS << "OModSI"; break;
1012     case ImmTyDPP8: OS << "DPP8"; break;
1013     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1014     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1015     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1016     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1017     case ImmTyDppFi: OS << "FI"; break;
1018     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1019     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1020     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1021     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1022     case ImmTyDMask: OS << "DMask"; break;
1023     case ImmTyDim: OS << "Dim"; break;
1024     case ImmTyUNorm: OS << "UNorm"; break;
1025     case ImmTyDA: OS << "DA"; break;
1026     case ImmTyR128A16: OS << "R128A16"; break;
1027     case ImmTyA16: OS << "A16"; break;
1028     case ImmTyLWE: OS << "LWE"; break;
1029     case ImmTyOff: OS << "Off"; break;
1030     case ImmTyExpTgt: OS << "ExpTgt"; break;
1031     case ImmTyExpCompr: OS << "ExpCompr"; break;
1032     case ImmTyExpVM: OS << "ExpVM"; break;
1033     case ImmTyHwreg: OS << "Hwreg"; break;
1034     case ImmTySendMsg: OS << "SendMsg"; break;
1035     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1036     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1037     case ImmTyAttrChan: OS << "AttrChan"; break;
1038     case ImmTyOpSel: OS << "OpSel"; break;
1039     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1040     case ImmTyNegLo: OS << "NegLo"; break;
1041     case ImmTyNegHi: OS << "NegHi"; break;
1042     case ImmTySwizzle: OS << "Swizzle"; break;
1043     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1044     case ImmTyHigh: OS << "High"; break;
1045     case ImmTyBLGP: OS << "BLGP"; break;
1046     case ImmTyCBSZ: OS << "CBSZ"; break;
1047     case ImmTyABID: OS << "ABID"; break;
1048     case ImmTyEndpgm: OS << "Endpgm"; break;
1049     case ImmTyWaitVDST: OS << "WaitVDST"; break;
1050     case ImmTyWaitEXP: OS << "WaitEXP"; break;
1051     }
1052   }
1053 
1054   void print(raw_ostream &OS) const override {
1055     switch (Kind) {
1056     case Register:
1057       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1058       break;
1059     case Immediate:
1060       OS << '<' << getImm();
1061       if (getImmTy() != ImmTyNone) {
1062         OS << " type: "; printImmTy(OS, getImmTy());
1063       }
1064       OS << " mods: " << Imm.Mods << '>';
1065       break;
1066     case Token:
1067       OS << '\'' << getToken() << '\'';
1068       break;
1069     case Expression:
1070       OS << "<expr " << *Expr << '>';
1071       break;
1072     }
1073   }
1074 
1075   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1076                                       int64_t Val, SMLoc Loc,
1077                                       ImmTy Type = ImmTyNone,
1078                                       bool IsFPImm = false) {
1079     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1080     Op->Imm.Val = Val;
1081     Op->Imm.IsFPImm = IsFPImm;
1082     Op->Imm.Kind = ImmKindTyNone;
1083     Op->Imm.Type = Type;
1084     Op->Imm.Mods = Modifiers();
1085     Op->StartLoc = Loc;
1086     Op->EndLoc = Loc;
1087     return Op;
1088   }
1089 
1090   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1091                                         StringRef Str, SMLoc Loc,
1092                                         bool HasExplicitEncodingSize = true) {
1093     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1094     Res->Tok.Data = Str.data();
1095     Res->Tok.Length = Str.size();
1096     Res->StartLoc = Loc;
1097     Res->EndLoc = Loc;
1098     return Res;
1099   }
1100 
1101   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1102                                       unsigned RegNo, SMLoc S,
1103                                       SMLoc E) {
1104     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1105     Op->Reg.RegNo = RegNo;
1106     Op->Reg.Mods = Modifiers();
1107     Op->StartLoc = S;
1108     Op->EndLoc = E;
1109     return Op;
1110   }
1111 
1112   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1113                                        const class MCExpr *Expr, SMLoc S) {
1114     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1115     Op->Expr = Expr;
1116     Op->StartLoc = S;
1117     Op->EndLoc = S;
1118     return Op;
1119   }
1120 };
1121 
1122 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1123   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1124   return OS;
1125 }
1126 
1127 //===----------------------------------------------------------------------===//
1128 // AsmParser
1129 //===----------------------------------------------------------------------===//
1130 
1131 // Holds info related to the current kernel, e.g. count of SGPRs used.
1132 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1133 // .amdgpu_hsa_kernel or at EOF.
1134 class KernelScopeInfo {
1135   int SgprIndexUnusedMin = -1;
1136   int VgprIndexUnusedMin = -1;
1137   int AgprIndexUnusedMin = -1;
1138   MCContext *Ctx = nullptr;
1139   MCSubtargetInfo const *MSTI = nullptr;
1140 
1141   void usesSgprAt(int i) {
1142     if (i >= SgprIndexUnusedMin) {
1143       SgprIndexUnusedMin = ++i;
1144       if (Ctx) {
1145         MCSymbol* const Sym =
1146           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1147         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1148       }
1149     }
1150   }
1151 
1152   void usesVgprAt(int i) {
1153     if (i >= VgprIndexUnusedMin) {
1154       VgprIndexUnusedMin = ++i;
1155       if (Ctx) {
1156         MCSymbol* const Sym =
1157           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1158         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1159                                          VgprIndexUnusedMin);
1160         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1161       }
1162     }
1163   }
1164 
1165   void usesAgprAt(int i) {
1166     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1167     if (!hasMAIInsts(*MSTI))
1168       return;
1169 
1170     if (i >= AgprIndexUnusedMin) {
1171       AgprIndexUnusedMin = ++i;
1172       if (Ctx) {
1173         MCSymbol* const Sym =
1174           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1175         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1176 
1177         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1178         MCSymbol* const vSym =
1179           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1180         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1181                                          VgprIndexUnusedMin);
1182         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1183       }
1184     }
1185   }
1186 
1187 public:
1188   KernelScopeInfo() = default;
1189 
1190   void initialize(MCContext &Context) {
1191     Ctx = &Context;
1192     MSTI = Ctx->getSubtargetInfo();
1193 
1194     usesSgprAt(SgprIndexUnusedMin = -1);
1195     usesVgprAt(VgprIndexUnusedMin = -1);
1196     if (hasMAIInsts(*MSTI)) {
1197       usesAgprAt(AgprIndexUnusedMin = -1);
1198     }
1199   }
1200 
1201   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1202                     unsigned RegWidth) {
1203     switch (RegKind) {
1204     case IS_SGPR:
1205       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1206       break;
1207     case IS_AGPR:
1208       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1209       break;
1210     case IS_VGPR:
1211       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1212       break;
1213     default:
1214       break;
1215     }
1216   }
1217 };
1218 
1219 class AMDGPUAsmParser : public MCTargetAsmParser {
1220   MCAsmParser &Parser;
1221 
1222   // Number of extra operands parsed after the first optional operand.
1223   // This may be necessary to skip hardcoded mandatory operands.
1224   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1225 
1226   unsigned ForcedEncodingSize = 0;
1227   bool ForcedDPP = false;
1228   bool ForcedSDWA = false;
1229   KernelScopeInfo KernelScope;
1230   unsigned CPolSeen;
1231 
1232   /// @name Auto-generated Match Functions
1233   /// {
1234 
1235 #define GET_ASSEMBLER_HEADER
1236 #include "AMDGPUGenAsmMatcher.inc"
1237 
1238   /// }
1239 
1240 private:
1241   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1242   bool OutOfRangeError(SMRange Range);
1243   /// Calculate VGPR/SGPR blocks required for given target, reserved
1244   /// registers, and user-specified NextFreeXGPR values.
1245   ///
1246   /// \param Features [in] Target features, used for bug corrections.
1247   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1248   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1249   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1250   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1251   /// descriptor field, if valid.
1252   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1253   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1254   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1255   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1256   /// \param VGPRBlocks [out] Result VGPR block count.
1257   /// \param SGPRBlocks [out] Result SGPR block count.
1258   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1259                           bool FlatScrUsed, bool XNACKUsed,
1260                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1261                           SMRange VGPRRange, unsigned NextFreeSGPR,
1262                           SMRange SGPRRange, unsigned &VGPRBlocks,
1263                           unsigned &SGPRBlocks);
1264   bool ParseDirectiveAMDGCNTarget();
1265   bool ParseDirectiveAMDHSAKernel();
1266   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1267   bool ParseDirectiveHSACodeObjectVersion();
1268   bool ParseDirectiveHSACodeObjectISA();
1269   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1270   bool ParseDirectiveAMDKernelCodeT();
1271   // TODO: Possibly make subtargetHasRegister const.
1272   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1273   bool ParseDirectiveAMDGPUHsaKernel();
1274 
1275   bool ParseDirectiveISAVersion();
1276   bool ParseDirectiveHSAMetadata();
1277   bool ParseDirectivePALMetadataBegin();
1278   bool ParseDirectivePALMetadata();
1279   bool ParseDirectiveAMDGPULDS();
1280 
1281   /// Common code to parse out a block of text (typically YAML) between start and
1282   /// end directives.
1283   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1284                            const char *AssemblerDirectiveEnd,
1285                            std::string &CollectString);
1286 
1287   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1288                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1289   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1290                            unsigned &RegNum, unsigned &RegWidth,
1291                            bool RestoreOnFailure = false);
1292   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1293                            unsigned &RegNum, unsigned &RegWidth,
1294                            SmallVectorImpl<AsmToken> &Tokens);
1295   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1296                            unsigned &RegWidth,
1297                            SmallVectorImpl<AsmToken> &Tokens);
1298   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1299                            unsigned &RegWidth,
1300                            SmallVectorImpl<AsmToken> &Tokens);
1301   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1302                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1303   bool ParseRegRange(unsigned& Num, unsigned& Width);
1304   unsigned getRegularReg(RegisterKind RegKind,
1305                          unsigned RegNum,
1306                          unsigned RegWidth,
1307                          SMLoc Loc);
1308 
1309   bool isRegister();
1310   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1311   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1312   void initializeGprCountSymbol(RegisterKind RegKind);
1313   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1314                              unsigned RegWidth);
1315   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1316                     bool IsAtomic, bool IsLds = false);
1317   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1318                  bool IsGdsHardcoded);
1319 
1320 public:
1321   enum AMDGPUMatchResultTy {
1322     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1323   };
1324   enum OperandMode {
1325     OperandMode_Default,
1326     OperandMode_NSA,
1327   };
1328 
1329   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1330 
1331   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1332                const MCInstrInfo &MII,
1333                const MCTargetOptions &Options)
1334       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1335     MCAsmParserExtension::Initialize(Parser);
1336 
1337     if (getFeatureBits().none()) {
1338       // Set default features.
1339       copySTI().ToggleFeature("southern-islands");
1340     }
1341 
1342     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1343 
1344     {
1345       // TODO: make those pre-defined variables read-only.
1346       // Currently there is none suitable machinery in the core llvm-mc for this.
1347       // MCSymbol::isRedefinable is intended for another purpose, and
1348       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1349       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1350       MCContext &Ctx = getContext();
1351       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1352         MCSymbol *Sym =
1353             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1354         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1355         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1356         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1357         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1358         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1359       } else {
1360         MCSymbol *Sym =
1361             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1362         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1363         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1364         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1365         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1366         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1367       }
1368       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1369         initializeGprCountSymbol(IS_VGPR);
1370         initializeGprCountSymbol(IS_SGPR);
1371       } else
1372         KernelScope.initialize(getContext());
1373     }
1374   }
1375 
1376   bool hasMIMG_R128() const {
1377     return AMDGPU::hasMIMG_R128(getSTI());
1378   }
1379 
1380   bool hasPackedD16() const {
1381     return AMDGPU::hasPackedD16(getSTI());
1382   }
1383 
1384   bool hasGFX10A16() const {
1385     return AMDGPU::hasGFX10A16(getSTI());
1386   }
1387 
1388   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1389 
1390   bool isSI() const {
1391     return AMDGPU::isSI(getSTI());
1392   }
1393 
1394   bool isCI() const {
1395     return AMDGPU::isCI(getSTI());
1396   }
1397 
1398   bool isVI() const {
1399     return AMDGPU::isVI(getSTI());
1400   }
1401 
1402   bool isGFX9() const {
1403     return AMDGPU::isGFX9(getSTI());
1404   }
1405 
1406   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1407   bool isGFX90A() const {
1408     return AMDGPU::isGFX90A(getSTI());
1409   }
1410 
1411   bool isGFX940() const {
1412     return AMDGPU::isGFX940(getSTI());
1413   }
1414 
1415   bool isGFX9Plus() const {
1416     return AMDGPU::isGFX9Plus(getSTI());
1417   }
1418 
1419   bool isGFX10() const {
1420     return AMDGPU::isGFX10(getSTI());
1421   }
1422 
1423   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1424 
1425   bool isGFX11() const {
1426     return AMDGPU::isGFX11(getSTI());
1427   }
1428 
1429   bool isGFX11Plus() const {
1430     return AMDGPU::isGFX11Plus(getSTI());
1431   }
1432 
1433   bool isGFX10_BEncoding() const {
1434     return AMDGPU::isGFX10_BEncoding(getSTI());
1435   }
1436 
1437   bool hasInv2PiInlineImm() const {
1438     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1439   }
1440 
1441   bool hasFlatOffsets() const {
1442     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1443   }
1444 
1445   bool hasArchitectedFlatScratch() const {
1446     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1447   }
1448 
1449   bool hasSGPR102_SGPR103() const {
1450     return !isVI() && !isGFX9();
1451   }
1452 
1453   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1454 
1455   bool hasIntClamp() const {
1456     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1457   }
1458 
1459   AMDGPUTargetStreamer &getTargetStreamer() {
1460     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1461     return static_cast<AMDGPUTargetStreamer &>(TS);
1462   }
1463 
1464   const MCRegisterInfo *getMRI() const {
1465     // We need this const_cast because for some reason getContext() is not const
1466     // in MCAsmParser.
1467     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1468   }
1469 
1470   const MCInstrInfo *getMII() const {
1471     return &MII;
1472   }
1473 
1474   const FeatureBitset &getFeatureBits() const {
1475     return getSTI().getFeatureBits();
1476   }
1477 
1478   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1479   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1480   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1481 
1482   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1483   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1484   bool isForcedDPP() const { return ForcedDPP; }
1485   bool isForcedSDWA() const { return ForcedSDWA; }
1486   ArrayRef<unsigned> getMatchedVariants() const;
1487   StringRef getMatchedVariantName() const;
1488 
1489   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1490   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1491                      bool RestoreOnFailure);
1492   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1493   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1494                                         SMLoc &EndLoc) override;
1495   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1496   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1497                                       unsigned Kind) override;
1498   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1499                                OperandVector &Operands, MCStreamer &Out,
1500                                uint64_t &ErrorInfo,
1501                                bool MatchingInlineAsm) override;
1502   bool ParseDirective(AsmToken DirectiveID) override;
1503   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1504                                     OperandMode Mode = OperandMode_Default);
1505   StringRef parseMnemonicSuffix(StringRef Name);
1506   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1507                         SMLoc NameLoc, OperandVector &Operands) override;
1508   //bool ProcessInstruction(MCInst &Inst);
1509 
1510   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1511 
1512   OperandMatchResultTy
1513   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1514                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1515                      bool (*ConvertResult)(int64_t &) = nullptr);
1516 
1517   OperandMatchResultTy
1518   parseOperandArrayWithPrefix(const char *Prefix,
1519                               OperandVector &Operands,
1520                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1521                               bool (*ConvertResult)(int64_t&) = nullptr);
1522 
1523   OperandMatchResultTy
1524   parseNamedBit(StringRef Name, OperandVector &Operands,
1525                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1526   OperandMatchResultTy parseCPol(OperandVector &Operands);
1527   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1528                                              StringRef &Value,
1529                                              SMLoc &StringLoc);
1530 
1531   bool isModifier();
1532   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1533   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1534   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1535   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1536   bool parseSP3NegModifier();
1537   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1538   OperandMatchResultTy parseReg(OperandVector &Operands);
1539   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1540   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1541   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1542   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1543   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1544   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1545   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1546   OperandMatchResultTy parseUfmt(int64_t &Format);
1547   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1548   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1549   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1550   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1551   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1552   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1553   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1554 
1555   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1556   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1557   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1558   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1559 
1560   bool parseCnt(int64_t &IntVal);
1561   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1562 
1563   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1564   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1565   OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
1566 
1567   bool parseDelay(int64_t &Delay);
1568   OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
1569 
1570   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1571 
1572 private:
1573   struct OperandInfoTy {
1574     SMLoc Loc;
1575     int64_t Id;
1576     bool IsSymbolic = false;
1577     bool IsDefined = false;
1578 
1579     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1580   };
1581 
1582   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1583   bool validateSendMsg(const OperandInfoTy &Msg,
1584                        const OperandInfoTy &Op,
1585                        const OperandInfoTy &Stream);
1586 
1587   bool parseHwregBody(OperandInfoTy &HwReg,
1588                       OperandInfoTy &Offset,
1589                       OperandInfoTy &Width);
1590   bool validateHwreg(const OperandInfoTy &HwReg,
1591                      const OperandInfoTy &Offset,
1592                      const OperandInfoTy &Width);
1593 
1594   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1595   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1596   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1597 
1598   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1599                       const OperandVector &Operands) const;
1600   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1601   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1602   SMLoc getLitLoc(const OperandVector &Operands) const;
1603   SMLoc getConstLoc(const OperandVector &Operands) const;
1604 
1605   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1606   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1607   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1608   bool validateSOPLiteral(const MCInst &Inst) const;
1609   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1610   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1611   bool validateIntClampSupported(const MCInst &Inst);
1612   bool validateMIMGAtomicDMask(const MCInst &Inst);
1613   bool validateMIMGGatherDMask(const MCInst &Inst);
1614   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1615   Optional<StringRef> validateMIMGDataSize(const MCInst &Inst);
1616   bool validateMIMGAddrSize(const MCInst &Inst);
1617   bool validateMIMGD16(const MCInst &Inst);
1618   bool validateMIMGDim(const MCInst &Inst);
1619   bool validateMIMGMSAA(const MCInst &Inst);
1620   bool validateOpSel(const MCInst &Inst);
1621   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1622   bool validateVccOperand(unsigned Reg) const;
1623   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1624   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1625   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1626   bool validateAGPRLdSt(const MCInst &Inst) const;
1627   bool validateVGPRAlign(const MCInst &Inst) const;
1628   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1629   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1630   bool validateDivScale(const MCInst &Inst);
1631   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1632                              const SMLoc &IDLoc);
1633   bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands,
1634                           const SMLoc &IDLoc);
1635   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1636   unsigned getConstantBusLimit(unsigned Opcode) const;
1637   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1638   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1639   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1640 
1641   bool isSupportedMnemo(StringRef Mnemo,
1642                         const FeatureBitset &FBS);
1643   bool isSupportedMnemo(StringRef Mnemo,
1644                         const FeatureBitset &FBS,
1645                         ArrayRef<unsigned> Variants);
1646   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1647 
1648   bool isId(const StringRef Id) const;
1649   bool isId(const AsmToken &Token, const StringRef Id) const;
1650   bool isToken(const AsmToken::TokenKind Kind) const;
1651   bool trySkipId(const StringRef Id);
1652   bool trySkipId(const StringRef Pref, const StringRef Id);
1653   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1654   bool trySkipToken(const AsmToken::TokenKind Kind);
1655   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1656   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1657   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1658 
1659   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1660   AsmToken::TokenKind getTokenKind() const;
1661   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1662   bool parseExpr(OperandVector &Operands);
1663   StringRef getTokenStr() const;
1664   AsmToken peekToken();
1665   AsmToken getToken() const;
1666   SMLoc getLoc() const;
1667   void lex();
1668 
1669 public:
1670   void onBeginOfFile() override;
1671 
1672   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1673   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1674 
1675   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1676   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1677   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1678   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1679   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1680   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1681 
1682   bool parseSwizzleOperand(int64_t &Op,
1683                            const unsigned MinVal,
1684                            const unsigned MaxVal,
1685                            const StringRef ErrMsg,
1686                            SMLoc &Loc);
1687   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1688                             const unsigned MinVal,
1689                             const unsigned MaxVal,
1690                             const StringRef ErrMsg);
1691   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1692   bool parseSwizzleOffset(int64_t &Imm);
1693   bool parseSwizzleMacro(int64_t &Imm);
1694   bool parseSwizzleQuadPerm(int64_t &Imm);
1695   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1696   bool parseSwizzleBroadcast(int64_t &Imm);
1697   bool parseSwizzleSwap(int64_t &Imm);
1698   bool parseSwizzleReverse(int64_t &Imm);
1699 
1700   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1701   int64_t parseGPRIdxMacro();
1702 
1703   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1704   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1705   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1706   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1707 
1708   AMDGPUOperand::Ptr defaultCPol() const;
1709 
1710   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1711   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1712   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1713   AMDGPUOperand::Ptr defaultFlatOffset() const;
1714 
1715   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1716 
1717   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1718                OptionalImmIndexMap &OptionalIdx);
1719   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1720   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1721   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1722   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1723                 OptionalImmIndexMap &OptionalIdx);
1724 
1725   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1726   void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1727 
1728   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1729                bool IsAtomic = false);
1730   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1731   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1732 
1733   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1734 
1735   bool parseDimId(unsigned &Encoding);
1736   OperandMatchResultTy parseDim(OperandVector &Operands);
1737   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1738   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1739   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1740   int64_t parseDPPCtrlSel(StringRef Ctrl);
1741   int64_t parseDPPCtrlPerm();
1742   AMDGPUOperand::Ptr defaultRowMask() const;
1743   AMDGPUOperand::Ptr defaultBankMask() const;
1744   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1745   AMDGPUOperand::Ptr defaultFI() const;
1746   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1747   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1748 
1749   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1750                                     AMDGPUOperand::ImmTy Type);
1751   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1752   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1753   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1754   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1755   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1756   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1757   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1758                uint64_t BasicInstType,
1759                bool SkipDstVcc = false,
1760                bool SkipSrcVcc = false);
1761 
1762   AMDGPUOperand::Ptr defaultBLGP() const;
1763   AMDGPUOperand::Ptr defaultCBSZ() const;
1764   AMDGPUOperand::Ptr defaultABID() const;
1765 
1766   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1767   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1768 
1769   AMDGPUOperand::Ptr defaultWaitVDST() const;
1770   AMDGPUOperand::Ptr defaultWaitEXP() const;
1771 };
1772 
1773 struct OptionalOperand {
1774   const char *Name;
1775   AMDGPUOperand::ImmTy Type;
1776   bool IsBit;
1777   bool (*ConvertResult)(int64_t&);
1778 };
1779 
1780 } // end anonymous namespace
1781 
1782 // May be called with integer type with equivalent bitwidth.
1783 static const fltSemantics *getFltSemantics(unsigned Size) {
1784   switch (Size) {
1785   case 4:
1786     return &APFloat::IEEEsingle();
1787   case 8:
1788     return &APFloat::IEEEdouble();
1789   case 2:
1790     return &APFloat::IEEEhalf();
1791   default:
1792     llvm_unreachable("unsupported fp type");
1793   }
1794 }
1795 
1796 static const fltSemantics *getFltSemantics(MVT VT) {
1797   return getFltSemantics(VT.getSizeInBits() / 8);
1798 }
1799 
1800 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1801   switch (OperandType) {
1802   case AMDGPU::OPERAND_REG_IMM_INT32:
1803   case AMDGPU::OPERAND_REG_IMM_FP32:
1804   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1805   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1806   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1807   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1808   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1809   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1810   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1811   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1812   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1813   case AMDGPU::OPERAND_KIMM32:
1814     return &APFloat::IEEEsingle();
1815   case AMDGPU::OPERAND_REG_IMM_INT64:
1816   case AMDGPU::OPERAND_REG_IMM_FP64:
1817   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1818   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1819   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1820     return &APFloat::IEEEdouble();
1821   case AMDGPU::OPERAND_REG_IMM_INT16:
1822   case AMDGPU::OPERAND_REG_IMM_FP16:
1823   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1824   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1825   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1826   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1827   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1828   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1829   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1830   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1831   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1832   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1833   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1834   case AMDGPU::OPERAND_KIMM16:
1835     return &APFloat::IEEEhalf();
1836   default:
1837     llvm_unreachable("unsupported fp type");
1838   }
1839 }
1840 
1841 //===----------------------------------------------------------------------===//
1842 // Operand
1843 //===----------------------------------------------------------------------===//
1844 
1845 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1846   bool Lost;
1847 
1848   // Convert literal to single precision
1849   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1850                                                APFloat::rmNearestTiesToEven,
1851                                                &Lost);
1852   // We allow precision lost but not overflow or underflow
1853   if (Status != APFloat::opOK &&
1854       Lost &&
1855       ((Status & APFloat::opOverflow)  != 0 ||
1856        (Status & APFloat::opUnderflow) != 0)) {
1857     return false;
1858   }
1859 
1860   return true;
1861 }
1862 
1863 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1864   return isUIntN(Size, Val) || isIntN(Size, Val);
1865 }
1866 
1867 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1868   if (VT.getScalarType() == MVT::i16) {
1869     // FP immediate values are broken.
1870     return isInlinableIntLiteral(Val);
1871   }
1872 
1873   // f16/v2f16 operands work correctly for all values.
1874   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1875 }
1876 
1877 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1878 
1879   // This is a hack to enable named inline values like
1880   // shared_base with both 32-bit and 64-bit operands.
1881   // Note that these values are defined as
1882   // 32-bit operands only.
1883   if (isInlineValue()) {
1884     return true;
1885   }
1886 
1887   if (!isImmTy(ImmTyNone)) {
1888     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1889     return false;
1890   }
1891   // TODO: We should avoid using host float here. It would be better to
1892   // check the float bit values which is what a few other places do.
1893   // We've had bot failures before due to weird NaN support on mips hosts.
1894 
1895   APInt Literal(64, Imm.Val);
1896 
1897   if (Imm.IsFPImm) { // We got fp literal token
1898     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1899       return AMDGPU::isInlinableLiteral64(Imm.Val,
1900                                           AsmParser->hasInv2PiInlineImm());
1901     }
1902 
1903     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1904     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1905       return false;
1906 
1907     if (type.getScalarSizeInBits() == 16) {
1908       return isInlineableLiteralOp16(
1909         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1910         type, AsmParser->hasInv2PiInlineImm());
1911     }
1912 
1913     // Check if single precision literal is inlinable
1914     return AMDGPU::isInlinableLiteral32(
1915       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1916       AsmParser->hasInv2PiInlineImm());
1917   }
1918 
1919   // We got int literal token.
1920   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1921     return AMDGPU::isInlinableLiteral64(Imm.Val,
1922                                         AsmParser->hasInv2PiInlineImm());
1923   }
1924 
1925   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1926     return false;
1927   }
1928 
1929   if (type.getScalarSizeInBits() == 16) {
1930     return isInlineableLiteralOp16(
1931       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1932       type, AsmParser->hasInv2PiInlineImm());
1933   }
1934 
1935   return AMDGPU::isInlinableLiteral32(
1936     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1937     AsmParser->hasInv2PiInlineImm());
1938 }
1939 
1940 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1941   // Check that this immediate can be added as literal
1942   if (!isImmTy(ImmTyNone)) {
1943     return false;
1944   }
1945 
1946   if (!Imm.IsFPImm) {
1947     // We got int literal token.
1948 
1949     if (type == MVT::f64 && hasFPModifiers()) {
1950       // Cannot apply fp modifiers to int literals preserving the same semantics
1951       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1952       // disable these cases.
1953       return false;
1954     }
1955 
1956     unsigned Size = type.getSizeInBits();
1957     if (Size == 64)
1958       Size = 32;
1959 
1960     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1961     // types.
1962     return isSafeTruncation(Imm.Val, Size);
1963   }
1964 
1965   // We got fp literal token
1966   if (type == MVT::f64) { // Expected 64-bit fp operand
1967     // We would set low 64-bits of literal to zeroes but we accept this literals
1968     return true;
1969   }
1970 
1971   if (type == MVT::i64) { // Expected 64-bit int operand
1972     // We don't allow fp literals in 64-bit integer instructions. It is
1973     // unclear how we should encode them.
1974     return false;
1975   }
1976 
1977   // We allow fp literals with f16x2 operands assuming that the specified
1978   // literal goes into the lower half and the upper half is zero. We also
1979   // require that the literal may be losslessly converted to f16.
1980   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1981                      (type == MVT::v2i16)? MVT::i16 :
1982                      (type == MVT::v2f32)? MVT::f32 : type;
1983 
1984   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1985   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1986 }
1987 
1988 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1989   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1990 }
1991 
1992 bool AMDGPUOperand::isVRegWithInputMods() const {
1993   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1994          // GFX90A allows DPP on 64-bit operands.
1995          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1996           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1997 }
1998 
1999 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2000   if (AsmParser->isVI())
2001     return isVReg32();
2002   else if (AsmParser->isGFX9Plus())
2003     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2004   else
2005     return false;
2006 }
2007 
2008 bool AMDGPUOperand::isSDWAFP16Operand() const {
2009   return isSDWAOperand(MVT::f16);
2010 }
2011 
2012 bool AMDGPUOperand::isSDWAFP32Operand() const {
2013   return isSDWAOperand(MVT::f32);
2014 }
2015 
2016 bool AMDGPUOperand::isSDWAInt16Operand() const {
2017   return isSDWAOperand(MVT::i16);
2018 }
2019 
2020 bool AMDGPUOperand::isSDWAInt32Operand() const {
2021   return isSDWAOperand(MVT::i32);
2022 }
2023 
2024 bool AMDGPUOperand::isBoolReg() const {
2025   auto FB = AsmParser->getFeatureBits();
2026   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2027                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2028 }
2029 
2030 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2031 {
2032   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2033   assert(Size == 2 || Size == 4 || Size == 8);
2034 
2035   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2036 
2037   if (Imm.Mods.Abs) {
2038     Val &= ~FpSignMask;
2039   }
2040   if (Imm.Mods.Neg) {
2041     Val ^= FpSignMask;
2042   }
2043 
2044   return Val;
2045 }
2046 
2047 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2048   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2049                              Inst.getNumOperands())) {
2050     addLiteralImmOperand(Inst, Imm.Val,
2051                          ApplyModifiers &
2052                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2053   } else {
2054     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2055     Inst.addOperand(MCOperand::createImm(Imm.Val));
2056     setImmKindNone();
2057   }
2058 }
2059 
2060 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2061   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2062   auto OpNum = Inst.getNumOperands();
2063   // Check that this operand accepts literals
2064   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2065 
2066   if (ApplyModifiers) {
2067     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2068     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2069     Val = applyInputFPModifiers(Val, Size);
2070   }
2071 
2072   APInt Literal(64, Val);
2073   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2074 
2075   if (Imm.IsFPImm) { // We got fp literal token
2076     switch (OpTy) {
2077     case AMDGPU::OPERAND_REG_IMM_INT64:
2078     case AMDGPU::OPERAND_REG_IMM_FP64:
2079     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2080     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2081     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2082       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2083                                        AsmParser->hasInv2PiInlineImm())) {
2084         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2085         setImmKindConst();
2086         return;
2087       }
2088 
2089       // Non-inlineable
2090       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2091         // For fp operands we check if low 32 bits are zeros
2092         if (Literal.getLoBits(32) != 0) {
2093           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2094           "Can't encode literal as exact 64-bit floating-point operand. "
2095           "Low 32-bits will be set to zero");
2096         }
2097 
2098         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2099         setImmKindLiteral();
2100         return;
2101       }
2102 
2103       // We don't allow fp literals in 64-bit integer instructions. It is
2104       // unclear how we should encode them. This case should be checked earlier
2105       // in predicate methods (isLiteralImm())
2106       llvm_unreachable("fp literal in 64-bit integer instruction.");
2107 
2108     case AMDGPU::OPERAND_REG_IMM_INT32:
2109     case AMDGPU::OPERAND_REG_IMM_FP32:
2110     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2111     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2112     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2113     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2114     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2115     case AMDGPU::OPERAND_REG_IMM_INT16:
2116     case AMDGPU::OPERAND_REG_IMM_FP16:
2117     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2118     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2119     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2120     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2121     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2122     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2123     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2124     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2125     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2126     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2127     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2128     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2129     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2130     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2131     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2132     case AMDGPU::OPERAND_KIMM32:
2133     case AMDGPU::OPERAND_KIMM16: {
2134       bool lost;
2135       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2136       // Convert literal to single precision
2137       FPLiteral.convert(*getOpFltSemantics(OpTy),
2138                         APFloat::rmNearestTiesToEven, &lost);
2139       // We allow precision lost but not overflow or underflow. This should be
2140       // checked earlier in isLiteralImm()
2141 
2142       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2143       Inst.addOperand(MCOperand::createImm(ImmVal));
2144       setImmKindLiteral();
2145       return;
2146     }
2147     default:
2148       llvm_unreachable("invalid operand size");
2149     }
2150 
2151     return;
2152   }
2153 
2154   // We got int literal token.
2155   // Only sign extend inline immediates.
2156   switch (OpTy) {
2157   case AMDGPU::OPERAND_REG_IMM_INT32:
2158   case AMDGPU::OPERAND_REG_IMM_FP32:
2159   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2160   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2161   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2162   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2163   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2164   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2165   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2166   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2167   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2168   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2169   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2170     if (isSafeTruncation(Val, 32) &&
2171         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2172                                      AsmParser->hasInv2PiInlineImm())) {
2173       Inst.addOperand(MCOperand::createImm(Val));
2174       setImmKindConst();
2175       return;
2176     }
2177 
2178     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2179     setImmKindLiteral();
2180     return;
2181 
2182   case AMDGPU::OPERAND_REG_IMM_INT64:
2183   case AMDGPU::OPERAND_REG_IMM_FP64:
2184   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2185   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2186   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2187     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2188       Inst.addOperand(MCOperand::createImm(Val));
2189       setImmKindConst();
2190       return;
2191     }
2192 
2193     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2194     setImmKindLiteral();
2195     return;
2196 
2197   case AMDGPU::OPERAND_REG_IMM_INT16:
2198   case AMDGPU::OPERAND_REG_IMM_FP16:
2199   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2200   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2201   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2202   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2203   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2204     if (isSafeTruncation(Val, 16) &&
2205         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2206                                      AsmParser->hasInv2PiInlineImm())) {
2207       Inst.addOperand(MCOperand::createImm(Val));
2208       setImmKindConst();
2209       return;
2210     }
2211 
2212     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2213     setImmKindLiteral();
2214     return;
2215 
2216   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2217   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2218   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2219   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2220     assert(isSafeTruncation(Val, 16));
2221     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2222                                         AsmParser->hasInv2PiInlineImm()));
2223 
2224     Inst.addOperand(MCOperand::createImm(Val));
2225     return;
2226   }
2227   case AMDGPU::OPERAND_KIMM32:
2228     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2229     setImmKindNone();
2230     return;
2231   case AMDGPU::OPERAND_KIMM16:
2232     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2233     setImmKindNone();
2234     return;
2235   default:
2236     llvm_unreachable("invalid operand size");
2237   }
2238 }
2239 
2240 template <unsigned Bitwidth>
2241 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2242   APInt Literal(64, Imm.Val);
2243   setImmKindNone();
2244 
2245   if (!Imm.IsFPImm) {
2246     // We got int literal token.
2247     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2248     return;
2249   }
2250 
2251   bool Lost;
2252   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2253   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2254                     APFloat::rmNearestTiesToEven, &Lost);
2255   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2256 }
2257 
2258 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2259   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2260 }
2261 
2262 static bool isInlineValue(unsigned Reg) {
2263   switch (Reg) {
2264   case AMDGPU::SRC_SHARED_BASE:
2265   case AMDGPU::SRC_SHARED_LIMIT:
2266   case AMDGPU::SRC_PRIVATE_BASE:
2267   case AMDGPU::SRC_PRIVATE_LIMIT:
2268   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2269     return true;
2270   case AMDGPU::SRC_VCCZ:
2271   case AMDGPU::SRC_EXECZ:
2272   case AMDGPU::SRC_SCC:
2273     return true;
2274   case AMDGPU::SGPR_NULL:
2275     return true;
2276   default:
2277     return false;
2278   }
2279 }
2280 
2281 bool AMDGPUOperand::isInlineValue() const {
2282   return isRegKind() && ::isInlineValue(getReg());
2283 }
2284 
2285 //===----------------------------------------------------------------------===//
2286 // AsmParser
2287 //===----------------------------------------------------------------------===//
2288 
2289 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2290   if (Is == IS_VGPR) {
2291     switch (RegWidth) {
2292       default: return -1;
2293       case 32:
2294         return AMDGPU::VGPR_32RegClassID;
2295       case 64:
2296         return AMDGPU::VReg_64RegClassID;
2297       case 96:
2298         return AMDGPU::VReg_96RegClassID;
2299       case 128:
2300         return AMDGPU::VReg_128RegClassID;
2301       case 160:
2302         return AMDGPU::VReg_160RegClassID;
2303       case 192:
2304         return AMDGPU::VReg_192RegClassID;
2305       case 224:
2306         return AMDGPU::VReg_224RegClassID;
2307       case 256:
2308         return AMDGPU::VReg_256RegClassID;
2309       case 512:
2310         return AMDGPU::VReg_512RegClassID;
2311       case 1024:
2312         return AMDGPU::VReg_1024RegClassID;
2313     }
2314   } else if (Is == IS_TTMP) {
2315     switch (RegWidth) {
2316       default: return -1;
2317       case 32:
2318         return AMDGPU::TTMP_32RegClassID;
2319       case 64:
2320         return AMDGPU::TTMP_64RegClassID;
2321       case 128:
2322         return AMDGPU::TTMP_128RegClassID;
2323       case 256:
2324         return AMDGPU::TTMP_256RegClassID;
2325       case 512:
2326         return AMDGPU::TTMP_512RegClassID;
2327     }
2328   } else if (Is == IS_SGPR) {
2329     switch (RegWidth) {
2330       default: return -1;
2331       case 32:
2332         return AMDGPU::SGPR_32RegClassID;
2333       case 64:
2334         return AMDGPU::SGPR_64RegClassID;
2335       case 96:
2336         return AMDGPU::SGPR_96RegClassID;
2337       case 128:
2338         return AMDGPU::SGPR_128RegClassID;
2339       case 160:
2340         return AMDGPU::SGPR_160RegClassID;
2341       case 192:
2342         return AMDGPU::SGPR_192RegClassID;
2343       case 224:
2344         return AMDGPU::SGPR_224RegClassID;
2345       case 256:
2346         return AMDGPU::SGPR_256RegClassID;
2347       case 512:
2348         return AMDGPU::SGPR_512RegClassID;
2349     }
2350   } else if (Is == IS_AGPR) {
2351     switch (RegWidth) {
2352       default: return -1;
2353       case 32:
2354         return AMDGPU::AGPR_32RegClassID;
2355       case 64:
2356         return AMDGPU::AReg_64RegClassID;
2357       case 96:
2358         return AMDGPU::AReg_96RegClassID;
2359       case 128:
2360         return AMDGPU::AReg_128RegClassID;
2361       case 160:
2362         return AMDGPU::AReg_160RegClassID;
2363       case 192:
2364         return AMDGPU::AReg_192RegClassID;
2365       case 224:
2366         return AMDGPU::AReg_224RegClassID;
2367       case 256:
2368         return AMDGPU::AReg_256RegClassID;
2369       case 512:
2370         return AMDGPU::AReg_512RegClassID;
2371       case 1024:
2372         return AMDGPU::AReg_1024RegClassID;
2373     }
2374   }
2375   return -1;
2376 }
2377 
2378 static unsigned getSpecialRegForName(StringRef RegName) {
2379   return StringSwitch<unsigned>(RegName)
2380     .Case("exec", AMDGPU::EXEC)
2381     .Case("vcc", AMDGPU::VCC)
2382     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2383     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2384     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2385     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2386     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2387     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2388     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2389     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2390     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2391     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2392     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2393     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2394     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2395     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2396     .Case("m0", AMDGPU::M0)
2397     .Case("vccz", AMDGPU::SRC_VCCZ)
2398     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2399     .Case("execz", AMDGPU::SRC_EXECZ)
2400     .Case("src_execz", AMDGPU::SRC_EXECZ)
2401     .Case("scc", AMDGPU::SRC_SCC)
2402     .Case("src_scc", AMDGPU::SRC_SCC)
2403     .Case("tba", AMDGPU::TBA)
2404     .Case("tma", AMDGPU::TMA)
2405     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2406     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2407     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2408     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2409     .Case("vcc_lo", AMDGPU::VCC_LO)
2410     .Case("vcc_hi", AMDGPU::VCC_HI)
2411     .Case("exec_lo", AMDGPU::EXEC_LO)
2412     .Case("exec_hi", AMDGPU::EXEC_HI)
2413     .Case("tma_lo", AMDGPU::TMA_LO)
2414     .Case("tma_hi", AMDGPU::TMA_HI)
2415     .Case("tba_lo", AMDGPU::TBA_LO)
2416     .Case("tba_hi", AMDGPU::TBA_HI)
2417     .Case("pc", AMDGPU::PC_REG)
2418     .Case("null", AMDGPU::SGPR_NULL)
2419     .Default(AMDGPU::NoRegister);
2420 }
2421 
2422 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2423                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2424   auto R = parseRegister();
2425   if (!R) return true;
2426   assert(R->isReg());
2427   RegNo = R->getReg();
2428   StartLoc = R->getStartLoc();
2429   EndLoc = R->getEndLoc();
2430   return false;
2431 }
2432 
2433 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2434                                     SMLoc &EndLoc) {
2435   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2436 }
2437 
2438 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2439                                                        SMLoc &StartLoc,
2440                                                        SMLoc &EndLoc) {
2441   bool Result =
2442       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2443   bool PendingErrors = getParser().hasPendingError();
2444   getParser().clearPendingErrors();
2445   if (PendingErrors)
2446     return MatchOperand_ParseFail;
2447   if (Result)
2448     return MatchOperand_NoMatch;
2449   return MatchOperand_Success;
2450 }
2451 
2452 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2453                                             RegisterKind RegKind, unsigned Reg1,
2454                                             SMLoc Loc) {
2455   switch (RegKind) {
2456   case IS_SPECIAL:
2457     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2458       Reg = AMDGPU::EXEC;
2459       RegWidth = 64;
2460       return true;
2461     }
2462     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2463       Reg = AMDGPU::FLAT_SCR;
2464       RegWidth = 64;
2465       return true;
2466     }
2467     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2468       Reg = AMDGPU::XNACK_MASK;
2469       RegWidth = 64;
2470       return true;
2471     }
2472     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2473       Reg = AMDGPU::VCC;
2474       RegWidth = 64;
2475       return true;
2476     }
2477     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2478       Reg = AMDGPU::TBA;
2479       RegWidth = 64;
2480       return true;
2481     }
2482     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2483       Reg = AMDGPU::TMA;
2484       RegWidth = 64;
2485       return true;
2486     }
2487     Error(Loc, "register does not fit in the list");
2488     return false;
2489   case IS_VGPR:
2490   case IS_SGPR:
2491   case IS_AGPR:
2492   case IS_TTMP:
2493     if (Reg1 != Reg + RegWidth / 32) {
2494       Error(Loc, "registers in a list must have consecutive indices");
2495       return false;
2496     }
2497     RegWidth += 32;
2498     return true;
2499   default:
2500     llvm_unreachable("unexpected register kind");
2501   }
2502 }
2503 
2504 struct RegInfo {
2505   StringLiteral Name;
2506   RegisterKind Kind;
2507 };
2508 
2509 static constexpr RegInfo RegularRegisters[] = {
2510   {{"v"},    IS_VGPR},
2511   {{"s"},    IS_SGPR},
2512   {{"ttmp"}, IS_TTMP},
2513   {{"acc"},  IS_AGPR},
2514   {{"a"},    IS_AGPR},
2515 };
2516 
2517 static bool isRegularReg(RegisterKind Kind) {
2518   return Kind == IS_VGPR ||
2519          Kind == IS_SGPR ||
2520          Kind == IS_TTMP ||
2521          Kind == IS_AGPR;
2522 }
2523 
2524 static const RegInfo* getRegularRegInfo(StringRef Str) {
2525   for (const RegInfo &Reg : RegularRegisters)
2526     if (Str.startswith(Reg.Name))
2527       return &Reg;
2528   return nullptr;
2529 }
2530 
2531 static bool getRegNum(StringRef Str, unsigned& Num) {
2532   return !Str.getAsInteger(10, Num);
2533 }
2534 
2535 bool
2536 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2537                             const AsmToken &NextToken) const {
2538 
2539   // A list of consecutive registers: [s0,s1,s2,s3]
2540   if (Token.is(AsmToken::LBrac))
2541     return true;
2542 
2543   if (!Token.is(AsmToken::Identifier))
2544     return false;
2545 
2546   // A single register like s0 or a range of registers like s[0:1]
2547 
2548   StringRef Str = Token.getString();
2549   const RegInfo *Reg = getRegularRegInfo(Str);
2550   if (Reg) {
2551     StringRef RegName = Reg->Name;
2552     StringRef RegSuffix = Str.substr(RegName.size());
2553     if (!RegSuffix.empty()) {
2554       unsigned Num;
2555       // A single register with an index: rXX
2556       if (getRegNum(RegSuffix, Num))
2557         return true;
2558     } else {
2559       // A range of registers: r[XX:YY].
2560       if (NextToken.is(AsmToken::LBrac))
2561         return true;
2562     }
2563   }
2564 
2565   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2566 }
2567 
2568 bool
2569 AMDGPUAsmParser::isRegister()
2570 {
2571   return isRegister(getToken(), peekToken());
2572 }
2573 
2574 unsigned
2575 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2576                                unsigned RegNum,
2577                                unsigned RegWidth,
2578                                SMLoc Loc) {
2579 
2580   assert(isRegularReg(RegKind));
2581 
2582   unsigned AlignSize = 1;
2583   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2584     // SGPR and TTMP registers must be aligned.
2585     // Max required alignment is 4 dwords.
2586     AlignSize = std::min(RegWidth / 32, 4u);
2587   }
2588 
2589   if (RegNum % AlignSize != 0) {
2590     Error(Loc, "invalid register alignment");
2591     return AMDGPU::NoRegister;
2592   }
2593 
2594   unsigned RegIdx = RegNum / AlignSize;
2595   int RCID = getRegClass(RegKind, RegWidth);
2596   if (RCID == -1) {
2597     Error(Loc, "invalid or unsupported register size");
2598     return AMDGPU::NoRegister;
2599   }
2600 
2601   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2602   const MCRegisterClass RC = TRI->getRegClass(RCID);
2603   if (RegIdx >= RC.getNumRegs()) {
2604     Error(Loc, "register index is out of range");
2605     return AMDGPU::NoRegister;
2606   }
2607 
2608   return RC.getRegister(RegIdx);
2609 }
2610 
2611 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2612   int64_t RegLo, RegHi;
2613   if (!skipToken(AsmToken::LBrac, "missing register index"))
2614     return false;
2615 
2616   SMLoc FirstIdxLoc = getLoc();
2617   SMLoc SecondIdxLoc;
2618 
2619   if (!parseExpr(RegLo))
2620     return false;
2621 
2622   if (trySkipToken(AsmToken::Colon)) {
2623     SecondIdxLoc = getLoc();
2624     if (!parseExpr(RegHi))
2625       return false;
2626   } else {
2627     RegHi = RegLo;
2628   }
2629 
2630   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2631     return false;
2632 
2633   if (!isUInt<32>(RegLo)) {
2634     Error(FirstIdxLoc, "invalid register index");
2635     return false;
2636   }
2637 
2638   if (!isUInt<32>(RegHi)) {
2639     Error(SecondIdxLoc, "invalid register index");
2640     return false;
2641   }
2642 
2643   if (RegLo > RegHi) {
2644     Error(FirstIdxLoc, "first register index should not exceed second index");
2645     return false;
2646   }
2647 
2648   Num = static_cast<unsigned>(RegLo);
2649   RegWidth = 32 * ((RegHi - RegLo) + 1);
2650   return true;
2651 }
2652 
2653 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2654                                           unsigned &RegNum, unsigned &RegWidth,
2655                                           SmallVectorImpl<AsmToken> &Tokens) {
2656   assert(isToken(AsmToken::Identifier));
2657   unsigned Reg = getSpecialRegForName(getTokenStr());
2658   if (Reg) {
2659     RegNum = 0;
2660     RegWidth = 32;
2661     RegKind = IS_SPECIAL;
2662     Tokens.push_back(getToken());
2663     lex(); // skip register name
2664   }
2665   return Reg;
2666 }
2667 
2668 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2669                                           unsigned &RegNum, unsigned &RegWidth,
2670                                           SmallVectorImpl<AsmToken> &Tokens) {
2671   assert(isToken(AsmToken::Identifier));
2672   StringRef RegName = getTokenStr();
2673   auto Loc = getLoc();
2674 
2675   const RegInfo *RI = getRegularRegInfo(RegName);
2676   if (!RI) {
2677     Error(Loc, "invalid register name");
2678     return AMDGPU::NoRegister;
2679   }
2680 
2681   Tokens.push_back(getToken());
2682   lex(); // skip register name
2683 
2684   RegKind = RI->Kind;
2685   StringRef RegSuffix = RegName.substr(RI->Name.size());
2686   if (!RegSuffix.empty()) {
2687     // Single 32-bit register: vXX.
2688     if (!getRegNum(RegSuffix, RegNum)) {
2689       Error(Loc, "invalid register index");
2690       return AMDGPU::NoRegister;
2691     }
2692     RegWidth = 32;
2693   } else {
2694     // Range of registers: v[XX:YY]. ":YY" is optional.
2695     if (!ParseRegRange(RegNum, RegWidth))
2696       return AMDGPU::NoRegister;
2697   }
2698 
2699   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2700 }
2701 
2702 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2703                                        unsigned &RegWidth,
2704                                        SmallVectorImpl<AsmToken> &Tokens) {
2705   unsigned Reg = AMDGPU::NoRegister;
2706   auto ListLoc = getLoc();
2707 
2708   if (!skipToken(AsmToken::LBrac,
2709                  "expected a register or a list of registers")) {
2710     return AMDGPU::NoRegister;
2711   }
2712 
2713   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2714 
2715   auto Loc = getLoc();
2716   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2717     return AMDGPU::NoRegister;
2718   if (RegWidth != 32) {
2719     Error(Loc, "expected a single 32-bit register");
2720     return AMDGPU::NoRegister;
2721   }
2722 
2723   for (; trySkipToken(AsmToken::Comma); ) {
2724     RegisterKind NextRegKind;
2725     unsigned NextReg, NextRegNum, NextRegWidth;
2726     Loc = getLoc();
2727 
2728     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2729                              NextRegNum, NextRegWidth,
2730                              Tokens)) {
2731       return AMDGPU::NoRegister;
2732     }
2733     if (NextRegWidth != 32) {
2734       Error(Loc, "expected a single 32-bit register");
2735       return AMDGPU::NoRegister;
2736     }
2737     if (NextRegKind != RegKind) {
2738       Error(Loc, "registers in a list must be of the same kind");
2739       return AMDGPU::NoRegister;
2740     }
2741     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2742       return AMDGPU::NoRegister;
2743   }
2744 
2745   if (!skipToken(AsmToken::RBrac,
2746                  "expected a comma or a closing square bracket")) {
2747     return AMDGPU::NoRegister;
2748   }
2749 
2750   if (isRegularReg(RegKind))
2751     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2752 
2753   return Reg;
2754 }
2755 
2756 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2757                                           unsigned &RegNum, unsigned &RegWidth,
2758                                           SmallVectorImpl<AsmToken> &Tokens) {
2759   auto Loc = getLoc();
2760   Reg = AMDGPU::NoRegister;
2761 
2762   if (isToken(AsmToken::Identifier)) {
2763     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2764     if (Reg == AMDGPU::NoRegister)
2765       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2766   } else {
2767     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2768   }
2769 
2770   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2771   if (Reg == AMDGPU::NoRegister) {
2772     assert(Parser.hasPendingError());
2773     return false;
2774   }
2775 
2776   if (!subtargetHasRegister(*TRI, Reg)) {
2777     if (Reg == AMDGPU::SGPR_NULL) {
2778       Error(Loc, "'null' operand is not supported on this GPU");
2779     } else {
2780       Error(Loc, "register not available on this GPU");
2781     }
2782     return false;
2783   }
2784 
2785   return true;
2786 }
2787 
2788 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2789                                           unsigned &RegNum, unsigned &RegWidth,
2790                                           bool RestoreOnFailure /*=false*/) {
2791   Reg = AMDGPU::NoRegister;
2792 
2793   SmallVector<AsmToken, 1> Tokens;
2794   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2795     if (RestoreOnFailure) {
2796       while (!Tokens.empty()) {
2797         getLexer().UnLex(Tokens.pop_back_val());
2798       }
2799     }
2800     return true;
2801   }
2802   return false;
2803 }
2804 
2805 Optional<StringRef>
2806 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2807   switch (RegKind) {
2808   case IS_VGPR:
2809     return StringRef(".amdgcn.next_free_vgpr");
2810   case IS_SGPR:
2811     return StringRef(".amdgcn.next_free_sgpr");
2812   default:
2813     return None;
2814   }
2815 }
2816 
2817 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2818   auto SymbolName = getGprCountSymbolName(RegKind);
2819   assert(SymbolName && "initializing invalid register kind");
2820   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2821   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2822 }
2823 
2824 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2825                                             unsigned DwordRegIndex,
2826                                             unsigned RegWidth) {
2827   // Symbols are only defined for GCN targets
2828   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2829     return true;
2830 
2831   auto SymbolName = getGprCountSymbolName(RegKind);
2832   if (!SymbolName)
2833     return true;
2834   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2835 
2836   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2837   int64_t OldCount;
2838 
2839   if (!Sym->isVariable())
2840     return !Error(getLoc(),
2841                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2842   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2843     return !Error(
2844         getLoc(),
2845         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2846 
2847   if (OldCount <= NewMax)
2848     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2849 
2850   return true;
2851 }
2852 
2853 std::unique_ptr<AMDGPUOperand>
2854 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2855   const auto &Tok = getToken();
2856   SMLoc StartLoc = Tok.getLoc();
2857   SMLoc EndLoc = Tok.getEndLoc();
2858   RegisterKind RegKind;
2859   unsigned Reg, RegNum, RegWidth;
2860 
2861   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2862     return nullptr;
2863   }
2864   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2865     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2866       return nullptr;
2867   } else
2868     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2869   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2870 }
2871 
2872 OperandMatchResultTy
2873 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2874   // TODO: add syntactic sugar for 1/(2*PI)
2875 
2876   assert(!isRegister());
2877   assert(!isModifier());
2878 
2879   const auto& Tok = getToken();
2880   const auto& NextTok = peekToken();
2881   bool IsReal = Tok.is(AsmToken::Real);
2882   SMLoc S = getLoc();
2883   bool Negate = false;
2884 
2885   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2886     lex();
2887     IsReal = true;
2888     Negate = true;
2889   }
2890 
2891   if (IsReal) {
2892     // Floating-point expressions are not supported.
2893     // Can only allow floating-point literals with an
2894     // optional sign.
2895 
2896     StringRef Num = getTokenStr();
2897     lex();
2898 
2899     APFloat RealVal(APFloat::IEEEdouble());
2900     auto roundMode = APFloat::rmNearestTiesToEven;
2901     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2902       return MatchOperand_ParseFail;
2903     }
2904     if (Negate)
2905       RealVal.changeSign();
2906 
2907     Operands.push_back(
2908       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2909                                AMDGPUOperand::ImmTyNone, true));
2910 
2911     return MatchOperand_Success;
2912 
2913   } else {
2914     int64_t IntVal;
2915     const MCExpr *Expr;
2916     SMLoc S = getLoc();
2917 
2918     if (HasSP3AbsModifier) {
2919       // This is a workaround for handling expressions
2920       // as arguments of SP3 'abs' modifier, for example:
2921       //     |1.0|
2922       //     |-1|
2923       //     |1+x|
2924       // This syntax is not compatible with syntax of standard
2925       // MC expressions (due to the trailing '|').
2926       SMLoc EndLoc;
2927       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2928         return MatchOperand_ParseFail;
2929     } else {
2930       if (Parser.parseExpression(Expr))
2931         return MatchOperand_ParseFail;
2932     }
2933 
2934     if (Expr->evaluateAsAbsolute(IntVal)) {
2935       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2936     } else {
2937       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2938     }
2939 
2940     return MatchOperand_Success;
2941   }
2942 
2943   return MatchOperand_NoMatch;
2944 }
2945 
2946 OperandMatchResultTy
2947 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2948   if (!isRegister())
2949     return MatchOperand_NoMatch;
2950 
2951   if (auto R = parseRegister()) {
2952     assert(R->isReg());
2953     Operands.push_back(std::move(R));
2954     return MatchOperand_Success;
2955   }
2956   return MatchOperand_ParseFail;
2957 }
2958 
2959 OperandMatchResultTy
2960 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2961   auto res = parseReg(Operands);
2962   if (res != MatchOperand_NoMatch) {
2963     return res;
2964   } else if (isModifier()) {
2965     return MatchOperand_NoMatch;
2966   } else {
2967     return parseImm(Operands, HasSP3AbsMod);
2968   }
2969 }
2970 
2971 bool
2972 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2973   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2974     const auto &str = Token.getString();
2975     return str == "abs" || str == "neg" || str == "sext";
2976   }
2977   return false;
2978 }
2979 
2980 bool
2981 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2982   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2983 }
2984 
2985 bool
2986 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2987   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2988 }
2989 
2990 bool
2991 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2992   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2993 }
2994 
2995 // Check if this is an operand modifier or an opcode modifier
2996 // which may look like an expression but it is not. We should
2997 // avoid parsing these modifiers as expressions. Currently
2998 // recognized sequences are:
2999 //   |...|
3000 //   abs(...)
3001 //   neg(...)
3002 //   sext(...)
3003 //   -reg
3004 //   -|...|
3005 //   -abs(...)
3006 //   name:...
3007 // Note that simple opcode modifiers like 'gds' may be parsed as
3008 // expressions; this is a special case. See getExpressionAsToken.
3009 //
3010 bool
3011 AMDGPUAsmParser::isModifier() {
3012 
3013   AsmToken Tok = getToken();
3014   AsmToken NextToken[2];
3015   peekTokens(NextToken);
3016 
3017   return isOperandModifier(Tok, NextToken[0]) ||
3018          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3019          isOpcodeModifierWithVal(Tok, NextToken[0]);
3020 }
3021 
3022 // Check if the current token is an SP3 'neg' modifier.
3023 // Currently this modifier is allowed in the following context:
3024 //
3025 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3026 // 2. Before an 'abs' modifier: -abs(...)
3027 // 3. Before an SP3 'abs' modifier: -|...|
3028 //
3029 // In all other cases "-" is handled as a part
3030 // of an expression that follows the sign.
3031 //
3032 // Note: When "-" is followed by an integer literal,
3033 // this is interpreted as integer negation rather
3034 // than a floating-point NEG modifier applied to N.
3035 // Beside being contr-intuitive, such use of floating-point
3036 // NEG modifier would have resulted in different meaning
3037 // of integer literals used with VOP1/2/C and VOP3,
3038 // for example:
3039 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3040 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3041 // Negative fp literals with preceding "-" are
3042 // handled likewise for uniformity
3043 //
3044 bool
3045 AMDGPUAsmParser::parseSP3NegModifier() {
3046 
3047   AsmToken NextToken[2];
3048   peekTokens(NextToken);
3049 
3050   if (isToken(AsmToken::Minus) &&
3051       (isRegister(NextToken[0], NextToken[1]) ||
3052        NextToken[0].is(AsmToken::Pipe) ||
3053        isId(NextToken[0], "abs"))) {
3054     lex();
3055     return true;
3056   }
3057 
3058   return false;
3059 }
3060 
3061 OperandMatchResultTy
3062 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3063                                               bool AllowImm) {
3064   bool Neg, SP3Neg;
3065   bool Abs, SP3Abs;
3066   SMLoc Loc;
3067 
3068   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3069   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3070     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3071     return MatchOperand_ParseFail;
3072   }
3073 
3074   SP3Neg = parseSP3NegModifier();
3075 
3076   Loc = getLoc();
3077   Neg = trySkipId("neg");
3078   if (Neg && SP3Neg) {
3079     Error(Loc, "expected register or immediate");
3080     return MatchOperand_ParseFail;
3081   }
3082   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3083     return MatchOperand_ParseFail;
3084 
3085   Abs = trySkipId("abs");
3086   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3087     return MatchOperand_ParseFail;
3088 
3089   Loc = getLoc();
3090   SP3Abs = trySkipToken(AsmToken::Pipe);
3091   if (Abs && SP3Abs) {
3092     Error(Loc, "expected register or immediate");
3093     return MatchOperand_ParseFail;
3094   }
3095 
3096   OperandMatchResultTy Res;
3097   if (AllowImm) {
3098     Res = parseRegOrImm(Operands, SP3Abs);
3099   } else {
3100     Res = parseReg(Operands);
3101   }
3102   if (Res != MatchOperand_Success) {
3103     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3104   }
3105 
3106   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3107     return MatchOperand_ParseFail;
3108   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3109     return MatchOperand_ParseFail;
3110   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3111     return MatchOperand_ParseFail;
3112 
3113   AMDGPUOperand::Modifiers Mods;
3114   Mods.Abs = Abs || SP3Abs;
3115   Mods.Neg = Neg || SP3Neg;
3116 
3117   if (Mods.hasFPModifiers()) {
3118     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3119     if (Op.isExpr()) {
3120       Error(Op.getStartLoc(), "expected an absolute expression");
3121       return MatchOperand_ParseFail;
3122     }
3123     Op.setModifiers(Mods);
3124   }
3125   return MatchOperand_Success;
3126 }
3127 
3128 OperandMatchResultTy
3129 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3130                                                bool AllowImm) {
3131   bool Sext = trySkipId("sext");
3132   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3133     return MatchOperand_ParseFail;
3134 
3135   OperandMatchResultTy Res;
3136   if (AllowImm) {
3137     Res = parseRegOrImm(Operands);
3138   } else {
3139     Res = parseReg(Operands);
3140   }
3141   if (Res != MatchOperand_Success) {
3142     return Sext? MatchOperand_ParseFail : Res;
3143   }
3144 
3145   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3146     return MatchOperand_ParseFail;
3147 
3148   AMDGPUOperand::Modifiers Mods;
3149   Mods.Sext = Sext;
3150 
3151   if (Mods.hasIntModifiers()) {
3152     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3153     if (Op.isExpr()) {
3154       Error(Op.getStartLoc(), "expected an absolute expression");
3155       return MatchOperand_ParseFail;
3156     }
3157     Op.setModifiers(Mods);
3158   }
3159 
3160   return MatchOperand_Success;
3161 }
3162 
3163 OperandMatchResultTy
3164 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3165   return parseRegOrImmWithFPInputMods(Operands, false);
3166 }
3167 
3168 OperandMatchResultTy
3169 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3170   return parseRegOrImmWithIntInputMods(Operands, false);
3171 }
3172 
3173 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3174   auto Loc = getLoc();
3175   if (trySkipId("off")) {
3176     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3177                                                 AMDGPUOperand::ImmTyOff, false));
3178     return MatchOperand_Success;
3179   }
3180 
3181   if (!isRegister())
3182     return MatchOperand_NoMatch;
3183 
3184   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3185   if (Reg) {
3186     Operands.push_back(std::move(Reg));
3187     return MatchOperand_Success;
3188   }
3189 
3190   return MatchOperand_ParseFail;
3191 
3192 }
3193 
3194 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3195   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3196 
3197   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3198       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3199       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3200       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3201     return Match_InvalidOperand;
3202 
3203   if ((TSFlags & SIInstrFlags::VOP3) &&
3204       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3205       getForcedEncodingSize() != 64)
3206     return Match_PreferE32;
3207 
3208   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3209       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3210     // v_mac_f32/16 allow only dst_sel == DWORD;
3211     auto OpNum =
3212         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3213     const auto &Op = Inst.getOperand(OpNum);
3214     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3215       return Match_InvalidOperand;
3216     }
3217   }
3218 
3219   return Match_Success;
3220 }
3221 
3222 static ArrayRef<unsigned> getAllVariants() {
3223   static const unsigned Variants[] = {
3224     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3225     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3226   };
3227 
3228   return makeArrayRef(Variants);
3229 }
3230 
3231 // What asm variants we should check
3232 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3233   if (getForcedEncodingSize() == 32) {
3234     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3235     return makeArrayRef(Variants);
3236   }
3237 
3238   if (isForcedVOP3()) {
3239     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3240     return makeArrayRef(Variants);
3241   }
3242 
3243   if (isForcedSDWA()) {
3244     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3245                                         AMDGPUAsmVariants::SDWA9};
3246     return makeArrayRef(Variants);
3247   }
3248 
3249   if (isForcedDPP()) {
3250     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3251     return makeArrayRef(Variants);
3252   }
3253 
3254   return getAllVariants();
3255 }
3256 
3257 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3258   if (getForcedEncodingSize() == 32)
3259     return "e32";
3260 
3261   if (isForcedVOP3())
3262     return "e64";
3263 
3264   if (isForcedSDWA())
3265     return "sdwa";
3266 
3267   if (isForcedDPP())
3268     return "dpp";
3269 
3270   return "";
3271 }
3272 
3273 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3274   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3275   const unsigned Num = Desc.getNumImplicitUses();
3276   for (unsigned i = 0; i < Num; ++i) {
3277     unsigned Reg = Desc.ImplicitUses[i];
3278     switch (Reg) {
3279     case AMDGPU::FLAT_SCR:
3280     case AMDGPU::VCC:
3281     case AMDGPU::VCC_LO:
3282     case AMDGPU::VCC_HI:
3283     case AMDGPU::M0:
3284       return Reg;
3285     default:
3286       break;
3287     }
3288   }
3289   return AMDGPU::NoRegister;
3290 }
3291 
3292 // NB: This code is correct only when used to check constant
3293 // bus limitations because GFX7 support no f16 inline constants.
3294 // Note that there are no cases when a GFX7 opcode violates
3295 // constant bus limitations due to the use of an f16 constant.
3296 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3297                                        unsigned OpIdx) const {
3298   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3299 
3300   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3301     return false;
3302   }
3303 
3304   const MCOperand &MO = Inst.getOperand(OpIdx);
3305 
3306   int64_t Val = MO.getImm();
3307   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3308 
3309   switch (OpSize) { // expected operand size
3310   case 8:
3311     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3312   case 4:
3313     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3314   case 2: {
3315     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3316     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3317         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3318         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3319       return AMDGPU::isInlinableIntLiteral(Val);
3320 
3321     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3322         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3323         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3324       return AMDGPU::isInlinableIntLiteralV216(Val);
3325 
3326     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3327         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3328         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3329       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3330 
3331     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3332   }
3333   default:
3334     llvm_unreachable("invalid operand size");
3335   }
3336 }
3337 
3338 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3339   if (!isGFX10Plus())
3340     return 1;
3341 
3342   switch (Opcode) {
3343   // 64-bit shift instructions can use only one scalar value input
3344   case AMDGPU::V_LSHLREV_B64_e64:
3345   case AMDGPU::V_LSHLREV_B64_gfx10:
3346   case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3347   case AMDGPU::V_LSHRREV_B64_e64:
3348   case AMDGPU::V_LSHRREV_B64_gfx10:
3349   case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3350   case AMDGPU::V_ASHRREV_I64_e64:
3351   case AMDGPU::V_ASHRREV_I64_gfx10:
3352   case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3353   case AMDGPU::V_LSHL_B64_e64:
3354   case AMDGPU::V_LSHR_B64_e64:
3355   case AMDGPU::V_ASHR_I64_e64:
3356     return 1;
3357   default:
3358     return 2;
3359   }
3360 }
3361 
3362 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3363   const MCOperand &MO = Inst.getOperand(OpIdx);
3364   if (MO.isImm()) {
3365     return !isInlineConstant(Inst, OpIdx);
3366   } else if (MO.isReg()) {
3367     auto Reg = MO.getReg();
3368     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3369     auto PReg = mc2PseudoReg(Reg);
3370     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3371   } else {
3372     return true;
3373   }
3374 }
3375 
3376 bool
3377 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3378                                                 const OperandVector &Operands) {
3379   const unsigned Opcode = Inst.getOpcode();
3380   const MCInstrDesc &Desc = MII.get(Opcode);
3381   unsigned LastSGPR = AMDGPU::NoRegister;
3382   unsigned ConstantBusUseCount = 0;
3383   unsigned NumLiterals = 0;
3384   unsigned LiteralSize;
3385 
3386   if (Desc.TSFlags &
3387       (SIInstrFlags::VOPC |
3388        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3389        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3390        SIInstrFlags::SDWA)) {
3391     // Check special imm operands (used by madmk, etc)
3392     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3393       ++NumLiterals;
3394       LiteralSize = 4;
3395     }
3396 
3397     SmallDenseSet<unsigned> SGPRsUsed;
3398     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3399     if (SGPRUsed != AMDGPU::NoRegister) {
3400       SGPRsUsed.insert(SGPRUsed);
3401       ++ConstantBusUseCount;
3402     }
3403 
3404     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3405     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3406     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3407 
3408     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3409 
3410     for (int OpIdx : OpIndices) {
3411       if (OpIdx == -1) break;
3412 
3413       const MCOperand &MO = Inst.getOperand(OpIdx);
3414       if (usesConstantBus(Inst, OpIdx)) {
3415         if (MO.isReg()) {
3416           LastSGPR = mc2PseudoReg(MO.getReg());
3417           // Pairs of registers with a partial intersections like these
3418           //   s0, s[0:1]
3419           //   flat_scratch_lo, flat_scratch
3420           //   flat_scratch_lo, flat_scratch_hi
3421           // are theoretically valid but they are disabled anyway.
3422           // Note that this code mimics SIInstrInfo::verifyInstruction
3423           if (!SGPRsUsed.count(LastSGPR)) {
3424             SGPRsUsed.insert(LastSGPR);
3425             ++ConstantBusUseCount;
3426           }
3427         } else { // Expression or a literal
3428 
3429           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3430             continue; // special operand like VINTERP attr_chan
3431 
3432           // An instruction may use only one literal.
3433           // This has been validated on the previous step.
3434           // See validateVOPLiteral.
3435           // This literal may be used as more than one operand.
3436           // If all these operands are of the same size,
3437           // this literal counts as one scalar value.
3438           // Otherwise it counts as 2 scalar values.
3439           // See "GFX10 Shader Programming", section 3.6.2.3.
3440 
3441           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3442           if (Size < 4) Size = 4;
3443 
3444           if (NumLiterals == 0) {
3445             NumLiterals = 1;
3446             LiteralSize = Size;
3447           } else if (LiteralSize != Size) {
3448             NumLiterals = 2;
3449           }
3450         }
3451       }
3452     }
3453   }
3454   ConstantBusUseCount += NumLiterals;
3455 
3456   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3457     return true;
3458 
3459   SMLoc LitLoc = getLitLoc(Operands);
3460   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3461   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3462   Error(Loc, "invalid operand (violates constant bus restrictions)");
3463   return false;
3464 }
3465 
3466 bool
3467 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3468                                                  const OperandVector &Operands) {
3469   const unsigned Opcode = Inst.getOpcode();
3470   const MCInstrDesc &Desc = MII.get(Opcode);
3471 
3472   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3473   if (DstIdx == -1 ||
3474       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3475     return true;
3476   }
3477 
3478   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3479 
3480   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3481   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3482   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3483 
3484   assert(DstIdx != -1);
3485   const MCOperand &Dst = Inst.getOperand(DstIdx);
3486   assert(Dst.isReg());
3487 
3488   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3489 
3490   for (int SrcIdx : SrcIndices) {
3491     if (SrcIdx == -1) break;
3492     const MCOperand &Src = Inst.getOperand(SrcIdx);
3493     if (Src.isReg()) {
3494       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3495         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3496         Error(getRegLoc(SrcReg, Operands),
3497           "destination must be different than all sources");
3498         return false;
3499       }
3500     }
3501   }
3502 
3503   return true;
3504 }
3505 
3506 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3507 
3508   const unsigned Opc = Inst.getOpcode();
3509   const MCInstrDesc &Desc = MII.get(Opc);
3510 
3511   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3512     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3513     assert(ClampIdx != -1);
3514     return Inst.getOperand(ClampIdx).getImm() == 0;
3515   }
3516 
3517   return true;
3518 }
3519 
3520 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3521 
3522   const unsigned Opc = Inst.getOpcode();
3523   const MCInstrDesc &Desc = MII.get(Opc);
3524 
3525   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3526     return None;
3527 
3528   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3529   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3530   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3531 
3532   assert(VDataIdx != -1);
3533 
3534   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3535     return None;
3536 
3537   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3538   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3539   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3540   if (DMask == 0)
3541     DMask = 1;
3542 
3543   bool isPackedD16 = false;
3544   unsigned DataSize =
3545     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3546   if (hasPackedD16()) {
3547     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3548     isPackedD16 = D16Idx >= 0;
3549     if (isPackedD16 && Inst.getOperand(D16Idx).getImm())
3550       DataSize = (DataSize + 1) / 2;
3551   }
3552 
3553   if ((VDataSize / 4) == DataSize + TFESize)
3554     return None;
3555 
3556   return StringRef(isPackedD16
3557                        ? "image data size does not match dmask, d16 and tfe"
3558                        : "image data size does not match dmask and tfe");
3559 }
3560 
3561 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3562   const unsigned Opc = Inst.getOpcode();
3563   const MCInstrDesc &Desc = MII.get(Opc);
3564 
3565   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3566     return true;
3567 
3568   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3569 
3570   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3571       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3572   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3573   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3574   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3575   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3576 
3577   assert(VAddr0Idx != -1);
3578   assert(SrsrcIdx != -1);
3579   assert(SrsrcIdx > VAddr0Idx);
3580 
3581   if (DimIdx == -1)
3582     return true; // intersect_ray
3583 
3584   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3585   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3586   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3587   unsigned ActualAddrSize =
3588       IsNSA ? SrsrcIdx - VAddr0Idx
3589             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3590   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3591 
3592   unsigned ExpectedAddrSize =
3593       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3594 
3595   if (!IsNSA) {
3596     if (ExpectedAddrSize > 8)
3597       ExpectedAddrSize = 16;
3598 
3599     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3600     // This provides backward compatibility for assembly created
3601     // before 160b/192b/224b types were directly supported.
3602     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3603       return true;
3604   }
3605 
3606   return ActualAddrSize == ExpectedAddrSize;
3607 }
3608 
3609 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3610 
3611   const unsigned Opc = Inst.getOpcode();
3612   const MCInstrDesc &Desc = MII.get(Opc);
3613 
3614   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3615     return true;
3616   if (!Desc.mayLoad() || !Desc.mayStore())
3617     return true; // Not atomic
3618 
3619   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3620   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3621 
3622   // This is an incomplete check because image_atomic_cmpswap
3623   // may only use 0x3 and 0xf while other atomic operations
3624   // may use 0x1 and 0x3. However these limitations are
3625   // verified when we check that dmask matches dst size.
3626   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3627 }
3628 
3629 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3630 
3631   const unsigned Opc = Inst.getOpcode();
3632   const MCInstrDesc &Desc = MII.get(Opc);
3633 
3634   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3635     return true;
3636 
3637   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3638   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3639 
3640   // GATHER4 instructions use dmask in a different fashion compared to
3641   // other MIMG instructions. The only useful DMASK values are
3642   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3643   // (red,red,red,red) etc.) The ISA document doesn't mention
3644   // this.
3645   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3646 }
3647 
3648 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3649   const unsigned Opc = Inst.getOpcode();
3650   const MCInstrDesc &Desc = MII.get(Opc);
3651 
3652   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3653     return true;
3654 
3655   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3656   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3657       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3658 
3659   if (!BaseOpcode->MSAA)
3660     return true;
3661 
3662   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3663   assert(DimIdx != -1);
3664 
3665   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3666   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3667 
3668   return DimInfo->MSAA;
3669 }
3670 
3671 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3672 {
3673   switch (Opcode) {
3674   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3675   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3676   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3677     return true;
3678   default:
3679     return false;
3680   }
3681 }
3682 
3683 // movrels* opcodes should only allow VGPRS as src0.
3684 // This is specified in .td description for vop1/vop3,
3685 // but sdwa is handled differently. See isSDWAOperand.
3686 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3687                                       const OperandVector &Operands) {
3688 
3689   const unsigned Opc = Inst.getOpcode();
3690   const MCInstrDesc &Desc = MII.get(Opc);
3691 
3692   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3693     return true;
3694 
3695   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3696   assert(Src0Idx != -1);
3697 
3698   SMLoc ErrLoc;
3699   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3700   if (Src0.isReg()) {
3701     auto Reg = mc2PseudoReg(Src0.getReg());
3702     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3703     if (!isSGPR(Reg, TRI))
3704       return true;
3705     ErrLoc = getRegLoc(Reg, Operands);
3706   } else {
3707     ErrLoc = getConstLoc(Operands);
3708   }
3709 
3710   Error(ErrLoc, "source operand must be a VGPR");
3711   return false;
3712 }
3713 
3714 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3715                                           const OperandVector &Operands) {
3716 
3717   const unsigned Opc = Inst.getOpcode();
3718 
3719   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3720     return true;
3721 
3722   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3723   assert(Src0Idx != -1);
3724 
3725   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3726   if (!Src0.isReg())
3727     return true;
3728 
3729   auto Reg = mc2PseudoReg(Src0.getReg());
3730   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3731   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3732     Error(getRegLoc(Reg, Operands),
3733           "source operand must be either a VGPR or an inline constant");
3734     return false;
3735   }
3736 
3737   return true;
3738 }
3739 
3740 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3741                                    const OperandVector &Operands) {
3742   const unsigned Opc = Inst.getOpcode();
3743   const MCInstrDesc &Desc = MII.get(Opc);
3744 
3745   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3746     return true;
3747 
3748   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3749   if (Src2Idx == -1)
3750     return true;
3751 
3752   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3753   if (!Src2.isReg())
3754     return true;
3755 
3756   MCRegister Src2Reg = Src2.getReg();
3757   MCRegister DstReg = Inst.getOperand(0).getReg();
3758   if (Src2Reg == DstReg)
3759     return true;
3760 
3761   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3762   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3763     return true;
3764 
3765   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3766     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3767           "source 2 operand must not partially overlap with dst");
3768     return false;
3769   }
3770 
3771   return true;
3772 }
3773 
3774 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3775   switch (Inst.getOpcode()) {
3776   default:
3777     return true;
3778   case V_DIV_SCALE_F32_gfx6_gfx7:
3779   case V_DIV_SCALE_F32_vi:
3780   case V_DIV_SCALE_F32_gfx10:
3781   case V_DIV_SCALE_F64_gfx6_gfx7:
3782   case V_DIV_SCALE_F64_vi:
3783   case V_DIV_SCALE_F64_gfx10:
3784     break;
3785   }
3786 
3787   // TODO: Check that src0 = src1 or src2.
3788 
3789   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3790                     AMDGPU::OpName::src2_modifiers,
3791                     AMDGPU::OpName::src2_modifiers}) {
3792     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3793             .getImm() &
3794         SISrcMods::ABS) {
3795       return false;
3796     }
3797   }
3798 
3799   return true;
3800 }
3801 
3802 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3803 
3804   const unsigned Opc = Inst.getOpcode();
3805   const MCInstrDesc &Desc = MII.get(Opc);
3806 
3807   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3808     return true;
3809 
3810   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3811   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3812     if (isCI() || isSI())
3813       return false;
3814   }
3815 
3816   return true;
3817 }
3818 
3819 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3820   const unsigned Opc = Inst.getOpcode();
3821   const MCInstrDesc &Desc = MII.get(Opc);
3822 
3823   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3824     return true;
3825 
3826   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3827   if (DimIdx < 0)
3828     return true;
3829 
3830   long Imm = Inst.getOperand(DimIdx).getImm();
3831   if (Imm < 0 || Imm >= 8)
3832     return false;
3833 
3834   return true;
3835 }
3836 
3837 static bool IsRevOpcode(const unsigned Opcode)
3838 {
3839   switch (Opcode) {
3840   case AMDGPU::V_SUBREV_F32_e32:
3841   case AMDGPU::V_SUBREV_F32_e64:
3842   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3843   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3844   case AMDGPU::V_SUBREV_F32_e32_vi:
3845   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3846   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3847   case AMDGPU::V_SUBREV_F32_e64_vi:
3848 
3849   case AMDGPU::V_SUBREV_CO_U32_e32:
3850   case AMDGPU::V_SUBREV_CO_U32_e64:
3851   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3852   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3853 
3854   case AMDGPU::V_SUBBREV_U32_e32:
3855   case AMDGPU::V_SUBBREV_U32_e64:
3856   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3857   case AMDGPU::V_SUBBREV_U32_e32_vi:
3858   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3859   case AMDGPU::V_SUBBREV_U32_e64_vi:
3860 
3861   case AMDGPU::V_SUBREV_U32_e32:
3862   case AMDGPU::V_SUBREV_U32_e64:
3863   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3864   case AMDGPU::V_SUBREV_U32_e32_vi:
3865   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3866   case AMDGPU::V_SUBREV_U32_e64_vi:
3867 
3868   case AMDGPU::V_SUBREV_F16_e32:
3869   case AMDGPU::V_SUBREV_F16_e64:
3870   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3871   case AMDGPU::V_SUBREV_F16_e32_vi:
3872   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3873   case AMDGPU::V_SUBREV_F16_e64_vi:
3874 
3875   case AMDGPU::V_SUBREV_U16_e32:
3876   case AMDGPU::V_SUBREV_U16_e64:
3877   case AMDGPU::V_SUBREV_U16_e32_vi:
3878   case AMDGPU::V_SUBREV_U16_e64_vi:
3879 
3880   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3881   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3882   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3883 
3884   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3885   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3886 
3887   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3888   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3889 
3890   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3891   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3892 
3893   case AMDGPU::V_LSHRREV_B32_e32:
3894   case AMDGPU::V_LSHRREV_B32_e64:
3895   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3896   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3897   case AMDGPU::V_LSHRREV_B32_e32_vi:
3898   case AMDGPU::V_LSHRREV_B32_e64_vi:
3899   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3900   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3901 
3902   case AMDGPU::V_ASHRREV_I32_e32:
3903   case AMDGPU::V_ASHRREV_I32_e64:
3904   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3905   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3906   case AMDGPU::V_ASHRREV_I32_e32_vi:
3907   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3908   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3909   case AMDGPU::V_ASHRREV_I32_e64_vi:
3910 
3911   case AMDGPU::V_LSHLREV_B32_e32:
3912   case AMDGPU::V_LSHLREV_B32_e64:
3913   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3914   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3915   case AMDGPU::V_LSHLREV_B32_e32_vi:
3916   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3917   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3918   case AMDGPU::V_LSHLREV_B32_e64_vi:
3919 
3920   case AMDGPU::V_LSHLREV_B16_e32:
3921   case AMDGPU::V_LSHLREV_B16_e64:
3922   case AMDGPU::V_LSHLREV_B16_e32_vi:
3923   case AMDGPU::V_LSHLREV_B16_e64_vi:
3924   case AMDGPU::V_LSHLREV_B16_gfx10:
3925 
3926   case AMDGPU::V_LSHRREV_B16_e32:
3927   case AMDGPU::V_LSHRREV_B16_e64:
3928   case AMDGPU::V_LSHRREV_B16_e32_vi:
3929   case AMDGPU::V_LSHRREV_B16_e64_vi:
3930   case AMDGPU::V_LSHRREV_B16_gfx10:
3931 
3932   case AMDGPU::V_ASHRREV_I16_e32:
3933   case AMDGPU::V_ASHRREV_I16_e64:
3934   case AMDGPU::V_ASHRREV_I16_e32_vi:
3935   case AMDGPU::V_ASHRREV_I16_e64_vi:
3936   case AMDGPU::V_ASHRREV_I16_gfx10:
3937 
3938   case AMDGPU::V_LSHLREV_B64_e64:
3939   case AMDGPU::V_LSHLREV_B64_gfx10:
3940   case AMDGPU::V_LSHLREV_B64_vi:
3941 
3942   case AMDGPU::V_LSHRREV_B64_e64:
3943   case AMDGPU::V_LSHRREV_B64_gfx10:
3944   case AMDGPU::V_LSHRREV_B64_vi:
3945 
3946   case AMDGPU::V_ASHRREV_I64_e64:
3947   case AMDGPU::V_ASHRREV_I64_gfx10:
3948   case AMDGPU::V_ASHRREV_I64_vi:
3949 
3950   case AMDGPU::V_PK_LSHLREV_B16:
3951   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3952   case AMDGPU::V_PK_LSHLREV_B16_vi:
3953 
3954   case AMDGPU::V_PK_LSHRREV_B16:
3955   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3956   case AMDGPU::V_PK_LSHRREV_B16_vi:
3957   case AMDGPU::V_PK_ASHRREV_I16:
3958   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3959   case AMDGPU::V_PK_ASHRREV_I16_vi:
3960     return true;
3961   default:
3962     return false;
3963   }
3964 }
3965 
3966 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3967 
3968   using namespace SIInstrFlags;
3969   const unsigned Opcode = Inst.getOpcode();
3970   const MCInstrDesc &Desc = MII.get(Opcode);
3971 
3972   // lds_direct register is defined so that it can be used
3973   // with 9-bit operands only. Ignore encodings which do not accept these.
3974   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3975   if ((Desc.TSFlags & Enc) == 0)
3976     return None;
3977 
3978   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3979     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3980     if (SrcIdx == -1)
3981       break;
3982     const auto &Src = Inst.getOperand(SrcIdx);
3983     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3984 
3985       if (isGFX90A() || isGFX11Plus())
3986         return StringRef("lds_direct is not supported on this GPU");
3987 
3988       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3989         return StringRef("lds_direct cannot be used with this instruction");
3990 
3991       if (SrcName != OpName::src0)
3992         return StringRef("lds_direct may be used as src0 only");
3993     }
3994   }
3995 
3996   return None;
3997 }
3998 
3999 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4000   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4001     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4002     if (Op.isFlatOffset())
4003       return Op.getStartLoc();
4004   }
4005   return getLoc();
4006 }
4007 
4008 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4009                                          const OperandVector &Operands) {
4010   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4011   if ((TSFlags & SIInstrFlags::FLAT) == 0)
4012     return true;
4013 
4014   auto Opcode = Inst.getOpcode();
4015   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4016   assert(OpNum != -1);
4017 
4018   const auto &Op = Inst.getOperand(OpNum);
4019   if (!hasFlatOffsets() && Op.getImm() != 0) {
4020     Error(getFlatOffsetLoc(Operands),
4021           "flat offset modifier is not supported on this GPU");
4022     return false;
4023   }
4024 
4025   // For FLAT segment the offset must be positive;
4026   // MSB is ignored and forced to zero.
4027   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
4028     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
4029     if (!isIntN(OffsetSize, Op.getImm())) {
4030       Error(getFlatOffsetLoc(Operands),
4031             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4032       return false;
4033     }
4034   } else {
4035     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
4036     if (!isUIntN(OffsetSize, Op.getImm())) {
4037       Error(getFlatOffsetLoc(Operands),
4038             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4039       return false;
4040     }
4041   }
4042 
4043   return true;
4044 }
4045 
4046 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4047   // Start with second operand because SMEM Offset cannot be dst or src0.
4048   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4049     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4050     if (Op.isSMEMOffset())
4051       return Op.getStartLoc();
4052   }
4053   return getLoc();
4054 }
4055 
4056 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4057                                          const OperandVector &Operands) {
4058   if (isCI() || isSI())
4059     return true;
4060 
4061   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4062   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4063     return true;
4064 
4065   auto Opcode = Inst.getOpcode();
4066   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4067   if (OpNum == -1)
4068     return true;
4069 
4070   const auto &Op = Inst.getOperand(OpNum);
4071   if (!Op.isImm())
4072     return true;
4073 
4074   uint64_t Offset = Op.getImm();
4075   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4076   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4077       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4078     return true;
4079 
4080   Error(getSMEMOffsetLoc(Operands),
4081         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4082                                "expected a 21-bit signed offset");
4083 
4084   return false;
4085 }
4086 
4087 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4088   unsigned Opcode = Inst.getOpcode();
4089   const MCInstrDesc &Desc = MII.get(Opcode);
4090   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4091     return true;
4092 
4093   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4094   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4095 
4096   const int OpIndices[] = { Src0Idx, Src1Idx };
4097 
4098   unsigned NumExprs = 0;
4099   unsigned NumLiterals = 0;
4100   uint32_t LiteralValue;
4101 
4102   for (int OpIdx : OpIndices) {
4103     if (OpIdx == -1) break;
4104 
4105     const MCOperand &MO = Inst.getOperand(OpIdx);
4106     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4107     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4108       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4109         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4110         if (NumLiterals == 0 || LiteralValue != Value) {
4111           LiteralValue = Value;
4112           ++NumLiterals;
4113         }
4114       } else if (MO.isExpr()) {
4115         ++NumExprs;
4116       }
4117     }
4118   }
4119 
4120   return NumLiterals + NumExprs <= 1;
4121 }
4122 
4123 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4124   const unsigned Opc = Inst.getOpcode();
4125   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4126       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4127     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4128     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4129 
4130     if (OpSel & ~3)
4131       return false;
4132   }
4133 
4134   if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
4135     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4136     if (OpSelIdx != -1) {
4137       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4138         return false;
4139     }
4140     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4141     if (OpSelHiIdx != -1) {
4142       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4143         return false;
4144     }
4145   }
4146 
4147   return true;
4148 }
4149 
4150 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4151                                   const OperandVector &Operands) {
4152   const unsigned Opc = Inst.getOpcode();
4153   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4154   if (DppCtrlIdx < 0)
4155     return true;
4156   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4157 
4158   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4159     // DPP64 is supported for row_newbcast only.
4160     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4161     if (Src0Idx >= 0 &&
4162         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4163       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4164       Error(S, "64 bit dpp only supports row_newbcast");
4165       return false;
4166     }
4167   }
4168 
4169   return true;
4170 }
4171 
4172 // Check if VCC register matches wavefront size
4173 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4174   auto FB = getFeatureBits();
4175   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4176     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4177 }
4178 
4179 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4180 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4181                                          const OperandVector &Operands) {
4182   unsigned Opcode = Inst.getOpcode();
4183   const MCInstrDesc &Desc = MII.get(Opcode);
4184   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4185   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4186       ImmIdx == -1)
4187     return true;
4188 
4189   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4190   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4191   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4192 
4193   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4194 
4195   unsigned NumExprs = 0;
4196   unsigned NumLiterals = 0;
4197   uint32_t LiteralValue;
4198 
4199   for (int OpIdx : OpIndices) {
4200     if (OpIdx == -1)
4201       continue;
4202 
4203     const MCOperand &MO = Inst.getOperand(OpIdx);
4204     if (!MO.isImm() && !MO.isExpr())
4205       continue;
4206     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4207       continue;
4208 
4209     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4210         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4211       Error(getConstLoc(Operands),
4212             "inline constants are not allowed for this operand");
4213       return false;
4214     }
4215 
4216     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4217       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4218       if (NumLiterals == 0 || LiteralValue != Value) {
4219         LiteralValue = Value;
4220         ++NumLiterals;
4221       }
4222     } else if (MO.isExpr()) {
4223       ++NumExprs;
4224     }
4225   }
4226   NumLiterals += NumExprs;
4227 
4228   if (!NumLiterals)
4229     return true;
4230 
4231   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4232     Error(getLitLoc(Operands), "literal operands are not supported");
4233     return false;
4234   }
4235 
4236   if (NumLiterals > 1) {
4237     Error(getLitLoc(Operands), "only one literal operand is allowed");
4238     return false;
4239   }
4240 
4241   return true;
4242 }
4243 
4244 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4245 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4246                          const MCRegisterInfo *MRI) {
4247   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4248   if (OpIdx < 0)
4249     return -1;
4250 
4251   const MCOperand &Op = Inst.getOperand(OpIdx);
4252   if (!Op.isReg())
4253     return -1;
4254 
4255   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4256   auto Reg = Sub ? Sub : Op.getReg();
4257   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4258   return AGPR32.contains(Reg) ? 1 : 0;
4259 }
4260 
4261 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4262   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4263   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4264                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4265                   SIInstrFlags::DS)) == 0)
4266     return true;
4267 
4268   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4269                                                       : AMDGPU::OpName::vdata;
4270 
4271   const MCRegisterInfo *MRI = getMRI();
4272   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4273   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4274 
4275   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4276     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4277     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4278       return false;
4279   }
4280 
4281   auto FB = getFeatureBits();
4282   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4283     if (DataAreg < 0 || DstAreg < 0)
4284       return true;
4285     return DstAreg == DataAreg;
4286   }
4287 
4288   return DstAreg < 1 && DataAreg < 1;
4289 }
4290 
4291 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4292   auto FB = getFeatureBits();
4293   if (!FB[AMDGPU::FeatureGFX90AInsts])
4294     return true;
4295 
4296   const MCRegisterInfo *MRI = getMRI();
4297   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4298   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4299   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4300     const MCOperand &Op = Inst.getOperand(I);
4301     if (!Op.isReg())
4302       continue;
4303 
4304     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4305     if (!Sub)
4306       continue;
4307 
4308     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4309       return false;
4310     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4311       return false;
4312   }
4313 
4314   return true;
4315 }
4316 
4317 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4318   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4319     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4320     if (Op.isBLGP())
4321       return Op.getStartLoc();
4322   }
4323   return SMLoc();
4324 }
4325 
4326 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4327                                    const OperandVector &Operands) {
4328   unsigned Opc = Inst.getOpcode();
4329   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4330   if (BlgpIdx == -1)
4331     return true;
4332   SMLoc BLGPLoc = getBLGPLoc(Operands);
4333   if (!BLGPLoc.isValid())
4334     return true;
4335   bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4336   auto FB = getFeatureBits();
4337   bool UsesNeg = false;
4338   if (FB[AMDGPU::FeatureGFX940Insts]) {
4339     switch (Opc) {
4340     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4341     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4342     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4343     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4344       UsesNeg = true;
4345     }
4346   }
4347 
4348   if (IsNeg == UsesNeg)
4349     return true;
4350 
4351   Error(BLGPLoc,
4352         UsesNeg ? "invalid modifier: blgp is not supported"
4353                 : "invalid modifier: neg is not supported");
4354 
4355   return false;
4356 }
4357 
4358 // gfx90a has an undocumented limitation:
4359 // DS_GWS opcodes must use even aligned registers.
4360 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4361                                   const OperandVector &Operands) {
4362   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4363     return true;
4364 
4365   int Opc = Inst.getOpcode();
4366   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4367       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4368     return true;
4369 
4370   const MCRegisterInfo *MRI = getMRI();
4371   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4372   int Data0Pos =
4373       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4374   assert(Data0Pos != -1);
4375   auto Reg = Inst.getOperand(Data0Pos).getReg();
4376   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4377   if (RegIdx & 1) {
4378     SMLoc RegLoc = getRegLoc(Reg, Operands);
4379     Error(RegLoc, "vgpr must be even aligned");
4380     return false;
4381   }
4382 
4383   return true;
4384 }
4385 
4386 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4387                                             const OperandVector &Operands,
4388                                             const SMLoc &IDLoc) {
4389   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4390                                            AMDGPU::OpName::cpol);
4391   if (CPolPos == -1)
4392     return true;
4393 
4394   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4395 
4396   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4397   if (TSFlags & SIInstrFlags::SMRD) {
4398     if (CPol && (isSI() || isCI())) {
4399       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4400       Error(S, "cache policy is not supported for SMRD instructions");
4401       return false;
4402     }
4403     if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4404       Error(IDLoc, "invalid cache policy for SMEM instruction");
4405       return false;
4406     }
4407   }
4408 
4409   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4410     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4411     StringRef CStr(S.getPointer());
4412     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4413     Error(S, "scc is not supported on this GPU");
4414     return false;
4415   }
4416 
4417   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4418     return true;
4419 
4420   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4421     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4422       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4423                               : "instruction must use glc");
4424       return false;
4425     }
4426   } else {
4427     if (CPol & CPol::GLC) {
4428       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4429       StringRef CStr(S.getPointer());
4430       S = SMLoc::getFromPointer(
4431           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4432       Error(S, isGFX940() ? "instruction must not use sc0"
4433                           : "instruction must not use glc");
4434       return false;
4435     }
4436   }
4437 
4438   return true;
4439 }
4440 
4441 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst,
4442                                          const OperandVector &Operands,
4443                                          const SMLoc &IDLoc) {
4444   if (isGFX940())
4445     return true;
4446 
4447   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4448   if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) !=
4449       (SIInstrFlags::VALU | SIInstrFlags::FLAT))
4450     return true;
4451   // This is FLAT LDS DMA.
4452 
4453   SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands);
4454   StringRef CStr(S.getPointer());
4455   if (!CStr.startswith("lds")) {
4456     // This is incorrectly selected LDS DMA version of a FLAT load opcode.
4457     // And LDS version should have 'lds' modifier, but it follows optional
4458     // operands so its absense is ignored by the matcher.
4459     Error(IDLoc, "invalid operands for instruction");
4460     return false;
4461   }
4462 
4463   return true;
4464 }
4465 
4466 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4467                                           const SMLoc &IDLoc,
4468                                           const OperandVector &Operands) {
4469   if (auto ErrMsg = validateLdsDirect(Inst)) {
4470     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4471     return false;
4472   }
4473   if (!validateSOPLiteral(Inst)) {
4474     Error(getLitLoc(Operands),
4475       "only one literal operand is allowed");
4476     return false;
4477   }
4478   if (!validateVOPLiteral(Inst, Operands)) {
4479     return false;
4480   }
4481   if (!validateConstantBusLimitations(Inst, Operands)) {
4482     return false;
4483   }
4484   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4485     return false;
4486   }
4487   if (!validateIntClampSupported(Inst)) {
4488     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4489       "integer clamping is not supported on this GPU");
4490     return false;
4491   }
4492   if (!validateOpSel(Inst)) {
4493     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4494       "invalid op_sel operand");
4495     return false;
4496   }
4497   if (!validateDPP(Inst, Operands)) {
4498     return false;
4499   }
4500   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4501   if (!validateMIMGD16(Inst)) {
4502     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4503       "d16 modifier is not supported on this GPU");
4504     return false;
4505   }
4506   if (!validateMIMGDim(Inst)) {
4507     Error(IDLoc, "dim modifier is required on this GPU");
4508     return false;
4509   }
4510   if (!validateMIMGMSAA(Inst)) {
4511     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4512           "invalid dim; must be MSAA type");
4513     return false;
4514   }
4515   if (auto ErrMsg = validateMIMGDataSize(Inst)) {
4516     Error(IDLoc, *ErrMsg);
4517     return false;
4518   }
4519   if (!validateMIMGAddrSize(Inst)) {
4520     Error(IDLoc,
4521       "image address size does not match dim and a16");
4522     return false;
4523   }
4524   if (!validateMIMGAtomicDMask(Inst)) {
4525     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4526       "invalid atomic image dmask");
4527     return false;
4528   }
4529   if (!validateMIMGGatherDMask(Inst)) {
4530     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4531       "invalid image_gather dmask: only one bit must be set");
4532     return false;
4533   }
4534   if (!validateMovrels(Inst, Operands)) {
4535     return false;
4536   }
4537   if (!validateFlatOffset(Inst, Operands)) {
4538     return false;
4539   }
4540   if (!validateSMEMOffset(Inst, Operands)) {
4541     return false;
4542   }
4543   if (!validateMAIAccWrite(Inst, Operands)) {
4544     return false;
4545   }
4546   if (!validateMFMA(Inst, Operands)) {
4547     return false;
4548   }
4549   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4550     return false;
4551   }
4552 
4553   if (!validateAGPRLdSt(Inst)) {
4554     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4555     ? "invalid register class: data and dst should be all VGPR or AGPR"
4556     : "invalid register class: agpr loads and stores not supported on this GPU"
4557     );
4558     return false;
4559   }
4560   if (!validateVGPRAlign(Inst)) {
4561     Error(IDLoc,
4562       "invalid register class: vgpr tuples must be 64 bit aligned");
4563     return false;
4564   }
4565   if (!validateGWS(Inst, Operands)) {
4566     return false;
4567   }
4568 
4569   if (!validateBLGP(Inst, Operands)) {
4570     return false;
4571   }
4572 
4573   if (!validateDivScale(Inst)) {
4574     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4575     return false;
4576   }
4577   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4578     return false;
4579   }
4580 
4581   if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) {
4582     return false;
4583   }
4584 
4585   return true;
4586 }
4587 
4588 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4589                                             const FeatureBitset &FBS,
4590                                             unsigned VariantID = 0);
4591 
4592 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4593                                 const FeatureBitset &AvailableFeatures,
4594                                 unsigned VariantID);
4595 
4596 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4597                                        const FeatureBitset &FBS) {
4598   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4599 }
4600 
4601 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4602                                        const FeatureBitset &FBS,
4603                                        ArrayRef<unsigned> Variants) {
4604   for (auto Variant : Variants) {
4605     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4606       return true;
4607   }
4608 
4609   return false;
4610 }
4611 
4612 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4613                                                   const SMLoc &IDLoc) {
4614   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4615 
4616   // Check if requested instruction variant is supported.
4617   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4618     return false;
4619 
4620   // This instruction is not supported.
4621   // Clear any other pending errors because they are no longer relevant.
4622   getParser().clearPendingErrors();
4623 
4624   // Requested instruction variant is not supported.
4625   // Check if any other variants are supported.
4626   StringRef VariantName = getMatchedVariantName();
4627   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4628     return Error(IDLoc,
4629                  Twine(VariantName,
4630                        " variant of this instruction is not supported"));
4631   }
4632 
4633   // Finally check if this instruction is supported on any other GPU.
4634   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4635     return Error(IDLoc, "instruction not supported on this GPU");
4636   }
4637 
4638   // Instruction not supported on any GPU. Probably a typo.
4639   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4640   return Error(IDLoc, "invalid instruction" + Suggestion);
4641 }
4642 
4643 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4644                                               OperandVector &Operands,
4645                                               MCStreamer &Out,
4646                                               uint64_t &ErrorInfo,
4647                                               bool MatchingInlineAsm) {
4648   MCInst Inst;
4649   unsigned Result = Match_Success;
4650   for (auto Variant : getMatchedVariants()) {
4651     uint64_t EI;
4652     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4653                                   Variant);
4654     // We order match statuses from least to most specific. We use most specific
4655     // status as resulting
4656     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4657     if ((R == Match_Success) ||
4658         (R == Match_PreferE32) ||
4659         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4660         (R == Match_InvalidOperand && Result != Match_MissingFeature
4661                                    && Result != Match_PreferE32) ||
4662         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4663                                    && Result != Match_MissingFeature
4664                                    && Result != Match_PreferE32)) {
4665       Result = R;
4666       ErrorInfo = EI;
4667     }
4668     if (R == Match_Success)
4669       break;
4670   }
4671 
4672   if (Result == Match_Success) {
4673     if (!validateInstruction(Inst, IDLoc, Operands)) {
4674       return true;
4675     }
4676     Inst.setLoc(IDLoc);
4677     Out.emitInstruction(Inst, getSTI());
4678     return false;
4679   }
4680 
4681   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4682   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4683     return true;
4684   }
4685 
4686   switch (Result) {
4687   default: break;
4688   case Match_MissingFeature:
4689     // It has been verified that the specified instruction
4690     // mnemonic is valid. A match was found but it requires
4691     // features which are not supported on this GPU.
4692     return Error(IDLoc, "operands are not valid for this GPU or mode");
4693 
4694   case Match_InvalidOperand: {
4695     SMLoc ErrorLoc = IDLoc;
4696     if (ErrorInfo != ~0ULL) {
4697       if (ErrorInfo >= Operands.size()) {
4698         return Error(IDLoc, "too few operands for instruction");
4699       }
4700       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4701       if (ErrorLoc == SMLoc())
4702         ErrorLoc = IDLoc;
4703     }
4704     return Error(ErrorLoc, "invalid operand for instruction");
4705   }
4706 
4707   case Match_PreferE32:
4708     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4709                         "should be encoded as e32");
4710   case Match_MnemonicFail:
4711     llvm_unreachable("Invalid instructions should have been handled already");
4712   }
4713   llvm_unreachable("Implement any new match types added!");
4714 }
4715 
4716 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4717   int64_t Tmp = -1;
4718   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4719     return true;
4720   }
4721   if (getParser().parseAbsoluteExpression(Tmp)) {
4722     return true;
4723   }
4724   Ret = static_cast<uint32_t>(Tmp);
4725   return false;
4726 }
4727 
4728 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4729                                                uint32_t &Minor) {
4730   if (ParseAsAbsoluteExpression(Major))
4731     return TokError("invalid major version");
4732 
4733   if (!trySkipToken(AsmToken::Comma))
4734     return TokError("minor version number required, comma expected");
4735 
4736   if (ParseAsAbsoluteExpression(Minor))
4737     return TokError("invalid minor version");
4738 
4739   return false;
4740 }
4741 
4742 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4743   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4744     return TokError("directive only supported for amdgcn architecture");
4745 
4746   std::string TargetIDDirective;
4747   SMLoc TargetStart = getTok().getLoc();
4748   if (getParser().parseEscapedString(TargetIDDirective))
4749     return true;
4750 
4751   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4752   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4753     return getParser().Error(TargetRange.Start,
4754         (Twine(".amdgcn_target directive's target id ") +
4755          Twine(TargetIDDirective) +
4756          Twine(" does not match the specified target id ") +
4757          Twine(getTargetStreamer().getTargetID()->toString())).str());
4758 
4759   return false;
4760 }
4761 
4762 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4763   return Error(Range.Start, "value out of range", Range);
4764 }
4765 
4766 bool AMDGPUAsmParser::calculateGPRBlocks(
4767     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4768     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4769     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4770     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4771   // TODO(scott.linder): These calculations are duplicated from
4772   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4773   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4774 
4775   unsigned NumVGPRs = NextFreeVGPR;
4776   unsigned NumSGPRs = NextFreeSGPR;
4777 
4778   if (Version.Major >= 10)
4779     NumSGPRs = 0;
4780   else {
4781     unsigned MaxAddressableNumSGPRs =
4782         IsaInfo::getAddressableNumSGPRs(&getSTI());
4783 
4784     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4785         NumSGPRs > MaxAddressableNumSGPRs)
4786       return OutOfRangeError(SGPRRange);
4787 
4788     NumSGPRs +=
4789         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4790 
4791     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4792         NumSGPRs > MaxAddressableNumSGPRs)
4793       return OutOfRangeError(SGPRRange);
4794 
4795     if (Features.test(FeatureSGPRInitBug))
4796       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4797   }
4798 
4799   VGPRBlocks =
4800       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4801   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4802 
4803   return false;
4804 }
4805 
4806 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4807   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4808     return TokError("directive only supported for amdgcn architecture");
4809 
4810   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4811     return TokError("directive only supported for amdhsa OS");
4812 
4813   StringRef KernelName;
4814   if (getParser().parseIdentifier(KernelName))
4815     return true;
4816 
4817   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4818 
4819   StringSet<> Seen;
4820 
4821   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4822 
4823   SMRange VGPRRange;
4824   uint64_t NextFreeVGPR = 0;
4825   uint64_t AccumOffset = 0;
4826   uint64_t SharedVGPRCount = 0;
4827   SMRange SGPRRange;
4828   uint64_t NextFreeSGPR = 0;
4829 
4830   // Count the number of user SGPRs implied from the enabled feature bits.
4831   unsigned ImpliedUserSGPRCount = 0;
4832 
4833   // Track if the asm explicitly contains the directive for the user SGPR
4834   // count.
4835   Optional<unsigned> ExplicitUserSGPRCount;
4836   bool ReserveVCC = true;
4837   bool ReserveFlatScr = true;
4838   Optional<bool> EnableWavefrontSize32;
4839 
4840   while (true) {
4841     while (trySkipToken(AsmToken::EndOfStatement));
4842 
4843     StringRef ID;
4844     SMRange IDRange = getTok().getLocRange();
4845     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4846       return true;
4847 
4848     if (ID == ".end_amdhsa_kernel")
4849       break;
4850 
4851     if (Seen.find(ID) != Seen.end())
4852       return TokError(".amdhsa_ directives cannot be repeated");
4853     Seen.insert(ID);
4854 
4855     SMLoc ValStart = getLoc();
4856     int64_t IVal;
4857     if (getParser().parseAbsoluteExpression(IVal))
4858       return true;
4859     SMLoc ValEnd = getLoc();
4860     SMRange ValRange = SMRange(ValStart, ValEnd);
4861 
4862     if (IVal < 0)
4863       return OutOfRangeError(ValRange);
4864 
4865     uint64_t Val = IVal;
4866 
4867 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4868   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4869     return OutOfRangeError(RANGE);                                             \
4870   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4871 
4872     if (ID == ".amdhsa_group_segment_fixed_size") {
4873       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4874         return OutOfRangeError(ValRange);
4875       KD.group_segment_fixed_size = Val;
4876     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4877       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4878         return OutOfRangeError(ValRange);
4879       KD.private_segment_fixed_size = Val;
4880     } else if (ID == ".amdhsa_kernarg_size") {
4881       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4882         return OutOfRangeError(ValRange);
4883       KD.kernarg_size = Val;
4884     } else if (ID == ".amdhsa_user_sgpr_count") {
4885       ExplicitUserSGPRCount = Val;
4886     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4887       if (hasArchitectedFlatScratch())
4888         return Error(IDRange.Start,
4889                      "directive is not supported with architected flat scratch",
4890                      IDRange);
4891       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4892                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4893                        Val, ValRange);
4894       if (Val)
4895         ImpliedUserSGPRCount += 4;
4896     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4897       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4898                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4899                        ValRange);
4900       if (Val)
4901         ImpliedUserSGPRCount += 2;
4902     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4903       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4904                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4905                        ValRange);
4906       if (Val)
4907         ImpliedUserSGPRCount += 2;
4908     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4909       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4910                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4911                        Val, ValRange);
4912       if (Val)
4913         ImpliedUserSGPRCount += 2;
4914     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4915       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4916                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4917                        ValRange);
4918       if (Val)
4919         ImpliedUserSGPRCount += 2;
4920     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4921       if (hasArchitectedFlatScratch())
4922         return Error(IDRange.Start,
4923                      "directive is not supported with architected flat scratch",
4924                      IDRange);
4925       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4926                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4927                        ValRange);
4928       if (Val)
4929         ImpliedUserSGPRCount += 2;
4930     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4931       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4932                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4933                        Val, ValRange);
4934       if (Val)
4935         ImpliedUserSGPRCount += 1;
4936     } else if (ID == ".amdhsa_wavefront_size32") {
4937       if (IVersion.Major < 10)
4938         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4939       EnableWavefrontSize32 = Val;
4940       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4941                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4942                        Val, ValRange);
4943     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4944       if (hasArchitectedFlatScratch())
4945         return Error(IDRange.Start,
4946                      "directive is not supported with architected flat scratch",
4947                      IDRange);
4948       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4949                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4950     } else if (ID == ".amdhsa_enable_private_segment") {
4951       if (!hasArchitectedFlatScratch())
4952         return Error(
4953             IDRange.Start,
4954             "directive is not supported without architected flat scratch",
4955             IDRange);
4956       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4957                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4958     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4959       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4960                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4961                        ValRange);
4962     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4963       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4964                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4965                        ValRange);
4966     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4967       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4968                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4969                        ValRange);
4970     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4971       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4972                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4973                        ValRange);
4974     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4975       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4976                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4977                        ValRange);
4978     } else if (ID == ".amdhsa_next_free_vgpr") {
4979       VGPRRange = ValRange;
4980       NextFreeVGPR = Val;
4981     } else if (ID == ".amdhsa_next_free_sgpr") {
4982       SGPRRange = ValRange;
4983       NextFreeSGPR = Val;
4984     } else if (ID == ".amdhsa_accum_offset") {
4985       if (!isGFX90A())
4986         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4987       AccumOffset = Val;
4988     } else if (ID == ".amdhsa_reserve_vcc") {
4989       if (!isUInt<1>(Val))
4990         return OutOfRangeError(ValRange);
4991       ReserveVCC = Val;
4992     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4993       if (IVersion.Major < 7)
4994         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4995       if (hasArchitectedFlatScratch())
4996         return Error(IDRange.Start,
4997                      "directive is not supported with architected flat scratch",
4998                      IDRange);
4999       if (!isUInt<1>(Val))
5000         return OutOfRangeError(ValRange);
5001       ReserveFlatScr = Val;
5002     } else if (ID == ".amdhsa_reserve_xnack_mask") {
5003       if (IVersion.Major < 8)
5004         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5005       if (!isUInt<1>(Val))
5006         return OutOfRangeError(ValRange);
5007       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5008         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5009                                  IDRange);
5010     } else if (ID == ".amdhsa_float_round_mode_32") {
5011       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5012                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5013     } else if (ID == ".amdhsa_float_round_mode_16_64") {
5014       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5015                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5016     } else if (ID == ".amdhsa_float_denorm_mode_32") {
5017       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5018                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5019     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5020       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5021                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5022                        ValRange);
5023     } else if (ID == ".amdhsa_dx10_clamp") {
5024       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5025                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5026     } else if (ID == ".amdhsa_ieee_mode") {
5027       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5028                        Val, ValRange);
5029     } else if (ID == ".amdhsa_fp16_overflow") {
5030       if (IVersion.Major < 9)
5031         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5032       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5033                        ValRange);
5034     } else if (ID == ".amdhsa_tg_split") {
5035       if (!isGFX90A())
5036         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5037       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5038                        ValRange);
5039     } else if (ID == ".amdhsa_workgroup_processor_mode") {
5040       if (IVersion.Major < 10)
5041         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5042       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5043                        ValRange);
5044     } else if (ID == ".amdhsa_memory_ordered") {
5045       if (IVersion.Major < 10)
5046         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5047       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5048                        ValRange);
5049     } else if (ID == ".amdhsa_forward_progress") {
5050       if (IVersion.Major < 10)
5051         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5052       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5053                        ValRange);
5054     } else if (ID == ".amdhsa_shared_vgpr_count") {
5055       if (IVersion.Major < 10)
5056         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5057       SharedVGPRCount = Val;
5058       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5059                        COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val,
5060                        ValRange);
5061     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5062       PARSE_BITS_ENTRY(
5063           KD.compute_pgm_rsrc2,
5064           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5065           ValRange);
5066     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5067       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5068                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5069                        Val, ValRange);
5070     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5071       PARSE_BITS_ENTRY(
5072           KD.compute_pgm_rsrc2,
5073           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5074           ValRange);
5075     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5076       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5077                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5078                        Val, ValRange);
5079     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5080       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5081                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5082                        Val, ValRange);
5083     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5084       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5085                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5086                        Val, ValRange);
5087     } else if (ID == ".amdhsa_exception_int_div_zero") {
5088       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5089                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5090                        Val, ValRange);
5091     } else {
5092       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5093     }
5094 
5095 #undef PARSE_BITS_ENTRY
5096   }
5097 
5098   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5099     return TokError(".amdhsa_next_free_vgpr directive is required");
5100 
5101   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5102     return TokError(".amdhsa_next_free_sgpr directive is required");
5103 
5104   unsigned VGPRBlocks;
5105   unsigned SGPRBlocks;
5106   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5107                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5108                          EnableWavefrontSize32, NextFreeVGPR,
5109                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5110                          SGPRBlocks))
5111     return true;
5112 
5113   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5114           VGPRBlocks))
5115     return OutOfRangeError(VGPRRange);
5116   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5117                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5118 
5119   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5120           SGPRBlocks))
5121     return OutOfRangeError(SGPRRange);
5122   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5123                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5124                   SGPRBlocks);
5125 
5126   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5127     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5128                     "enabled user SGPRs");
5129 
5130   unsigned UserSGPRCount =
5131       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5132 
5133   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5134     return TokError("too many user SGPRs enabled");
5135   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5136                   UserSGPRCount);
5137 
5138   if (isGFX90A()) {
5139     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5140       return TokError(".amdhsa_accum_offset directive is required");
5141     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5142       return TokError("accum_offset should be in range [4..256] in "
5143                       "increments of 4");
5144     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5145       return TokError("accum_offset exceeds total VGPR allocation");
5146     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5147                     (AccumOffset / 4 - 1));
5148   }
5149 
5150   if (IVersion.Major == 10) {
5151     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5152     if (SharedVGPRCount && EnableWavefrontSize32) {
5153       return TokError("shared_vgpr_count directive not valid on "
5154                       "wavefront size 32");
5155     }
5156     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5157       return TokError("shared_vgpr_count*2 + "
5158                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5159                       "exceed 63\n");
5160     }
5161   }
5162 
5163   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5164       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5165       ReserveFlatScr);
5166   return false;
5167 }
5168 
5169 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5170   uint32_t Major;
5171   uint32_t Minor;
5172 
5173   if (ParseDirectiveMajorMinor(Major, Minor))
5174     return true;
5175 
5176   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5177   return false;
5178 }
5179 
5180 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5181   uint32_t Major;
5182   uint32_t Minor;
5183   uint32_t Stepping;
5184   StringRef VendorName;
5185   StringRef ArchName;
5186 
5187   // If this directive has no arguments, then use the ISA version for the
5188   // targeted GPU.
5189   if (isToken(AsmToken::EndOfStatement)) {
5190     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5191     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5192                                                         ISA.Stepping,
5193                                                         "AMD", "AMDGPU");
5194     return false;
5195   }
5196 
5197   if (ParseDirectiveMajorMinor(Major, Minor))
5198     return true;
5199 
5200   if (!trySkipToken(AsmToken::Comma))
5201     return TokError("stepping version number required, comma expected");
5202 
5203   if (ParseAsAbsoluteExpression(Stepping))
5204     return TokError("invalid stepping version");
5205 
5206   if (!trySkipToken(AsmToken::Comma))
5207     return TokError("vendor name required, comma expected");
5208 
5209   if (!parseString(VendorName, "invalid vendor name"))
5210     return true;
5211 
5212   if (!trySkipToken(AsmToken::Comma))
5213     return TokError("arch name required, comma expected");
5214 
5215   if (!parseString(ArchName, "invalid arch name"))
5216     return true;
5217 
5218   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5219                                                       VendorName, ArchName);
5220   return false;
5221 }
5222 
5223 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5224                                                amd_kernel_code_t &Header) {
5225   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5226   // assembly for backwards compatibility.
5227   if (ID == "max_scratch_backing_memory_byte_size") {
5228     Parser.eatToEndOfStatement();
5229     return false;
5230   }
5231 
5232   SmallString<40> ErrStr;
5233   raw_svector_ostream Err(ErrStr);
5234   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5235     return TokError(Err.str());
5236   }
5237   Lex();
5238 
5239   if (ID == "enable_wavefront_size32") {
5240     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5241       if (!isGFX10Plus())
5242         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5243       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5244         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5245     } else {
5246       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5247         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5248     }
5249   }
5250 
5251   if (ID == "wavefront_size") {
5252     if (Header.wavefront_size == 5) {
5253       if (!isGFX10Plus())
5254         return TokError("wavefront_size=5 is only allowed on GFX10+");
5255       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5256         return TokError("wavefront_size=5 requires +WavefrontSize32");
5257     } else if (Header.wavefront_size == 6) {
5258       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5259         return TokError("wavefront_size=6 requires +WavefrontSize64");
5260     }
5261   }
5262 
5263   if (ID == "enable_wgp_mode") {
5264     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5265         !isGFX10Plus())
5266       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5267   }
5268 
5269   if (ID == "enable_mem_ordered") {
5270     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5271         !isGFX10Plus())
5272       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5273   }
5274 
5275   if (ID == "enable_fwd_progress") {
5276     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5277         !isGFX10Plus())
5278       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5279   }
5280 
5281   return false;
5282 }
5283 
5284 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5285   amd_kernel_code_t Header;
5286   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5287 
5288   while (true) {
5289     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5290     // will set the current token to EndOfStatement.
5291     while(trySkipToken(AsmToken::EndOfStatement));
5292 
5293     StringRef ID;
5294     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5295       return true;
5296 
5297     if (ID == ".end_amd_kernel_code_t")
5298       break;
5299 
5300     if (ParseAMDKernelCodeTValue(ID, Header))
5301       return true;
5302   }
5303 
5304   getTargetStreamer().EmitAMDKernelCodeT(Header);
5305 
5306   return false;
5307 }
5308 
5309 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5310   StringRef KernelName;
5311   if (!parseId(KernelName, "expected symbol name"))
5312     return true;
5313 
5314   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5315                                            ELF::STT_AMDGPU_HSA_KERNEL);
5316 
5317   KernelScope.initialize(getContext());
5318   return false;
5319 }
5320 
5321 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5322   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5323     return Error(getLoc(),
5324                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5325                  "architectures");
5326   }
5327 
5328   auto TargetIDDirective = getLexer().getTok().getStringContents();
5329   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5330     return Error(getParser().getTok().getLoc(), "target id must match options");
5331 
5332   getTargetStreamer().EmitISAVersion();
5333   Lex();
5334 
5335   return false;
5336 }
5337 
5338 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5339   const char *AssemblerDirectiveBegin;
5340   const char *AssemblerDirectiveEnd;
5341   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5342       isHsaAbiVersion3AndAbove(&getSTI())
5343           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5344                             HSAMD::V3::AssemblerDirectiveEnd)
5345           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5346                             HSAMD::AssemblerDirectiveEnd);
5347 
5348   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5349     return Error(getLoc(),
5350                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5351                  "not available on non-amdhsa OSes")).str());
5352   }
5353 
5354   std::string HSAMetadataString;
5355   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5356                           HSAMetadataString))
5357     return true;
5358 
5359   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5360     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5361       return Error(getLoc(), "invalid HSA metadata");
5362   } else {
5363     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5364       return Error(getLoc(), "invalid HSA metadata");
5365   }
5366 
5367   return false;
5368 }
5369 
5370 /// Common code to parse out a block of text (typically YAML) between start and
5371 /// end directives.
5372 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5373                                           const char *AssemblerDirectiveEnd,
5374                                           std::string &CollectString) {
5375 
5376   raw_string_ostream CollectStream(CollectString);
5377 
5378   getLexer().setSkipSpace(false);
5379 
5380   bool FoundEnd = false;
5381   while (!isToken(AsmToken::Eof)) {
5382     while (isToken(AsmToken::Space)) {
5383       CollectStream << getTokenStr();
5384       Lex();
5385     }
5386 
5387     if (trySkipId(AssemblerDirectiveEnd)) {
5388       FoundEnd = true;
5389       break;
5390     }
5391 
5392     CollectStream << Parser.parseStringToEndOfStatement()
5393                   << getContext().getAsmInfo()->getSeparatorString();
5394 
5395     Parser.eatToEndOfStatement();
5396   }
5397 
5398   getLexer().setSkipSpace(true);
5399 
5400   if (isToken(AsmToken::Eof) && !FoundEnd) {
5401     return TokError(Twine("expected directive ") +
5402                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5403   }
5404 
5405   CollectStream.flush();
5406   return false;
5407 }
5408 
5409 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5410 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5411   std::string String;
5412   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5413                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5414     return true;
5415 
5416   auto PALMetadata = getTargetStreamer().getPALMetadata();
5417   if (!PALMetadata->setFromString(String))
5418     return Error(getLoc(), "invalid PAL metadata");
5419   return false;
5420 }
5421 
5422 /// Parse the assembler directive for old linear-format PAL metadata.
5423 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5424   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5425     return Error(getLoc(),
5426                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5427                  "not available on non-amdpal OSes")).str());
5428   }
5429 
5430   auto PALMetadata = getTargetStreamer().getPALMetadata();
5431   PALMetadata->setLegacy();
5432   for (;;) {
5433     uint32_t Key, Value;
5434     if (ParseAsAbsoluteExpression(Key)) {
5435       return TokError(Twine("invalid value in ") +
5436                       Twine(PALMD::AssemblerDirective));
5437     }
5438     if (!trySkipToken(AsmToken::Comma)) {
5439       return TokError(Twine("expected an even number of values in ") +
5440                       Twine(PALMD::AssemblerDirective));
5441     }
5442     if (ParseAsAbsoluteExpression(Value)) {
5443       return TokError(Twine("invalid value in ") +
5444                       Twine(PALMD::AssemblerDirective));
5445     }
5446     PALMetadata->setRegister(Key, Value);
5447     if (!trySkipToken(AsmToken::Comma))
5448       break;
5449   }
5450   return false;
5451 }
5452 
5453 /// ParseDirectiveAMDGPULDS
5454 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5455 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5456   if (getParser().checkForValidSection())
5457     return true;
5458 
5459   StringRef Name;
5460   SMLoc NameLoc = getLoc();
5461   if (getParser().parseIdentifier(Name))
5462     return TokError("expected identifier in directive");
5463 
5464   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5465   if (parseToken(AsmToken::Comma, "expected ','"))
5466     return true;
5467 
5468   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5469 
5470   int64_t Size;
5471   SMLoc SizeLoc = getLoc();
5472   if (getParser().parseAbsoluteExpression(Size))
5473     return true;
5474   if (Size < 0)
5475     return Error(SizeLoc, "size must be non-negative");
5476   if (Size > LocalMemorySize)
5477     return Error(SizeLoc, "size is too large");
5478 
5479   int64_t Alignment = 4;
5480   if (trySkipToken(AsmToken::Comma)) {
5481     SMLoc AlignLoc = getLoc();
5482     if (getParser().parseAbsoluteExpression(Alignment))
5483       return true;
5484     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5485       return Error(AlignLoc, "alignment must be a power of two");
5486 
5487     // Alignment larger than the size of LDS is possible in theory, as long
5488     // as the linker manages to place to symbol at address 0, but we do want
5489     // to make sure the alignment fits nicely into a 32-bit integer.
5490     if (Alignment >= 1u << 31)
5491       return Error(AlignLoc, "alignment is too large");
5492   }
5493 
5494   if (parseToken(AsmToken::EndOfStatement,
5495                  "unexpected token in '.amdgpu_lds' directive"))
5496     return true;
5497 
5498   Symbol->redefineIfPossible();
5499   if (!Symbol->isUndefined())
5500     return Error(NameLoc, "invalid symbol redefinition");
5501 
5502   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5503   return false;
5504 }
5505 
5506 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5507   StringRef IDVal = DirectiveID.getString();
5508 
5509   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5510     if (IDVal == ".amdhsa_kernel")
5511      return ParseDirectiveAMDHSAKernel();
5512 
5513     // TODO: Restructure/combine with PAL metadata directive.
5514     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5515       return ParseDirectiveHSAMetadata();
5516   } else {
5517     if (IDVal == ".hsa_code_object_version")
5518       return ParseDirectiveHSACodeObjectVersion();
5519 
5520     if (IDVal == ".hsa_code_object_isa")
5521       return ParseDirectiveHSACodeObjectISA();
5522 
5523     if (IDVal == ".amd_kernel_code_t")
5524       return ParseDirectiveAMDKernelCodeT();
5525 
5526     if (IDVal == ".amdgpu_hsa_kernel")
5527       return ParseDirectiveAMDGPUHsaKernel();
5528 
5529     if (IDVal == ".amd_amdgpu_isa")
5530       return ParseDirectiveISAVersion();
5531 
5532     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5533       return ParseDirectiveHSAMetadata();
5534   }
5535 
5536   if (IDVal == ".amdgcn_target")
5537     return ParseDirectiveAMDGCNTarget();
5538 
5539   if (IDVal == ".amdgpu_lds")
5540     return ParseDirectiveAMDGPULDS();
5541 
5542   if (IDVal == PALMD::AssemblerDirectiveBegin)
5543     return ParseDirectivePALMetadataBegin();
5544 
5545   if (IDVal == PALMD::AssemblerDirective)
5546     return ParseDirectivePALMetadata();
5547 
5548   return true;
5549 }
5550 
5551 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5552                                            unsigned RegNo) {
5553 
5554   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5555     return isGFX9Plus();
5556 
5557   // GFX10 has 2 more SGPRs 104 and 105.
5558   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5559     return hasSGPR104_SGPR105();
5560 
5561   switch (RegNo) {
5562   case AMDGPU::SRC_SHARED_BASE:
5563   case AMDGPU::SRC_SHARED_LIMIT:
5564   case AMDGPU::SRC_PRIVATE_BASE:
5565   case AMDGPU::SRC_PRIVATE_LIMIT:
5566   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5567     return isGFX9Plus();
5568   case AMDGPU::TBA:
5569   case AMDGPU::TBA_LO:
5570   case AMDGPU::TBA_HI:
5571   case AMDGPU::TMA:
5572   case AMDGPU::TMA_LO:
5573   case AMDGPU::TMA_HI:
5574     return !isGFX9Plus();
5575   case AMDGPU::XNACK_MASK:
5576   case AMDGPU::XNACK_MASK_LO:
5577   case AMDGPU::XNACK_MASK_HI:
5578     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5579   case AMDGPU::SGPR_NULL:
5580     return isGFX10Plus();
5581   default:
5582     break;
5583   }
5584 
5585   if (isCI())
5586     return true;
5587 
5588   if (isSI() || isGFX10Plus()) {
5589     // No flat_scr on SI.
5590     // On GFX10 flat scratch is not a valid register operand and can only be
5591     // accessed with s_setreg/s_getreg.
5592     switch (RegNo) {
5593     case AMDGPU::FLAT_SCR:
5594     case AMDGPU::FLAT_SCR_LO:
5595     case AMDGPU::FLAT_SCR_HI:
5596       return false;
5597     default:
5598       return true;
5599     }
5600   }
5601 
5602   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5603   // SI/CI have.
5604   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5605     return hasSGPR102_SGPR103();
5606 
5607   return true;
5608 }
5609 
5610 OperandMatchResultTy
5611 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5612                               OperandMode Mode) {
5613   // Try to parse with a custom parser
5614   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5615 
5616   // If we successfully parsed the operand or if there as an error parsing,
5617   // we are done.
5618   //
5619   // If we are parsing after we reach EndOfStatement then this means we
5620   // are appending default values to the Operands list.  This is only done
5621   // by custom parser, so we shouldn't continue on to the generic parsing.
5622   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5623       isToken(AsmToken::EndOfStatement))
5624     return ResTy;
5625 
5626   SMLoc RBraceLoc;
5627   SMLoc LBraceLoc = getLoc();
5628   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5629     unsigned Prefix = Operands.size();
5630 
5631     for (;;) {
5632       auto Loc = getLoc();
5633       ResTy = parseReg(Operands);
5634       if (ResTy == MatchOperand_NoMatch)
5635         Error(Loc, "expected a register");
5636       if (ResTy != MatchOperand_Success)
5637         return MatchOperand_ParseFail;
5638 
5639       RBraceLoc = getLoc();
5640       if (trySkipToken(AsmToken::RBrac))
5641         break;
5642 
5643       if (!skipToken(AsmToken::Comma,
5644                      "expected a comma or a closing square bracket")) {
5645         return MatchOperand_ParseFail;
5646       }
5647     }
5648 
5649     if (Operands.size() - Prefix > 1) {
5650       Operands.insert(Operands.begin() + Prefix,
5651                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5652       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5653     }
5654 
5655     return MatchOperand_Success;
5656   }
5657 
5658   return parseRegOrImm(Operands);
5659 }
5660 
5661 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5662   // Clear any forced encodings from the previous instruction.
5663   setForcedEncodingSize(0);
5664   setForcedDPP(false);
5665   setForcedSDWA(false);
5666 
5667   if (Name.endswith("_e64")) {
5668     setForcedEncodingSize(64);
5669     return Name.substr(0, Name.size() - 4);
5670   } else if (Name.endswith("_e32")) {
5671     setForcedEncodingSize(32);
5672     return Name.substr(0, Name.size() - 4);
5673   } else if (Name.endswith("_dpp")) {
5674     setForcedDPP(true);
5675     return Name.substr(0, Name.size() - 4);
5676   } else if (Name.endswith("_sdwa")) {
5677     setForcedSDWA(true);
5678     return Name.substr(0, Name.size() - 5);
5679   }
5680   return Name;
5681 }
5682 
5683 static void applyMnemonicAliases(StringRef &Mnemonic,
5684                                  const FeatureBitset &Features,
5685                                  unsigned VariantID);
5686 
5687 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5688                                        StringRef Name,
5689                                        SMLoc NameLoc, OperandVector &Operands) {
5690   // Add the instruction mnemonic
5691   Name = parseMnemonicSuffix(Name);
5692 
5693   // If the target architecture uses MnemonicAlias, call it here to parse
5694   // operands correctly.
5695   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
5696 
5697   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5698 
5699   bool IsMIMG = Name.startswith("image_");
5700 
5701   while (!trySkipToken(AsmToken::EndOfStatement)) {
5702     OperandMode Mode = OperandMode_Default;
5703     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5704       Mode = OperandMode_NSA;
5705     CPolSeen = 0;
5706     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5707 
5708     if (Res != MatchOperand_Success) {
5709       checkUnsupportedInstruction(Name, NameLoc);
5710       if (!Parser.hasPendingError()) {
5711         // FIXME: use real operand location rather than the current location.
5712         StringRef Msg =
5713           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5714                                             "not a valid operand.";
5715         Error(getLoc(), Msg);
5716       }
5717       while (!trySkipToken(AsmToken::EndOfStatement)) {
5718         lex();
5719       }
5720       return true;
5721     }
5722 
5723     // Eat the comma or space if there is one.
5724     trySkipToken(AsmToken::Comma);
5725   }
5726 
5727   return false;
5728 }
5729 
5730 //===----------------------------------------------------------------------===//
5731 // Utility functions
5732 //===----------------------------------------------------------------------===//
5733 
5734 OperandMatchResultTy
5735 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5736 
5737   if (!trySkipId(Prefix, AsmToken::Colon))
5738     return MatchOperand_NoMatch;
5739 
5740   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5741 }
5742 
5743 OperandMatchResultTy
5744 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5745                                     AMDGPUOperand::ImmTy ImmTy,
5746                                     bool (*ConvertResult)(int64_t&)) {
5747   SMLoc S = getLoc();
5748   int64_t Value = 0;
5749 
5750   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5751   if (Res != MatchOperand_Success)
5752     return Res;
5753 
5754   if (ConvertResult && !ConvertResult(Value)) {
5755     Error(S, "invalid " + StringRef(Prefix) + " value.");
5756   }
5757 
5758   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5759   return MatchOperand_Success;
5760 }
5761 
5762 OperandMatchResultTy
5763 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5764                                              OperandVector &Operands,
5765                                              AMDGPUOperand::ImmTy ImmTy,
5766                                              bool (*ConvertResult)(int64_t&)) {
5767   SMLoc S = getLoc();
5768   if (!trySkipId(Prefix, AsmToken::Colon))
5769     return MatchOperand_NoMatch;
5770 
5771   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5772     return MatchOperand_ParseFail;
5773 
5774   unsigned Val = 0;
5775   const unsigned MaxSize = 4;
5776 
5777   // FIXME: How to verify the number of elements matches the number of src
5778   // operands?
5779   for (int I = 0; ; ++I) {
5780     int64_t Op;
5781     SMLoc Loc = getLoc();
5782     if (!parseExpr(Op))
5783       return MatchOperand_ParseFail;
5784 
5785     if (Op != 0 && Op != 1) {
5786       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5787       return MatchOperand_ParseFail;
5788     }
5789 
5790     Val |= (Op << I);
5791 
5792     if (trySkipToken(AsmToken::RBrac))
5793       break;
5794 
5795     if (I + 1 == MaxSize) {
5796       Error(getLoc(), "expected a closing square bracket");
5797       return MatchOperand_ParseFail;
5798     }
5799 
5800     if (!skipToken(AsmToken::Comma, "expected a comma"))
5801       return MatchOperand_ParseFail;
5802   }
5803 
5804   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5805   return MatchOperand_Success;
5806 }
5807 
5808 OperandMatchResultTy
5809 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5810                                AMDGPUOperand::ImmTy ImmTy) {
5811   int64_t Bit;
5812   SMLoc S = getLoc();
5813 
5814   if (trySkipId(Name)) {
5815     Bit = 1;
5816   } else if (trySkipId("no", Name)) {
5817     Bit = 0;
5818   } else {
5819     return MatchOperand_NoMatch;
5820   }
5821 
5822   if (Name == "r128" && !hasMIMG_R128()) {
5823     Error(S, "r128 modifier is not supported on this GPU");
5824     return MatchOperand_ParseFail;
5825   }
5826   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5827     Error(S, "a16 modifier is not supported on this GPU");
5828     return MatchOperand_ParseFail;
5829   }
5830 
5831   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5832     ImmTy = AMDGPUOperand::ImmTyR128A16;
5833 
5834   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5835   return MatchOperand_Success;
5836 }
5837 
5838 OperandMatchResultTy
5839 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5840   unsigned CPolOn = 0;
5841   unsigned CPolOff = 0;
5842   SMLoc S = getLoc();
5843 
5844   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5845   if (isGFX940() && !Mnemo.startswith("s_")) {
5846     if (trySkipId("sc0"))
5847       CPolOn = AMDGPU::CPol::SC0;
5848     else if (trySkipId("nosc0"))
5849       CPolOff = AMDGPU::CPol::SC0;
5850     else if (trySkipId("nt"))
5851       CPolOn = AMDGPU::CPol::NT;
5852     else if (trySkipId("nont"))
5853       CPolOff = AMDGPU::CPol::NT;
5854     else if (trySkipId("sc1"))
5855       CPolOn = AMDGPU::CPol::SC1;
5856     else if (trySkipId("nosc1"))
5857       CPolOff = AMDGPU::CPol::SC1;
5858     else
5859       return MatchOperand_NoMatch;
5860   }
5861   else if (trySkipId("glc"))
5862     CPolOn = AMDGPU::CPol::GLC;
5863   else if (trySkipId("noglc"))
5864     CPolOff = AMDGPU::CPol::GLC;
5865   else if (trySkipId("slc"))
5866     CPolOn = AMDGPU::CPol::SLC;
5867   else if (trySkipId("noslc"))
5868     CPolOff = AMDGPU::CPol::SLC;
5869   else if (trySkipId("dlc"))
5870     CPolOn = AMDGPU::CPol::DLC;
5871   else if (trySkipId("nodlc"))
5872     CPolOff = AMDGPU::CPol::DLC;
5873   else if (trySkipId("scc"))
5874     CPolOn = AMDGPU::CPol::SCC;
5875   else if (trySkipId("noscc"))
5876     CPolOff = AMDGPU::CPol::SCC;
5877   else
5878     return MatchOperand_NoMatch;
5879 
5880   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5881     Error(S, "dlc modifier is not supported on this GPU");
5882     return MatchOperand_ParseFail;
5883   }
5884 
5885   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5886     Error(S, "scc modifier is not supported on this GPU");
5887     return MatchOperand_ParseFail;
5888   }
5889 
5890   if (CPolSeen & (CPolOn | CPolOff)) {
5891     Error(S, "duplicate cache policy modifier");
5892     return MatchOperand_ParseFail;
5893   }
5894 
5895   CPolSeen |= (CPolOn | CPolOff);
5896 
5897   for (unsigned I = 1; I != Operands.size(); ++I) {
5898     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5899     if (Op.isCPol()) {
5900       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5901       return MatchOperand_Success;
5902     }
5903   }
5904 
5905   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5906                                               AMDGPUOperand::ImmTyCPol));
5907 
5908   return MatchOperand_Success;
5909 }
5910 
5911 static void addOptionalImmOperand(
5912   MCInst& Inst, const OperandVector& Operands,
5913   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5914   AMDGPUOperand::ImmTy ImmT,
5915   int64_t Default = 0) {
5916   auto i = OptionalIdx.find(ImmT);
5917   if (i != OptionalIdx.end()) {
5918     unsigned Idx = i->second;
5919     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5920   } else {
5921     Inst.addOperand(MCOperand::createImm(Default));
5922   }
5923 }
5924 
5925 OperandMatchResultTy
5926 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5927                                        StringRef &Value,
5928                                        SMLoc &StringLoc) {
5929   if (!trySkipId(Prefix, AsmToken::Colon))
5930     return MatchOperand_NoMatch;
5931 
5932   StringLoc = getLoc();
5933   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5934                                                   : MatchOperand_ParseFail;
5935 }
5936 
5937 //===----------------------------------------------------------------------===//
5938 // MTBUF format
5939 //===----------------------------------------------------------------------===//
5940 
5941 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5942                                   int64_t MaxVal,
5943                                   int64_t &Fmt) {
5944   int64_t Val;
5945   SMLoc Loc = getLoc();
5946 
5947   auto Res = parseIntWithPrefix(Pref, Val);
5948   if (Res == MatchOperand_ParseFail)
5949     return false;
5950   if (Res == MatchOperand_NoMatch)
5951     return true;
5952 
5953   if (Val < 0 || Val > MaxVal) {
5954     Error(Loc, Twine("out of range ", StringRef(Pref)));
5955     return false;
5956   }
5957 
5958   Fmt = Val;
5959   return true;
5960 }
5961 
5962 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5963 // values to live in a joint format operand in the MCInst encoding.
5964 OperandMatchResultTy
5965 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5966   using namespace llvm::AMDGPU::MTBUFFormat;
5967 
5968   int64_t Dfmt = DFMT_UNDEF;
5969   int64_t Nfmt = NFMT_UNDEF;
5970 
5971   // dfmt and nfmt can appear in either order, and each is optional.
5972   for (int I = 0; I < 2; ++I) {
5973     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5974       return MatchOperand_ParseFail;
5975 
5976     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5977       return MatchOperand_ParseFail;
5978     }
5979     // Skip optional comma between dfmt/nfmt
5980     // but guard against 2 commas following each other.
5981     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5982         !peekToken().is(AsmToken::Comma)) {
5983       trySkipToken(AsmToken::Comma);
5984     }
5985   }
5986 
5987   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5988     return MatchOperand_NoMatch;
5989 
5990   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5991   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5992 
5993   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5994   return MatchOperand_Success;
5995 }
5996 
5997 OperandMatchResultTy
5998 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5999   using namespace llvm::AMDGPU::MTBUFFormat;
6000 
6001   int64_t Fmt = UFMT_UNDEF;
6002 
6003   if (!tryParseFmt("format", UFMT_MAX, Fmt))
6004     return MatchOperand_ParseFail;
6005 
6006   if (Fmt == UFMT_UNDEF)
6007     return MatchOperand_NoMatch;
6008 
6009   Format = Fmt;
6010   return MatchOperand_Success;
6011 }
6012 
6013 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6014                                     int64_t &Nfmt,
6015                                     StringRef FormatStr,
6016                                     SMLoc Loc) {
6017   using namespace llvm::AMDGPU::MTBUFFormat;
6018   int64_t Format;
6019 
6020   Format = getDfmt(FormatStr);
6021   if (Format != DFMT_UNDEF) {
6022     Dfmt = Format;
6023     return true;
6024   }
6025 
6026   Format = getNfmt(FormatStr, getSTI());
6027   if (Format != NFMT_UNDEF) {
6028     Nfmt = Format;
6029     return true;
6030   }
6031 
6032   Error(Loc, "unsupported format");
6033   return false;
6034 }
6035 
6036 OperandMatchResultTy
6037 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6038                                           SMLoc FormatLoc,
6039                                           int64_t &Format) {
6040   using namespace llvm::AMDGPU::MTBUFFormat;
6041 
6042   int64_t Dfmt = DFMT_UNDEF;
6043   int64_t Nfmt = NFMT_UNDEF;
6044   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6045     return MatchOperand_ParseFail;
6046 
6047   if (trySkipToken(AsmToken::Comma)) {
6048     StringRef Str;
6049     SMLoc Loc = getLoc();
6050     if (!parseId(Str, "expected a format string") ||
6051         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
6052       return MatchOperand_ParseFail;
6053     }
6054     if (Dfmt == DFMT_UNDEF) {
6055       Error(Loc, "duplicate numeric format");
6056       return MatchOperand_ParseFail;
6057     } else if (Nfmt == NFMT_UNDEF) {
6058       Error(Loc, "duplicate data format");
6059       return MatchOperand_ParseFail;
6060     }
6061   }
6062 
6063   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6064   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6065 
6066   if (isGFX10Plus()) {
6067     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6068     if (Ufmt == UFMT_UNDEF) {
6069       Error(FormatLoc, "unsupported format");
6070       return MatchOperand_ParseFail;
6071     }
6072     Format = Ufmt;
6073   } else {
6074     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6075   }
6076 
6077   return MatchOperand_Success;
6078 }
6079 
6080 OperandMatchResultTy
6081 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6082                                             SMLoc Loc,
6083                                             int64_t &Format) {
6084   using namespace llvm::AMDGPU::MTBUFFormat;
6085 
6086   auto Id = getUnifiedFormat(FormatStr, getSTI());
6087   if (Id == UFMT_UNDEF)
6088     return MatchOperand_NoMatch;
6089 
6090   if (!isGFX10Plus()) {
6091     Error(Loc, "unified format is not supported on this GPU");
6092     return MatchOperand_ParseFail;
6093   }
6094 
6095   Format = Id;
6096   return MatchOperand_Success;
6097 }
6098 
6099 OperandMatchResultTy
6100 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6101   using namespace llvm::AMDGPU::MTBUFFormat;
6102   SMLoc Loc = getLoc();
6103 
6104   if (!parseExpr(Format))
6105     return MatchOperand_ParseFail;
6106   if (!isValidFormatEncoding(Format, getSTI())) {
6107     Error(Loc, "out of range format");
6108     return MatchOperand_ParseFail;
6109   }
6110 
6111   return MatchOperand_Success;
6112 }
6113 
6114 OperandMatchResultTy
6115 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6116   using namespace llvm::AMDGPU::MTBUFFormat;
6117 
6118   if (!trySkipId("format", AsmToken::Colon))
6119     return MatchOperand_NoMatch;
6120 
6121   if (trySkipToken(AsmToken::LBrac)) {
6122     StringRef FormatStr;
6123     SMLoc Loc = getLoc();
6124     if (!parseId(FormatStr, "expected a format string"))
6125       return MatchOperand_ParseFail;
6126 
6127     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6128     if (Res == MatchOperand_NoMatch)
6129       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6130     if (Res != MatchOperand_Success)
6131       return Res;
6132 
6133     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6134       return MatchOperand_ParseFail;
6135 
6136     return MatchOperand_Success;
6137   }
6138 
6139   return parseNumericFormat(Format);
6140 }
6141 
6142 OperandMatchResultTy
6143 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6144   using namespace llvm::AMDGPU::MTBUFFormat;
6145 
6146   int64_t Format = getDefaultFormatEncoding(getSTI());
6147   OperandMatchResultTy Res;
6148   SMLoc Loc = getLoc();
6149 
6150   // Parse legacy format syntax.
6151   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6152   if (Res == MatchOperand_ParseFail)
6153     return Res;
6154 
6155   bool FormatFound = (Res == MatchOperand_Success);
6156 
6157   Operands.push_back(
6158     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6159 
6160   if (FormatFound)
6161     trySkipToken(AsmToken::Comma);
6162 
6163   if (isToken(AsmToken::EndOfStatement)) {
6164     // We are expecting an soffset operand,
6165     // but let matcher handle the error.
6166     return MatchOperand_Success;
6167   }
6168 
6169   // Parse soffset.
6170   Res = parseRegOrImm(Operands);
6171   if (Res != MatchOperand_Success)
6172     return Res;
6173 
6174   trySkipToken(AsmToken::Comma);
6175 
6176   if (!FormatFound) {
6177     Res = parseSymbolicOrNumericFormat(Format);
6178     if (Res == MatchOperand_ParseFail)
6179       return Res;
6180     if (Res == MatchOperand_Success) {
6181       auto Size = Operands.size();
6182       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6183       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6184       Op.setImm(Format);
6185     }
6186     return MatchOperand_Success;
6187   }
6188 
6189   if (isId("format") && peekToken().is(AsmToken::Colon)) {
6190     Error(getLoc(), "duplicate format");
6191     return MatchOperand_ParseFail;
6192   }
6193   return MatchOperand_Success;
6194 }
6195 
6196 //===----------------------------------------------------------------------===//
6197 // ds
6198 //===----------------------------------------------------------------------===//
6199 
6200 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6201                                     const OperandVector &Operands) {
6202   OptionalImmIndexMap OptionalIdx;
6203 
6204   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6205     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6206 
6207     // Add the register arguments
6208     if (Op.isReg()) {
6209       Op.addRegOperands(Inst, 1);
6210       continue;
6211     }
6212 
6213     // Handle optional arguments
6214     OptionalIdx[Op.getImmTy()] = i;
6215   }
6216 
6217   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6218   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6219   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6220 
6221   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6222 }
6223 
6224 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6225                                 bool IsGdsHardcoded) {
6226   OptionalImmIndexMap OptionalIdx;
6227 
6228   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6229     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6230 
6231     // Add the register arguments
6232     if (Op.isReg()) {
6233       Op.addRegOperands(Inst, 1);
6234       continue;
6235     }
6236 
6237     if (Op.isToken() && Op.getToken() == "gds") {
6238       IsGdsHardcoded = true;
6239       continue;
6240     }
6241 
6242     // Handle optional arguments
6243     OptionalIdx[Op.getImmTy()] = i;
6244   }
6245 
6246   AMDGPUOperand::ImmTy OffsetType =
6247     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
6248      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
6249      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
6250                                                       AMDGPUOperand::ImmTyOffset;
6251 
6252   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6253 
6254   if (!IsGdsHardcoded) {
6255     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6256   }
6257   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6258 }
6259 
6260 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6261   OptionalImmIndexMap OptionalIdx;
6262 
6263   unsigned OperandIdx[4];
6264   unsigned EnMask = 0;
6265   int SrcIdx = 0;
6266 
6267   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6268     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6269 
6270     // Add the register arguments
6271     if (Op.isReg()) {
6272       assert(SrcIdx < 4);
6273       OperandIdx[SrcIdx] = Inst.size();
6274       Op.addRegOperands(Inst, 1);
6275       ++SrcIdx;
6276       continue;
6277     }
6278 
6279     if (Op.isOff()) {
6280       assert(SrcIdx < 4);
6281       OperandIdx[SrcIdx] = Inst.size();
6282       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6283       ++SrcIdx;
6284       continue;
6285     }
6286 
6287     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6288       Op.addImmOperands(Inst, 1);
6289       continue;
6290     }
6291 
6292     if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
6293       continue;
6294 
6295     // Handle optional arguments
6296     OptionalIdx[Op.getImmTy()] = i;
6297   }
6298 
6299   assert(SrcIdx == 4);
6300 
6301   bool Compr = false;
6302   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6303     Compr = true;
6304     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6305     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6306     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6307   }
6308 
6309   for (auto i = 0; i < SrcIdx; ++i) {
6310     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6311       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6312     }
6313   }
6314 
6315   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6316   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6317 
6318   Inst.addOperand(MCOperand::createImm(EnMask));
6319 }
6320 
6321 //===----------------------------------------------------------------------===//
6322 // s_waitcnt
6323 //===----------------------------------------------------------------------===//
6324 
6325 static bool
6326 encodeCnt(
6327   const AMDGPU::IsaVersion ISA,
6328   int64_t &IntVal,
6329   int64_t CntVal,
6330   bool Saturate,
6331   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6332   unsigned (*decode)(const IsaVersion &Version, unsigned))
6333 {
6334   bool Failed = false;
6335 
6336   IntVal = encode(ISA, IntVal, CntVal);
6337   if (CntVal != decode(ISA, IntVal)) {
6338     if (Saturate) {
6339       IntVal = encode(ISA, IntVal, -1);
6340     } else {
6341       Failed = true;
6342     }
6343   }
6344   return Failed;
6345 }
6346 
6347 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6348 
6349   SMLoc CntLoc = getLoc();
6350   StringRef CntName = getTokenStr();
6351 
6352   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6353       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6354     return false;
6355 
6356   int64_t CntVal;
6357   SMLoc ValLoc = getLoc();
6358   if (!parseExpr(CntVal))
6359     return false;
6360 
6361   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6362 
6363   bool Failed = true;
6364   bool Sat = CntName.endswith("_sat");
6365 
6366   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6367     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6368   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6369     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6370   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6371     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6372   } else {
6373     Error(CntLoc, "invalid counter name " + CntName);
6374     return false;
6375   }
6376 
6377   if (Failed) {
6378     Error(ValLoc, "too large value for " + CntName);
6379     return false;
6380   }
6381 
6382   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6383     return false;
6384 
6385   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6386     if (isToken(AsmToken::EndOfStatement)) {
6387       Error(getLoc(), "expected a counter name");
6388       return false;
6389     }
6390   }
6391 
6392   return true;
6393 }
6394 
6395 OperandMatchResultTy
6396 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6397   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6398   int64_t Waitcnt = getWaitcntBitMask(ISA);
6399   SMLoc S = getLoc();
6400 
6401   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6402     while (!isToken(AsmToken::EndOfStatement)) {
6403       if (!parseCnt(Waitcnt))
6404         return MatchOperand_ParseFail;
6405     }
6406   } else {
6407     if (!parseExpr(Waitcnt))
6408       return MatchOperand_ParseFail;
6409   }
6410 
6411   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6412   return MatchOperand_Success;
6413 }
6414 
6415 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6416   SMLoc FieldLoc = getLoc();
6417   StringRef FieldName = getTokenStr();
6418   if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6419       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6420     return false;
6421 
6422   SMLoc ValueLoc = getLoc();
6423   StringRef ValueName = getTokenStr();
6424   if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6425       !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6426     return false;
6427 
6428   unsigned Shift;
6429   if (FieldName == "instid0") {
6430     Shift = 0;
6431   } else if (FieldName == "instskip") {
6432     Shift = 4;
6433   } else if (FieldName == "instid1") {
6434     Shift = 7;
6435   } else {
6436     Error(FieldLoc, "invalid field name " + FieldName);
6437     return false;
6438   }
6439 
6440   int Value;
6441   if (Shift == 4) {
6442     // Parse values for instskip.
6443     Value = StringSwitch<int>(ValueName)
6444                 .Case("SAME", 0)
6445                 .Case("NEXT", 1)
6446                 .Case("SKIP_1", 2)
6447                 .Case("SKIP_2", 3)
6448                 .Case("SKIP_3", 4)
6449                 .Case("SKIP_4", 5)
6450                 .Default(-1);
6451   } else {
6452     // Parse values for instid0 and instid1.
6453     Value = StringSwitch<int>(ValueName)
6454                 .Case("NO_DEP", 0)
6455                 .Case("VALU_DEP_1", 1)
6456                 .Case("VALU_DEP_2", 2)
6457                 .Case("VALU_DEP_3", 3)
6458                 .Case("VALU_DEP_4", 4)
6459                 .Case("TRANS32_DEP_1", 5)
6460                 .Case("TRANS32_DEP_2", 6)
6461                 .Case("TRANS32_DEP_3", 7)
6462                 .Case("FMA_ACCUM_CYCLE_1", 8)
6463                 .Case("SALU_CYCLE_1", 9)
6464                 .Case("SALU_CYCLE_2", 10)
6465                 .Case("SALU_CYCLE_3", 11)
6466                 .Default(-1);
6467   }
6468   if (Value < 0) {
6469     Error(ValueLoc, "invalid value name " + ValueName);
6470     return false;
6471   }
6472 
6473   Delay |= Value << Shift;
6474   return true;
6475 }
6476 
6477 OperandMatchResultTy
6478 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) {
6479   int64_t Delay = 0;
6480   SMLoc S = getLoc();
6481 
6482   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6483     do {
6484       if (!parseDelay(Delay))
6485         return MatchOperand_ParseFail;
6486     } while (trySkipToken(AsmToken::Pipe));
6487   } else {
6488     if (!parseExpr(Delay))
6489       return MatchOperand_ParseFail;
6490   }
6491 
6492   Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
6493   return MatchOperand_Success;
6494 }
6495 
6496 bool
6497 AMDGPUOperand::isSWaitCnt() const {
6498   return isImm();
6499 }
6500 
6501 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); }
6502 
6503 //===----------------------------------------------------------------------===//
6504 // DepCtr
6505 //===----------------------------------------------------------------------===//
6506 
6507 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6508                                   StringRef DepCtrName) {
6509   switch (ErrorId) {
6510   case OPR_ID_UNKNOWN:
6511     Error(Loc, Twine("invalid counter name ", DepCtrName));
6512     return;
6513   case OPR_ID_UNSUPPORTED:
6514     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6515     return;
6516   case OPR_ID_DUPLICATE:
6517     Error(Loc, Twine("duplicate counter name ", DepCtrName));
6518     return;
6519   case OPR_VAL_INVALID:
6520     Error(Loc, Twine("invalid value for ", DepCtrName));
6521     return;
6522   default:
6523     assert(false);
6524   }
6525 }
6526 
6527 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6528 
6529   using namespace llvm::AMDGPU::DepCtr;
6530 
6531   SMLoc DepCtrLoc = getLoc();
6532   StringRef DepCtrName = getTokenStr();
6533 
6534   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6535       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6536     return false;
6537 
6538   int64_t ExprVal;
6539   if (!parseExpr(ExprVal))
6540     return false;
6541 
6542   unsigned PrevOprMask = UsedOprMask;
6543   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6544 
6545   if (CntVal < 0) {
6546     depCtrError(DepCtrLoc, CntVal, DepCtrName);
6547     return false;
6548   }
6549 
6550   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6551     return false;
6552 
6553   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6554     if (isToken(AsmToken::EndOfStatement)) {
6555       Error(getLoc(), "expected a counter name");
6556       return false;
6557     }
6558   }
6559 
6560   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
6561   DepCtr = (DepCtr & ~CntValMask) | CntVal;
6562   return true;
6563 }
6564 
6565 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) {
6566   using namespace llvm::AMDGPU::DepCtr;
6567 
6568   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
6569   SMLoc Loc = getLoc();
6570 
6571   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6572     unsigned UsedOprMask = 0;
6573     while (!isToken(AsmToken::EndOfStatement)) {
6574       if (!parseDepCtr(DepCtr, UsedOprMask))
6575         return MatchOperand_ParseFail;
6576     }
6577   } else {
6578     if (!parseExpr(DepCtr))
6579       return MatchOperand_ParseFail;
6580   }
6581 
6582   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
6583   return MatchOperand_Success;
6584 }
6585 
6586 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
6587 
6588 //===----------------------------------------------------------------------===//
6589 // hwreg
6590 //===----------------------------------------------------------------------===//
6591 
6592 bool
6593 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6594                                 OperandInfoTy &Offset,
6595                                 OperandInfoTy &Width) {
6596   using namespace llvm::AMDGPU::Hwreg;
6597 
6598   // The register may be specified by name or using a numeric code
6599   HwReg.Loc = getLoc();
6600   if (isToken(AsmToken::Identifier) &&
6601       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6602     HwReg.IsSymbolic = true;
6603     lex(); // skip register name
6604   } else if (!parseExpr(HwReg.Id, "a register name")) {
6605     return false;
6606   }
6607 
6608   if (trySkipToken(AsmToken::RParen))
6609     return true;
6610 
6611   // parse optional params
6612   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6613     return false;
6614 
6615   Offset.Loc = getLoc();
6616   if (!parseExpr(Offset.Id))
6617     return false;
6618 
6619   if (!skipToken(AsmToken::Comma, "expected a comma"))
6620     return false;
6621 
6622   Width.Loc = getLoc();
6623   return parseExpr(Width.Id) &&
6624          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6625 }
6626 
6627 bool
6628 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6629                                const OperandInfoTy &Offset,
6630                                const OperandInfoTy &Width) {
6631 
6632   using namespace llvm::AMDGPU::Hwreg;
6633 
6634   if (HwReg.IsSymbolic) {
6635     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6636       Error(HwReg.Loc,
6637             "specified hardware register is not supported on this GPU");
6638       return false;
6639     }
6640   } else {
6641     if (!isValidHwreg(HwReg.Id)) {
6642       Error(HwReg.Loc,
6643             "invalid code of hardware register: only 6-bit values are legal");
6644       return false;
6645     }
6646   }
6647   if (!isValidHwregOffset(Offset.Id)) {
6648     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6649     return false;
6650   }
6651   if (!isValidHwregWidth(Width.Id)) {
6652     Error(Width.Loc,
6653           "invalid bitfield width: only values from 1 to 32 are legal");
6654     return false;
6655   }
6656   return true;
6657 }
6658 
6659 OperandMatchResultTy
6660 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6661   using namespace llvm::AMDGPU::Hwreg;
6662 
6663   int64_t ImmVal = 0;
6664   SMLoc Loc = getLoc();
6665 
6666   if (trySkipId("hwreg", AsmToken::LParen)) {
6667     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6668     OperandInfoTy Offset(OFFSET_DEFAULT_);
6669     OperandInfoTy Width(WIDTH_DEFAULT_);
6670     if (parseHwregBody(HwReg, Offset, Width) &&
6671         validateHwreg(HwReg, Offset, Width)) {
6672       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6673     } else {
6674       return MatchOperand_ParseFail;
6675     }
6676   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6677     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6678       Error(Loc, "invalid immediate: only 16-bit values are legal");
6679       return MatchOperand_ParseFail;
6680     }
6681   } else {
6682     return MatchOperand_ParseFail;
6683   }
6684 
6685   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6686   return MatchOperand_Success;
6687 }
6688 
6689 bool AMDGPUOperand::isHwreg() const {
6690   return isImmTy(ImmTyHwreg);
6691 }
6692 
6693 //===----------------------------------------------------------------------===//
6694 // sendmsg
6695 //===----------------------------------------------------------------------===//
6696 
6697 bool
6698 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6699                                   OperandInfoTy &Op,
6700                                   OperandInfoTy &Stream) {
6701   using namespace llvm::AMDGPU::SendMsg;
6702 
6703   Msg.Loc = getLoc();
6704   if (isToken(AsmToken::Identifier) &&
6705       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6706     Msg.IsSymbolic = true;
6707     lex(); // skip message name
6708   } else if (!parseExpr(Msg.Id, "a message name")) {
6709     return false;
6710   }
6711 
6712   if (trySkipToken(AsmToken::Comma)) {
6713     Op.IsDefined = true;
6714     Op.Loc = getLoc();
6715     if (isToken(AsmToken::Identifier) &&
6716         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6717       lex(); // skip operation name
6718     } else if (!parseExpr(Op.Id, "an operation name")) {
6719       return false;
6720     }
6721 
6722     if (trySkipToken(AsmToken::Comma)) {
6723       Stream.IsDefined = true;
6724       Stream.Loc = getLoc();
6725       if (!parseExpr(Stream.Id))
6726         return false;
6727     }
6728   }
6729 
6730   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6731 }
6732 
6733 bool
6734 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6735                                  const OperandInfoTy &Op,
6736                                  const OperandInfoTy &Stream) {
6737   using namespace llvm::AMDGPU::SendMsg;
6738 
6739   // Validation strictness depends on whether message is specified
6740   // in a symbolic or in a numeric form. In the latter case
6741   // only encoding possibility is checked.
6742   bool Strict = Msg.IsSymbolic;
6743 
6744   if (Strict) {
6745     if (Msg.Id == OPR_ID_UNSUPPORTED) {
6746       Error(Msg.Loc, "specified message id is not supported on this GPU");
6747       return false;
6748     }
6749   } else {
6750     if (!isValidMsgId(Msg.Id, getSTI())) {
6751       Error(Msg.Loc, "invalid message id");
6752       return false;
6753     }
6754   }
6755   if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
6756     if (Op.IsDefined) {
6757       Error(Op.Loc, "message does not support operations");
6758     } else {
6759       Error(Msg.Loc, "missing message operation");
6760     }
6761     return false;
6762   }
6763   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6764     Error(Op.Loc, "invalid operation id");
6765     return false;
6766   }
6767   if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
6768       Stream.IsDefined) {
6769     Error(Stream.Loc, "message operation does not support streams");
6770     return false;
6771   }
6772   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6773     Error(Stream.Loc, "invalid message stream id");
6774     return false;
6775   }
6776   return true;
6777 }
6778 
6779 OperandMatchResultTy
6780 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6781   using namespace llvm::AMDGPU::SendMsg;
6782 
6783   int64_t ImmVal = 0;
6784   SMLoc Loc = getLoc();
6785 
6786   if (trySkipId("sendmsg", AsmToken::LParen)) {
6787     OperandInfoTy Msg(OPR_ID_UNKNOWN);
6788     OperandInfoTy Op(OP_NONE_);
6789     OperandInfoTy Stream(STREAM_ID_NONE_);
6790     if (parseSendMsgBody(Msg, Op, Stream) &&
6791         validateSendMsg(Msg, Op, Stream)) {
6792       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6793     } else {
6794       return MatchOperand_ParseFail;
6795     }
6796   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6797     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6798       Error(Loc, "invalid immediate: only 16-bit values are legal");
6799       return MatchOperand_ParseFail;
6800     }
6801   } else {
6802     return MatchOperand_ParseFail;
6803   }
6804 
6805   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6806   return MatchOperand_Success;
6807 }
6808 
6809 bool AMDGPUOperand::isSendMsg() const {
6810   return isImmTy(ImmTySendMsg);
6811 }
6812 
6813 //===----------------------------------------------------------------------===//
6814 // v_interp
6815 //===----------------------------------------------------------------------===//
6816 
6817 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6818   StringRef Str;
6819   SMLoc S = getLoc();
6820 
6821   if (!parseId(Str))
6822     return MatchOperand_NoMatch;
6823 
6824   int Slot = StringSwitch<int>(Str)
6825     .Case("p10", 0)
6826     .Case("p20", 1)
6827     .Case("p0", 2)
6828     .Default(-1);
6829 
6830   if (Slot == -1) {
6831     Error(S, "invalid interpolation slot");
6832     return MatchOperand_ParseFail;
6833   }
6834 
6835   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6836                                               AMDGPUOperand::ImmTyInterpSlot));
6837   return MatchOperand_Success;
6838 }
6839 
6840 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6841   StringRef Str;
6842   SMLoc S = getLoc();
6843 
6844   if (!parseId(Str))
6845     return MatchOperand_NoMatch;
6846 
6847   if (!Str.startswith("attr")) {
6848     Error(S, "invalid interpolation attribute");
6849     return MatchOperand_ParseFail;
6850   }
6851 
6852   StringRef Chan = Str.take_back(2);
6853   int AttrChan = StringSwitch<int>(Chan)
6854     .Case(".x", 0)
6855     .Case(".y", 1)
6856     .Case(".z", 2)
6857     .Case(".w", 3)
6858     .Default(-1);
6859   if (AttrChan == -1) {
6860     Error(S, "invalid or missing interpolation attribute channel");
6861     return MatchOperand_ParseFail;
6862   }
6863 
6864   Str = Str.drop_back(2).drop_front(4);
6865 
6866   uint8_t Attr;
6867   if (Str.getAsInteger(10, Attr)) {
6868     Error(S, "invalid or missing interpolation attribute number");
6869     return MatchOperand_ParseFail;
6870   }
6871 
6872   if (Attr > 63) {
6873     Error(S, "out of bounds interpolation attribute number");
6874     return MatchOperand_ParseFail;
6875   }
6876 
6877   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6878 
6879   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6880                                               AMDGPUOperand::ImmTyInterpAttr));
6881   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6882                                               AMDGPUOperand::ImmTyAttrChan));
6883   return MatchOperand_Success;
6884 }
6885 
6886 //===----------------------------------------------------------------------===//
6887 // exp
6888 //===----------------------------------------------------------------------===//
6889 
6890 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6891   using namespace llvm::AMDGPU::Exp;
6892 
6893   StringRef Str;
6894   SMLoc S = getLoc();
6895 
6896   if (!parseId(Str))
6897     return MatchOperand_NoMatch;
6898 
6899   unsigned Id = getTgtId(Str);
6900   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6901     Error(S, (Id == ET_INVALID) ?
6902                 "invalid exp target" :
6903                 "exp target is not supported on this GPU");
6904     return MatchOperand_ParseFail;
6905   }
6906 
6907   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6908                                               AMDGPUOperand::ImmTyExpTgt));
6909   return MatchOperand_Success;
6910 }
6911 
6912 //===----------------------------------------------------------------------===//
6913 // parser helpers
6914 //===----------------------------------------------------------------------===//
6915 
6916 bool
6917 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6918   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6919 }
6920 
6921 bool
6922 AMDGPUAsmParser::isId(const StringRef Id) const {
6923   return isId(getToken(), Id);
6924 }
6925 
6926 bool
6927 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6928   return getTokenKind() == Kind;
6929 }
6930 
6931 bool
6932 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6933   if (isId(Id)) {
6934     lex();
6935     return true;
6936   }
6937   return false;
6938 }
6939 
6940 bool
6941 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6942   if (isToken(AsmToken::Identifier)) {
6943     StringRef Tok = getTokenStr();
6944     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6945       lex();
6946       return true;
6947     }
6948   }
6949   return false;
6950 }
6951 
6952 bool
6953 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6954   if (isId(Id) && peekToken().is(Kind)) {
6955     lex();
6956     lex();
6957     return true;
6958   }
6959   return false;
6960 }
6961 
6962 bool
6963 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6964   if (isToken(Kind)) {
6965     lex();
6966     return true;
6967   }
6968   return false;
6969 }
6970 
6971 bool
6972 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6973                            const StringRef ErrMsg) {
6974   if (!trySkipToken(Kind)) {
6975     Error(getLoc(), ErrMsg);
6976     return false;
6977   }
6978   return true;
6979 }
6980 
6981 bool
6982 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6983   SMLoc S = getLoc();
6984 
6985   const MCExpr *Expr;
6986   if (Parser.parseExpression(Expr))
6987     return false;
6988 
6989   if (Expr->evaluateAsAbsolute(Imm))
6990     return true;
6991 
6992   if (Expected.empty()) {
6993     Error(S, "expected absolute expression");
6994   } else {
6995     Error(S, Twine("expected ", Expected) +
6996              Twine(" or an absolute expression"));
6997   }
6998   return false;
6999 }
7000 
7001 bool
7002 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7003   SMLoc S = getLoc();
7004 
7005   const MCExpr *Expr;
7006   if (Parser.parseExpression(Expr))
7007     return false;
7008 
7009   int64_t IntVal;
7010   if (Expr->evaluateAsAbsolute(IntVal)) {
7011     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7012   } else {
7013     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7014   }
7015   return true;
7016 }
7017 
7018 bool
7019 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7020   if (isToken(AsmToken::String)) {
7021     Val = getToken().getStringContents();
7022     lex();
7023     return true;
7024   } else {
7025     Error(getLoc(), ErrMsg);
7026     return false;
7027   }
7028 }
7029 
7030 bool
7031 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7032   if (isToken(AsmToken::Identifier)) {
7033     Val = getTokenStr();
7034     lex();
7035     return true;
7036   } else {
7037     if (!ErrMsg.empty())
7038       Error(getLoc(), ErrMsg);
7039     return false;
7040   }
7041 }
7042 
7043 AsmToken
7044 AMDGPUAsmParser::getToken() const {
7045   return Parser.getTok();
7046 }
7047 
7048 AsmToken
7049 AMDGPUAsmParser::peekToken() {
7050   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
7051 }
7052 
7053 void
7054 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7055   auto TokCount = getLexer().peekTokens(Tokens);
7056 
7057   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7058     Tokens[Idx] = AsmToken(AsmToken::Error, "");
7059 }
7060 
7061 AsmToken::TokenKind
7062 AMDGPUAsmParser::getTokenKind() const {
7063   return getLexer().getKind();
7064 }
7065 
7066 SMLoc
7067 AMDGPUAsmParser::getLoc() const {
7068   return getToken().getLoc();
7069 }
7070 
7071 StringRef
7072 AMDGPUAsmParser::getTokenStr() const {
7073   return getToken().getString();
7074 }
7075 
7076 void
7077 AMDGPUAsmParser::lex() {
7078   Parser.Lex();
7079 }
7080 
7081 SMLoc
7082 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7083                                const OperandVector &Operands) const {
7084   for (unsigned i = Operands.size() - 1; i > 0; --i) {
7085     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7086     if (Test(Op))
7087       return Op.getStartLoc();
7088   }
7089   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7090 }
7091 
7092 SMLoc
7093 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7094                            const OperandVector &Operands) const {
7095   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7096   return getOperandLoc(Test, Operands);
7097 }
7098 
7099 SMLoc
7100 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7101                            const OperandVector &Operands) const {
7102   auto Test = [=](const AMDGPUOperand& Op) {
7103     return Op.isRegKind() && Op.getReg() == Reg;
7104   };
7105   return getOperandLoc(Test, Operands);
7106 }
7107 
7108 SMLoc
7109 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
7110   auto Test = [](const AMDGPUOperand& Op) {
7111     return Op.IsImmKindLiteral() || Op.isExpr();
7112   };
7113   return getOperandLoc(Test, Operands);
7114 }
7115 
7116 SMLoc
7117 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7118   auto Test = [](const AMDGPUOperand& Op) {
7119     return Op.isImmKindConst();
7120   };
7121   return getOperandLoc(Test, Operands);
7122 }
7123 
7124 //===----------------------------------------------------------------------===//
7125 // swizzle
7126 //===----------------------------------------------------------------------===//
7127 
7128 LLVM_READNONE
7129 static unsigned
7130 encodeBitmaskPerm(const unsigned AndMask,
7131                   const unsigned OrMask,
7132                   const unsigned XorMask) {
7133   using namespace llvm::AMDGPU::Swizzle;
7134 
7135   return BITMASK_PERM_ENC |
7136          (AndMask << BITMASK_AND_SHIFT) |
7137          (OrMask  << BITMASK_OR_SHIFT)  |
7138          (XorMask << BITMASK_XOR_SHIFT);
7139 }
7140 
7141 bool
7142 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7143                                      const unsigned MinVal,
7144                                      const unsigned MaxVal,
7145                                      const StringRef ErrMsg,
7146                                      SMLoc &Loc) {
7147   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7148     return false;
7149   }
7150   Loc = getLoc();
7151   if (!parseExpr(Op)) {
7152     return false;
7153   }
7154   if (Op < MinVal || Op > MaxVal) {
7155     Error(Loc, ErrMsg);
7156     return false;
7157   }
7158 
7159   return true;
7160 }
7161 
7162 bool
7163 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7164                                       const unsigned MinVal,
7165                                       const unsigned MaxVal,
7166                                       const StringRef ErrMsg) {
7167   SMLoc Loc;
7168   for (unsigned i = 0; i < OpNum; ++i) {
7169     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7170       return false;
7171   }
7172 
7173   return true;
7174 }
7175 
7176 bool
7177 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7178   using namespace llvm::AMDGPU::Swizzle;
7179 
7180   int64_t Lane[LANE_NUM];
7181   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7182                            "expected a 2-bit lane id")) {
7183     Imm = QUAD_PERM_ENC;
7184     for (unsigned I = 0; I < LANE_NUM; ++I) {
7185       Imm |= Lane[I] << (LANE_SHIFT * I);
7186     }
7187     return true;
7188   }
7189   return false;
7190 }
7191 
7192 bool
7193 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7194   using namespace llvm::AMDGPU::Swizzle;
7195 
7196   SMLoc Loc;
7197   int64_t GroupSize;
7198   int64_t LaneIdx;
7199 
7200   if (!parseSwizzleOperand(GroupSize,
7201                            2, 32,
7202                            "group size must be in the interval [2,32]",
7203                            Loc)) {
7204     return false;
7205   }
7206   if (!isPowerOf2_64(GroupSize)) {
7207     Error(Loc, "group size must be a power of two");
7208     return false;
7209   }
7210   if (parseSwizzleOperand(LaneIdx,
7211                           0, GroupSize - 1,
7212                           "lane id must be in the interval [0,group size - 1]",
7213                           Loc)) {
7214     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7215     return true;
7216   }
7217   return false;
7218 }
7219 
7220 bool
7221 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7222   using namespace llvm::AMDGPU::Swizzle;
7223 
7224   SMLoc Loc;
7225   int64_t GroupSize;
7226 
7227   if (!parseSwizzleOperand(GroupSize,
7228                            2, 32,
7229                            "group size must be in the interval [2,32]",
7230                            Loc)) {
7231     return false;
7232   }
7233   if (!isPowerOf2_64(GroupSize)) {
7234     Error(Loc, "group size must be a power of two");
7235     return false;
7236   }
7237 
7238   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7239   return true;
7240 }
7241 
7242 bool
7243 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7244   using namespace llvm::AMDGPU::Swizzle;
7245 
7246   SMLoc Loc;
7247   int64_t GroupSize;
7248 
7249   if (!parseSwizzleOperand(GroupSize,
7250                            1, 16,
7251                            "group size must be in the interval [1,16]",
7252                            Loc)) {
7253     return false;
7254   }
7255   if (!isPowerOf2_64(GroupSize)) {
7256     Error(Loc, "group size must be a power of two");
7257     return false;
7258   }
7259 
7260   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7261   return true;
7262 }
7263 
7264 bool
7265 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7266   using namespace llvm::AMDGPU::Swizzle;
7267 
7268   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7269     return false;
7270   }
7271 
7272   StringRef Ctl;
7273   SMLoc StrLoc = getLoc();
7274   if (!parseString(Ctl)) {
7275     return false;
7276   }
7277   if (Ctl.size() != BITMASK_WIDTH) {
7278     Error(StrLoc, "expected a 5-character mask");
7279     return false;
7280   }
7281 
7282   unsigned AndMask = 0;
7283   unsigned OrMask = 0;
7284   unsigned XorMask = 0;
7285 
7286   for (size_t i = 0; i < Ctl.size(); ++i) {
7287     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7288     switch(Ctl[i]) {
7289     default:
7290       Error(StrLoc, "invalid mask");
7291       return false;
7292     case '0':
7293       break;
7294     case '1':
7295       OrMask |= Mask;
7296       break;
7297     case 'p':
7298       AndMask |= Mask;
7299       break;
7300     case 'i':
7301       AndMask |= Mask;
7302       XorMask |= Mask;
7303       break;
7304     }
7305   }
7306 
7307   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7308   return true;
7309 }
7310 
7311 bool
7312 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7313 
7314   SMLoc OffsetLoc = getLoc();
7315 
7316   if (!parseExpr(Imm, "a swizzle macro")) {
7317     return false;
7318   }
7319   if (!isUInt<16>(Imm)) {
7320     Error(OffsetLoc, "expected a 16-bit offset");
7321     return false;
7322   }
7323   return true;
7324 }
7325 
7326 bool
7327 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7328   using namespace llvm::AMDGPU::Swizzle;
7329 
7330   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7331 
7332     SMLoc ModeLoc = getLoc();
7333     bool Ok = false;
7334 
7335     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7336       Ok = parseSwizzleQuadPerm(Imm);
7337     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7338       Ok = parseSwizzleBitmaskPerm(Imm);
7339     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7340       Ok = parseSwizzleBroadcast(Imm);
7341     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7342       Ok = parseSwizzleSwap(Imm);
7343     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7344       Ok = parseSwizzleReverse(Imm);
7345     } else {
7346       Error(ModeLoc, "expected a swizzle mode");
7347     }
7348 
7349     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7350   }
7351 
7352   return false;
7353 }
7354 
7355 OperandMatchResultTy
7356 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
7357   SMLoc S = getLoc();
7358   int64_t Imm = 0;
7359 
7360   if (trySkipId("offset")) {
7361 
7362     bool Ok = false;
7363     if (skipToken(AsmToken::Colon, "expected a colon")) {
7364       if (trySkipId("swizzle")) {
7365         Ok = parseSwizzleMacro(Imm);
7366       } else {
7367         Ok = parseSwizzleOffset(Imm);
7368       }
7369     }
7370 
7371     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7372 
7373     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
7374   } else {
7375     // Swizzle "offset" operand is optional.
7376     // If it is omitted, try parsing other optional operands.
7377     return parseOptionalOpr(Operands);
7378   }
7379 }
7380 
7381 bool
7382 AMDGPUOperand::isSwizzle() const {
7383   return isImmTy(ImmTySwizzle);
7384 }
7385 
7386 //===----------------------------------------------------------------------===//
7387 // VGPR Index Mode
7388 //===----------------------------------------------------------------------===//
7389 
7390 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7391 
7392   using namespace llvm::AMDGPU::VGPRIndexMode;
7393 
7394   if (trySkipToken(AsmToken::RParen)) {
7395     return OFF;
7396   }
7397 
7398   int64_t Imm = 0;
7399 
7400   while (true) {
7401     unsigned Mode = 0;
7402     SMLoc S = getLoc();
7403 
7404     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7405       if (trySkipId(IdSymbolic[ModeId])) {
7406         Mode = 1 << ModeId;
7407         break;
7408       }
7409     }
7410 
7411     if (Mode == 0) {
7412       Error(S, (Imm == 0)?
7413                "expected a VGPR index mode or a closing parenthesis" :
7414                "expected a VGPR index mode");
7415       return UNDEF;
7416     }
7417 
7418     if (Imm & Mode) {
7419       Error(S, "duplicate VGPR index mode");
7420       return UNDEF;
7421     }
7422     Imm |= Mode;
7423 
7424     if (trySkipToken(AsmToken::RParen))
7425       break;
7426     if (!skipToken(AsmToken::Comma,
7427                    "expected a comma or a closing parenthesis"))
7428       return UNDEF;
7429   }
7430 
7431   return Imm;
7432 }
7433 
7434 OperandMatchResultTy
7435 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7436 
7437   using namespace llvm::AMDGPU::VGPRIndexMode;
7438 
7439   int64_t Imm = 0;
7440   SMLoc S = getLoc();
7441 
7442   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7443     Imm = parseGPRIdxMacro();
7444     if (Imm == UNDEF)
7445       return MatchOperand_ParseFail;
7446   } else {
7447     if (getParser().parseAbsoluteExpression(Imm))
7448       return MatchOperand_ParseFail;
7449     if (Imm < 0 || !isUInt<4>(Imm)) {
7450       Error(S, "invalid immediate: only 4-bit values are legal");
7451       return MatchOperand_ParseFail;
7452     }
7453   }
7454 
7455   Operands.push_back(
7456       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7457   return MatchOperand_Success;
7458 }
7459 
7460 bool AMDGPUOperand::isGPRIdxMode() const {
7461   return isImmTy(ImmTyGprIdxMode);
7462 }
7463 
7464 //===----------------------------------------------------------------------===//
7465 // sopp branch targets
7466 //===----------------------------------------------------------------------===//
7467 
7468 OperandMatchResultTy
7469 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7470 
7471   // Make sure we are not parsing something
7472   // that looks like a label or an expression but is not.
7473   // This will improve error messages.
7474   if (isRegister() || isModifier())
7475     return MatchOperand_NoMatch;
7476 
7477   if (!parseExpr(Operands))
7478     return MatchOperand_ParseFail;
7479 
7480   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7481   assert(Opr.isImm() || Opr.isExpr());
7482   SMLoc Loc = Opr.getStartLoc();
7483 
7484   // Currently we do not support arbitrary expressions as branch targets.
7485   // Only labels and absolute expressions are accepted.
7486   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7487     Error(Loc, "expected an absolute expression or a label");
7488   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7489     Error(Loc, "expected a 16-bit signed jump offset");
7490   }
7491 
7492   return MatchOperand_Success;
7493 }
7494 
7495 //===----------------------------------------------------------------------===//
7496 // Boolean holding registers
7497 //===----------------------------------------------------------------------===//
7498 
7499 OperandMatchResultTy
7500 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7501   return parseReg(Operands);
7502 }
7503 
7504 //===----------------------------------------------------------------------===//
7505 // mubuf
7506 //===----------------------------------------------------------------------===//
7507 
7508 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7509   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7510 }
7511 
7512 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7513                                    const OperandVector &Operands,
7514                                    bool IsAtomic,
7515                                    bool IsLds) {
7516   OptionalImmIndexMap OptionalIdx;
7517   unsigned FirstOperandIdx = 1;
7518   bool IsAtomicReturn = false;
7519 
7520   if (IsAtomic) {
7521     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7522       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7523       if (!Op.isCPol())
7524         continue;
7525       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7526       break;
7527     }
7528 
7529     if (!IsAtomicReturn) {
7530       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7531       if (NewOpc != -1)
7532         Inst.setOpcode(NewOpc);
7533     }
7534 
7535     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7536                       SIInstrFlags::IsAtomicRet;
7537   }
7538 
7539   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7540     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7541 
7542     // Add the register arguments
7543     if (Op.isReg()) {
7544       Op.addRegOperands(Inst, 1);
7545       // Insert a tied src for atomic return dst.
7546       // This cannot be postponed as subsequent calls to
7547       // addImmOperands rely on correct number of MC operands.
7548       if (IsAtomicReturn && i == FirstOperandIdx)
7549         Op.addRegOperands(Inst, 1);
7550       continue;
7551     }
7552 
7553     // Handle the case where soffset is an immediate
7554     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7555       Op.addImmOperands(Inst, 1);
7556       continue;
7557     }
7558 
7559     // Handle tokens like 'offen' which are sometimes hard-coded into the
7560     // asm string.  There are no MCInst operands for these.
7561     if (Op.isToken()) {
7562       continue;
7563     }
7564     assert(Op.isImm());
7565 
7566     // Handle optional arguments
7567     OptionalIdx[Op.getImmTy()] = i;
7568   }
7569 
7570   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7571   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7572 
7573   if (!IsLds) { // tfe is not legal with lds opcodes
7574     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7575   }
7576   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7577 }
7578 
7579 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7580   OptionalImmIndexMap OptionalIdx;
7581 
7582   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7583     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7584 
7585     // Add the register arguments
7586     if (Op.isReg()) {
7587       Op.addRegOperands(Inst, 1);
7588       continue;
7589     }
7590 
7591     // Handle the case where soffset is an immediate
7592     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7593       Op.addImmOperands(Inst, 1);
7594       continue;
7595     }
7596 
7597     // Handle tokens like 'offen' which are sometimes hard-coded into the
7598     // asm string.  There are no MCInst operands for these.
7599     if (Op.isToken()) {
7600       continue;
7601     }
7602     assert(Op.isImm());
7603 
7604     // Handle optional arguments
7605     OptionalIdx[Op.getImmTy()] = i;
7606   }
7607 
7608   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7609                         AMDGPUOperand::ImmTyOffset);
7610   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7611   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7612   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7613   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7614 }
7615 
7616 //===----------------------------------------------------------------------===//
7617 // mimg
7618 //===----------------------------------------------------------------------===//
7619 
7620 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7621                               bool IsAtomic) {
7622   unsigned I = 1;
7623   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7624   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7625     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7626   }
7627 
7628   if (IsAtomic) {
7629     // Add src, same as dst
7630     assert(Desc.getNumDefs() == 1);
7631     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7632   }
7633 
7634   OptionalImmIndexMap OptionalIdx;
7635 
7636   for (unsigned E = Operands.size(); I != E; ++I) {
7637     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7638 
7639     // Add the register arguments
7640     if (Op.isReg()) {
7641       Op.addRegOperands(Inst, 1);
7642     } else if (Op.isImmModifier()) {
7643       OptionalIdx[Op.getImmTy()] = I;
7644     } else if (!Op.isToken()) {
7645       llvm_unreachable("unexpected operand type");
7646     }
7647   }
7648 
7649   bool IsGFX10Plus = isGFX10Plus();
7650 
7651   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7652   if (IsGFX10Plus)
7653     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7654   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7655   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7656   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7657   if (IsGFX10Plus)
7658     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7659   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7660     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7661   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7662   if (!IsGFX10Plus)
7663     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7664   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7665 }
7666 
7667 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7668   cvtMIMG(Inst, Operands, true);
7669 }
7670 
7671 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7672   OptionalImmIndexMap OptionalIdx;
7673   bool IsAtomicReturn = false;
7674 
7675   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7676     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7677     if (!Op.isCPol())
7678       continue;
7679     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7680     break;
7681   }
7682 
7683   if (!IsAtomicReturn) {
7684     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7685     if (NewOpc != -1)
7686       Inst.setOpcode(NewOpc);
7687   }
7688 
7689   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7690                     SIInstrFlags::IsAtomicRet;
7691 
7692   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7693     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7694 
7695     // Add the register arguments
7696     if (Op.isReg()) {
7697       Op.addRegOperands(Inst, 1);
7698       if (IsAtomicReturn && i == 1)
7699         Op.addRegOperands(Inst, 1);
7700       continue;
7701     }
7702 
7703     // Handle the case where soffset is an immediate
7704     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7705       Op.addImmOperands(Inst, 1);
7706       continue;
7707     }
7708 
7709     // Handle tokens like 'offen' which are sometimes hard-coded into the
7710     // asm string.  There are no MCInst operands for these.
7711     if (Op.isToken()) {
7712       continue;
7713     }
7714     assert(Op.isImm());
7715 
7716     // Handle optional arguments
7717     OptionalIdx[Op.getImmTy()] = i;
7718   }
7719 
7720   if ((int)Inst.getNumOperands() <=
7721       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7722     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7723   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7724 }
7725 
7726 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7727                                       const OperandVector &Operands) {
7728   for (unsigned I = 1; I < Operands.size(); ++I) {
7729     auto &Operand = (AMDGPUOperand &)*Operands[I];
7730     if (Operand.isReg())
7731       Operand.addRegOperands(Inst, 1);
7732   }
7733 
7734   Inst.addOperand(MCOperand::createImm(1)); // a16
7735 }
7736 
7737 //===----------------------------------------------------------------------===//
7738 // smrd
7739 //===----------------------------------------------------------------------===//
7740 
7741 bool AMDGPUOperand::isSMRDOffset8() const {
7742   return isImm() && isUInt<8>(getImm());
7743 }
7744 
7745 bool AMDGPUOperand::isSMEMOffset() const {
7746   return isImmTy(ImmTyNone) ||
7747          isImmTy(ImmTyOffset); // Offset range is checked later by validator.
7748 }
7749 
7750 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7751   // 32-bit literals are only supported on CI and we only want to use them
7752   // when the offset is > 8-bits.
7753   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7754 }
7755 
7756 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7757   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7758 }
7759 
7760 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7761   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7762 }
7763 
7764 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7765   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7766 }
7767 
7768 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7769   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7770 }
7771 
7772 //===----------------------------------------------------------------------===//
7773 // vop3
7774 //===----------------------------------------------------------------------===//
7775 
7776 static bool ConvertOmodMul(int64_t &Mul) {
7777   if (Mul != 1 && Mul != 2 && Mul != 4)
7778     return false;
7779 
7780   Mul >>= 1;
7781   return true;
7782 }
7783 
7784 static bool ConvertOmodDiv(int64_t &Div) {
7785   if (Div == 1) {
7786     Div = 0;
7787     return true;
7788   }
7789 
7790   if (Div == 2) {
7791     Div = 3;
7792     return true;
7793   }
7794 
7795   return false;
7796 }
7797 
7798 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7799 // This is intentional and ensures compatibility with sp3.
7800 // See bug 35397 for details.
7801 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7802   if (BoundCtrl == 0 || BoundCtrl == 1) {
7803     BoundCtrl = 1;
7804     return true;
7805   }
7806   return false;
7807 }
7808 
7809 // Note: the order in this table matches the order of operands in AsmString.
7810 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7811   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7812   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7813   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7814   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7815   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7816   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7817   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7818   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7819   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7820   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7821   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7822   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7823   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7824   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7825   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7826   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7827   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7828   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7829   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7830   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7831   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7832   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7833   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7834   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7835   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7836   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7837   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7838   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7839   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7840   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7841   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7842   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7843   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7844   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7845   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7846   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7847   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7848   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7849   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7850   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7851   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr},
7852   {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr},
7853   {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr}
7854 };
7855 
7856 void AMDGPUAsmParser::onBeginOfFile() {
7857   if (!getParser().getStreamer().getTargetStreamer() ||
7858       getSTI().getTargetTriple().getArch() == Triple::r600)
7859     return;
7860 
7861   if (!getTargetStreamer().getTargetID())
7862     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7863 
7864   if (isHsaAbiVersion3AndAbove(&getSTI()))
7865     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7866 }
7867 
7868 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7869 
7870   OperandMatchResultTy res = parseOptionalOpr(Operands);
7871 
7872   // This is a hack to enable hardcoded mandatory operands which follow
7873   // optional operands.
7874   //
7875   // Current design assumes that all operands after the first optional operand
7876   // are also optional. However implementation of some instructions violates
7877   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7878   //
7879   // To alleviate this problem, we have to (implicitly) parse extra operands
7880   // to make sure autogenerated parser of custom operands never hit hardcoded
7881   // mandatory operands.
7882 
7883   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7884     if (res != MatchOperand_Success ||
7885         isToken(AsmToken::EndOfStatement))
7886       break;
7887 
7888     trySkipToken(AsmToken::Comma);
7889     res = parseOptionalOpr(Operands);
7890   }
7891 
7892   return res;
7893 }
7894 
7895 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7896   OperandMatchResultTy res;
7897   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7898     // try to parse any optional operand here
7899     if (Op.IsBit) {
7900       res = parseNamedBit(Op.Name, Operands, Op.Type);
7901     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7902       res = parseOModOperand(Operands);
7903     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7904                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7905                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7906       res = parseSDWASel(Operands, Op.Name, Op.Type);
7907     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7908       res = parseSDWADstUnused(Operands);
7909     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7910                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7911                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7912                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7913       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7914                                         Op.ConvertResult);
7915     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7916       res = parseDim(Operands);
7917     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7918       res = parseCPol(Operands);
7919     } else {
7920       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7921       if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) {
7922         res = parseOperandArrayWithPrefix("neg", Operands,
7923                                           AMDGPUOperand::ImmTyBLGP,
7924                                           nullptr);
7925       }
7926     }
7927     if (res != MatchOperand_NoMatch) {
7928       return res;
7929     }
7930   }
7931   return MatchOperand_NoMatch;
7932 }
7933 
7934 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7935   StringRef Name = getTokenStr();
7936   if (Name == "mul") {
7937     return parseIntWithPrefix("mul", Operands,
7938                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7939   }
7940 
7941   if (Name == "div") {
7942     return parseIntWithPrefix("div", Operands,
7943                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7944   }
7945 
7946   return MatchOperand_NoMatch;
7947 }
7948 
7949 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7950   cvtVOP3P(Inst, Operands);
7951 
7952   int Opc = Inst.getOpcode();
7953 
7954   int SrcNum;
7955   const int Ops[] = { AMDGPU::OpName::src0,
7956                       AMDGPU::OpName::src1,
7957                       AMDGPU::OpName::src2 };
7958   for (SrcNum = 0;
7959        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7960        ++SrcNum);
7961   assert(SrcNum > 0);
7962 
7963   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7964   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7965 
7966   if ((OpSel & (1 << SrcNum)) != 0) {
7967     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7968     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7969     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7970   }
7971 }
7972 
7973 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7974       // 1. This operand is input modifiers
7975   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7976       // 2. This is not last operand
7977       && Desc.NumOperands > (OpNum + 1)
7978       // 3. Next operand is register class
7979       && Desc.OpInfo[OpNum + 1].RegClass != -1
7980       // 4. Next register is not tied to any other operand
7981       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7982 }
7983 
7984 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7985 {
7986   OptionalImmIndexMap OptionalIdx;
7987   unsigned Opc = Inst.getOpcode();
7988 
7989   unsigned I = 1;
7990   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7991   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7992     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7993   }
7994 
7995   for (unsigned E = Operands.size(); I != E; ++I) {
7996     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7997     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7998       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7999     } else if (Op.isInterpSlot() ||
8000                Op.isInterpAttr() ||
8001                Op.isAttrChan()) {
8002       Inst.addOperand(MCOperand::createImm(Op.getImm()));
8003     } else if (Op.isImmModifier()) {
8004       OptionalIdx[Op.getImmTy()] = I;
8005     } else {
8006       llvm_unreachable("unhandled operand type");
8007     }
8008   }
8009 
8010   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
8011     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
8012   }
8013 
8014   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8015     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8016   }
8017 
8018   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8019     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8020   }
8021 }
8022 
8023 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8024 {
8025   OptionalImmIndexMap OptionalIdx;
8026   unsigned Opc = Inst.getOpcode();
8027 
8028   unsigned I = 1;
8029   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8030   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8031     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8032   }
8033 
8034   for (unsigned E = Operands.size(); I != E; ++I) {
8035     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8036     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8037       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8038     } else if (Op.isImmModifier()) {
8039       OptionalIdx[Op.getImmTy()] = I;
8040     } else {
8041       llvm_unreachable("unhandled operand type");
8042     }
8043   }
8044 
8045   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8046 
8047   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8048   if (OpSelIdx != -1)
8049     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8050 
8051   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8052 
8053   if (OpSelIdx == -1)
8054     return;
8055 
8056   const int Ops[] = { AMDGPU::OpName::src0,
8057                       AMDGPU::OpName::src1,
8058                       AMDGPU::OpName::src2 };
8059   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8060                          AMDGPU::OpName::src1_modifiers,
8061                          AMDGPU::OpName::src2_modifiers };
8062 
8063   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8064 
8065   for (int J = 0; J < 3; ++J) {
8066     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8067     if (OpIdx == -1)
8068       break;
8069 
8070     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8071     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8072 
8073     if ((OpSel & (1 << J)) != 0)
8074       ModVal |= SISrcMods::OP_SEL_0;
8075     if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8076         (OpSel & (1 << 3)) != 0)
8077       ModVal |= SISrcMods::DST_OP_SEL;
8078 
8079     Inst.getOperand(ModIdx).setImm(ModVal);
8080   }
8081 }
8082 
8083 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8084                               OptionalImmIndexMap &OptionalIdx) {
8085   unsigned Opc = Inst.getOpcode();
8086 
8087   unsigned I = 1;
8088   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8089   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8090     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8091   }
8092 
8093   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
8094     // This instruction has src modifiers
8095     for (unsigned E = Operands.size(); I != E; ++I) {
8096       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8097       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8098         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8099       } else if (Op.isImmModifier()) {
8100         OptionalIdx[Op.getImmTy()] = I;
8101       } else if (Op.isRegOrImm()) {
8102         Op.addRegOrImmOperands(Inst, 1);
8103       } else {
8104         llvm_unreachable("unhandled operand type");
8105       }
8106     }
8107   } else {
8108     // No src modifiers
8109     for (unsigned E = Operands.size(); I != E; ++I) {
8110       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8111       if (Op.isMod()) {
8112         OptionalIdx[Op.getImmTy()] = I;
8113       } else {
8114         Op.addRegOrImmOperands(Inst, 1);
8115       }
8116     }
8117   }
8118 
8119   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8120     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8121   }
8122 
8123   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8124     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8125   }
8126 
8127   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8128   // it has src2 register operand that is tied to dst operand
8129   // we don't allow modifiers for this operand in assembler so src2_modifiers
8130   // should be 0.
8131   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
8132       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
8133       Opc == AMDGPU::V_MAC_F32_e64_vi ||
8134       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
8135       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
8136       Opc == AMDGPU::V_MAC_F16_e64_vi ||
8137       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
8138       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
8139       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
8140       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
8141       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
8142     auto it = Inst.begin();
8143     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8144     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8145     ++it;
8146     // Copy the operand to ensure it's not invalidated when Inst grows.
8147     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8148   }
8149 }
8150 
8151 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8152   OptionalImmIndexMap OptionalIdx;
8153   cvtVOP3(Inst, Operands, OptionalIdx);
8154 }
8155 
8156 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8157                                OptionalImmIndexMap &OptIdx) {
8158   const int Opc = Inst.getOpcode();
8159   const MCInstrDesc &Desc = MII.get(Opc);
8160 
8161   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8162 
8163   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
8164     assert(!IsPacked);
8165     Inst.addOperand(Inst.getOperand(0));
8166   }
8167 
8168   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8169   // instruction, and then figure out where to actually put the modifiers
8170 
8171   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8172   if (OpSelIdx != -1) {
8173     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8174   }
8175 
8176   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8177   if (OpSelHiIdx != -1) {
8178     int DefaultVal = IsPacked ? -1 : 0;
8179     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8180                           DefaultVal);
8181   }
8182 
8183   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8184   if (NegLoIdx != -1) {
8185     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8186     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8187   }
8188 
8189   const int Ops[] = { AMDGPU::OpName::src0,
8190                       AMDGPU::OpName::src1,
8191                       AMDGPU::OpName::src2 };
8192   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8193                          AMDGPU::OpName::src1_modifiers,
8194                          AMDGPU::OpName::src2_modifiers };
8195 
8196   unsigned OpSel = 0;
8197   unsigned OpSelHi = 0;
8198   unsigned NegLo = 0;
8199   unsigned NegHi = 0;
8200 
8201   if (OpSelIdx != -1)
8202     OpSel = Inst.getOperand(OpSelIdx).getImm();
8203 
8204   if (OpSelHiIdx != -1)
8205     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8206 
8207   if (NegLoIdx != -1) {
8208     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8209     NegLo = Inst.getOperand(NegLoIdx).getImm();
8210     NegHi = Inst.getOperand(NegHiIdx).getImm();
8211   }
8212 
8213   for (int J = 0; J < 3; ++J) {
8214     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8215     if (OpIdx == -1)
8216       break;
8217 
8218     uint32_t ModVal = 0;
8219 
8220     if ((OpSel & (1 << J)) != 0)
8221       ModVal |= SISrcMods::OP_SEL_0;
8222 
8223     if ((OpSelHi & (1 << J)) != 0)
8224       ModVal |= SISrcMods::OP_SEL_1;
8225 
8226     if ((NegLo & (1 << J)) != 0)
8227       ModVal |= SISrcMods::NEG;
8228 
8229     if ((NegHi & (1 << J)) != 0)
8230       ModVal |= SISrcMods::NEG_HI;
8231 
8232     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8233 
8234     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8235   }
8236 }
8237 
8238 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8239   OptionalImmIndexMap OptIdx;
8240   cvtVOP3(Inst, Operands, OptIdx);
8241   cvtVOP3P(Inst, Operands, OptIdx);
8242 }
8243 
8244 //===----------------------------------------------------------------------===//
8245 // dpp
8246 //===----------------------------------------------------------------------===//
8247 
8248 bool AMDGPUOperand::isDPP8() const {
8249   return isImmTy(ImmTyDPP8);
8250 }
8251 
8252 bool AMDGPUOperand::isDPPCtrl() const {
8253   using namespace AMDGPU::DPP;
8254 
8255   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8256   if (result) {
8257     int64_t Imm = getImm();
8258     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8259            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8260            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8261            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8262            (Imm == DppCtrl::WAVE_SHL1) ||
8263            (Imm == DppCtrl::WAVE_ROL1) ||
8264            (Imm == DppCtrl::WAVE_SHR1) ||
8265            (Imm == DppCtrl::WAVE_ROR1) ||
8266            (Imm == DppCtrl::ROW_MIRROR) ||
8267            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8268            (Imm == DppCtrl::BCAST15) ||
8269            (Imm == DppCtrl::BCAST31) ||
8270            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8271            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8272   }
8273   return false;
8274 }
8275 
8276 //===----------------------------------------------------------------------===//
8277 // mAI
8278 //===----------------------------------------------------------------------===//
8279 
8280 bool AMDGPUOperand::isBLGP() const {
8281   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8282 }
8283 
8284 bool AMDGPUOperand::isCBSZ() const {
8285   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8286 }
8287 
8288 bool AMDGPUOperand::isABID() const {
8289   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8290 }
8291 
8292 bool AMDGPUOperand::isS16Imm() const {
8293   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8294 }
8295 
8296 bool AMDGPUOperand::isU16Imm() const {
8297   return isImm() && isUInt<16>(getImm());
8298 }
8299 
8300 //===----------------------------------------------------------------------===//
8301 // dim
8302 //===----------------------------------------------------------------------===//
8303 
8304 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8305   // We want to allow "dim:1D" etc.,
8306   // but the initial 1 is tokenized as an integer.
8307   std::string Token;
8308   if (isToken(AsmToken::Integer)) {
8309     SMLoc Loc = getToken().getEndLoc();
8310     Token = std::string(getTokenStr());
8311     lex();
8312     if (getLoc() != Loc)
8313       return false;
8314   }
8315 
8316   StringRef Suffix;
8317   if (!parseId(Suffix))
8318     return false;
8319   Token += Suffix;
8320 
8321   StringRef DimId = Token;
8322   if (DimId.startswith("SQ_RSRC_IMG_"))
8323     DimId = DimId.drop_front(12);
8324 
8325   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8326   if (!DimInfo)
8327     return false;
8328 
8329   Encoding = DimInfo->Encoding;
8330   return true;
8331 }
8332 
8333 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8334   if (!isGFX10Plus())
8335     return MatchOperand_NoMatch;
8336 
8337   SMLoc S = getLoc();
8338 
8339   if (!trySkipId("dim", AsmToken::Colon))
8340     return MatchOperand_NoMatch;
8341 
8342   unsigned Encoding;
8343   SMLoc Loc = getLoc();
8344   if (!parseDimId(Encoding)) {
8345     Error(Loc, "invalid dim value");
8346     return MatchOperand_ParseFail;
8347   }
8348 
8349   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8350                                               AMDGPUOperand::ImmTyDim));
8351   return MatchOperand_Success;
8352 }
8353 
8354 //===----------------------------------------------------------------------===//
8355 // dpp
8356 //===----------------------------------------------------------------------===//
8357 
8358 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8359   SMLoc S = getLoc();
8360 
8361   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8362     return MatchOperand_NoMatch;
8363 
8364   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8365 
8366   int64_t Sels[8];
8367 
8368   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8369     return MatchOperand_ParseFail;
8370 
8371   for (size_t i = 0; i < 8; ++i) {
8372     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8373       return MatchOperand_ParseFail;
8374 
8375     SMLoc Loc = getLoc();
8376     if (getParser().parseAbsoluteExpression(Sels[i]))
8377       return MatchOperand_ParseFail;
8378     if (0 > Sels[i] || 7 < Sels[i]) {
8379       Error(Loc, "expected a 3-bit value");
8380       return MatchOperand_ParseFail;
8381     }
8382   }
8383 
8384   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8385     return MatchOperand_ParseFail;
8386 
8387   unsigned DPP8 = 0;
8388   for (size_t i = 0; i < 8; ++i)
8389     DPP8 |= (Sels[i] << (i * 3));
8390 
8391   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8392   return MatchOperand_Success;
8393 }
8394 
8395 bool
8396 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8397                                     const OperandVector &Operands) {
8398   if (Ctrl == "row_newbcast")
8399     return isGFX90A();
8400 
8401   if (Ctrl == "row_share" ||
8402       Ctrl == "row_xmask")
8403     return isGFX10Plus();
8404 
8405   if (Ctrl == "wave_shl" ||
8406       Ctrl == "wave_shr" ||
8407       Ctrl == "wave_rol" ||
8408       Ctrl == "wave_ror" ||
8409       Ctrl == "row_bcast")
8410     return isVI() || isGFX9();
8411 
8412   return Ctrl == "row_mirror" ||
8413          Ctrl == "row_half_mirror" ||
8414          Ctrl == "quad_perm" ||
8415          Ctrl == "row_shl" ||
8416          Ctrl == "row_shr" ||
8417          Ctrl == "row_ror";
8418 }
8419 
8420 int64_t
8421 AMDGPUAsmParser::parseDPPCtrlPerm() {
8422   // quad_perm:[%d,%d,%d,%d]
8423 
8424   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8425     return -1;
8426 
8427   int64_t Val = 0;
8428   for (int i = 0; i < 4; ++i) {
8429     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8430       return -1;
8431 
8432     int64_t Temp;
8433     SMLoc Loc = getLoc();
8434     if (getParser().parseAbsoluteExpression(Temp))
8435       return -1;
8436     if (Temp < 0 || Temp > 3) {
8437       Error(Loc, "expected a 2-bit value");
8438       return -1;
8439     }
8440 
8441     Val += (Temp << i * 2);
8442   }
8443 
8444   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8445     return -1;
8446 
8447   return Val;
8448 }
8449 
8450 int64_t
8451 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8452   using namespace AMDGPU::DPP;
8453 
8454   // sel:%d
8455 
8456   int64_t Val;
8457   SMLoc Loc = getLoc();
8458 
8459   if (getParser().parseAbsoluteExpression(Val))
8460     return -1;
8461 
8462   struct DppCtrlCheck {
8463     int64_t Ctrl;
8464     int Lo;
8465     int Hi;
8466   };
8467 
8468   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8469     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8470     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8471     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8472     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8473     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8474     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8475     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8476     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8477     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8478     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8479     .Default({-1, 0, 0});
8480 
8481   bool Valid;
8482   if (Check.Ctrl == -1) {
8483     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8484     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8485   } else {
8486     Valid = Check.Lo <= Val && Val <= Check.Hi;
8487     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8488   }
8489 
8490   if (!Valid) {
8491     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8492     return -1;
8493   }
8494 
8495   return Val;
8496 }
8497 
8498 OperandMatchResultTy
8499 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8500   using namespace AMDGPU::DPP;
8501 
8502   if (!isToken(AsmToken::Identifier) ||
8503       !isSupportedDPPCtrl(getTokenStr(), Operands))
8504     return MatchOperand_NoMatch;
8505 
8506   SMLoc S = getLoc();
8507   int64_t Val = -1;
8508   StringRef Ctrl;
8509 
8510   parseId(Ctrl);
8511 
8512   if (Ctrl == "row_mirror") {
8513     Val = DppCtrl::ROW_MIRROR;
8514   } else if (Ctrl == "row_half_mirror") {
8515     Val = DppCtrl::ROW_HALF_MIRROR;
8516   } else {
8517     if (skipToken(AsmToken::Colon, "expected a colon")) {
8518       if (Ctrl == "quad_perm") {
8519         Val = parseDPPCtrlPerm();
8520       } else {
8521         Val = parseDPPCtrlSel(Ctrl);
8522       }
8523     }
8524   }
8525 
8526   if (Val == -1)
8527     return MatchOperand_ParseFail;
8528 
8529   Operands.push_back(
8530     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8531   return MatchOperand_Success;
8532 }
8533 
8534 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8535   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8536 }
8537 
8538 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8539   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8540 }
8541 
8542 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8543   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8544 }
8545 
8546 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8547   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8548 }
8549 
8550 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8551   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8552 }
8553 
8554 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8555   OptionalImmIndexMap OptionalIdx;
8556 
8557   unsigned Opc = Inst.getOpcode();
8558   bool HasModifiers =
8559       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8560   unsigned I = 1;
8561   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8562   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8563     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8564   }
8565 
8566   int Fi = 0;
8567   for (unsigned E = Operands.size(); I != E; ++I) {
8568     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8569                                             MCOI::TIED_TO);
8570     if (TiedTo != -1) {
8571       assert((unsigned)TiedTo < Inst.getNumOperands());
8572       // handle tied old or src2 for MAC instructions
8573       Inst.addOperand(Inst.getOperand(TiedTo));
8574     }
8575     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8576     // Add the register arguments
8577     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8578       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8579       // Skip it.
8580       continue;
8581     }
8582 
8583     if (IsDPP8) {
8584       if (Op.isDPP8()) {
8585         Op.addImmOperands(Inst, 1);
8586       } else if (HasModifiers &&
8587                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8588         Op.addRegWithFPInputModsOperands(Inst, 2);
8589       } else if (Op.isFI()) {
8590         Fi = Op.getImm();
8591       } else if (Op.isReg()) {
8592         Op.addRegOperands(Inst, 1);
8593       } else {
8594         llvm_unreachable("Invalid operand type");
8595       }
8596     } else {
8597       if (HasModifiers &&
8598           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8599         Op.addRegWithFPInputModsOperands(Inst, 2);
8600       } else if (Op.isReg()) {
8601         Op.addRegOperands(Inst, 1);
8602       } else if (Op.isDPPCtrl()) {
8603         Op.addImmOperands(Inst, 1);
8604       } else if (Op.isImm()) {
8605         // Handle optional arguments
8606         OptionalIdx[Op.getImmTy()] = I;
8607       } else {
8608         llvm_unreachable("Invalid operand type");
8609       }
8610     }
8611   }
8612 
8613   if (IsDPP8) {
8614     using namespace llvm::AMDGPU::DPP;
8615     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8616   } else {
8617     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8618     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8619     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8620     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8621       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8622     }
8623   }
8624 }
8625 
8626 //===----------------------------------------------------------------------===//
8627 // sdwa
8628 //===----------------------------------------------------------------------===//
8629 
8630 OperandMatchResultTy
8631 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8632                               AMDGPUOperand::ImmTy Type) {
8633   using namespace llvm::AMDGPU::SDWA;
8634 
8635   SMLoc S = getLoc();
8636   StringRef Value;
8637   OperandMatchResultTy res;
8638 
8639   SMLoc StringLoc;
8640   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8641   if (res != MatchOperand_Success) {
8642     return res;
8643   }
8644 
8645   int64_t Int;
8646   Int = StringSwitch<int64_t>(Value)
8647         .Case("BYTE_0", SdwaSel::BYTE_0)
8648         .Case("BYTE_1", SdwaSel::BYTE_1)
8649         .Case("BYTE_2", SdwaSel::BYTE_2)
8650         .Case("BYTE_3", SdwaSel::BYTE_3)
8651         .Case("WORD_0", SdwaSel::WORD_0)
8652         .Case("WORD_1", SdwaSel::WORD_1)
8653         .Case("DWORD", SdwaSel::DWORD)
8654         .Default(0xffffffff);
8655 
8656   if (Int == 0xffffffff) {
8657     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8658     return MatchOperand_ParseFail;
8659   }
8660 
8661   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8662   return MatchOperand_Success;
8663 }
8664 
8665 OperandMatchResultTy
8666 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8667   using namespace llvm::AMDGPU::SDWA;
8668 
8669   SMLoc S = getLoc();
8670   StringRef Value;
8671   OperandMatchResultTy res;
8672 
8673   SMLoc StringLoc;
8674   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8675   if (res != MatchOperand_Success) {
8676     return res;
8677   }
8678 
8679   int64_t Int;
8680   Int = StringSwitch<int64_t>(Value)
8681         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8682         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8683         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8684         .Default(0xffffffff);
8685 
8686   if (Int == 0xffffffff) {
8687     Error(StringLoc, "invalid dst_unused value");
8688     return MatchOperand_ParseFail;
8689   }
8690 
8691   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8692   return MatchOperand_Success;
8693 }
8694 
8695 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8696   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8697 }
8698 
8699 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8700   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8701 }
8702 
8703 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8704   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8705 }
8706 
8707 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8708   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8709 }
8710 
8711 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8712   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8713 }
8714 
8715 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8716                               uint64_t BasicInstType,
8717                               bool SkipDstVcc,
8718                               bool SkipSrcVcc) {
8719   using namespace llvm::AMDGPU::SDWA;
8720 
8721   OptionalImmIndexMap OptionalIdx;
8722   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8723   bool SkippedVcc = false;
8724 
8725   unsigned I = 1;
8726   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8727   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8728     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8729   }
8730 
8731   for (unsigned E = Operands.size(); I != E; ++I) {
8732     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8733     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8734         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8735       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8736       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8737       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8738       // Skip VCC only if we didn't skip it on previous iteration.
8739       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8740       if (BasicInstType == SIInstrFlags::VOP2 &&
8741           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8742            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8743         SkippedVcc = true;
8744         continue;
8745       } else if (BasicInstType == SIInstrFlags::VOPC &&
8746                  Inst.getNumOperands() == 0) {
8747         SkippedVcc = true;
8748         continue;
8749       }
8750     }
8751     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8752       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8753     } else if (Op.isImm()) {
8754       // Handle optional arguments
8755       OptionalIdx[Op.getImmTy()] = I;
8756     } else {
8757       llvm_unreachable("Invalid operand type");
8758     }
8759     SkippedVcc = false;
8760   }
8761 
8762   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8763       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8764       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8765     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8766     switch (BasicInstType) {
8767     case SIInstrFlags::VOP1:
8768       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8769       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8770         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8771       }
8772       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8773       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8774       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8775       break;
8776 
8777     case SIInstrFlags::VOP2:
8778       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8779       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8780         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8781       }
8782       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8783       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8784       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8785       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8786       break;
8787 
8788     case SIInstrFlags::VOPC:
8789       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8790         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8791       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8792       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8793       break;
8794 
8795     default:
8796       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8797     }
8798   }
8799 
8800   // special case v_mac_{f16, f32}:
8801   // it has src2 register operand that is tied to dst operand
8802   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8803       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8804     auto it = Inst.begin();
8805     std::advance(
8806       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8807     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8808   }
8809 }
8810 
8811 //===----------------------------------------------------------------------===//
8812 // mAI
8813 //===----------------------------------------------------------------------===//
8814 
8815 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8816   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8817 }
8818 
8819 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8820   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8821 }
8822 
8823 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8824   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8825 }
8826 
8827 /// Force static initialization.
8828 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8829   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8830   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8831 }
8832 
8833 #define GET_REGISTER_MATCHER
8834 #define GET_MATCHER_IMPLEMENTATION
8835 #define GET_MNEMONIC_SPELL_CHECKER
8836 #define GET_MNEMONIC_CHECKER
8837 #include "AMDGPUGenAsmMatcher.inc"
8838 
8839 // This function should be defined after auto-generated include so that we have
8840 // MatchClassKind enum defined
8841 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8842                                                      unsigned Kind) {
8843   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8844   // But MatchInstructionImpl() expects to meet token and fails to validate
8845   // operand. This method checks if we are given immediate operand but expect to
8846   // get corresponding token.
8847   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8848   switch (Kind) {
8849   case MCK_addr64:
8850     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8851   case MCK_gds:
8852     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8853   case MCK_lds:
8854     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8855   case MCK_idxen:
8856     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8857   case MCK_offen:
8858     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8859   case MCK_SSrcB32:
8860     // When operands have expression values, they will return true for isToken,
8861     // because it is not possible to distinguish between a token and an
8862     // expression at parse time. MatchInstructionImpl() will always try to
8863     // match an operand as a token, when isToken returns true, and when the
8864     // name of the expression is not a valid token, the match will fail,
8865     // so we need to handle it here.
8866     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8867   case MCK_SSrcF32:
8868     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8869   case MCK_SoppBrTarget:
8870     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8871   case MCK_VReg32OrOff:
8872     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8873   case MCK_InterpSlot:
8874     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8875   case MCK_Attr:
8876     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8877   case MCK_AttrChan:
8878     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8879   case MCK_ImmSMEMOffset:
8880     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8881   case MCK_SReg_64:
8882   case MCK_SReg_64_XEXEC:
8883     // Null is defined as a 32-bit register but
8884     // it should also be enabled with 64-bit operands.
8885     // The following code enables it for SReg_64 operands
8886     // used as source and destination. Remaining source
8887     // operands are handled in isInlinableImm.
8888     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8889   default:
8890     return Match_InvalidOperand;
8891   }
8892 }
8893 
8894 //===----------------------------------------------------------------------===//
8895 // endpgm
8896 //===----------------------------------------------------------------------===//
8897 
8898 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8899   SMLoc S = getLoc();
8900   int64_t Imm = 0;
8901 
8902   if (!parseExpr(Imm)) {
8903     // The operand is optional, if not present default to 0
8904     Imm = 0;
8905   }
8906 
8907   if (!isUInt<16>(Imm)) {
8908     Error(S, "expected a 16-bit value");
8909     return MatchOperand_ParseFail;
8910   }
8911 
8912   Operands.push_back(
8913       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8914   return MatchOperand_Success;
8915 }
8916 
8917 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8918 
8919 //===----------------------------------------------------------------------===//
8920 // LDSDIR
8921 //===----------------------------------------------------------------------===//
8922 
8923 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const {
8924   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST);
8925 }
8926 
8927 bool AMDGPUOperand::isWaitVDST() const {
8928   return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
8929 }
8930 
8931 //===----------------------------------------------------------------------===//
8932 // VINTERP
8933 //===----------------------------------------------------------------------===//
8934 
8935 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const {
8936   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP);
8937 }
8938 
8939 bool AMDGPUOperand::isWaitEXP() const {
8940   return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
8941 }
8942