1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCInstrDesc.h"
29 #include "llvm/MC/MCParser/MCAsmLexer.h"
30 #include "llvm/MC/MCParser/MCAsmParser.h"
31 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
32 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/MC/TargetRegistry.h"
35 #include "llvm/Support/AMDGPUMetadata.h"
36 #include "llvm/Support/AMDHSAKernelDescriptor.h"
37 #include "llvm/Support/Casting.h"
38 #include "llvm/Support/MachineValueType.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/Support/TargetParser.h"
41 
42 using namespace llvm;
43 using namespace llvm::AMDGPU;
44 using namespace llvm::amdhsa;
45 
46 namespace {
47 
48 class AMDGPUAsmParser;
49 
50 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
51 
52 //===----------------------------------------------------------------------===//
53 // Operand
54 //===----------------------------------------------------------------------===//
55 
56 class AMDGPUOperand : public MCParsedAsmOperand {
57   enum KindTy {
58     Token,
59     Immediate,
60     Register,
61     Expression
62   } Kind;
63 
64   SMLoc StartLoc, EndLoc;
65   const AMDGPUAsmParser *AsmParser;
66 
67 public:
68   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
69       : Kind(Kind_), AsmParser(AsmParser_) {}
70 
71   using Ptr = std::unique_ptr<AMDGPUOperand>;
72 
73   struct Modifiers {
74     bool Abs = false;
75     bool Neg = false;
76     bool Sext = false;
77 
78     bool hasFPModifiers() const { return Abs || Neg; }
79     bool hasIntModifiers() const { return Sext; }
80     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
81 
82     int64_t getFPModifiersOperand() const {
83       int64_t Operand = 0;
84       Operand |= Abs ? SISrcMods::ABS : 0u;
85       Operand |= Neg ? SISrcMods::NEG : 0u;
86       return Operand;
87     }
88 
89     int64_t getIntModifiersOperand() const {
90       int64_t Operand = 0;
91       Operand |= Sext ? SISrcMods::SEXT : 0u;
92       return Operand;
93     }
94 
95     int64_t getModifiersOperand() const {
96       assert(!(hasFPModifiers() && hasIntModifiers())
97            && "fp and int modifiers should not be used simultaneously");
98       if (hasFPModifiers()) {
99         return getFPModifiersOperand();
100       } else if (hasIntModifiers()) {
101         return getIntModifiersOperand();
102       } else {
103         return 0;
104       }
105     }
106 
107     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
108   };
109 
110   enum ImmTy {
111     ImmTyNone,
112     ImmTyGDS,
113     ImmTyLDS,
114     ImmTyOffen,
115     ImmTyIdxen,
116     ImmTyAddr64,
117     ImmTyOffset,
118     ImmTyInstOffset,
119     ImmTyOffset0,
120     ImmTyOffset1,
121     ImmTyCPol,
122     ImmTySWZ,
123     ImmTyTFE,
124     ImmTyD16,
125     ImmTyClampSI,
126     ImmTyOModSI,
127     ImmTySdwaDstSel,
128     ImmTySdwaSrc0Sel,
129     ImmTySdwaSrc1Sel,
130     ImmTySdwaDstUnused,
131     ImmTyDMask,
132     ImmTyDim,
133     ImmTyUNorm,
134     ImmTyDA,
135     ImmTyR128A16,
136     ImmTyA16,
137     ImmTyLWE,
138     ImmTyExpTgt,
139     ImmTyExpCompr,
140     ImmTyExpVM,
141     ImmTyFORMAT,
142     ImmTyHwreg,
143     ImmTyOff,
144     ImmTySendMsg,
145     ImmTyInterpSlot,
146     ImmTyInterpAttr,
147     ImmTyAttrChan,
148     ImmTyOpSel,
149     ImmTyOpSelHi,
150     ImmTyNegLo,
151     ImmTyNegHi,
152     ImmTyDPP8,
153     ImmTyDppCtrl,
154     ImmTyDppRowMask,
155     ImmTyDppBankMask,
156     ImmTyDppBoundCtrl,
157     ImmTyDppFi,
158     ImmTySwizzle,
159     ImmTyGprIdxMode,
160     ImmTyHigh,
161     ImmTyBLGP,
162     ImmTyCBSZ,
163     ImmTyABID,
164     ImmTyEndpgm,
165     ImmTyWaitVDST,
166     ImmTyWaitEXP,
167   };
168 
169   enum ImmKindTy {
170     ImmKindTyNone,
171     ImmKindTyLiteral,
172     ImmKindTyConst,
173   };
174 
175 private:
176   struct TokOp {
177     const char *Data;
178     unsigned Length;
179   };
180 
181   struct ImmOp {
182     int64_t Val;
183     ImmTy Type;
184     bool IsFPImm;
185     mutable ImmKindTy Kind;
186     Modifiers Mods;
187   };
188 
189   struct RegOp {
190     unsigned RegNo;
191     Modifiers Mods;
192   };
193 
194   union {
195     TokOp Tok;
196     ImmOp Imm;
197     RegOp Reg;
198     const MCExpr *Expr;
199   };
200 
201 public:
202   bool isToken() const override {
203     if (Kind == Token)
204       return true;
205 
206     // When parsing operands, we can't always tell if something was meant to be
207     // a token, like 'gds', or an expression that references a global variable.
208     // In this case, we assume the string is an expression, and if we need to
209     // interpret is a token, then we treat the symbol name as the token.
210     return isSymbolRefExpr();
211   }
212 
213   bool isSymbolRefExpr() const {
214     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
215   }
216 
217   bool isImm() const override {
218     return Kind == Immediate;
219   }
220 
221   void setImmKindNone() const {
222     assert(isImm());
223     Imm.Kind = ImmKindTyNone;
224   }
225 
226   void setImmKindLiteral() const {
227     assert(isImm());
228     Imm.Kind = ImmKindTyLiteral;
229   }
230 
231   void setImmKindConst() const {
232     assert(isImm());
233     Imm.Kind = ImmKindTyConst;
234   }
235 
236   bool IsImmKindLiteral() const {
237     return isImm() && Imm.Kind == ImmKindTyLiteral;
238   }
239 
240   bool isImmKindConst() const {
241     return isImm() && Imm.Kind == ImmKindTyConst;
242   }
243 
244   bool isInlinableImm(MVT type) const;
245   bool isLiteralImm(MVT type) const;
246 
247   bool isRegKind() const {
248     return Kind == Register;
249   }
250 
251   bool isReg() const override {
252     return isRegKind() && !hasModifiers();
253   }
254 
255   bool isRegOrInline(unsigned RCID, MVT type) const {
256     return isRegClass(RCID) || isInlinableImm(type);
257   }
258 
259   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
260     return isRegOrInline(RCID, type) || isLiteralImm(type);
261   }
262 
263   bool isRegOrImmWithInt16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
265   }
266 
267   bool isRegOrImmWithInt32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
269   }
270 
271   bool isRegOrInlineImmWithInt16InputMods() const {
272     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
273   }
274 
275   bool isRegOrInlineImmWithInt32InputMods() const {
276     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
277   }
278 
279   bool isRegOrImmWithInt64InputMods() const {
280     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
281   }
282 
283   bool isRegOrImmWithFP16InputMods() const {
284     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
285   }
286 
287   bool isRegOrImmWithFP32InputMods() const {
288     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
289   }
290 
291   bool isRegOrImmWithFP64InputMods() const {
292     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
293   }
294 
295   bool isRegOrInlineImmWithFP16InputMods() const {
296     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
297   }
298 
299   bool isRegOrInlineImmWithFP32InputMods() const {
300     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
301   }
302 
303 
304   bool isVReg() const {
305     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
306            isRegClass(AMDGPU::VReg_64RegClassID) ||
307            isRegClass(AMDGPU::VReg_96RegClassID) ||
308            isRegClass(AMDGPU::VReg_128RegClassID) ||
309            isRegClass(AMDGPU::VReg_160RegClassID) ||
310            isRegClass(AMDGPU::VReg_192RegClassID) ||
311            isRegClass(AMDGPU::VReg_256RegClassID) ||
312            isRegClass(AMDGPU::VReg_512RegClassID) ||
313            isRegClass(AMDGPU::VReg_1024RegClassID);
314   }
315 
316   bool isVReg32() const {
317     return isRegClass(AMDGPU::VGPR_32RegClassID);
318   }
319 
320   bool isVReg32OrOff() const {
321     return isOff() || isVReg32();
322   }
323 
324   bool isNull() const {
325     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
326   }
327 
328   bool isVRegWithInputMods() const;
329 
330   bool isSDWAOperand(MVT type) const;
331   bool isSDWAFP16Operand() const;
332   bool isSDWAFP32Operand() const;
333   bool isSDWAInt16Operand() const;
334   bool isSDWAInt32Operand() const;
335 
336   bool isImmTy(ImmTy ImmT) const {
337     return isImm() && Imm.Type == ImmT;
338   }
339 
340   bool isImmModifier() const {
341     return isImm() && Imm.Type != ImmTyNone;
342   }
343 
344   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
345   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
346   bool isDMask() const { return isImmTy(ImmTyDMask); }
347   bool isDim() const { return isImmTy(ImmTyDim); }
348   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
349   bool isDA() const { return isImmTy(ImmTyDA); }
350   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
351   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
352   bool isLWE() const { return isImmTy(ImmTyLWE); }
353   bool isOff() const { return isImmTy(ImmTyOff); }
354   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
355   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
356   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
357   bool isOffen() const { return isImmTy(ImmTyOffen); }
358   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
359   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
360   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
361   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
362   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
363 
364   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
365   bool isGDS() const { return isImmTy(ImmTyGDS); }
366   bool isLDS() const { return isImmTy(ImmTyLDS); }
367   bool isCPol() const { return isImmTy(ImmTyCPol); }
368   bool isSWZ() const { return isImmTy(ImmTySWZ); }
369   bool isTFE() const { return isImmTy(ImmTyTFE); }
370   bool isD16() const { return isImmTy(ImmTyD16); }
371   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
372   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
373   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
374   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
375   bool isFI() const { return isImmTy(ImmTyDppFi); }
376   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
377   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
378   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
379   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
380   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
381   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
382   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
383   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
384   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
385   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
386   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
387   bool isHigh() const { return isImmTy(ImmTyHigh); }
388 
389   bool isMod() const {
390     return isClampSI() || isOModSI();
391   }
392 
393   bool isRegOrImm() const {
394     return isReg() || isImm();
395   }
396 
397   bool isRegClass(unsigned RCID) const;
398 
399   bool isInlineValue() const;
400 
401   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
402     return isRegOrInline(RCID, type) && !hasModifiers();
403   }
404 
405   bool isSCSrcB16() const {
406     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
407   }
408 
409   bool isSCSrcV2B16() const {
410     return isSCSrcB16();
411   }
412 
413   bool isSCSrcB32() const {
414     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
415   }
416 
417   bool isSCSrcB64() const {
418     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
419   }
420 
421   bool isBoolReg() const;
422 
423   bool isSCSrcF16() const {
424     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
425   }
426 
427   bool isSCSrcV2F16() const {
428     return isSCSrcF16();
429   }
430 
431   bool isSCSrcF32() const {
432     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
433   }
434 
435   bool isSCSrcF64() const {
436     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
437   }
438 
439   bool isSSrcB32() const {
440     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
441   }
442 
443   bool isSSrcB16() const {
444     return isSCSrcB16() || isLiteralImm(MVT::i16);
445   }
446 
447   bool isSSrcV2B16() const {
448     llvm_unreachable("cannot happen");
449     return isSSrcB16();
450   }
451 
452   bool isSSrcB64() const {
453     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
454     // See isVSrc64().
455     return isSCSrcB64() || isLiteralImm(MVT::i64);
456   }
457 
458   bool isSSrcF32() const {
459     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
460   }
461 
462   bool isSSrcF64() const {
463     return isSCSrcB64() || isLiteralImm(MVT::f64);
464   }
465 
466   bool isSSrcF16() const {
467     return isSCSrcB16() || isLiteralImm(MVT::f16);
468   }
469 
470   bool isSSrcV2F16() const {
471     llvm_unreachable("cannot happen");
472     return isSSrcF16();
473   }
474 
475   bool isSSrcV2FP32() const {
476     llvm_unreachable("cannot happen");
477     return isSSrcF32();
478   }
479 
480   bool isSCSrcV2FP32() const {
481     llvm_unreachable("cannot happen");
482     return isSCSrcF32();
483   }
484 
485   bool isSSrcV2INT32() const {
486     llvm_unreachable("cannot happen");
487     return isSSrcB32();
488   }
489 
490   bool isSCSrcV2INT32() const {
491     llvm_unreachable("cannot happen");
492     return isSCSrcB32();
493   }
494 
495   bool isSSrcOrLdsB32() const {
496     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
497            isLiteralImm(MVT::i32) || isExpr();
498   }
499 
500   bool isVCSrcB32() const {
501     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
502   }
503 
504   bool isVCSrcB64() const {
505     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
506   }
507 
508   bool isVCSrcB16() const {
509     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
510   }
511 
512   bool isVCSrcV2B16() const {
513     return isVCSrcB16();
514   }
515 
516   bool isVCSrcF32() const {
517     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
518   }
519 
520   bool isVCSrcF64() const {
521     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
522   }
523 
524   bool isVCSrcF16() const {
525     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
526   }
527 
528   bool isVCSrcV2F16() const {
529     return isVCSrcF16();
530   }
531 
532   bool isVSrcB32() const {
533     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
534   }
535 
536   bool isVSrcB64() const {
537     return isVCSrcF64() || isLiteralImm(MVT::i64);
538   }
539 
540   bool isVSrcB16() const {
541     return isVCSrcB16() || isLiteralImm(MVT::i16);
542   }
543 
544   bool isVSrcV2B16() const {
545     return isVSrcB16() || isLiteralImm(MVT::v2i16);
546   }
547 
548   bool isVCSrcV2FP32() const {
549     return isVCSrcF64();
550   }
551 
552   bool isVSrcV2FP32() const {
553     return isVSrcF64() || isLiteralImm(MVT::v2f32);
554   }
555 
556   bool isVCSrcV2INT32() const {
557     return isVCSrcB64();
558   }
559 
560   bool isVSrcV2INT32() const {
561     return isVSrcB64() || isLiteralImm(MVT::v2i32);
562   }
563 
564   bool isVSrcF32() const {
565     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
566   }
567 
568   bool isVSrcF64() const {
569     return isVCSrcF64() || isLiteralImm(MVT::f64);
570   }
571 
572   bool isVSrcF16() const {
573     return isVCSrcF16() || isLiteralImm(MVT::f16);
574   }
575 
576   bool isVSrcV2F16() const {
577     return isVSrcF16() || isLiteralImm(MVT::v2f16);
578   }
579 
580   bool isVISrcB32() const {
581     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
582   }
583 
584   bool isVISrcB16() const {
585     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
586   }
587 
588   bool isVISrcV2B16() const {
589     return isVISrcB16();
590   }
591 
592   bool isVISrcF32() const {
593     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
594   }
595 
596   bool isVISrcF16() const {
597     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
598   }
599 
600   bool isVISrcV2F16() const {
601     return isVISrcF16() || isVISrcB32();
602   }
603 
604   bool isVISrc_64B64() const {
605     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
606   }
607 
608   bool isVISrc_64F64() const {
609     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
610   }
611 
612   bool isVISrc_64V2FP32() const {
613     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
614   }
615 
616   bool isVISrc_64V2INT32() const {
617     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
618   }
619 
620   bool isVISrc_256B64() const {
621     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
622   }
623 
624   bool isVISrc_256F64() const {
625     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
626   }
627 
628   bool isVISrc_128B16() const {
629     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
630   }
631 
632   bool isVISrc_128V2B16() const {
633     return isVISrc_128B16();
634   }
635 
636   bool isVISrc_128B32() const {
637     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
638   }
639 
640   bool isVISrc_128F32() const {
641     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
642   }
643 
644   bool isVISrc_256V2FP32() const {
645     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
646   }
647 
648   bool isVISrc_256V2INT32() const {
649     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
650   }
651 
652   bool isVISrc_512B32() const {
653     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
654   }
655 
656   bool isVISrc_512B16() const {
657     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
658   }
659 
660   bool isVISrc_512V2B16() const {
661     return isVISrc_512B16();
662   }
663 
664   bool isVISrc_512F32() const {
665     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
666   }
667 
668   bool isVISrc_512F16() const {
669     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
670   }
671 
672   bool isVISrc_512V2F16() const {
673     return isVISrc_512F16() || isVISrc_512B32();
674   }
675 
676   bool isVISrc_1024B32() const {
677     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
678   }
679 
680   bool isVISrc_1024B16() const {
681     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
682   }
683 
684   bool isVISrc_1024V2B16() const {
685     return isVISrc_1024B16();
686   }
687 
688   bool isVISrc_1024F32() const {
689     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
690   }
691 
692   bool isVISrc_1024F16() const {
693     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
694   }
695 
696   bool isVISrc_1024V2F16() const {
697     return isVISrc_1024F16() || isVISrc_1024B32();
698   }
699 
700   bool isAISrcB32() const {
701     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
702   }
703 
704   bool isAISrcB16() const {
705     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
706   }
707 
708   bool isAISrcV2B16() const {
709     return isAISrcB16();
710   }
711 
712   bool isAISrcF32() const {
713     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
714   }
715 
716   bool isAISrcF16() const {
717     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
718   }
719 
720   bool isAISrcV2F16() const {
721     return isAISrcF16() || isAISrcB32();
722   }
723 
724   bool isAISrc_64B64() const {
725     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
726   }
727 
728   bool isAISrc_64F64() const {
729     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
730   }
731 
732   bool isAISrc_128B32() const {
733     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
734   }
735 
736   bool isAISrc_128B16() const {
737     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
738   }
739 
740   bool isAISrc_128V2B16() const {
741     return isAISrc_128B16();
742   }
743 
744   bool isAISrc_128F32() const {
745     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
746   }
747 
748   bool isAISrc_128F16() const {
749     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
750   }
751 
752   bool isAISrc_128V2F16() const {
753     return isAISrc_128F16() || isAISrc_128B32();
754   }
755 
756   bool isVISrc_128F16() const {
757     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
758   }
759 
760   bool isVISrc_128V2F16() const {
761     return isVISrc_128F16() || isVISrc_128B32();
762   }
763 
764   bool isAISrc_256B64() const {
765     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
766   }
767 
768   bool isAISrc_256F64() const {
769     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
770   }
771 
772   bool isAISrc_512B32() const {
773     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
774   }
775 
776   bool isAISrc_512B16() const {
777     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
778   }
779 
780   bool isAISrc_512V2B16() const {
781     return isAISrc_512B16();
782   }
783 
784   bool isAISrc_512F32() const {
785     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
786   }
787 
788   bool isAISrc_512F16() const {
789     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
790   }
791 
792   bool isAISrc_512V2F16() const {
793     return isAISrc_512F16() || isAISrc_512B32();
794   }
795 
796   bool isAISrc_1024B32() const {
797     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
798   }
799 
800   bool isAISrc_1024B16() const {
801     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
802   }
803 
804   bool isAISrc_1024V2B16() const {
805     return isAISrc_1024B16();
806   }
807 
808   bool isAISrc_1024F32() const {
809     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
810   }
811 
812   bool isAISrc_1024F16() const {
813     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
814   }
815 
816   bool isAISrc_1024V2F16() const {
817     return isAISrc_1024F16() || isAISrc_1024B32();
818   }
819 
820   bool isKImmFP32() const {
821     return isLiteralImm(MVT::f32);
822   }
823 
824   bool isKImmFP16() const {
825     return isLiteralImm(MVT::f16);
826   }
827 
828   bool isMem() const override {
829     return false;
830   }
831 
832   bool isExpr() const {
833     return Kind == Expression;
834   }
835 
836   bool isSoppBrTarget() const {
837     return isExpr() || isImm();
838   }
839 
840   bool isSWaitCnt() const;
841   bool isDepCtr() const;
842   bool isSDelayAlu() const;
843   bool isHwreg() const;
844   bool isSendMsg() const;
845   bool isSwizzle() const;
846   bool isSMRDOffset8() const;
847   bool isSMEMOffset() const;
848   bool isSMRDLiteralOffset() const;
849   bool isDPP8() const;
850   bool isDPPCtrl() const;
851   bool isBLGP() const;
852   bool isCBSZ() const;
853   bool isABID() const;
854   bool isGPRIdxMode() const;
855   bool isS16Imm() const;
856   bool isU16Imm() const;
857   bool isEndpgm() const;
858   bool isWaitVDST() const;
859   bool isWaitEXP() const;
860 
861   StringRef getExpressionAsToken() const {
862     assert(isExpr());
863     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
864     return S->getSymbol().getName();
865   }
866 
867   StringRef getToken() const {
868     assert(isToken());
869 
870     if (Kind == Expression)
871       return getExpressionAsToken();
872 
873     return StringRef(Tok.Data, Tok.Length);
874   }
875 
876   int64_t getImm() const {
877     assert(isImm());
878     return Imm.Val;
879   }
880 
881   void setImm(int64_t Val) {
882     assert(isImm());
883     Imm.Val = Val;
884   }
885 
886   ImmTy getImmTy() const {
887     assert(isImm());
888     return Imm.Type;
889   }
890 
891   unsigned getReg() const override {
892     assert(isRegKind());
893     return Reg.RegNo;
894   }
895 
896   SMLoc getStartLoc() const override {
897     return StartLoc;
898   }
899 
900   SMLoc getEndLoc() const override {
901     return EndLoc;
902   }
903 
904   SMRange getLocRange() const {
905     return SMRange(StartLoc, EndLoc);
906   }
907 
908   Modifiers getModifiers() const {
909     assert(isRegKind() || isImmTy(ImmTyNone));
910     return isRegKind() ? Reg.Mods : Imm.Mods;
911   }
912 
913   void setModifiers(Modifiers Mods) {
914     assert(isRegKind() || isImmTy(ImmTyNone));
915     if (isRegKind())
916       Reg.Mods = Mods;
917     else
918       Imm.Mods = Mods;
919   }
920 
921   bool hasModifiers() const {
922     return getModifiers().hasModifiers();
923   }
924 
925   bool hasFPModifiers() const {
926     return getModifiers().hasFPModifiers();
927   }
928 
929   bool hasIntModifiers() const {
930     return getModifiers().hasIntModifiers();
931   }
932 
933   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
934 
935   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
936 
937   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
938 
939   template <unsigned Bitwidth>
940   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
941 
942   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
943     addKImmFPOperands<16>(Inst, N);
944   }
945 
946   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
947     addKImmFPOperands<32>(Inst, N);
948   }
949 
950   void addRegOperands(MCInst &Inst, unsigned N) const;
951 
952   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
953     addRegOperands(Inst, N);
954   }
955 
956   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
957     if (isRegKind())
958       addRegOperands(Inst, N);
959     else if (isExpr())
960       Inst.addOperand(MCOperand::createExpr(Expr));
961     else
962       addImmOperands(Inst, N);
963   }
964 
965   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
966     Modifiers Mods = getModifiers();
967     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
968     if (isRegKind()) {
969       addRegOperands(Inst, N);
970     } else {
971       addImmOperands(Inst, N, false);
972     }
973   }
974 
975   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
976     assert(!hasIntModifiers());
977     addRegOrImmWithInputModsOperands(Inst, N);
978   }
979 
980   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
981     assert(!hasFPModifiers());
982     addRegOrImmWithInputModsOperands(Inst, N);
983   }
984 
985   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
986     Modifiers Mods = getModifiers();
987     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
988     assert(isRegKind());
989     addRegOperands(Inst, N);
990   }
991 
992   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
993     assert(!hasIntModifiers());
994     addRegWithInputModsOperands(Inst, N);
995   }
996 
997   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
998     assert(!hasFPModifiers());
999     addRegWithInputModsOperands(Inst, N);
1000   }
1001 
1002   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
1003     if (isImm())
1004       addImmOperands(Inst, N);
1005     else {
1006       assert(isExpr());
1007       Inst.addOperand(MCOperand::createExpr(Expr));
1008     }
1009   }
1010 
1011   static void printImmTy(raw_ostream& OS, ImmTy Type) {
1012     switch (Type) {
1013     case ImmTyNone: OS << "None"; break;
1014     case ImmTyGDS: OS << "GDS"; break;
1015     case ImmTyLDS: OS << "LDS"; break;
1016     case ImmTyOffen: OS << "Offen"; break;
1017     case ImmTyIdxen: OS << "Idxen"; break;
1018     case ImmTyAddr64: OS << "Addr64"; break;
1019     case ImmTyOffset: OS << "Offset"; break;
1020     case ImmTyInstOffset: OS << "InstOffset"; break;
1021     case ImmTyOffset0: OS << "Offset0"; break;
1022     case ImmTyOffset1: OS << "Offset1"; break;
1023     case ImmTyCPol: OS << "CPol"; break;
1024     case ImmTySWZ: OS << "SWZ"; break;
1025     case ImmTyTFE: OS << "TFE"; break;
1026     case ImmTyD16: OS << "D16"; break;
1027     case ImmTyFORMAT: OS << "FORMAT"; break;
1028     case ImmTyClampSI: OS << "ClampSI"; break;
1029     case ImmTyOModSI: OS << "OModSI"; break;
1030     case ImmTyDPP8: OS << "DPP8"; break;
1031     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1032     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1033     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1034     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1035     case ImmTyDppFi: OS << "FI"; break;
1036     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1037     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1038     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1039     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1040     case ImmTyDMask: OS << "DMask"; break;
1041     case ImmTyDim: OS << "Dim"; break;
1042     case ImmTyUNorm: OS << "UNorm"; break;
1043     case ImmTyDA: OS << "DA"; break;
1044     case ImmTyR128A16: OS << "R128A16"; break;
1045     case ImmTyA16: OS << "A16"; break;
1046     case ImmTyLWE: OS << "LWE"; break;
1047     case ImmTyOff: OS << "Off"; break;
1048     case ImmTyExpTgt: OS << "ExpTgt"; break;
1049     case ImmTyExpCompr: OS << "ExpCompr"; break;
1050     case ImmTyExpVM: OS << "ExpVM"; break;
1051     case ImmTyHwreg: OS << "Hwreg"; break;
1052     case ImmTySendMsg: OS << "SendMsg"; break;
1053     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1054     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1055     case ImmTyAttrChan: OS << "AttrChan"; break;
1056     case ImmTyOpSel: OS << "OpSel"; break;
1057     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1058     case ImmTyNegLo: OS << "NegLo"; break;
1059     case ImmTyNegHi: OS << "NegHi"; break;
1060     case ImmTySwizzle: OS << "Swizzle"; break;
1061     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1062     case ImmTyHigh: OS << "High"; break;
1063     case ImmTyBLGP: OS << "BLGP"; break;
1064     case ImmTyCBSZ: OS << "CBSZ"; break;
1065     case ImmTyABID: OS << "ABID"; break;
1066     case ImmTyEndpgm: OS << "Endpgm"; break;
1067     case ImmTyWaitVDST: OS << "WaitVDST"; break;
1068     case ImmTyWaitEXP: OS << "WaitEXP"; break;
1069     }
1070   }
1071 
1072   void print(raw_ostream &OS) const override {
1073     switch (Kind) {
1074     case Register:
1075       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1076       break;
1077     case Immediate:
1078       OS << '<' << getImm();
1079       if (getImmTy() != ImmTyNone) {
1080         OS << " type: "; printImmTy(OS, getImmTy());
1081       }
1082       OS << " mods: " << Imm.Mods << '>';
1083       break;
1084     case Token:
1085       OS << '\'' << getToken() << '\'';
1086       break;
1087     case Expression:
1088       OS << "<expr " << *Expr << '>';
1089       break;
1090     }
1091   }
1092 
1093   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1094                                       int64_t Val, SMLoc Loc,
1095                                       ImmTy Type = ImmTyNone,
1096                                       bool IsFPImm = false) {
1097     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1098     Op->Imm.Val = Val;
1099     Op->Imm.IsFPImm = IsFPImm;
1100     Op->Imm.Kind = ImmKindTyNone;
1101     Op->Imm.Type = Type;
1102     Op->Imm.Mods = Modifiers();
1103     Op->StartLoc = Loc;
1104     Op->EndLoc = Loc;
1105     return Op;
1106   }
1107 
1108   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1109                                         StringRef Str, SMLoc Loc,
1110                                         bool HasExplicitEncodingSize = true) {
1111     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1112     Res->Tok.Data = Str.data();
1113     Res->Tok.Length = Str.size();
1114     Res->StartLoc = Loc;
1115     Res->EndLoc = Loc;
1116     return Res;
1117   }
1118 
1119   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1120                                       unsigned RegNo, SMLoc S,
1121                                       SMLoc E) {
1122     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1123     Op->Reg.RegNo = RegNo;
1124     Op->Reg.Mods = Modifiers();
1125     Op->StartLoc = S;
1126     Op->EndLoc = E;
1127     return Op;
1128   }
1129 
1130   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1131                                        const class MCExpr *Expr, SMLoc S) {
1132     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1133     Op->Expr = Expr;
1134     Op->StartLoc = S;
1135     Op->EndLoc = S;
1136     return Op;
1137   }
1138 };
1139 
1140 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1141   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1142   return OS;
1143 }
1144 
1145 //===----------------------------------------------------------------------===//
1146 // AsmParser
1147 //===----------------------------------------------------------------------===//
1148 
1149 // Holds info related to the current kernel, e.g. count of SGPRs used.
1150 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1151 // .amdgpu_hsa_kernel or at EOF.
1152 class KernelScopeInfo {
1153   int SgprIndexUnusedMin = -1;
1154   int VgprIndexUnusedMin = -1;
1155   int AgprIndexUnusedMin = -1;
1156   MCContext *Ctx = nullptr;
1157   MCSubtargetInfo const *MSTI = nullptr;
1158 
1159   void usesSgprAt(int i) {
1160     if (i >= SgprIndexUnusedMin) {
1161       SgprIndexUnusedMin = ++i;
1162       if (Ctx) {
1163         MCSymbol* const Sym =
1164           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1165         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1166       }
1167     }
1168   }
1169 
1170   void usesVgprAt(int i) {
1171     if (i >= VgprIndexUnusedMin) {
1172       VgprIndexUnusedMin = ++i;
1173       if (Ctx) {
1174         MCSymbol* const Sym =
1175           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1176         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1177                                          VgprIndexUnusedMin);
1178         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1179       }
1180     }
1181   }
1182 
1183   void usesAgprAt(int i) {
1184     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1185     if (!hasMAIInsts(*MSTI))
1186       return;
1187 
1188     if (i >= AgprIndexUnusedMin) {
1189       AgprIndexUnusedMin = ++i;
1190       if (Ctx) {
1191         MCSymbol* const Sym =
1192           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1193         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1194 
1195         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1196         MCSymbol* const vSym =
1197           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1198         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1199                                          VgprIndexUnusedMin);
1200         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1201       }
1202     }
1203   }
1204 
1205 public:
1206   KernelScopeInfo() = default;
1207 
1208   void initialize(MCContext &Context) {
1209     Ctx = &Context;
1210     MSTI = Ctx->getSubtargetInfo();
1211 
1212     usesSgprAt(SgprIndexUnusedMin = -1);
1213     usesVgprAt(VgprIndexUnusedMin = -1);
1214     if (hasMAIInsts(*MSTI)) {
1215       usesAgprAt(AgprIndexUnusedMin = -1);
1216     }
1217   }
1218 
1219   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1220                     unsigned RegWidth) {
1221     switch (RegKind) {
1222     case IS_SGPR:
1223       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1224       break;
1225     case IS_AGPR:
1226       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1227       break;
1228     case IS_VGPR:
1229       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1230       break;
1231     default:
1232       break;
1233     }
1234   }
1235 };
1236 
1237 class AMDGPUAsmParser : public MCTargetAsmParser {
1238   MCAsmParser &Parser;
1239 
1240   // Number of extra operands parsed after the first optional operand.
1241   // This may be necessary to skip hardcoded mandatory operands.
1242   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1243 
1244   unsigned ForcedEncodingSize = 0;
1245   bool ForcedDPP = false;
1246   bool ForcedSDWA = false;
1247   KernelScopeInfo KernelScope;
1248   unsigned CPolSeen;
1249 
1250   /// @name Auto-generated Match Functions
1251   /// {
1252 
1253 #define GET_ASSEMBLER_HEADER
1254 #include "AMDGPUGenAsmMatcher.inc"
1255 
1256   /// }
1257 
1258 private:
1259   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1260   bool OutOfRangeError(SMRange Range);
1261   /// Calculate VGPR/SGPR blocks required for given target, reserved
1262   /// registers, and user-specified NextFreeXGPR values.
1263   ///
1264   /// \param Features [in] Target features, used for bug corrections.
1265   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1266   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1267   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1268   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1269   /// descriptor field, if valid.
1270   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1271   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1272   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1273   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1274   /// \param VGPRBlocks [out] Result VGPR block count.
1275   /// \param SGPRBlocks [out] Result SGPR block count.
1276   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1277                           bool FlatScrUsed, bool XNACKUsed,
1278                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1279                           SMRange VGPRRange, unsigned NextFreeSGPR,
1280                           SMRange SGPRRange, unsigned &VGPRBlocks,
1281                           unsigned &SGPRBlocks);
1282   bool ParseDirectiveAMDGCNTarget();
1283   bool ParseDirectiveAMDHSAKernel();
1284   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1285   bool ParseDirectiveHSACodeObjectVersion();
1286   bool ParseDirectiveHSACodeObjectISA();
1287   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1288   bool ParseDirectiveAMDKernelCodeT();
1289   // TODO: Possibly make subtargetHasRegister const.
1290   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1291   bool ParseDirectiveAMDGPUHsaKernel();
1292 
1293   bool ParseDirectiveISAVersion();
1294   bool ParseDirectiveHSAMetadata();
1295   bool ParseDirectivePALMetadataBegin();
1296   bool ParseDirectivePALMetadata();
1297   bool ParseDirectiveAMDGPULDS();
1298 
1299   /// Common code to parse out a block of text (typically YAML) between start and
1300   /// end directives.
1301   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1302                            const char *AssemblerDirectiveEnd,
1303                            std::string &CollectString);
1304 
1305   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1306                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1307   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1308                            unsigned &RegNum, unsigned &RegWidth,
1309                            bool RestoreOnFailure = false);
1310   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1311                            unsigned &RegNum, unsigned &RegWidth,
1312                            SmallVectorImpl<AsmToken> &Tokens);
1313   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1314                            unsigned &RegWidth,
1315                            SmallVectorImpl<AsmToken> &Tokens);
1316   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1317                            unsigned &RegWidth,
1318                            SmallVectorImpl<AsmToken> &Tokens);
1319   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1320                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1321   bool ParseRegRange(unsigned& Num, unsigned& Width);
1322   unsigned getRegularReg(RegisterKind RegKind,
1323                          unsigned RegNum,
1324                          unsigned RegWidth,
1325                          SMLoc Loc);
1326 
1327   bool isRegister();
1328   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1329   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1330   void initializeGprCountSymbol(RegisterKind RegKind);
1331   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1332                              unsigned RegWidth);
1333   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1334                     bool IsAtomic, bool IsLds = false);
1335   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1336                  bool IsGdsHardcoded);
1337 
1338 public:
1339   enum AMDGPUMatchResultTy {
1340     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1341   };
1342   enum OperandMode {
1343     OperandMode_Default,
1344     OperandMode_NSA,
1345   };
1346 
1347   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1348 
1349   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1350                const MCInstrInfo &MII,
1351                const MCTargetOptions &Options)
1352       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1353     MCAsmParserExtension::Initialize(Parser);
1354 
1355     if (getFeatureBits().none()) {
1356       // Set default features.
1357       copySTI().ToggleFeature("southern-islands");
1358     }
1359 
1360     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1361 
1362     {
1363       // TODO: make those pre-defined variables read-only.
1364       // Currently there is none suitable machinery in the core llvm-mc for this.
1365       // MCSymbol::isRedefinable is intended for another purpose, and
1366       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1367       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1368       MCContext &Ctx = getContext();
1369       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1370         MCSymbol *Sym =
1371             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1372         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1373         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1374         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1375         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1376         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1377       } else {
1378         MCSymbol *Sym =
1379             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1380         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1381         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1382         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1383         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1384         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1385       }
1386       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1387         initializeGprCountSymbol(IS_VGPR);
1388         initializeGprCountSymbol(IS_SGPR);
1389       } else
1390         KernelScope.initialize(getContext());
1391     }
1392   }
1393 
1394   bool hasMIMG_R128() const {
1395     return AMDGPU::hasMIMG_R128(getSTI());
1396   }
1397 
1398   bool hasPackedD16() const {
1399     return AMDGPU::hasPackedD16(getSTI());
1400   }
1401 
1402   bool hasGFX10A16() const {
1403     return AMDGPU::hasGFX10A16(getSTI());
1404   }
1405 
1406   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1407 
1408   bool isSI() const {
1409     return AMDGPU::isSI(getSTI());
1410   }
1411 
1412   bool isCI() const {
1413     return AMDGPU::isCI(getSTI());
1414   }
1415 
1416   bool isVI() const {
1417     return AMDGPU::isVI(getSTI());
1418   }
1419 
1420   bool isGFX9() const {
1421     return AMDGPU::isGFX9(getSTI());
1422   }
1423 
1424   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1425   bool isGFX90A() const {
1426     return AMDGPU::isGFX90A(getSTI());
1427   }
1428 
1429   bool isGFX940() const {
1430     return AMDGPU::isGFX940(getSTI());
1431   }
1432 
1433   bool isGFX9Plus() const {
1434     return AMDGPU::isGFX9Plus(getSTI());
1435   }
1436 
1437   bool isGFX10() const {
1438     return AMDGPU::isGFX10(getSTI());
1439   }
1440 
1441   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1442 
1443   bool isGFX11() const {
1444     return AMDGPU::isGFX11(getSTI());
1445   }
1446 
1447   bool isGFX11Plus() const {
1448     return AMDGPU::isGFX11Plus(getSTI());
1449   }
1450 
1451   bool isGFX10_BEncoding() const {
1452     return AMDGPU::isGFX10_BEncoding(getSTI());
1453   }
1454 
1455   bool hasInv2PiInlineImm() const {
1456     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1457   }
1458 
1459   bool hasFlatOffsets() const {
1460     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1461   }
1462 
1463   bool hasArchitectedFlatScratch() const {
1464     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1465   }
1466 
1467   bool hasSGPR102_SGPR103() const {
1468     return !isVI() && !isGFX9();
1469   }
1470 
1471   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1472 
1473   bool hasIntClamp() const {
1474     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1475   }
1476 
1477   AMDGPUTargetStreamer &getTargetStreamer() {
1478     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1479     return static_cast<AMDGPUTargetStreamer &>(TS);
1480   }
1481 
1482   const MCRegisterInfo *getMRI() const {
1483     // We need this const_cast because for some reason getContext() is not const
1484     // in MCAsmParser.
1485     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1486   }
1487 
1488   const MCInstrInfo *getMII() const {
1489     return &MII;
1490   }
1491 
1492   const FeatureBitset &getFeatureBits() const {
1493     return getSTI().getFeatureBits();
1494   }
1495 
1496   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1497   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1498   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1499 
1500   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1501   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1502   bool isForcedDPP() const { return ForcedDPP; }
1503   bool isForcedSDWA() const { return ForcedSDWA; }
1504   ArrayRef<unsigned> getMatchedVariants() const;
1505   StringRef getMatchedVariantName() const;
1506 
1507   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1508   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1509                      bool RestoreOnFailure);
1510   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1511   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1512                                         SMLoc &EndLoc) override;
1513   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1514   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1515                                       unsigned Kind) override;
1516   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1517                                OperandVector &Operands, MCStreamer &Out,
1518                                uint64_t &ErrorInfo,
1519                                bool MatchingInlineAsm) override;
1520   bool ParseDirective(AsmToken DirectiveID) override;
1521   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1522                                     OperandMode Mode = OperandMode_Default);
1523   StringRef parseMnemonicSuffix(StringRef Name);
1524   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1525                         SMLoc NameLoc, OperandVector &Operands) override;
1526   //bool ProcessInstruction(MCInst &Inst);
1527 
1528   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1529 
1530   OperandMatchResultTy
1531   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1532                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1533                      bool (*ConvertResult)(int64_t &) = nullptr);
1534 
1535   OperandMatchResultTy
1536   parseOperandArrayWithPrefix(const char *Prefix,
1537                               OperandVector &Operands,
1538                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1539                               bool (*ConvertResult)(int64_t&) = nullptr);
1540 
1541   OperandMatchResultTy
1542   parseNamedBit(StringRef Name, OperandVector &Operands,
1543                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1544   OperandMatchResultTy parseCPol(OperandVector &Operands);
1545   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1546                                              StringRef &Value,
1547                                              SMLoc &StringLoc);
1548 
1549   bool isModifier();
1550   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1551   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1552   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1553   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1554   bool parseSP3NegModifier();
1555   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1556   OperandMatchResultTy parseReg(OperandVector &Operands);
1557   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1558   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1559   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1560   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1561   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1562   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1563   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1564   OperandMatchResultTy parseUfmt(int64_t &Format);
1565   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1566   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1567   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1568   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1569   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1570   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1571   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1572 
1573   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1574   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1575   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1576   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1577 
1578   bool parseCnt(int64_t &IntVal);
1579   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1580 
1581   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1582   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1583   OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
1584 
1585   bool parseDelay(int64_t &Delay);
1586   OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
1587 
1588   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1589 
1590 private:
1591   struct OperandInfoTy {
1592     SMLoc Loc;
1593     int64_t Id;
1594     bool IsSymbolic = false;
1595     bool IsDefined = false;
1596 
1597     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1598   };
1599 
1600   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1601   bool validateSendMsg(const OperandInfoTy &Msg,
1602                        const OperandInfoTy &Op,
1603                        const OperandInfoTy &Stream);
1604 
1605   bool parseHwregBody(OperandInfoTy &HwReg,
1606                       OperandInfoTy &Offset,
1607                       OperandInfoTy &Width);
1608   bool validateHwreg(const OperandInfoTy &HwReg,
1609                      const OperandInfoTy &Offset,
1610                      const OperandInfoTy &Width);
1611 
1612   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1613   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1614   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1615 
1616   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1617                       const OperandVector &Operands) const;
1618   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1619   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1620   SMLoc getLitLoc(const OperandVector &Operands) const;
1621   SMLoc getConstLoc(const OperandVector &Operands) const;
1622 
1623   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1624   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1625   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1626   bool validateSOPLiteral(const MCInst &Inst) const;
1627   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1628   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1629   bool validateIntClampSupported(const MCInst &Inst);
1630   bool validateMIMGAtomicDMask(const MCInst &Inst);
1631   bool validateMIMGGatherDMask(const MCInst &Inst);
1632   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1633   Optional<StringRef> validateMIMGDataSize(const MCInst &Inst);
1634   bool validateMIMGAddrSize(const MCInst &Inst);
1635   bool validateMIMGD16(const MCInst &Inst);
1636   bool validateMIMGDim(const MCInst &Inst);
1637   bool validateMIMGMSAA(const MCInst &Inst);
1638   bool validateOpSel(const MCInst &Inst);
1639   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1640   bool validateVccOperand(unsigned Reg) const;
1641   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1642   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1643   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1644   bool validateAGPRLdSt(const MCInst &Inst) const;
1645   bool validateVGPRAlign(const MCInst &Inst) const;
1646   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1647   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1648   bool validateDivScale(const MCInst &Inst);
1649   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1650                              const SMLoc &IDLoc);
1651   bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands,
1652                           const SMLoc &IDLoc);
1653   bool validateExeczVcczOperands(const OperandVector &Operands);
1654   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1655   unsigned getConstantBusLimit(unsigned Opcode) const;
1656   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1657   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1658   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1659 
1660   bool isSupportedMnemo(StringRef Mnemo,
1661                         const FeatureBitset &FBS);
1662   bool isSupportedMnemo(StringRef Mnemo,
1663                         const FeatureBitset &FBS,
1664                         ArrayRef<unsigned> Variants);
1665   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1666 
1667   bool isId(const StringRef Id) const;
1668   bool isId(const AsmToken &Token, const StringRef Id) const;
1669   bool isToken(const AsmToken::TokenKind Kind) const;
1670   bool trySkipId(const StringRef Id);
1671   bool trySkipId(const StringRef Pref, const StringRef Id);
1672   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1673   bool trySkipToken(const AsmToken::TokenKind Kind);
1674   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1675   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1676   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1677 
1678   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1679   AsmToken::TokenKind getTokenKind() const;
1680   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1681   bool parseExpr(OperandVector &Operands);
1682   StringRef getTokenStr() const;
1683   AsmToken peekToken(bool ShouldSkipSpace = true);
1684   AsmToken getToken() const;
1685   SMLoc getLoc() const;
1686   void lex();
1687 
1688 public:
1689   void onBeginOfFile() override;
1690 
1691   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1692   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1693 
1694   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1695   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1696   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1697   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1698   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1699   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1700 
1701   bool parseSwizzleOperand(int64_t &Op,
1702                            const unsigned MinVal,
1703                            const unsigned MaxVal,
1704                            const StringRef ErrMsg,
1705                            SMLoc &Loc);
1706   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1707                             const unsigned MinVal,
1708                             const unsigned MaxVal,
1709                             const StringRef ErrMsg);
1710   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1711   bool parseSwizzleOffset(int64_t &Imm);
1712   bool parseSwizzleMacro(int64_t &Imm);
1713   bool parseSwizzleQuadPerm(int64_t &Imm);
1714   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1715   bool parseSwizzleBroadcast(int64_t &Imm);
1716   bool parseSwizzleSwap(int64_t &Imm);
1717   bool parseSwizzleReverse(int64_t &Imm);
1718 
1719   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1720   int64_t parseGPRIdxMacro();
1721 
1722   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1723   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1724   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1725   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1726 
1727   AMDGPUOperand::Ptr defaultCPol() const;
1728 
1729   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1730   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1731   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1732   AMDGPUOperand::Ptr defaultFlatOffset() const;
1733 
1734   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1735 
1736   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1737                OptionalImmIndexMap &OptionalIdx);
1738   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1739   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1740   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1741   void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1742   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1743                     OptionalImmIndexMap &OptionalIdx);
1744   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1745                 OptionalImmIndexMap &OptionalIdx);
1746 
1747   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1748   void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1749 
1750   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1751                bool IsAtomic = false);
1752   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1753   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1754 
1755   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1756 
1757   bool parseDimId(unsigned &Encoding);
1758   OperandMatchResultTy parseDim(OperandVector &Operands);
1759   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1760   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1761   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1762   int64_t parseDPPCtrlSel(StringRef Ctrl);
1763   int64_t parseDPPCtrlPerm();
1764   AMDGPUOperand::Ptr defaultRowMask() const;
1765   AMDGPUOperand::Ptr defaultBankMask() const;
1766   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1767   AMDGPUOperand::Ptr defaultFI() const;
1768   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1769   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1770     cvtDPP(Inst, Operands, true);
1771   }
1772   void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1773                   bool IsDPP8 = false);
1774   void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1775     cvtVOP3DPP(Inst, Operands, true);
1776   }
1777 
1778   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1779                                     AMDGPUOperand::ImmTy Type);
1780   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1781   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1782   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1783   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1784   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1785   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1786   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1787                uint64_t BasicInstType,
1788                bool SkipDstVcc = false,
1789                bool SkipSrcVcc = false);
1790 
1791   AMDGPUOperand::Ptr defaultBLGP() const;
1792   AMDGPUOperand::Ptr defaultCBSZ() const;
1793   AMDGPUOperand::Ptr defaultABID() const;
1794 
1795   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1796   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1797 
1798   AMDGPUOperand::Ptr defaultWaitVDST() const;
1799   AMDGPUOperand::Ptr defaultWaitEXP() const;
1800   OperandMatchResultTy parseVOPD(OperandVector &Operands);
1801 };
1802 
1803 struct OptionalOperand {
1804   const char *Name;
1805   AMDGPUOperand::ImmTy Type;
1806   bool IsBit;
1807   bool (*ConvertResult)(int64_t&);
1808 };
1809 
1810 } // end anonymous namespace
1811 
1812 // May be called with integer type with equivalent bitwidth.
1813 static const fltSemantics *getFltSemantics(unsigned Size) {
1814   switch (Size) {
1815   case 4:
1816     return &APFloat::IEEEsingle();
1817   case 8:
1818     return &APFloat::IEEEdouble();
1819   case 2:
1820     return &APFloat::IEEEhalf();
1821   default:
1822     llvm_unreachable("unsupported fp type");
1823   }
1824 }
1825 
1826 static const fltSemantics *getFltSemantics(MVT VT) {
1827   return getFltSemantics(VT.getSizeInBits() / 8);
1828 }
1829 
1830 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1831   switch (OperandType) {
1832   case AMDGPU::OPERAND_REG_IMM_INT32:
1833   case AMDGPU::OPERAND_REG_IMM_FP32:
1834   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1835   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1836   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1837   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1838   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1839   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1840   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1841   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1842   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1843   case AMDGPU::OPERAND_KIMM32:
1844     return &APFloat::IEEEsingle();
1845   case AMDGPU::OPERAND_REG_IMM_INT64:
1846   case AMDGPU::OPERAND_REG_IMM_FP64:
1847   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1848   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1849   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1850     return &APFloat::IEEEdouble();
1851   case AMDGPU::OPERAND_REG_IMM_INT16:
1852   case AMDGPU::OPERAND_REG_IMM_FP16:
1853   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1854   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1855   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1856   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1857   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1858   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1859   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1860   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1861   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1862   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1863   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1864   case AMDGPU::OPERAND_KIMM16:
1865     return &APFloat::IEEEhalf();
1866   default:
1867     llvm_unreachable("unsupported fp type");
1868   }
1869 }
1870 
1871 //===----------------------------------------------------------------------===//
1872 // Operand
1873 //===----------------------------------------------------------------------===//
1874 
1875 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1876   bool Lost;
1877 
1878   // Convert literal to single precision
1879   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1880                                                APFloat::rmNearestTiesToEven,
1881                                                &Lost);
1882   // We allow precision lost but not overflow or underflow
1883   if (Status != APFloat::opOK &&
1884       Lost &&
1885       ((Status & APFloat::opOverflow)  != 0 ||
1886        (Status & APFloat::opUnderflow) != 0)) {
1887     return false;
1888   }
1889 
1890   return true;
1891 }
1892 
1893 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1894   return isUIntN(Size, Val) || isIntN(Size, Val);
1895 }
1896 
1897 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1898   if (VT.getScalarType() == MVT::i16) {
1899     // FP immediate values are broken.
1900     return isInlinableIntLiteral(Val);
1901   }
1902 
1903   // f16/v2f16 operands work correctly for all values.
1904   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1905 }
1906 
1907 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1908 
1909   // This is a hack to enable named inline values like
1910   // shared_base with both 32-bit and 64-bit operands.
1911   // Note that these values are defined as
1912   // 32-bit operands only.
1913   if (isInlineValue()) {
1914     return true;
1915   }
1916 
1917   if (!isImmTy(ImmTyNone)) {
1918     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1919     return false;
1920   }
1921   // TODO: We should avoid using host float here. It would be better to
1922   // check the float bit values which is what a few other places do.
1923   // We've had bot failures before due to weird NaN support on mips hosts.
1924 
1925   APInt Literal(64, Imm.Val);
1926 
1927   if (Imm.IsFPImm) { // We got fp literal token
1928     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1929       return AMDGPU::isInlinableLiteral64(Imm.Val,
1930                                           AsmParser->hasInv2PiInlineImm());
1931     }
1932 
1933     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1934     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1935       return false;
1936 
1937     if (type.getScalarSizeInBits() == 16) {
1938       return isInlineableLiteralOp16(
1939         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1940         type, AsmParser->hasInv2PiInlineImm());
1941     }
1942 
1943     // Check if single precision literal is inlinable
1944     return AMDGPU::isInlinableLiteral32(
1945       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1946       AsmParser->hasInv2PiInlineImm());
1947   }
1948 
1949   // We got int literal token.
1950   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1951     return AMDGPU::isInlinableLiteral64(Imm.Val,
1952                                         AsmParser->hasInv2PiInlineImm());
1953   }
1954 
1955   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1956     return false;
1957   }
1958 
1959   if (type.getScalarSizeInBits() == 16) {
1960     return isInlineableLiteralOp16(
1961       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1962       type, AsmParser->hasInv2PiInlineImm());
1963   }
1964 
1965   return AMDGPU::isInlinableLiteral32(
1966     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1967     AsmParser->hasInv2PiInlineImm());
1968 }
1969 
1970 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1971   // Check that this immediate can be added as literal
1972   if (!isImmTy(ImmTyNone)) {
1973     return false;
1974   }
1975 
1976   if (!Imm.IsFPImm) {
1977     // We got int literal token.
1978 
1979     if (type == MVT::f64 && hasFPModifiers()) {
1980       // Cannot apply fp modifiers to int literals preserving the same semantics
1981       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1982       // disable these cases.
1983       return false;
1984     }
1985 
1986     unsigned Size = type.getSizeInBits();
1987     if (Size == 64)
1988       Size = 32;
1989 
1990     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1991     // types.
1992     return isSafeTruncation(Imm.Val, Size);
1993   }
1994 
1995   // We got fp literal token
1996   if (type == MVT::f64) { // Expected 64-bit fp operand
1997     // We would set low 64-bits of literal to zeroes but we accept this literals
1998     return true;
1999   }
2000 
2001   if (type == MVT::i64) { // Expected 64-bit int operand
2002     // We don't allow fp literals in 64-bit integer instructions. It is
2003     // unclear how we should encode them.
2004     return false;
2005   }
2006 
2007   // We allow fp literals with f16x2 operands assuming that the specified
2008   // literal goes into the lower half and the upper half is zero. We also
2009   // require that the literal may be losslessly converted to f16.
2010   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
2011                      (type == MVT::v2i16)? MVT::i16 :
2012                      (type == MVT::v2f32)? MVT::f32 : type;
2013 
2014   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2015   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2016 }
2017 
2018 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2019   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2020 }
2021 
2022 bool AMDGPUOperand::isVRegWithInputMods() const {
2023   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2024          // GFX90A allows DPP on 64-bit operands.
2025          (isRegClass(AMDGPU::VReg_64RegClassID) &&
2026           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
2027 }
2028 
2029 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2030   if (AsmParser->isVI())
2031     return isVReg32();
2032   else if (AsmParser->isGFX9Plus())
2033     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2034   else
2035     return false;
2036 }
2037 
2038 bool AMDGPUOperand::isSDWAFP16Operand() const {
2039   return isSDWAOperand(MVT::f16);
2040 }
2041 
2042 bool AMDGPUOperand::isSDWAFP32Operand() const {
2043   return isSDWAOperand(MVT::f32);
2044 }
2045 
2046 bool AMDGPUOperand::isSDWAInt16Operand() const {
2047   return isSDWAOperand(MVT::i16);
2048 }
2049 
2050 bool AMDGPUOperand::isSDWAInt32Operand() const {
2051   return isSDWAOperand(MVT::i32);
2052 }
2053 
2054 bool AMDGPUOperand::isBoolReg() const {
2055   auto FB = AsmParser->getFeatureBits();
2056   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2057                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2058 }
2059 
2060 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2061 {
2062   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2063   assert(Size == 2 || Size == 4 || Size == 8);
2064 
2065   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2066 
2067   if (Imm.Mods.Abs) {
2068     Val &= ~FpSignMask;
2069   }
2070   if (Imm.Mods.Neg) {
2071     Val ^= FpSignMask;
2072   }
2073 
2074   return Val;
2075 }
2076 
2077 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2078   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2079                              Inst.getNumOperands())) {
2080     addLiteralImmOperand(Inst, Imm.Val,
2081                          ApplyModifiers &
2082                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2083   } else {
2084     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2085     Inst.addOperand(MCOperand::createImm(Imm.Val));
2086     setImmKindNone();
2087   }
2088 }
2089 
2090 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2091   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2092   auto OpNum = Inst.getNumOperands();
2093   // Check that this operand accepts literals
2094   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2095 
2096   if (ApplyModifiers) {
2097     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2098     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2099     Val = applyInputFPModifiers(Val, Size);
2100   }
2101 
2102   APInt Literal(64, Val);
2103   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2104 
2105   if (Imm.IsFPImm) { // We got fp literal token
2106     switch (OpTy) {
2107     case AMDGPU::OPERAND_REG_IMM_INT64:
2108     case AMDGPU::OPERAND_REG_IMM_FP64:
2109     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2110     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2111     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2112       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2113                                        AsmParser->hasInv2PiInlineImm())) {
2114         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2115         setImmKindConst();
2116         return;
2117       }
2118 
2119       // Non-inlineable
2120       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2121         // For fp operands we check if low 32 bits are zeros
2122         if (Literal.getLoBits(32) != 0) {
2123           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2124           "Can't encode literal as exact 64-bit floating-point operand. "
2125           "Low 32-bits will be set to zero");
2126         }
2127 
2128         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2129         setImmKindLiteral();
2130         return;
2131       }
2132 
2133       // We don't allow fp literals in 64-bit integer instructions. It is
2134       // unclear how we should encode them. This case should be checked earlier
2135       // in predicate methods (isLiteralImm())
2136       llvm_unreachable("fp literal in 64-bit integer instruction.");
2137 
2138     case AMDGPU::OPERAND_REG_IMM_INT32:
2139     case AMDGPU::OPERAND_REG_IMM_FP32:
2140     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2141     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2142     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2143     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2144     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2145     case AMDGPU::OPERAND_REG_IMM_INT16:
2146     case AMDGPU::OPERAND_REG_IMM_FP16:
2147     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2148     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2149     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2150     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2151     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2152     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2153     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2154     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2155     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2156     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2157     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2158     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2159     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2160     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2161     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2162     case AMDGPU::OPERAND_KIMM32:
2163     case AMDGPU::OPERAND_KIMM16: {
2164       bool lost;
2165       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2166       // Convert literal to single precision
2167       FPLiteral.convert(*getOpFltSemantics(OpTy),
2168                         APFloat::rmNearestTiesToEven, &lost);
2169       // We allow precision lost but not overflow or underflow. This should be
2170       // checked earlier in isLiteralImm()
2171 
2172       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2173       Inst.addOperand(MCOperand::createImm(ImmVal));
2174       setImmKindLiteral();
2175       return;
2176     }
2177     default:
2178       llvm_unreachable("invalid operand size");
2179     }
2180 
2181     return;
2182   }
2183 
2184   // We got int literal token.
2185   // Only sign extend inline immediates.
2186   switch (OpTy) {
2187   case AMDGPU::OPERAND_REG_IMM_INT32:
2188   case AMDGPU::OPERAND_REG_IMM_FP32:
2189   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2190   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2191   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2192   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2193   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2194   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2195   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2196   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2197   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2198   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2199   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2200     if (isSafeTruncation(Val, 32) &&
2201         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2202                                      AsmParser->hasInv2PiInlineImm())) {
2203       Inst.addOperand(MCOperand::createImm(Val));
2204       setImmKindConst();
2205       return;
2206     }
2207 
2208     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2209     setImmKindLiteral();
2210     return;
2211 
2212   case AMDGPU::OPERAND_REG_IMM_INT64:
2213   case AMDGPU::OPERAND_REG_IMM_FP64:
2214   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2215   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2216   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2217     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2218       Inst.addOperand(MCOperand::createImm(Val));
2219       setImmKindConst();
2220       return;
2221     }
2222 
2223     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2224     setImmKindLiteral();
2225     return;
2226 
2227   case AMDGPU::OPERAND_REG_IMM_INT16:
2228   case AMDGPU::OPERAND_REG_IMM_FP16:
2229   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2230   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2231   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2232   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2233   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2234     if (isSafeTruncation(Val, 16) &&
2235         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2236                                      AsmParser->hasInv2PiInlineImm())) {
2237       Inst.addOperand(MCOperand::createImm(Val));
2238       setImmKindConst();
2239       return;
2240     }
2241 
2242     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2243     setImmKindLiteral();
2244     return;
2245 
2246   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2247   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2248   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2249   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2250     assert(isSafeTruncation(Val, 16));
2251     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2252                                         AsmParser->hasInv2PiInlineImm()));
2253 
2254     Inst.addOperand(MCOperand::createImm(Val));
2255     return;
2256   }
2257   case AMDGPU::OPERAND_KIMM32:
2258     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2259     setImmKindNone();
2260     return;
2261   case AMDGPU::OPERAND_KIMM16:
2262     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2263     setImmKindNone();
2264     return;
2265   default:
2266     llvm_unreachable("invalid operand size");
2267   }
2268 }
2269 
2270 template <unsigned Bitwidth>
2271 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2272   APInt Literal(64, Imm.Val);
2273   setImmKindNone();
2274 
2275   if (!Imm.IsFPImm) {
2276     // We got int literal token.
2277     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2278     return;
2279   }
2280 
2281   bool Lost;
2282   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2283   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2284                     APFloat::rmNearestTiesToEven, &Lost);
2285   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2286 }
2287 
2288 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2289   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2290 }
2291 
2292 static bool isInlineValue(unsigned Reg) {
2293   switch (Reg) {
2294   case AMDGPU::SRC_SHARED_BASE:
2295   case AMDGPU::SRC_SHARED_LIMIT:
2296   case AMDGPU::SRC_PRIVATE_BASE:
2297   case AMDGPU::SRC_PRIVATE_LIMIT:
2298   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2299     return true;
2300   case AMDGPU::SRC_VCCZ:
2301   case AMDGPU::SRC_EXECZ:
2302   case AMDGPU::SRC_SCC:
2303     return true;
2304   case AMDGPU::SGPR_NULL:
2305     return true;
2306   default:
2307     return false;
2308   }
2309 }
2310 
2311 bool AMDGPUOperand::isInlineValue() const {
2312   return isRegKind() && ::isInlineValue(getReg());
2313 }
2314 
2315 //===----------------------------------------------------------------------===//
2316 // AsmParser
2317 //===----------------------------------------------------------------------===//
2318 
2319 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2320   if (Is == IS_VGPR) {
2321     switch (RegWidth) {
2322       default: return -1;
2323       case 32:
2324         return AMDGPU::VGPR_32RegClassID;
2325       case 64:
2326         return AMDGPU::VReg_64RegClassID;
2327       case 96:
2328         return AMDGPU::VReg_96RegClassID;
2329       case 128:
2330         return AMDGPU::VReg_128RegClassID;
2331       case 160:
2332         return AMDGPU::VReg_160RegClassID;
2333       case 192:
2334         return AMDGPU::VReg_192RegClassID;
2335       case 224:
2336         return AMDGPU::VReg_224RegClassID;
2337       case 256:
2338         return AMDGPU::VReg_256RegClassID;
2339       case 512:
2340         return AMDGPU::VReg_512RegClassID;
2341       case 1024:
2342         return AMDGPU::VReg_1024RegClassID;
2343     }
2344   } else if (Is == IS_TTMP) {
2345     switch (RegWidth) {
2346       default: return -1;
2347       case 32:
2348         return AMDGPU::TTMP_32RegClassID;
2349       case 64:
2350         return AMDGPU::TTMP_64RegClassID;
2351       case 128:
2352         return AMDGPU::TTMP_128RegClassID;
2353       case 256:
2354         return AMDGPU::TTMP_256RegClassID;
2355       case 512:
2356         return AMDGPU::TTMP_512RegClassID;
2357     }
2358   } else if (Is == IS_SGPR) {
2359     switch (RegWidth) {
2360       default: return -1;
2361       case 32:
2362         return AMDGPU::SGPR_32RegClassID;
2363       case 64:
2364         return AMDGPU::SGPR_64RegClassID;
2365       case 96:
2366         return AMDGPU::SGPR_96RegClassID;
2367       case 128:
2368         return AMDGPU::SGPR_128RegClassID;
2369       case 160:
2370         return AMDGPU::SGPR_160RegClassID;
2371       case 192:
2372         return AMDGPU::SGPR_192RegClassID;
2373       case 224:
2374         return AMDGPU::SGPR_224RegClassID;
2375       case 256:
2376         return AMDGPU::SGPR_256RegClassID;
2377       case 512:
2378         return AMDGPU::SGPR_512RegClassID;
2379     }
2380   } else if (Is == IS_AGPR) {
2381     switch (RegWidth) {
2382       default: return -1;
2383       case 32:
2384         return AMDGPU::AGPR_32RegClassID;
2385       case 64:
2386         return AMDGPU::AReg_64RegClassID;
2387       case 96:
2388         return AMDGPU::AReg_96RegClassID;
2389       case 128:
2390         return AMDGPU::AReg_128RegClassID;
2391       case 160:
2392         return AMDGPU::AReg_160RegClassID;
2393       case 192:
2394         return AMDGPU::AReg_192RegClassID;
2395       case 224:
2396         return AMDGPU::AReg_224RegClassID;
2397       case 256:
2398         return AMDGPU::AReg_256RegClassID;
2399       case 512:
2400         return AMDGPU::AReg_512RegClassID;
2401       case 1024:
2402         return AMDGPU::AReg_1024RegClassID;
2403     }
2404   }
2405   return -1;
2406 }
2407 
2408 static unsigned getSpecialRegForName(StringRef RegName) {
2409   return StringSwitch<unsigned>(RegName)
2410     .Case("exec", AMDGPU::EXEC)
2411     .Case("vcc", AMDGPU::VCC)
2412     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2413     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2414     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2415     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2416     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2417     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2418     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2419     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2420     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2421     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2422     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2423     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2424     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2425     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2426     .Case("m0", AMDGPU::M0)
2427     .Case("vccz", AMDGPU::SRC_VCCZ)
2428     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2429     .Case("execz", AMDGPU::SRC_EXECZ)
2430     .Case("src_execz", AMDGPU::SRC_EXECZ)
2431     .Case("scc", AMDGPU::SRC_SCC)
2432     .Case("src_scc", AMDGPU::SRC_SCC)
2433     .Case("tba", AMDGPU::TBA)
2434     .Case("tma", AMDGPU::TMA)
2435     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2436     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2437     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2438     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2439     .Case("vcc_lo", AMDGPU::VCC_LO)
2440     .Case("vcc_hi", AMDGPU::VCC_HI)
2441     .Case("exec_lo", AMDGPU::EXEC_LO)
2442     .Case("exec_hi", AMDGPU::EXEC_HI)
2443     .Case("tma_lo", AMDGPU::TMA_LO)
2444     .Case("tma_hi", AMDGPU::TMA_HI)
2445     .Case("tba_lo", AMDGPU::TBA_LO)
2446     .Case("tba_hi", AMDGPU::TBA_HI)
2447     .Case("pc", AMDGPU::PC_REG)
2448     .Case("null", AMDGPU::SGPR_NULL)
2449     .Default(AMDGPU::NoRegister);
2450 }
2451 
2452 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2453                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2454   auto R = parseRegister();
2455   if (!R) return true;
2456   assert(R->isReg());
2457   RegNo = R->getReg();
2458   StartLoc = R->getStartLoc();
2459   EndLoc = R->getEndLoc();
2460   return false;
2461 }
2462 
2463 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2464                                     SMLoc &EndLoc) {
2465   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2466 }
2467 
2468 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2469                                                        SMLoc &StartLoc,
2470                                                        SMLoc &EndLoc) {
2471   bool Result =
2472       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2473   bool PendingErrors = getParser().hasPendingError();
2474   getParser().clearPendingErrors();
2475   if (PendingErrors)
2476     return MatchOperand_ParseFail;
2477   if (Result)
2478     return MatchOperand_NoMatch;
2479   return MatchOperand_Success;
2480 }
2481 
2482 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2483                                             RegisterKind RegKind, unsigned Reg1,
2484                                             SMLoc Loc) {
2485   switch (RegKind) {
2486   case IS_SPECIAL:
2487     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2488       Reg = AMDGPU::EXEC;
2489       RegWidth = 64;
2490       return true;
2491     }
2492     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2493       Reg = AMDGPU::FLAT_SCR;
2494       RegWidth = 64;
2495       return true;
2496     }
2497     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2498       Reg = AMDGPU::XNACK_MASK;
2499       RegWidth = 64;
2500       return true;
2501     }
2502     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2503       Reg = AMDGPU::VCC;
2504       RegWidth = 64;
2505       return true;
2506     }
2507     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2508       Reg = AMDGPU::TBA;
2509       RegWidth = 64;
2510       return true;
2511     }
2512     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2513       Reg = AMDGPU::TMA;
2514       RegWidth = 64;
2515       return true;
2516     }
2517     Error(Loc, "register does not fit in the list");
2518     return false;
2519   case IS_VGPR:
2520   case IS_SGPR:
2521   case IS_AGPR:
2522   case IS_TTMP:
2523     if (Reg1 != Reg + RegWidth / 32) {
2524       Error(Loc, "registers in a list must have consecutive indices");
2525       return false;
2526     }
2527     RegWidth += 32;
2528     return true;
2529   default:
2530     llvm_unreachable("unexpected register kind");
2531   }
2532 }
2533 
2534 struct RegInfo {
2535   StringLiteral Name;
2536   RegisterKind Kind;
2537 };
2538 
2539 static constexpr RegInfo RegularRegisters[] = {
2540   {{"v"},    IS_VGPR},
2541   {{"s"},    IS_SGPR},
2542   {{"ttmp"}, IS_TTMP},
2543   {{"acc"},  IS_AGPR},
2544   {{"a"},    IS_AGPR},
2545 };
2546 
2547 static bool isRegularReg(RegisterKind Kind) {
2548   return Kind == IS_VGPR ||
2549          Kind == IS_SGPR ||
2550          Kind == IS_TTMP ||
2551          Kind == IS_AGPR;
2552 }
2553 
2554 static const RegInfo* getRegularRegInfo(StringRef Str) {
2555   for (const RegInfo &Reg : RegularRegisters)
2556     if (Str.startswith(Reg.Name))
2557       return &Reg;
2558   return nullptr;
2559 }
2560 
2561 static bool getRegNum(StringRef Str, unsigned& Num) {
2562   return !Str.getAsInteger(10, Num);
2563 }
2564 
2565 bool
2566 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2567                             const AsmToken &NextToken) const {
2568 
2569   // A list of consecutive registers: [s0,s1,s2,s3]
2570   if (Token.is(AsmToken::LBrac))
2571     return true;
2572 
2573   if (!Token.is(AsmToken::Identifier))
2574     return false;
2575 
2576   // A single register like s0 or a range of registers like s[0:1]
2577 
2578   StringRef Str = Token.getString();
2579   const RegInfo *Reg = getRegularRegInfo(Str);
2580   if (Reg) {
2581     StringRef RegName = Reg->Name;
2582     StringRef RegSuffix = Str.substr(RegName.size());
2583     if (!RegSuffix.empty()) {
2584       unsigned Num;
2585       // A single register with an index: rXX
2586       if (getRegNum(RegSuffix, Num))
2587         return true;
2588     } else {
2589       // A range of registers: r[XX:YY].
2590       if (NextToken.is(AsmToken::LBrac))
2591         return true;
2592     }
2593   }
2594 
2595   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2596 }
2597 
2598 bool
2599 AMDGPUAsmParser::isRegister()
2600 {
2601   return isRegister(getToken(), peekToken());
2602 }
2603 
2604 unsigned
2605 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2606                                unsigned RegNum,
2607                                unsigned RegWidth,
2608                                SMLoc Loc) {
2609 
2610   assert(isRegularReg(RegKind));
2611 
2612   unsigned AlignSize = 1;
2613   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2614     // SGPR and TTMP registers must be aligned.
2615     // Max required alignment is 4 dwords.
2616     AlignSize = std::min(RegWidth / 32, 4u);
2617   }
2618 
2619   if (RegNum % AlignSize != 0) {
2620     Error(Loc, "invalid register alignment");
2621     return AMDGPU::NoRegister;
2622   }
2623 
2624   unsigned RegIdx = RegNum / AlignSize;
2625   int RCID = getRegClass(RegKind, RegWidth);
2626   if (RCID == -1) {
2627     Error(Loc, "invalid or unsupported register size");
2628     return AMDGPU::NoRegister;
2629   }
2630 
2631   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2632   const MCRegisterClass RC = TRI->getRegClass(RCID);
2633   if (RegIdx >= RC.getNumRegs()) {
2634     Error(Loc, "register index is out of range");
2635     return AMDGPU::NoRegister;
2636   }
2637 
2638   return RC.getRegister(RegIdx);
2639 }
2640 
2641 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2642   int64_t RegLo, RegHi;
2643   if (!skipToken(AsmToken::LBrac, "missing register index"))
2644     return false;
2645 
2646   SMLoc FirstIdxLoc = getLoc();
2647   SMLoc SecondIdxLoc;
2648 
2649   if (!parseExpr(RegLo))
2650     return false;
2651 
2652   if (trySkipToken(AsmToken::Colon)) {
2653     SecondIdxLoc = getLoc();
2654     if (!parseExpr(RegHi))
2655       return false;
2656   } else {
2657     RegHi = RegLo;
2658   }
2659 
2660   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2661     return false;
2662 
2663   if (!isUInt<32>(RegLo)) {
2664     Error(FirstIdxLoc, "invalid register index");
2665     return false;
2666   }
2667 
2668   if (!isUInt<32>(RegHi)) {
2669     Error(SecondIdxLoc, "invalid register index");
2670     return false;
2671   }
2672 
2673   if (RegLo > RegHi) {
2674     Error(FirstIdxLoc, "first register index should not exceed second index");
2675     return false;
2676   }
2677 
2678   Num = static_cast<unsigned>(RegLo);
2679   RegWidth = 32 * ((RegHi - RegLo) + 1);
2680   return true;
2681 }
2682 
2683 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2684                                           unsigned &RegNum, unsigned &RegWidth,
2685                                           SmallVectorImpl<AsmToken> &Tokens) {
2686   assert(isToken(AsmToken::Identifier));
2687   unsigned Reg = getSpecialRegForName(getTokenStr());
2688   if (Reg) {
2689     RegNum = 0;
2690     RegWidth = 32;
2691     RegKind = IS_SPECIAL;
2692     Tokens.push_back(getToken());
2693     lex(); // skip register name
2694   }
2695   return Reg;
2696 }
2697 
2698 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2699                                           unsigned &RegNum, unsigned &RegWidth,
2700                                           SmallVectorImpl<AsmToken> &Tokens) {
2701   assert(isToken(AsmToken::Identifier));
2702   StringRef RegName = getTokenStr();
2703   auto Loc = getLoc();
2704 
2705   const RegInfo *RI = getRegularRegInfo(RegName);
2706   if (!RI) {
2707     Error(Loc, "invalid register name");
2708     return AMDGPU::NoRegister;
2709   }
2710 
2711   Tokens.push_back(getToken());
2712   lex(); // skip register name
2713 
2714   RegKind = RI->Kind;
2715   StringRef RegSuffix = RegName.substr(RI->Name.size());
2716   if (!RegSuffix.empty()) {
2717     // Single 32-bit register: vXX.
2718     if (!getRegNum(RegSuffix, RegNum)) {
2719       Error(Loc, "invalid register index");
2720       return AMDGPU::NoRegister;
2721     }
2722     RegWidth = 32;
2723   } else {
2724     // Range of registers: v[XX:YY]. ":YY" is optional.
2725     if (!ParseRegRange(RegNum, RegWidth))
2726       return AMDGPU::NoRegister;
2727   }
2728 
2729   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2730 }
2731 
2732 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2733                                        unsigned &RegWidth,
2734                                        SmallVectorImpl<AsmToken> &Tokens) {
2735   unsigned Reg = AMDGPU::NoRegister;
2736   auto ListLoc = getLoc();
2737 
2738   if (!skipToken(AsmToken::LBrac,
2739                  "expected a register or a list of registers")) {
2740     return AMDGPU::NoRegister;
2741   }
2742 
2743   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2744 
2745   auto Loc = getLoc();
2746   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2747     return AMDGPU::NoRegister;
2748   if (RegWidth != 32) {
2749     Error(Loc, "expected a single 32-bit register");
2750     return AMDGPU::NoRegister;
2751   }
2752 
2753   for (; trySkipToken(AsmToken::Comma); ) {
2754     RegisterKind NextRegKind;
2755     unsigned NextReg, NextRegNum, NextRegWidth;
2756     Loc = getLoc();
2757 
2758     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2759                              NextRegNum, NextRegWidth,
2760                              Tokens)) {
2761       return AMDGPU::NoRegister;
2762     }
2763     if (NextRegWidth != 32) {
2764       Error(Loc, "expected a single 32-bit register");
2765       return AMDGPU::NoRegister;
2766     }
2767     if (NextRegKind != RegKind) {
2768       Error(Loc, "registers in a list must be of the same kind");
2769       return AMDGPU::NoRegister;
2770     }
2771     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2772       return AMDGPU::NoRegister;
2773   }
2774 
2775   if (!skipToken(AsmToken::RBrac,
2776                  "expected a comma or a closing square bracket")) {
2777     return AMDGPU::NoRegister;
2778   }
2779 
2780   if (isRegularReg(RegKind))
2781     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2782 
2783   return Reg;
2784 }
2785 
2786 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2787                                           unsigned &RegNum, unsigned &RegWidth,
2788                                           SmallVectorImpl<AsmToken> &Tokens) {
2789   auto Loc = getLoc();
2790   Reg = AMDGPU::NoRegister;
2791 
2792   if (isToken(AsmToken::Identifier)) {
2793     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2794     if (Reg == AMDGPU::NoRegister)
2795       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2796   } else {
2797     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2798   }
2799 
2800   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2801   if (Reg == AMDGPU::NoRegister) {
2802     assert(Parser.hasPendingError());
2803     return false;
2804   }
2805 
2806   if (!subtargetHasRegister(*TRI, Reg)) {
2807     if (Reg == AMDGPU::SGPR_NULL) {
2808       Error(Loc, "'null' operand is not supported on this GPU");
2809     } else {
2810       Error(Loc, "register not available on this GPU");
2811     }
2812     return false;
2813   }
2814 
2815   return true;
2816 }
2817 
2818 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2819                                           unsigned &RegNum, unsigned &RegWidth,
2820                                           bool RestoreOnFailure /*=false*/) {
2821   Reg = AMDGPU::NoRegister;
2822 
2823   SmallVector<AsmToken, 1> Tokens;
2824   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2825     if (RestoreOnFailure) {
2826       while (!Tokens.empty()) {
2827         getLexer().UnLex(Tokens.pop_back_val());
2828       }
2829     }
2830     return true;
2831   }
2832   return false;
2833 }
2834 
2835 Optional<StringRef>
2836 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2837   switch (RegKind) {
2838   case IS_VGPR:
2839     return StringRef(".amdgcn.next_free_vgpr");
2840   case IS_SGPR:
2841     return StringRef(".amdgcn.next_free_sgpr");
2842   default:
2843     return None;
2844   }
2845 }
2846 
2847 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2848   auto SymbolName = getGprCountSymbolName(RegKind);
2849   assert(SymbolName && "initializing invalid register kind");
2850   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2851   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2852 }
2853 
2854 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2855                                             unsigned DwordRegIndex,
2856                                             unsigned RegWidth) {
2857   // Symbols are only defined for GCN targets
2858   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2859     return true;
2860 
2861   auto SymbolName = getGprCountSymbolName(RegKind);
2862   if (!SymbolName)
2863     return true;
2864   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2865 
2866   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2867   int64_t OldCount;
2868 
2869   if (!Sym->isVariable())
2870     return !Error(getLoc(),
2871                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2872   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2873     return !Error(
2874         getLoc(),
2875         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2876 
2877   if (OldCount <= NewMax)
2878     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2879 
2880   return true;
2881 }
2882 
2883 std::unique_ptr<AMDGPUOperand>
2884 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2885   const auto &Tok = getToken();
2886   SMLoc StartLoc = Tok.getLoc();
2887   SMLoc EndLoc = Tok.getEndLoc();
2888   RegisterKind RegKind;
2889   unsigned Reg, RegNum, RegWidth;
2890 
2891   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2892     return nullptr;
2893   }
2894   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2895     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2896       return nullptr;
2897   } else
2898     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2899   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2900 }
2901 
2902 OperandMatchResultTy
2903 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2904   // TODO: add syntactic sugar for 1/(2*PI)
2905 
2906   if (isRegister())
2907     return MatchOperand_NoMatch;
2908   assert(!isModifier());
2909 
2910   const auto& Tok = getToken();
2911   const auto& NextTok = peekToken();
2912   bool IsReal = Tok.is(AsmToken::Real);
2913   SMLoc S = getLoc();
2914   bool Negate = false;
2915 
2916   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2917     lex();
2918     IsReal = true;
2919     Negate = true;
2920   }
2921 
2922   if (IsReal) {
2923     // Floating-point expressions are not supported.
2924     // Can only allow floating-point literals with an
2925     // optional sign.
2926 
2927     StringRef Num = getTokenStr();
2928     lex();
2929 
2930     APFloat RealVal(APFloat::IEEEdouble());
2931     auto roundMode = APFloat::rmNearestTiesToEven;
2932     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2933       return MatchOperand_ParseFail;
2934     }
2935     if (Negate)
2936       RealVal.changeSign();
2937 
2938     Operands.push_back(
2939       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2940                                AMDGPUOperand::ImmTyNone, true));
2941 
2942     return MatchOperand_Success;
2943 
2944   } else {
2945     int64_t IntVal;
2946     const MCExpr *Expr;
2947     SMLoc S = getLoc();
2948 
2949     if (HasSP3AbsModifier) {
2950       // This is a workaround for handling expressions
2951       // as arguments of SP3 'abs' modifier, for example:
2952       //     |1.0|
2953       //     |-1|
2954       //     |1+x|
2955       // This syntax is not compatible with syntax of standard
2956       // MC expressions (due to the trailing '|').
2957       SMLoc EndLoc;
2958       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2959         return MatchOperand_ParseFail;
2960     } else {
2961       if (Parser.parseExpression(Expr))
2962         return MatchOperand_ParseFail;
2963     }
2964 
2965     if (Expr->evaluateAsAbsolute(IntVal)) {
2966       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2967     } else {
2968       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2969     }
2970 
2971     return MatchOperand_Success;
2972   }
2973 
2974   return MatchOperand_NoMatch;
2975 }
2976 
2977 OperandMatchResultTy
2978 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2979   if (!isRegister())
2980     return MatchOperand_NoMatch;
2981 
2982   if (auto R = parseRegister()) {
2983     assert(R->isReg());
2984     Operands.push_back(std::move(R));
2985     return MatchOperand_Success;
2986   }
2987   return MatchOperand_ParseFail;
2988 }
2989 
2990 OperandMatchResultTy
2991 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2992   auto res = parseReg(Operands);
2993   if (res != MatchOperand_NoMatch) {
2994     return res;
2995   } else if (isModifier()) {
2996     return MatchOperand_NoMatch;
2997   } else {
2998     return parseImm(Operands, HasSP3AbsMod);
2999   }
3000 }
3001 
3002 bool
3003 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3004   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3005     const auto &str = Token.getString();
3006     return str == "abs" || str == "neg" || str == "sext";
3007   }
3008   return false;
3009 }
3010 
3011 bool
3012 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3013   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3014 }
3015 
3016 bool
3017 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3018   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3019 }
3020 
3021 bool
3022 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3023   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3024 }
3025 
3026 // Check if this is an operand modifier or an opcode modifier
3027 // which may look like an expression but it is not. We should
3028 // avoid parsing these modifiers as expressions. Currently
3029 // recognized sequences are:
3030 //   |...|
3031 //   abs(...)
3032 //   neg(...)
3033 //   sext(...)
3034 //   -reg
3035 //   -|...|
3036 //   -abs(...)
3037 //   name:...
3038 // Note that simple opcode modifiers like 'gds' may be parsed as
3039 // expressions; this is a special case. See getExpressionAsToken.
3040 //
3041 bool
3042 AMDGPUAsmParser::isModifier() {
3043 
3044   AsmToken Tok = getToken();
3045   AsmToken NextToken[2];
3046   peekTokens(NextToken);
3047 
3048   return isOperandModifier(Tok, NextToken[0]) ||
3049          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3050          isOpcodeModifierWithVal(Tok, NextToken[0]);
3051 }
3052 
3053 // Check if the current token is an SP3 'neg' modifier.
3054 // Currently this modifier is allowed in the following context:
3055 //
3056 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3057 // 2. Before an 'abs' modifier: -abs(...)
3058 // 3. Before an SP3 'abs' modifier: -|...|
3059 //
3060 // In all other cases "-" is handled as a part
3061 // of an expression that follows the sign.
3062 //
3063 // Note: When "-" is followed by an integer literal,
3064 // this is interpreted as integer negation rather
3065 // than a floating-point NEG modifier applied to N.
3066 // Beside being contr-intuitive, such use of floating-point
3067 // NEG modifier would have resulted in different meaning
3068 // of integer literals used with VOP1/2/C and VOP3,
3069 // for example:
3070 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3071 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3072 // Negative fp literals with preceding "-" are
3073 // handled likewise for uniformity
3074 //
3075 bool
3076 AMDGPUAsmParser::parseSP3NegModifier() {
3077 
3078   AsmToken NextToken[2];
3079   peekTokens(NextToken);
3080 
3081   if (isToken(AsmToken::Minus) &&
3082       (isRegister(NextToken[0], NextToken[1]) ||
3083        NextToken[0].is(AsmToken::Pipe) ||
3084        isId(NextToken[0], "abs"))) {
3085     lex();
3086     return true;
3087   }
3088 
3089   return false;
3090 }
3091 
3092 OperandMatchResultTy
3093 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3094                                               bool AllowImm) {
3095   bool Neg, SP3Neg;
3096   bool Abs, SP3Abs;
3097   SMLoc Loc;
3098 
3099   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3100   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3101     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3102     return MatchOperand_ParseFail;
3103   }
3104 
3105   SP3Neg = parseSP3NegModifier();
3106 
3107   Loc = getLoc();
3108   Neg = trySkipId("neg");
3109   if (Neg && SP3Neg) {
3110     Error(Loc, "expected register or immediate");
3111     return MatchOperand_ParseFail;
3112   }
3113   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3114     return MatchOperand_ParseFail;
3115 
3116   Abs = trySkipId("abs");
3117   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3118     return MatchOperand_ParseFail;
3119 
3120   Loc = getLoc();
3121   SP3Abs = trySkipToken(AsmToken::Pipe);
3122   if (Abs && SP3Abs) {
3123     Error(Loc, "expected register or immediate");
3124     return MatchOperand_ParseFail;
3125   }
3126 
3127   OperandMatchResultTy Res;
3128   if (AllowImm) {
3129     Res = parseRegOrImm(Operands, SP3Abs);
3130   } else {
3131     Res = parseReg(Operands);
3132   }
3133   if (Res != MatchOperand_Success) {
3134     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3135   }
3136 
3137   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3138     return MatchOperand_ParseFail;
3139   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3140     return MatchOperand_ParseFail;
3141   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3142     return MatchOperand_ParseFail;
3143 
3144   AMDGPUOperand::Modifiers Mods;
3145   Mods.Abs = Abs || SP3Abs;
3146   Mods.Neg = Neg || SP3Neg;
3147 
3148   if (Mods.hasFPModifiers()) {
3149     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3150     if (Op.isExpr()) {
3151       Error(Op.getStartLoc(), "expected an absolute expression");
3152       return MatchOperand_ParseFail;
3153     }
3154     Op.setModifiers(Mods);
3155   }
3156   return MatchOperand_Success;
3157 }
3158 
3159 OperandMatchResultTy
3160 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3161                                                bool AllowImm) {
3162   bool Sext = trySkipId("sext");
3163   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3164     return MatchOperand_ParseFail;
3165 
3166   OperandMatchResultTy Res;
3167   if (AllowImm) {
3168     Res = parseRegOrImm(Operands);
3169   } else {
3170     Res = parseReg(Operands);
3171   }
3172   if (Res != MatchOperand_Success) {
3173     return Sext? MatchOperand_ParseFail : Res;
3174   }
3175 
3176   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3177     return MatchOperand_ParseFail;
3178 
3179   AMDGPUOperand::Modifiers Mods;
3180   Mods.Sext = Sext;
3181 
3182   if (Mods.hasIntModifiers()) {
3183     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3184     if (Op.isExpr()) {
3185       Error(Op.getStartLoc(), "expected an absolute expression");
3186       return MatchOperand_ParseFail;
3187     }
3188     Op.setModifiers(Mods);
3189   }
3190 
3191   return MatchOperand_Success;
3192 }
3193 
3194 OperandMatchResultTy
3195 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3196   return parseRegOrImmWithFPInputMods(Operands, false);
3197 }
3198 
3199 OperandMatchResultTy
3200 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3201   return parseRegOrImmWithIntInputMods(Operands, false);
3202 }
3203 
3204 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3205   auto Loc = getLoc();
3206   if (trySkipId("off")) {
3207     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3208                                                 AMDGPUOperand::ImmTyOff, false));
3209     return MatchOperand_Success;
3210   }
3211 
3212   if (!isRegister())
3213     return MatchOperand_NoMatch;
3214 
3215   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3216   if (Reg) {
3217     Operands.push_back(std::move(Reg));
3218     return MatchOperand_Success;
3219   }
3220 
3221   return MatchOperand_ParseFail;
3222 
3223 }
3224 
3225 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3226   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3227 
3228   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3229       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3230       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3231       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3232     return Match_InvalidOperand;
3233 
3234   if ((TSFlags & SIInstrFlags::VOP3) &&
3235       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3236       getForcedEncodingSize() != 64)
3237     return Match_PreferE32;
3238 
3239   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3240       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3241     // v_mac_f32/16 allow only dst_sel == DWORD;
3242     auto OpNum =
3243         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3244     const auto &Op = Inst.getOperand(OpNum);
3245     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3246       return Match_InvalidOperand;
3247     }
3248   }
3249 
3250   return Match_Success;
3251 }
3252 
3253 static ArrayRef<unsigned> getAllVariants() {
3254   static const unsigned Variants[] = {
3255     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3256     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3257     AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3258   };
3259 
3260   return makeArrayRef(Variants);
3261 }
3262 
3263 // What asm variants we should check
3264 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3265   if (isForcedDPP() && isForcedVOP3()) {
3266     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3267     return makeArrayRef(Variants);
3268   }
3269   if (getForcedEncodingSize() == 32) {
3270     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3271     return makeArrayRef(Variants);
3272   }
3273 
3274   if (isForcedVOP3()) {
3275     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3276     return makeArrayRef(Variants);
3277   }
3278 
3279   if (isForcedSDWA()) {
3280     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3281                                         AMDGPUAsmVariants::SDWA9};
3282     return makeArrayRef(Variants);
3283   }
3284 
3285   if (isForcedDPP()) {
3286     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3287     return makeArrayRef(Variants);
3288   }
3289 
3290   return getAllVariants();
3291 }
3292 
3293 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3294   if (isForcedDPP() && isForcedVOP3())
3295     return "e64_dpp";
3296 
3297   if (getForcedEncodingSize() == 32)
3298     return "e32";
3299 
3300   if (isForcedVOP3())
3301     return "e64";
3302 
3303   if (isForcedSDWA())
3304     return "sdwa";
3305 
3306   if (isForcedDPP())
3307     return "dpp";
3308 
3309   return "";
3310 }
3311 
3312 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3313   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3314   const unsigned Num = Desc.getNumImplicitUses();
3315   for (unsigned i = 0; i < Num; ++i) {
3316     unsigned Reg = Desc.ImplicitUses[i];
3317     switch (Reg) {
3318     case AMDGPU::FLAT_SCR:
3319     case AMDGPU::VCC:
3320     case AMDGPU::VCC_LO:
3321     case AMDGPU::VCC_HI:
3322     case AMDGPU::M0:
3323       return Reg;
3324     default:
3325       break;
3326     }
3327   }
3328   return AMDGPU::NoRegister;
3329 }
3330 
3331 // NB: This code is correct only when used to check constant
3332 // bus limitations because GFX7 support no f16 inline constants.
3333 // Note that there are no cases when a GFX7 opcode violates
3334 // constant bus limitations due to the use of an f16 constant.
3335 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3336                                        unsigned OpIdx) const {
3337   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3338 
3339   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3340     return false;
3341   }
3342 
3343   const MCOperand &MO = Inst.getOperand(OpIdx);
3344 
3345   int64_t Val = MO.getImm();
3346   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3347 
3348   switch (OpSize) { // expected operand size
3349   case 8:
3350     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3351   case 4:
3352     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3353   case 2: {
3354     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3355     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3356         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3357         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3358       return AMDGPU::isInlinableIntLiteral(Val);
3359 
3360     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3361         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3362         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3363       return AMDGPU::isInlinableIntLiteralV216(Val);
3364 
3365     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3366         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3367         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3368       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3369 
3370     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3371   }
3372   default:
3373     llvm_unreachable("invalid operand size");
3374   }
3375 }
3376 
3377 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3378   if (!isGFX10Plus())
3379     return 1;
3380 
3381   switch (Opcode) {
3382   // 64-bit shift instructions can use only one scalar value input
3383   case AMDGPU::V_LSHLREV_B64_e64:
3384   case AMDGPU::V_LSHLREV_B64_gfx10:
3385   case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3386   case AMDGPU::V_LSHRREV_B64_e64:
3387   case AMDGPU::V_LSHRREV_B64_gfx10:
3388   case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3389   case AMDGPU::V_ASHRREV_I64_e64:
3390   case AMDGPU::V_ASHRREV_I64_gfx10:
3391   case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3392   case AMDGPU::V_LSHL_B64_e64:
3393   case AMDGPU::V_LSHR_B64_e64:
3394   case AMDGPU::V_ASHR_I64_e64:
3395     return 1;
3396   default:
3397     return 2;
3398   }
3399 }
3400 
3401 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3402   const MCOperand &MO = Inst.getOperand(OpIdx);
3403   if (MO.isImm()) {
3404     return !isInlineConstant(Inst, OpIdx);
3405   } else if (MO.isReg()) {
3406     auto Reg = MO.getReg();
3407     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3408     auto PReg = mc2PseudoReg(Reg);
3409     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3410   } else {
3411     return true;
3412   }
3413 }
3414 
3415 bool
3416 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3417                                                 const OperandVector &Operands) {
3418   const unsigned Opcode = Inst.getOpcode();
3419   const MCInstrDesc &Desc = MII.get(Opcode);
3420   unsigned LastSGPR = AMDGPU::NoRegister;
3421   unsigned ConstantBusUseCount = 0;
3422   unsigned NumLiterals = 0;
3423   unsigned LiteralSize;
3424 
3425   if (Desc.TSFlags &
3426       (SIInstrFlags::VOPC |
3427        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3428        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3429        SIInstrFlags::SDWA)) {
3430     // Check special imm operands (used by madmk, etc)
3431     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3432       ++NumLiterals;
3433       LiteralSize = 4;
3434     }
3435 
3436     SmallDenseSet<unsigned> SGPRsUsed;
3437     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3438     if (SGPRUsed != AMDGPU::NoRegister) {
3439       SGPRsUsed.insert(SGPRUsed);
3440       ++ConstantBusUseCount;
3441     }
3442 
3443     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3444     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3445     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3446 
3447     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3448 
3449     for (int OpIdx : OpIndices) {
3450       if (OpIdx == -1) break;
3451 
3452       const MCOperand &MO = Inst.getOperand(OpIdx);
3453       if (usesConstantBus(Inst, OpIdx)) {
3454         if (MO.isReg()) {
3455           LastSGPR = mc2PseudoReg(MO.getReg());
3456           // Pairs of registers with a partial intersections like these
3457           //   s0, s[0:1]
3458           //   flat_scratch_lo, flat_scratch
3459           //   flat_scratch_lo, flat_scratch_hi
3460           // are theoretically valid but they are disabled anyway.
3461           // Note that this code mimics SIInstrInfo::verifyInstruction
3462           if (SGPRsUsed.insert(LastSGPR).second) {
3463             ++ConstantBusUseCount;
3464           }
3465         } else { // Expression or a literal
3466 
3467           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3468             continue; // special operand like VINTERP attr_chan
3469 
3470           // An instruction may use only one literal.
3471           // This has been validated on the previous step.
3472           // See validateVOPLiteral.
3473           // This literal may be used as more than one operand.
3474           // If all these operands are of the same size,
3475           // this literal counts as one scalar value.
3476           // Otherwise it counts as 2 scalar values.
3477           // See "GFX10 Shader Programming", section 3.6.2.3.
3478 
3479           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3480           if (Size < 4) Size = 4;
3481 
3482           if (NumLiterals == 0) {
3483             NumLiterals = 1;
3484             LiteralSize = Size;
3485           } else if (LiteralSize != Size) {
3486             NumLiterals = 2;
3487           }
3488         }
3489       }
3490     }
3491   }
3492   ConstantBusUseCount += NumLiterals;
3493 
3494   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3495     return true;
3496 
3497   SMLoc LitLoc = getLitLoc(Operands);
3498   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3499   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3500   Error(Loc, "invalid operand (violates constant bus restrictions)");
3501   return false;
3502 }
3503 
3504 bool
3505 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3506                                                  const OperandVector &Operands) {
3507   const unsigned Opcode = Inst.getOpcode();
3508   const MCInstrDesc &Desc = MII.get(Opcode);
3509 
3510   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3511   if (DstIdx == -1 ||
3512       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3513     return true;
3514   }
3515 
3516   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3517 
3518   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3519   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3520   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3521 
3522   assert(DstIdx != -1);
3523   const MCOperand &Dst = Inst.getOperand(DstIdx);
3524   assert(Dst.isReg());
3525 
3526   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3527 
3528   for (int SrcIdx : SrcIndices) {
3529     if (SrcIdx == -1) break;
3530     const MCOperand &Src = Inst.getOperand(SrcIdx);
3531     if (Src.isReg()) {
3532       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3533         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3534         Error(getRegLoc(SrcReg, Operands),
3535           "destination must be different than all sources");
3536         return false;
3537       }
3538     }
3539   }
3540 
3541   return true;
3542 }
3543 
3544 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3545 
3546   const unsigned Opc = Inst.getOpcode();
3547   const MCInstrDesc &Desc = MII.get(Opc);
3548 
3549   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3550     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3551     assert(ClampIdx != -1);
3552     return Inst.getOperand(ClampIdx).getImm() == 0;
3553   }
3554 
3555   return true;
3556 }
3557 
3558 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3559 
3560   const unsigned Opc = Inst.getOpcode();
3561   const MCInstrDesc &Desc = MII.get(Opc);
3562 
3563   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3564     return None;
3565 
3566   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3567   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3568   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3569 
3570   assert(VDataIdx != -1);
3571 
3572   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3573     return None;
3574 
3575   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3576   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3577   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3578   if (DMask == 0)
3579     DMask = 1;
3580 
3581   bool isPackedD16 = false;
3582   unsigned DataSize =
3583     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3584   if (hasPackedD16()) {
3585     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3586     isPackedD16 = D16Idx >= 0;
3587     if (isPackedD16 && Inst.getOperand(D16Idx).getImm())
3588       DataSize = (DataSize + 1) / 2;
3589   }
3590 
3591   if ((VDataSize / 4) == DataSize + TFESize)
3592     return None;
3593 
3594   return StringRef(isPackedD16
3595                        ? "image data size does not match dmask, d16 and tfe"
3596                        : "image data size does not match dmask and tfe");
3597 }
3598 
3599 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3600   const unsigned Opc = Inst.getOpcode();
3601   const MCInstrDesc &Desc = MII.get(Opc);
3602 
3603   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3604     return true;
3605 
3606   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3607 
3608   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3609       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3610   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3611   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3612   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3613   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3614 
3615   assert(VAddr0Idx != -1);
3616   assert(SrsrcIdx != -1);
3617   assert(SrsrcIdx > VAddr0Idx);
3618 
3619   if (DimIdx == -1)
3620     return true; // intersect_ray
3621 
3622   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3623   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3624   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3625   unsigned ActualAddrSize =
3626       IsNSA ? SrsrcIdx - VAddr0Idx
3627             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3628   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3629 
3630   unsigned ExpectedAddrSize =
3631       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3632 
3633   if (!IsNSA) {
3634     if (ExpectedAddrSize > 8)
3635       ExpectedAddrSize = 16;
3636 
3637     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3638     // This provides backward compatibility for assembly created
3639     // before 160b/192b/224b types were directly supported.
3640     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3641       return true;
3642   }
3643 
3644   return ActualAddrSize == ExpectedAddrSize;
3645 }
3646 
3647 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3648 
3649   const unsigned Opc = Inst.getOpcode();
3650   const MCInstrDesc &Desc = MII.get(Opc);
3651 
3652   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3653     return true;
3654   if (!Desc.mayLoad() || !Desc.mayStore())
3655     return true; // Not atomic
3656 
3657   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3658   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3659 
3660   // This is an incomplete check because image_atomic_cmpswap
3661   // may only use 0x3 and 0xf while other atomic operations
3662   // may use 0x1 and 0x3. However these limitations are
3663   // verified when we check that dmask matches dst size.
3664   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3665 }
3666 
3667 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3668 
3669   const unsigned Opc = Inst.getOpcode();
3670   const MCInstrDesc &Desc = MII.get(Opc);
3671 
3672   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3673     return true;
3674 
3675   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3676   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3677 
3678   // GATHER4 instructions use dmask in a different fashion compared to
3679   // other MIMG instructions. The only useful DMASK values are
3680   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3681   // (red,red,red,red) etc.) The ISA document doesn't mention
3682   // this.
3683   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3684 }
3685 
3686 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3687   const unsigned Opc = Inst.getOpcode();
3688   const MCInstrDesc &Desc = MII.get(Opc);
3689 
3690   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3691     return true;
3692 
3693   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3694   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3695       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3696 
3697   if (!BaseOpcode->MSAA)
3698     return true;
3699 
3700   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3701   assert(DimIdx != -1);
3702 
3703   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3704   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3705 
3706   return DimInfo->MSAA;
3707 }
3708 
3709 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3710 {
3711   switch (Opcode) {
3712   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3713   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3714   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3715     return true;
3716   default:
3717     return false;
3718   }
3719 }
3720 
3721 // movrels* opcodes should only allow VGPRS as src0.
3722 // This is specified in .td description for vop1/vop3,
3723 // but sdwa is handled differently. See isSDWAOperand.
3724 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3725                                       const OperandVector &Operands) {
3726 
3727   const unsigned Opc = Inst.getOpcode();
3728   const MCInstrDesc &Desc = MII.get(Opc);
3729 
3730   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3731     return true;
3732 
3733   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3734   assert(Src0Idx != -1);
3735 
3736   SMLoc ErrLoc;
3737   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3738   if (Src0.isReg()) {
3739     auto Reg = mc2PseudoReg(Src0.getReg());
3740     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3741     if (!isSGPR(Reg, TRI))
3742       return true;
3743     ErrLoc = getRegLoc(Reg, Operands);
3744   } else {
3745     ErrLoc = getConstLoc(Operands);
3746   }
3747 
3748   Error(ErrLoc, "source operand must be a VGPR");
3749   return false;
3750 }
3751 
3752 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3753                                           const OperandVector &Operands) {
3754 
3755   const unsigned Opc = Inst.getOpcode();
3756 
3757   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3758     return true;
3759 
3760   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3761   assert(Src0Idx != -1);
3762 
3763   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3764   if (!Src0.isReg())
3765     return true;
3766 
3767   auto Reg = mc2PseudoReg(Src0.getReg());
3768   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3769   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3770     Error(getRegLoc(Reg, Operands),
3771           "source operand must be either a VGPR or an inline constant");
3772     return false;
3773   }
3774 
3775   return true;
3776 }
3777 
3778 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3779                                    const OperandVector &Operands) {
3780   const unsigned Opc = Inst.getOpcode();
3781   const MCInstrDesc &Desc = MII.get(Opc);
3782 
3783   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3784     return true;
3785 
3786   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3787   if (Src2Idx == -1)
3788     return true;
3789 
3790   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3791   if (!Src2.isReg())
3792     return true;
3793 
3794   MCRegister Src2Reg = Src2.getReg();
3795   MCRegister DstReg = Inst.getOperand(0).getReg();
3796   if (Src2Reg == DstReg)
3797     return true;
3798 
3799   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3800   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3801     return true;
3802 
3803   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3804     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3805           "source 2 operand must not partially overlap with dst");
3806     return false;
3807   }
3808 
3809   return true;
3810 }
3811 
3812 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3813   switch (Inst.getOpcode()) {
3814   default:
3815     return true;
3816   case V_DIV_SCALE_F32_gfx6_gfx7:
3817   case V_DIV_SCALE_F32_vi:
3818   case V_DIV_SCALE_F32_gfx10:
3819   case V_DIV_SCALE_F64_gfx6_gfx7:
3820   case V_DIV_SCALE_F64_vi:
3821   case V_DIV_SCALE_F64_gfx10:
3822     break;
3823   }
3824 
3825   // TODO: Check that src0 = src1 or src2.
3826 
3827   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3828                     AMDGPU::OpName::src2_modifiers,
3829                     AMDGPU::OpName::src2_modifiers}) {
3830     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3831             .getImm() &
3832         SISrcMods::ABS) {
3833       return false;
3834     }
3835   }
3836 
3837   return true;
3838 }
3839 
3840 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3841 
3842   const unsigned Opc = Inst.getOpcode();
3843   const MCInstrDesc &Desc = MII.get(Opc);
3844 
3845   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3846     return true;
3847 
3848   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3849   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3850     if (isCI() || isSI())
3851       return false;
3852   }
3853 
3854   return true;
3855 }
3856 
3857 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3858   const unsigned Opc = Inst.getOpcode();
3859   const MCInstrDesc &Desc = MII.get(Opc);
3860 
3861   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3862     return true;
3863 
3864   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3865   if (DimIdx < 0)
3866     return true;
3867 
3868   long Imm = Inst.getOperand(DimIdx).getImm();
3869   if (Imm < 0 || Imm >= 8)
3870     return false;
3871 
3872   return true;
3873 }
3874 
3875 static bool IsRevOpcode(const unsigned Opcode)
3876 {
3877   switch (Opcode) {
3878   case AMDGPU::V_SUBREV_F32_e32:
3879   case AMDGPU::V_SUBREV_F32_e64:
3880   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3881   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3882   case AMDGPU::V_SUBREV_F32_e32_vi:
3883   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3884   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3885   case AMDGPU::V_SUBREV_F32_e64_vi:
3886 
3887   case AMDGPU::V_SUBREV_CO_U32_e32:
3888   case AMDGPU::V_SUBREV_CO_U32_e64:
3889   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3890   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3891 
3892   case AMDGPU::V_SUBBREV_U32_e32:
3893   case AMDGPU::V_SUBBREV_U32_e64:
3894   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3895   case AMDGPU::V_SUBBREV_U32_e32_vi:
3896   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3897   case AMDGPU::V_SUBBREV_U32_e64_vi:
3898 
3899   case AMDGPU::V_SUBREV_U32_e32:
3900   case AMDGPU::V_SUBREV_U32_e64:
3901   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3902   case AMDGPU::V_SUBREV_U32_e32_vi:
3903   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3904   case AMDGPU::V_SUBREV_U32_e64_vi:
3905 
3906   case AMDGPU::V_SUBREV_F16_e32:
3907   case AMDGPU::V_SUBREV_F16_e64:
3908   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3909   case AMDGPU::V_SUBREV_F16_e32_vi:
3910   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3911   case AMDGPU::V_SUBREV_F16_e64_vi:
3912 
3913   case AMDGPU::V_SUBREV_U16_e32:
3914   case AMDGPU::V_SUBREV_U16_e64:
3915   case AMDGPU::V_SUBREV_U16_e32_vi:
3916   case AMDGPU::V_SUBREV_U16_e64_vi:
3917 
3918   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3919   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3920   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3921 
3922   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3923   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3924 
3925   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3926   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3927 
3928   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3929   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3930 
3931   case AMDGPU::V_LSHRREV_B32_e32:
3932   case AMDGPU::V_LSHRREV_B32_e64:
3933   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3934   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3935   case AMDGPU::V_LSHRREV_B32_e32_vi:
3936   case AMDGPU::V_LSHRREV_B32_e64_vi:
3937   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3938   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3939 
3940   case AMDGPU::V_ASHRREV_I32_e32:
3941   case AMDGPU::V_ASHRREV_I32_e64:
3942   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3943   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3944   case AMDGPU::V_ASHRREV_I32_e32_vi:
3945   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3946   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3947   case AMDGPU::V_ASHRREV_I32_e64_vi:
3948 
3949   case AMDGPU::V_LSHLREV_B32_e32:
3950   case AMDGPU::V_LSHLREV_B32_e64:
3951   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3952   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3953   case AMDGPU::V_LSHLREV_B32_e32_vi:
3954   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3955   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3956   case AMDGPU::V_LSHLREV_B32_e64_vi:
3957 
3958   case AMDGPU::V_LSHLREV_B16_e32:
3959   case AMDGPU::V_LSHLREV_B16_e64:
3960   case AMDGPU::V_LSHLREV_B16_e32_vi:
3961   case AMDGPU::V_LSHLREV_B16_e64_vi:
3962   case AMDGPU::V_LSHLREV_B16_gfx10:
3963 
3964   case AMDGPU::V_LSHRREV_B16_e32:
3965   case AMDGPU::V_LSHRREV_B16_e64:
3966   case AMDGPU::V_LSHRREV_B16_e32_vi:
3967   case AMDGPU::V_LSHRREV_B16_e64_vi:
3968   case AMDGPU::V_LSHRREV_B16_gfx10:
3969 
3970   case AMDGPU::V_ASHRREV_I16_e32:
3971   case AMDGPU::V_ASHRREV_I16_e64:
3972   case AMDGPU::V_ASHRREV_I16_e32_vi:
3973   case AMDGPU::V_ASHRREV_I16_e64_vi:
3974   case AMDGPU::V_ASHRREV_I16_gfx10:
3975 
3976   case AMDGPU::V_LSHLREV_B64_e64:
3977   case AMDGPU::V_LSHLREV_B64_gfx10:
3978   case AMDGPU::V_LSHLREV_B64_vi:
3979 
3980   case AMDGPU::V_LSHRREV_B64_e64:
3981   case AMDGPU::V_LSHRREV_B64_gfx10:
3982   case AMDGPU::V_LSHRREV_B64_vi:
3983 
3984   case AMDGPU::V_ASHRREV_I64_e64:
3985   case AMDGPU::V_ASHRREV_I64_gfx10:
3986   case AMDGPU::V_ASHRREV_I64_vi:
3987 
3988   case AMDGPU::V_PK_LSHLREV_B16:
3989   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3990   case AMDGPU::V_PK_LSHLREV_B16_vi:
3991 
3992   case AMDGPU::V_PK_LSHRREV_B16:
3993   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3994   case AMDGPU::V_PK_LSHRREV_B16_vi:
3995   case AMDGPU::V_PK_ASHRREV_I16:
3996   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3997   case AMDGPU::V_PK_ASHRREV_I16_vi:
3998     return true;
3999   default:
4000     return false;
4001   }
4002 }
4003 
4004 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4005 
4006   using namespace SIInstrFlags;
4007   const unsigned Opcode = Inst.getOpcode();
4008   const MCInstrDesc &Desc = MII.get(Opcode);
4009 
4010   // lds_direct register is defined so that it can be used
4011   // with 9-bit operands only. Ignore encodings which do not accept these.
4012   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4013   if ((Desc.TSFlags & Enc) == 0)
4014     return None;
4015 
4016   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4017     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4018     if (SrcIdx == -1)
4019       break;
4020     const auto &Src = Inst.getOperand(SrcIdx);
4021     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4022 
4023       if (isGFX90A() || isGFX11Plus())
4024         return StringRef("lds_direct is not supported on this GPU");
4025 
4026       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4027         return StringRef("lds_direct cannot be used with this instruction");
4028 
4029       if (SrcName != OpName::src0)
4030         return StringRef("lds_direct may be used as src0 only");
4031     }
4032   }
4033 
4034   return None;
4035 }
4036 
4037 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4038   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4039     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4040     if (Op.isFlatOffset())
4041       return Op.getStartLoc();
4042   }
4043   return getLoc();
4044 }
4045 
4046 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4047                                          const OperandVector &Operands) {
4048   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4049   if ((TSFlags & SIInstrFlags::FLAT) == 0)
4050     return true;
4051 
4052   auto Opcode = Inst.getOpcode();
4053   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4054   assert(OpNum != -1);
4055 
4056   const auto &Op = Inst.getOperand(OpNum);
4057   if (!hasFlatOffsets() && Op.getImm() != 0) {
4058     Error(getFlatOffsetLoc(Operands),
4059           "flat offset modifier is not supported on this GPU");
4060     return false;
4061   }
4062 
4063   // For FLAT segment the offset must be positive;
4064   // MSB is ignored and forced to zero.
4065   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
4066     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
4067     if (!isIntN(OffsetSize, Op.getImm())) {
4068       Error(getFlatOffsetLoc(Operands),
4069             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4070       return false;
4071     }
4072   } else {
4073     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
4074     if (!isUIntN(OffsetSize, Op.getImm())) {
4075       Error(getFlatOffsetLoc(Operands),
4076             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4077       return false;
4078     }
4079   }
4080 
4081   return true;
4082 }
4083 
4084 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4085   // Start with second operand because SMEM Offset cannot be dst or src0.
4086   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4087     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4088     if (Op.isSMEMOffset())
4089       return Op.getStartLoc();
4090   }
4091   return getLoc();
4092 }
4093 
4094 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4095                                          const OperandVector &Operands) {
4096   if (isCI() || isSI())
4097     return true;
4098 
4099   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4100   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4101     return true;
4102 
4103   auto Opcode = Inst.getOpcode();
4104   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4105   if (OpNum == -1)
4106     return true;
4107 
4108   const auto &Op = Inst.getOperand(OpNum);
4109   if (!Op.isImm())
4110     return true;
4111 
4112   uint64_t Offset = Op.getImm();
4113   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4114   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4115       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4116     return true;
4117 
4118   Error(getSMEMOffsetLoc(Operands),
4119         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4120                                "expected a 21-bit signed offset");
4121 
4122   return false;
4123 }
4124 
4125 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4126   unsigned Opcode = Inst.getOpcode();
4127   const MCInstrDesc &Desc = MII.get(Opcode);
4128   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4129     return true;
4130 
4131   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4132   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4133 
4134   const int OpIndices[] = { Src0Idx, Src1Idx };
4135 
4136   unsigned NumExprs = 0;
4137   unsigned NumLiterals = 0;
4138   uint32_t LiteralValue;
4139 
4140   for (int OpIdx : OpIndices) {
4141     if (OpIdx == -1) break;
4142 
4143     const MCOperand &MO = Inst.getOperand(OpIdx);
4144     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4145     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4146       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4147         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4148         if (NumLiterals == 0 || LiteralValue != Value) {
4149           LiteralValue = Value;
4150           ++NumLiterals;
4151         }
4152       } else if (MO.isExpr()) {
4153         ++NumExprs;
4154       }
4155     }
4156   }
4157 
4158   return NumLiterals + NumExprs <= 1;
4159 }
4160 
4161 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4162   const unsigned Opc = Inst.getOpcode();
4163   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4164       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4165     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4166     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4167 
4168     if (OpSel & ~3)
4169       return false;
4170   }
4171 
4172   if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
4173     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4174     if (OpSelIdx != -1) {
4175       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4176         return false;
4177     }
4178     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4179     if (OpSelHiIdx != -1) {
4180       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4181         return false;
4182     }
4183   }
4184 
4185   return true;
4186 }
4187 
4188 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4189                                   const OperandVector &Operands) {
4190   const unsigned Opc = Inst.getOpcode();
4191   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4192   if (DppCtrlIdx < 0)
4193     return true;
4194   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4195 
4196   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4197     // DPP64 is supported for row_newbcast only.
4198     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4199     if (Src0Idx >= 0 &&
4200         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4201       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4202       Error(S, "64 bit dpp only supports row_newbcast");
4203       return false;
4204     }
4205   }
4206 
4207   return true;
4208 }
4209 
4210 // Check if VCC register matches wavefront size
4211 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4212   auto FB = getFeatureBits();
4213   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4214     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4215 }
4216 
4217 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4218 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4219                                          const OperandVector &Operands) {
4220   unsigned Opcode = Inst.getOpcode();
4221   const MCInstrDesc &Desc = MII.get(Opcode);
4222   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4223   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4224       ImmIdx == -1)
4225     return true;
4226 
4227   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4228   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4229   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4230 
4231   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4232 
4233   unsigned NumExprs = 0;
4234   unsigned NumLiterals = 0;
4235   uint32_t LiteralValue;
4236 
4237   for (int OpIdx : OpIndices) {
4238     if (OpIdx == -1)
4239       continue;
4240 
4241     const MCOperand &MO = Inst.getOperand(OpIdx);
4242     if (!MO.isImm() && !MO.isExpr())
4243       continue;
4244     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4245       continue;
4246 
4247     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4248         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4249       Error(getConstLoc(Operands),
4250             "inline constants are not allowed for this operand");
4251       return false;
4252     }
4253 
4254     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4255       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4256       if (NumLiterals == 0 || LiteralValue != Value) {
4257         LiteralValue = Value;
4258         ++NumLiterals;
4259       }
4260     } else if (MO.isExpr()) {
4261       ++NumExprs;
4262     }
4263   }
4264   NumLiterals += NumExprs;
4265 
4266   if (!NumLiterals)
4267     return true;
4268 
4269   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4270     Error(getLitLoc(Operands), "literal operands are not supported");
4271     return false;
4272   }
4273 
4274   if (NumLiterals > 1) {
4275     Error(getLitLoc(Operands), "only one literal operand is allowed");
4276     return false;
4277   }
4278 
4279   return true;
4280 }
4281 
4282 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4283 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4284                          const MCRegisterInfo *MRI) {
4285   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4286   if (OpIdx < 0)
4287     return -1;
4288 
4289   const MCOperand &Op = Inst.getOperand(OpIdx);
4290   if (!Op.isReg())
4291     return -1;
4292 
4293   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4294   auto Reg = Sub ? Sub : Op.getReg();
4295   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4296   return AGPR32.contains(Reg) ? 1 : 0;
4297 }
4298 
4299 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4300   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4301   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4302                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4303                   SIInstrFlags::DS)) == 0)
4304     return true;
4305 
4306   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4307                                                       : AMDGPU::OpName::vdata;
4308 
4309   const MCRegisterInfo *MRI = getMRI();
4310   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4311   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4312 
4313   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4314     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4315     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4316       return false;
4317   }
4318 
4319   auto FB = getFeatureBits();
4320   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4321     if (DataAreg < 0 || DstAreg < 0)
4322       return true;
4323     return DstAreg == DataAreg;
4324   }
4325 
4326   return DstAreg < 1 && DataAreg < 1;
4327 }
4328 
4329 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4330   auto FB = getFeatureBits();
4331   if (!FB[AMDGPU::FeatureGFX90AInsts])
4332     return true;
4333 
4334   const MCRegisterInfo *MRI = getMRI();
4335   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4336   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4337   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4338     const MCOperand &Op = Inst.getOperand(I);
4339     if (!Op.isReg())
4340       continue;
4341 
4342     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4343     if (!Sub)
4344       continue;
4345 
4346     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4347       return false;
4348     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4349       return false;
4350   }
4351 
4352   return true;
4353 }
4354 
4355 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4356   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4357     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4358     if (Op.isBLGP())
4359       return Op.getStartLoc();
4360   }
4361   return SMLoc();
4362 }
4363 
4364 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4365                                    const OperandVector &Operands) {
4366   unsigned Opc = Inst.getOpcode();
4367   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4368   if (BlgpIdx == -1)
4369     return true;
4370   SMLoc BLGPLoc = getBLGPLoc(Operands);
4371   if (!BLGPLoc.isValid())
4372     return true;
4373   bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4374   auto FB = getFeatureBits();
4375   bool UsesNeg = false;
4376   if (FB[AMDGPU::FeatureGFX940Insts]) {
4377     switch (Opc) {
4378     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4379     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4380     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4381     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4382       UsesNeg = true;
4383     }
4384   }
4385 
4386   if (IsNeg == UsesNeg)
4387     return true;
4388 
4389   Error(BLGPLoc,
4390         UsesNeg ? "invalid modifier: blgp is not supported"
4391                 : "invalid modifier: neg is not supported");
4392 
4393   return false;
4394 }
4395 
4396 // gfx90a has an undocumented limitation:
4397 // DS_GWS opcodes must use even aligned registers.
4398 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4399                                   const OperandVector &Operands) {
4400   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4401     return true;
4402 
4403   int Opc = Inst.getOpcode();
4404   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4405       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4406     return true;
4407 
4408   const MCRegisterInfo *MRI = getMRI();
4409   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4410   int Data0Pos =
4411       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4412   assert(Data0Pos != -1);
4413   auto Reg = Inst.getOperand(Data0Pos).getReg();
4414   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4415   if (RegIdx & 1) {
4416     SMLoc RegLoc = getRegLoc(Reg, Operands);
4417     Error(RegLoc, "vgpr must be even aligned");
4418     return false;
4419   }
4420 
4421   return true;
4422 }
4423 
4424 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4425                                             const OperandVector &Operands,
4426                                             const SMLoc &IDLoc) {
4427   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4428                                            AMDGPU::OpName::cpol);
4429   if (CPolPos == -1)
4430     return true;
4431 
4432   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4433 
4434   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4435   if (TSFlags & SIInstrFlags::SMRD) {
4436     if (CPol && (isSI() || isCI())) {
4437       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4438       Error(S, "cache policy is not supported for SMRD instructions");
4439       return false;
4440     }
4441     if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4442       Error(IDLoc, "invalid cache policy for SMEM instruction");
4443       return false;
4444     }
4445   }
4446 
4447   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4448     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4449     StringRef CStr(S.getPointer());
4450     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4451     Error(S, "scc is not supported on this GPU");
4452     return false;
4453   }
4454 
4455   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4456     return true;
4457 
4458   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4459     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4460       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4461                               : "instruction must use glc");
4462       return false;
4463     }
4464   } else {
4465     if (CPol & CPol::GLC) {
4466       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4467       StringRef CStr(S.getPointer());
4468       S = SMLoc::getFromPointer(
4469           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4470       Error(S, isGFX940() ? "instruction must not use sc0"
4471                           : "instruction must not use glc");
4472       return false;
4473     }
4474   }
4475 
4476   return true;
4477 }
4478 
4479 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst,
4480                                          const OperandVector &Operands,
4481                                          const SMLoc &IDLoc) {
4482   if (isGFX940())
4483     return true;
4484 
4485   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4486   if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) !=
4487       (SIInstrFlags::VALU | SIInstrFlags::FLAT))
4488     return true;
4489   // This is FLAT LDS DMA.
4490 
4491   SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands);
4492   StringRef CStr(S.getPointer());
4493   if (!CStr.startswith("lds")) {
4494     // This is incorrectly selected LDS DMA version of a FLAT load opcode.
4495     // And LDS version should have 'lds' modifier, but it follows optional
4496     // operands so its absense is ignored by the matcher.
4497     Error(IDLoc, "invalid operands for instruction");
4498     return false;
4499   }
4500 
4501   return true;
4502 }
4503 
4504 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
4505   if (!isGFX11Plus())
4506     return true;
4507   for (auto &Operand : Operands) {
4508     if (!Operand->isReg())
4509       continue;
4510     unsigned Reg = Operand->getReg();
4511     if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4512       Error(getRegLoc(Reg, Operands),
4513             "execz and vccz are not supported on this GPU");
4514       return false;
4515     }
4516   }
4517   return true;
4518 }
4519 
4520 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4521                                           const SMLoc &IDLoc,
4522                                           const OperandVector &Operands) {
4523   if (auto ErrMsg = validateLdsDirect(Inst)) {
4524     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4525     return false;
4526   }
4527   if (!validateSOPLiteral(Inst)) {
4528     Error(getLitLoc(Operands),
4529       "only one literal operand is allowed");
4530     return false;
4531   }
4532   if (!validateVOPLiteral(Inst, Operands)) {
4533     return false;
4534   }
4535   if (!validateConstantBusLimitations(Inst, Operands)) {
4536     return false;
4537   }
4538   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4539     return false;
4540   }
4541   if (!validateIntClampSupported(Inst)) {
4542     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4543       "integer clamping is not supported on this GPU");
4544     return false;
4545   }
4546   if (!validateOpSel(Inst)) {
4547     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4548       "invalid op_sel operand");
4549     return false;
4550   }
4551   if (!validateDPP(Inst, Operands)) {
4552     return false;
4553   }
4554   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4555   if (!validateMIMGD16(Inst)) {
4556     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4557       "d16 modifier is not supported on this GPU");
4558     return false;
4559   }
4560   if (!validateMIMGDim(Inst)) {
4561     Error(IDLoc, "dim modifier is required on this GPU");
4562     return false;
4563   }
4564   if (!validateMIMGMSAA(Inst)) {
4565     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4566           "invalid dim; must be MSAA type");
4567     return false;
4568   }
4569   if (auto ErrMsg = validateMIMGDataSize(Inst)) {
4570     Error(IDLoc, *ErrMsg);
4571     return false;
4572   }
4573   if (!validateMIMGAddrSize(Inst)) {
4574     Error(IDLoc,
4575       "image address size does not match dim and a16");
4576     return false;
4577   }
4578   if (!validateMIMGAtomicDMask(Inst)) {
4579     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4580       "invalid atomic image dmask");
4581     return false;
4582   }
4583   if (!validateMIMGGatherDMask(Inst)) {
4584     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4585       "invalid image_gather dmask: only one bit must be set");
4586     return false;
4587   }
4588   if (!validateMovrels(Inst, Operands)) {
4589     return false;
4590   }
4591   if (!validateFlatOffset(Inst, Operands)) {
4592     return false;
4593   }
4594   if (!validateSMEMOffset(Inst, Operands)) {
4595     return false;
4596   }
4597   if (!validateMAIAccWrite(Inst, Operands)) {
4598     return false;
4599   }
4600   if (!validateMFMA(Inst, Operands)) {
4601     return false;
4602   }
4603   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4604     return false;
4605   }
4606 
4607   if (!validateAGPRLdSt(Inst)) {
4608     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4609     ? "invalid register class: data and dst should be all VGPR or AGPR"
4610     : "invalid register class: agpr loads and stores not supported on this GPU"
4611     );
4612     return false;
4613   }
4614   if (!validateVGPRAlign(Inst)) {
4615     Error(IDLoc,
4616       "invalid register class: vgpr tuples must be 64 bit aligned");
4617     return false;
4618   }
4619   if (!validateGWS(Inst, Operands)) {
4620     return false;
4621   }
4622 
4623   if (!validateBLGP(Inst, Operands)) {
4624     return false;
4625   }
4626 
4627   if (!validateDivScale(Inst)) {
4628     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4629     return false;
4630   }
4631   if (!validateExeczVcczOperands(Operands)) {
4632     return false;
4633   }
4634 
4635   if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) {
4636     return false;
4637   }
4638 
4639   return true;
4640 }
4641 
4642 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4643                                             const FeatureBitset &FBS,
4644                                             unsigned VariantID = 0);
4645 
4646 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4647                                 const FeatureBitset &AvailableFeatures,
4648                                 unsigned VariantID);
4649 
4650 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4651                                        const FeatureBitset &FBS) {
4652   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4653 }
4654 
4655 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4656                                        const FeatureBitset &FBS,
4657                                        ArrayRef<unsigned> Variants) {
4658   for (auto Variant : Variants) {
4659     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4660       return true;
4661   }
4662 
4663   return false;
4664 }
4665 
4666 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4667                                                   const SMLoc &IDLoc) {
4668   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4669 
4670   // Check if requested instruction variant is supported.
4671   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4672     return false;
4673 
4674   // This instruction is not supported.
4675   // Clear any other pending errors because they are no longer relevant.
4676   getParser().clearPendingErrors();
4677 
4678   // Requested instruction variant is not supported.
4679   // Check if any other variants are supported.
4680   StringRef VariantName = getMatchedVariantName();
4681   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4682     return Error(IDLoc,
4683                  Twine(VariantName,
4684                        " variant of this instruction is not supported"));
4685   }
4686 
4687   // Finally check if this instruction is supported on any other GPU.
4688   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4689     return Error(IDLoc, "instruction not supported on this GPU");
4690   }
4691 
4692   // Instruction not supported on any GPU. Probably a typo.
4693   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4694   return Error(IDLoc, "invalid instruction" + Suggestion);
4695 }
4696 
4697 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4698                                               OperandVector &Operands,
4699                                               MCStreamer &Out,
4700                                               uint64_t &ErrorInfo,
4701                                               bool MatchingInlineAsm) {
4702   MCInst Inst;
4703   unsigned Result = Match_Success;
4704   for (auto Variant : getMatchedVariants()) {
4705     uint64_t EI;
4706     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4707                                   Variant);
4708     // We order match statuses from least to most specific. We use most specific
4709     // status as resulting
4710     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4711     if ((R == Match_Success) ||
4712         (R == Match_PreferE32) ||
4713         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4714         (R == Match_InvalidOperand && Result != Match_MissingFeature
4715                                    && Result != Match_PreferE32) ||
4716         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4717                                    && Result != Match_MissingFeature
4718                                    && Result != Match_PreferE32)) {
4719       Result = R;
4720       ErrorInfo = EI;
4721     }
4722     if (R == Match_Success)
4723       break;
4724   }
4725 
4726   if (Result == Match_Success) {
4727     if (!validateInstruction(Inst, IDLoc, Operands)) {
4728       return true;
4729     }
4730     Inst.setLoc(IDLoc);
4731     Out.emitInstruction(Inst, getSTI());
4732     return false;
4733   }
4734 
4735   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4736   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4737     return true;
4738   }
4739 
4740   switch (Result) {
4741   default: break;
4742   case Match_MissingFeature:
4743     // It has been verified that the specified instruction
4744     // mnemonic is valid. A match was found but it requires
4745     // features which are not supported on this GPU.
4746     return Error(IDLoc, "operands are not valid for this GPU or mode");
4747 
4748   case Match_InvalidOperand: {
4749     SMLoc ErrorLoc = IDLoc;
4750     if (ErrorInfo != ~0ULL) {
4751       if (ErrorInfo >= Operands.size()) {
4752         return Error(IDLoc, "too few operands for instruction");
4753       }
4754       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4755       if (ErrorLoc == SMLoc())
4756         ErrorLoc = IDLoc;
4757     }
4758     return Error(ErrorLoc, "invalid operand for instruction");
4759   }
4760 
4761   case Match_PreferE32:
4762     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4763                         "should be encoded as e32");
4764   case Match_MnemonicFail:
4765     llvm_unreachable("Invalid instructions should have been handled already");
4766   }
4767   llvm_unreachable("Implement any new match types added!");
4768 }
4769 
4770 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4771   int64_t Tmp = -1;
4772   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4773     return true;
4774   }
4775   if (getParser().parseAbsoluteExpression(Tmp)) {
4776     return true;
4777   }
4778   Ret = static_cast<uint32_t>(Tmp);
4779   return false;
4780 }
4781 
4782 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4783                                                uint32_t &Minor) {
4784   if (ParseAsAbsoluteExpression(Major))
4785     return TokError("invalid major version");
4786 
4787   if (!trySkipToken(AsmToken::Comma))
4788     return TokError("minor version number required, comma expected");
4789 
4790   if (ParseAsAbsoluteExpression(Minor))
4791     return TokError("invalid minor version");
4792 
4793   return false;
4794 }
4795 
4796 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4797   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4798     return TokError("directive only supported for amdgcn architecture");
4799 
4800   std::string TargetIDDirective;
4801   SMLoc TargetStart = getTok().getLoc();
4802   if (getParser().parseEscapedString(TargetIDDirective))
4803     return true;
4804 
4805   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4806   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4807     return getParser().Error(TargetRange.Start,
4808         (Twine(".amdgcn_target directive's target id ") +
4809          Twine(TargetIDDirective) +
4810          Twine(" does not match the specified target id ") +
4811          Twine(getTargetStreamer().getTargetID()->toString())).str());
4812 
4813   return false;
4814 }
4815 
4816 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4817   return Error(Range.Start, "value out of range", Range);
4818 }
4819 
4820 bool AMDGPUAsmParser::calculateGPRBlocks(
4821     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4822     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4823     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4824     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4825   // TODO(scott.linder): These calculations are duplicated from
4826   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4827   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4828 
4829   unsigned NumVGPRs = NextFreeVGPR;
4830   unsigned NumSGPRs = NextFreeSGPR;
4831 
4832   if (Version.Major >= 10)
4833     NumSGPRs = 0;
4834   else {
4835     unsigned MaxAddressableNumSGPRs =
4836         IsaInfo::getAddressableNumSGPRs(&getSTI());
4837 
4838     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4839         NumSGPRs > MaxAddressableNumSGPRs)
4840       return OutOfRangeError(SGPRRange);
4841 
4842     NumSGPRs +=
4843         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4844 
4845     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4846         NumSGPRs > MaxAddressableNumSGPRs)
4847       return OutOfRangeError(SGPRRange);
4848 
4849     if (Features.test(FeatureSGPRInitBug))
4850       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4851   }
4852 
4853   VGPRBlocks =
4854       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4855   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4856 
4857   return false;
4858 }
4859 
4860 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4861   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4862     return TokError("directive only supported for amdgcn architecture");
4863 
4864   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4865     return TokError("directive only supported for amdhsa OS");
4866 
4867   StringRef KernelName;
4868   if (getParser().parseIdentifier(KernelName))
4869     return true;
4870 
4871   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4872 
4873   StringSet<> Seen;
4874 
4875   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4876 
4877   SMRange VGPRRange;
4878   uint64_t NextFreeVGPR = 0;
4879   uint64_t AccumOffset = 0;
4880   uint64_t SharedVGPRCount = 0;
4881   SMRange SGPRRange;
4882   uint64_t NextFreeSGPR = 0;
4883 
4884   // Count the number of user SGPRs implied from the enabled feature bits.
4885   unsigned ImpliedUserSGPRCount = 0;
4886 
4887   // Track if the asm explicitly contains the directive for the user SGPR
4888   // count.
4889   Optional<unsigned> ExplicitUserSGPRCount;
4890   bool ReserveVCC = true;
4891   bool ReserveFlatScr = true;
4892   Optional<bool> EnableWavefrontSize32;
4893 
4894   while (true) {
4895     while (trySkipToken(AsmToken::EndOfStatement));
4896 
4897     StringRef ID;
4898     SMRange IDRange = getTok().getLocRange();
4899     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4900       return true;
4901 
4902     if (ID == ".end_amdhsa_kernel")
4903       break;
4904 
4905     if (!Seen.insert(ID).second)
4906       return TokError(".amdhsa_ directives cannot be repeated");
4907 
4908     SMLoc ValStart = getLoc();
4909     int64_t IVal;
4910     if (getParser().parseAbsoluteExpression(IVal))
4911       return true;
4912     SMLoc ValEnd = getLoc();
4913     SMRange ValRange = SMRange(ValStart, ValEnd);
4914 
4915     if (IVal < 0)
4916       return OutOfRangeError(ValRange);
4917 
4918     uint64_t Val = IVal;
4919 
4920 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4921   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4922     return OutOfRangeError(RANGE);                                             \
4923   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4924 
4925     if (ID == ".amdhsa_group_segment_fixed_size") {
4926       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4927         return OutOfRangeError(ValRange);
4928       KD.group_segment_fixed_size = Val;
4929     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4930       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4931         return OutOfRangeError(ValRange);
4932       KD.private_segment_fixed_size = Val;
4933     } else if (ID == ".amdhsa_kernarg_size") {
4934       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4935         return OutOfRangeError(ValRange);
4936       KD.kernarg_size = Val;
4937     } else if (ID == ".amdhsa_user_sgpr_count") {
4938       ExplicitUserSGPRCount = Val;
4939     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4940       if (hasArchitectedFlatScratch())
4941         return Error(IDRange.Start,
4942                      "directive is not supported with architected flat scratch",
4943                      IDRange);
4944       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4945                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4946                        Val, ValRange);
4947       if (Val)
4948         ImpliedUserSGPRCount += 4;
4949     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4950       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4951                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4952                        ValRange);
4953       if (Val)
4954         ImpliedUserSGPRCount += 2;
4955     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4956       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4957                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4958                        ValRange);
4959       if (Val)
4960         ImpliedUserSGPRCount += 2;
4961     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4962       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4963                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4964                        Val, ValRange);
4965       if (Val)
4966         ImpliedUserSGPRCount += 2;
4967     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4968       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4969                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4970                        ValRange);
4971       if (Val)
4972         ImpliedUserSGPRCount += 2;
4973     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4974       if (hasArchitectedFlatScratch())
4975         return Error(IDRange.Start,
4976                      "directive is not supported with architected flat scratch",
4977                      IDRange);
4978       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4979                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4980                        ValRange);
4981       if (Val)
4982         ImpliedUserSGPRCount += 2;
4983     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4984       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4985                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4986                        Val, ValRange);
4987       if (Val)
4988         ImpliedUserSGPRCount += 1;
4989     } else if (ID == ".amdhsa_wavefront_size32") {
4990       if (IVersion.Major < 10)
4991         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4992       EnableWavefrontSize32 = Val;
4993       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4994                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4995                        Val, ValRange);
4996     } else if (ID == ".amdhsa_uses_dynamic_stack") {
4997       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4998                        KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
4999     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5000       if (hasArchitectedFlatScratch())
5001         return Error(IDRange.Start,
5002                      "directive is not supported with architected flat scratch",
5003                      IDRange);
5004       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5005                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5006     } else if (ID == ".amdhsa_enable_private_segment") {
5007       if (!hasArchitectedFlatScratch())
5008         return Error(
5009             IDRange.Start,
5010             "directive is not supported without architected flat scratch",
5011             IDRange);
5012       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5013                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5014     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5015       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5016                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5017                        ValRange);
5018     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5019       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5020                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5021                        ValRange);
5022     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5023       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5024                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5025                        ValRange);
5026     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5027       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5028                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5029                        ValRange);
5030     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5031       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5032                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5033                        ValRange);
5034     } else if (ID == ".amdhsa_next_free_vgpr") {
5035       VGPRRange = ValRange;
5036       NextFreeVGPR = Val;
5037     } else if (ID == ".amdhsa_next_free_sgpr") {
5038       SGPRRange = ValRange;
5039       NextFreeSGPR = Val;
5040     } else if (ID == ".amdhsa_accum_offset") {
5041       if (!isGFX90A())
5042         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5043       AccumOffset = Val;
5044     } else if (ID == ".amdhsa_reserve_vcc") {
5045       if (!isUInt<1>(Val))
5046         return OutOfRangeError(ValRange);
5047       ReserveVCC = Val;
5048     } else if (ID == ".amdhsa_reserve_flat_scratch") {
5049       if (IVersion.Major < 7)
5050         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5051       if (hasArchitectedFlatScratch())
5052         return Error(IDRange.Start,
5053                      "directive is not supported with architected flat scratch",
5054                      IDRange);
5055       if (!isUInt<1>(Val))
5056         return OutOfRangeError(ValRange);
5057       ReserveFlatScr = Val;
5058     } else if (ID == ".amdhsa_reserve_xnack_mask") {
5059       if (IVersion.Major < 8)
5060         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5061       if (!isUInt<1>(Val))
5062         return OutOfRangeError(ValRange);
5063       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5064         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5065                                  IDRange);
5066     } else if (ID == ".amdhsa_float_round_mode_32") {
5067       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5068                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5069     } else if (ID == ".amdhsa_float_round_mode_16_64") {
5070       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5071                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5072     } else if (ID == ".amdhsa_float_denorm_mode_32") {
5073       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5074                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5075     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5076       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5077                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5078                        ValRange);
5079     } else if (ID == ".amdhsa_dx10_clamp") {
5080       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5081                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5082     } else if (ID == ".amdhsa_ieee_mode") {
5083       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5084                        Val, ValRange);
5085     } else if (ID == ".amdhsa_fp16_overflow") {
5086       if (IVersion.Major < 9)
5087         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5088       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5089                        ValRange);
5090     } else if (ID == ".amdhsa_tg_split") {
5091       if (!isGFX90A())
5092         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5093       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5094                        ValRange);
5095     } else if (ID == ".amdhsa_workgroup_processor_mode") {
5096       if (IVersion.Major < 10)
5097         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5098       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5099                        ValRange);
5100     } else if (ID == ".amdhsa_memory_ordered") {
5101       if (IVersion.Major < 10)
5102         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5103       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5104                        ValRange);
5105     } else if (ID == ".amdhsa_forward_progress") {
5106       if (IVersion.Major < 10)
5107         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5108       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5109                        ValRange);
5110     } else if (ID == ".amdhsa_shared_vgpr_count") {
5111       if (IVersion.Major < 10)
5112         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5113       SharedVGPRCount = Val;
5114       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5115                        COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val,
5116                        ValRange);
5117     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5118       PARSE_BITS_ENTRY(
5119           KD.compute_pgm_rsrc2,
5120           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5121           ValRange);
5122     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5123       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5124                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5125                        Val, ValRange);
5126     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5127       PARSE_BITS_ENTRY(
5128           KD.compute_pgm_rsrc2,
5129           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5130           ValRange);
5131     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5132       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5133                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5134                        Val, ValRange);
5135     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5136       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5137                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5138                        Val, ValRange);
5139     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5140       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5141                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5142                        Val, ValRange);
5143     } else if (ID == ".amdhsa_exception_int_div_zero") {
5144       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5145                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5146                        Val, ValRange);
5147     } else {
5148       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5149     }
5150 
5151 #undef PARSE_BITS_ENTRY
5152   }
5153 
5154   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5155     return TokError(".amdhsa_next_free_vgpr directive is required");
5156 
5157   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5158     return TokError(".amdhsa_next_free_sgpr directive is required");
5159 
5160   unsigned VGPRBlocks;
5161   unsigned SGPRBlocks;
5162   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5163                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5164                          EnableWavefrontSize32, NextFreeVGPR,
5165                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5166                          SGPRBlocks))
5167     return true;
5168 
5169   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5170           VGPRBlocks))
5171     return OutOfRangeError(VGPRRange);
5172   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5173                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5174 
5175   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5176           SGPRBlocks))
5177     return OutOfRangeError(SGPRRange);
5178   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5179                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5180                   SGPRBlocks);
5181 
5182   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5183     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5184                     "enabled user SGPRs");
5185 
5186   unsigned UserSGPRCount =
5187       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5188 
5189   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5190     return TokError("too many user SGPRs enabled");
5191   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5192                   UserSGPRCount);
5193 
5194   if (isGFX90A()) {
5195     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5196       return TokError(".amdhsa_accum_offset directive is required");
5197     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5198       return TokError("accum_offset should be in range [4..256] in "
5199                       "increments of 4");
5200     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5201       return TokError("accum_offset exceeds total VGPR allocation");
5202     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5203                     (AccumOffset / 4 - 1));
5204   }
5205 
5206   if (IVersion.Major == 10) {
5207     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5208     if (SharedVGPRCount && EnableWavefrontSize32) {
5209       return TokError("shared_vgpr_count directive not valid on "
5210                       "wavefront size 32");
5211     }
5212     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5213       return TokError("shared_vgpr_count*2 + "
5214                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5215                       "exceed 63\n");
5216     }
5217   }
5218 
5219   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5220       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5221       ReserveFlatScr);
5222   return false;
5223 }
5224 
5225 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5226   uint32_t Major;
5227   uint32_t Minor;
5228 
5229   if (ParseDirectiveMajorMinor(Major, Minor))
5230     return true;
5231 
5232   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5233   return false;
5234 }
5235 
5236 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5237   uint32_t Major;
5238   uint32_t Minor;
5239   uint32_t Stepping;
5240   StringRef VendorName;
5241   StringRef ArchName;
5242 
5243   // If this directive has no arguments, then use the ISA version for the
5244   // targeted GPU.
5245   if (isToken(AsmToken::EndOfStatement)) {
5246     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5247     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5248                                                         ISA.Stepping,
5249                                                         "AMD", "AMDGPU");
5250     return false;
5251   }
5252 
5253   if (ParseDirectiveMajorMinor(Major, Minor))
5254     return true;
5255 
5256   if (!trySkipToken(AsmToken::Comma))
5257     return TokError("stepping version number required, comma expected");
5258 
5259   if (ParseAsAbsoluteExpression(Stepping))
5260     return TokError("invalid stepping version");
5261 
5262   if (!trySkipToken(AsmToken::Comma))
5263     return TokError("vendor name required, comma expected");
5264 
5265   if (!parseString(VendorName, "invalid vendor name"))
5266     return true;
5267 
5268   if (!trySkipToken(AsmToken::Comma))
5269     return TokError("arch name required, comma expected");
5270 
5271   if (!parseString(ArchName, "invalid arch name"))
5272     return true;
5273 
5274   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5275                                                       VendorName, ArchName);
5276   return false;
5277 }
5278 
5279 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5280                                                amd_kernel_code_t &Header) {
5281   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5282   // assembly for backwards compatibility.
5283   if (ID == "max_scratch_backing_memory_byte_size") {
5284     Parser.eatToEndOfStatement();
5285     return false;
5286   }
5287 
5288   SmallString<40> ErrStr;
5289   raw_svector_ostream Err(ErrStr);
5290   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5291     return TokError(Err.str());
5292   }
5293   Lex();
5294 
5295   if (ID == "enable_wavefront_size32") {
5296     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5297       if (!isGFX10Plus())
5298         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5299       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5300         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5301     } else {
5302       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5303         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5304     }
5305   }
5306 
5307   if (ID == "wavefront_size") {
5308     if (Header.wavefront_size == 5) {
5309       if (!isGFX10Plus())
5310         return TokError("wavefront_size=5 is only allowed on GFX10+");
5311       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5312         return TokError("wavefront_size=5 requires +WavefrontSize32");
5313     } else if (Header.wavefront_size == 6) {
5314       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5315         return TokError("wavefront_size=6 requires +WavefrontSize64");
5316     }
5317   }
5318 
5319   if (ID == "enable_wgp_mode") {
5320     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5321         !isGFX10Plus())
5322       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5323   }
5324 
5325   if (ID == "enable_mem_ordered") {
5326     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5327         !isGFX10Plus())
5328       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5329   }
5330 
5331   if (ID == "enable_fwd_progress") {
5332     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5333         !isGFX10Plus())
5334       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5335   }
5336 
5337   return false;
5338 }
5339 
5340 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5341   amd_kernel_code_t Header;
5342   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5343 
5344   while (true) {
5345     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5346     // will set the current token to EndOfStatement.
5347     while(trySkipToken(AsmToken::EndOfStatement));
5348 
5349     StringRef ID;
5350     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5351       return true;
5352 
5353     if (ID == ".end_amd_kernel_code_t")
5354       break;
5355 
5356     if (ParseAMDKernelCodeTValue(ID, Header))
5357       return true;
5358   }
5359 
5360   getTargetStreamer().EmitAMDKernelCodeT(Header);
5361 
5362   return false;
5363 }
5364 
5365 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5366   StringRef KernelName;
5367   if (!parseId(KernelName, "expected symbol name"))
5368     return true;
5369 
5370   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5371                                            ELF::STT_AMDGPU_HSA_KERNEL);
5372 
5373   KernelScope.initialize(getContext());
5374   return false;
5375 }
5376 
5377 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5378   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5379     return Error(getLoc(),
5380                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5381                  "architectures");
5382   }
5383 
5384   auto TargetIDDirective = getLexer().getTok().getStringContents();
5385   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5386     return Error(getParser().getTok().getLoc(), "target id must match options");
5387 
5388   getTargetStreamer().EmitISAVersion();
5389   Lex();
5390 
5391   return false;
5392 }
5393 
5394 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5395   const char *AssemblerDirectiveBegin;
5396   const char *AssemblerDirectiveEnd;
5397   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5398       isHsaAbiVersion3AndAbove(&getSTI())
5399           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5400                             HSAMD::V3::AssemblerDirectiveEnd)
5401           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5402                             HSAMD::AssemblerDirectiveEnd);
5403 
5404   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5405     return Error(getLoc(),
5406                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5407                  "not available on non-amdhsa OSes")).str());
5408   }
5409 
5410   std::string HSAMetadataString;
5411   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5412                           HSAMetadataString))
5413     return true;
5414 
5415   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5416     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5417       return Error(getLoc(), "invalid HSA metadata");
5418   } else {
5419     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5420       return Error(getLoc(), "invalid HSA metadata");
5421   }
5422 
5423   return false;
5424 }
5425 
5426 /// Common code to parse out a block of text (typically YAML) between start and
5427 /// end directives.
5428 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5429                                           const char *AssemblerDirectiveEnd,
5430                                           std::string &CollectString) {
5431 
5432   raw_string_ostream CollectStream(CollectString);
5433 
5434   getLexer().setSkipSpace(false);
5435 
5436   bool FoundEnd = false;
5437   while (!isToken(AsmToken::Eof)) {
5438     while (isToken(AsmToken::Space)) {
5439       CollectStream << getTokenStr();
5440       Lex();
5441     }
5442 
5443     if (trySkipId(AssemblerDirectiveEnd)) {
5444       FoundEnd = true;
5445       break;
5446     }
5447 
5448     CollectStream << Parser.parseStringToEndOfStatement()
5449                   << getContext().getAsmInfo()->getSeparatorString();
5450 
5451     Parser.eatToEndOfStatement();
5452   }
5453 
5454   getLexer().setSkipSpace(true);
5455 
5456   if (isToken(AsmToken::Eof) && !FoundEnd) {
5457     return TokError(Twine("expected directive ") +
5458                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5459   }
5460 
5461   CollectStream.flush();
5462   return false;
5463 }
5464 
5465 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5466 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5467   std::string String;
5468   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5469                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5470     return true;
5471 
5472   auto PALMetadata = getTargetStreamer().getPALMetadata();
5473   if (!PALMetadata->setFromString(String))
5474     return Error(getLoc(), "invalid PAL metadata");
5475   return false;
5476 }
5477 
5478 /// Parse the assembler directive for old linear-format PAL metadata.
5479 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5480   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5481     return Error(getLoc(),
5482                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5483                  "not available on non-amdpal OSes")).str());
5484   }
5485 
5486   auto PALMetadata = getTargetStreamer().getPALMetadata();
5487   PALMetadata->setLegacy();
5488   for (;;) {
5489     uint32_t Key, Value;
5490     if (ParseAsAbsoluteExpression(Key)) {
5491       return TokError(Twine("invalid value in ") +
5492                       Twine(PALMD::AssemblerDirective));
5493     }
5494     if (!trySkipToken(AsmToken::Comma)) {
5495       return TokError(Twine("expected an even number of values in ") +
5496                       Twine(PALMD::AssemblerDirective));
5497     }
5498     if (ParseAsAbsoluteExpression(Value)) {
5499       return TokError(Twine("invalid value in ") +
5500                       Twine(PALMD::AssemblerDirective));
5501     }
5502     PALMetadata->setRegister(Key, Value);
5503     if (!trySkipToken(AsmToken::Comma))
5504       break;
5505   }
5506   return false;
5507 }
5508 
5509 /// ParseDirectiveAMDGPULDS
5510 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5511 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5512   if (getParser().checkForValidSection())
5513     return true;
5514 
5515   StringRef Name;
5516   SMLoc NameLoc = getLoc();
5517   if (getParser().parseIdentifier(Name))
5518     return TokError("expected identifier in directive");
5519 
5520   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5521   if (parseToken(AsmToken::Comma, "expected ','"))
5522     return true;
5523 
5524   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5525 
5526   int64_t Size;
5527   SMLoc SizeLoc = getLoc();
5528   if (getParser().parseAbsoluteExpression(Size))
5529     return true;
5530   if (Size < 0)
5531     return Error(SizeLoc, "size must be non-negative");
5532   if (Size > LocalMemorySize)
5533     return Error(SizeLoc, "size is too large");
5534 
5535   int64_t Alignment = 4;
5536   if (trySkipToken(AsmToken::Comma)) {
5537     SMLoc AlignLoc = getLoc();
5538     if (getParser().parseAbsoluteExpression(Alignment))
5539       return true;
5540     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5541       return Error(AlignLoc, "alignment must be a power of two");
5542 
5543     // Alignment larger than the size of LDS is possible in theory, as long
5544     // as the linker manages to place to symbol at address 0, but we do want
5545     // to make sure the alignment fits nicely into a 32-bit integer.
5546     if (Alignment >= 1u << 31)
5547       return Error(AlignLoc, "alignment is too large");
5548   }
5549 
5550   if (parseEOL())
5551     return true;
5552 
5553   Symbol->redefineIfPossible();
5554   if (!Symbol->isUndefined())
5555     return Error(NameLoc, "invalid symbol redefinition");
5556 
5557   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5558   return false;
5559 }
5560 
5561 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5562   StringRef IDVal = DirectiveID.getString();
5563 
5564   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5565     if (IDVal == ".amdhsa_kernel")
5566      return ParseDirectiveAMDHSAKernel();
5567 
5568     // TODO: Restructure/combine with PAL metadata directive.
5569     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5570       return ParseDirectiveHSAMetadata();
5571   } else {
5572     if (IDVal == ".hsa_code_object_version")
5573       return ParseDirectiveHSACodeObjectVersion();
5574 
5575     if (IDVal == ".hsa_code_object_isa")
5576       return ParseDirectiveHSACodeObjectISA();
5577 
5578     if (IDVal == ".amd_kernel_code_t")
5579       return ParseDirectiveAMDKernelCodeT();
5580 
5581     if (IDVal == ".amdgpu_hsa_kernel")
5582       return ParseDirectiveAMDGPUHsaKernel();
5583 
5584     if (IDVal == ".amd_amdgpu_isa")
5585       return ParseDirectiveISAVersion();
5586 
5587     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5588       return ParseDirectiveHSAMetadata();
5589   }
5590 
5591   if (IDVal == ".amdgcn_target")
5592     return ParseDirectiveAMDGCNTarget();
5593 
5594   if (IDVal == ".amdgpu_lds")
5595     return ParseDirectiveAMDGPULDS();
5596 
5597   if (IDVal == PALMD::AssemblerDirectiveBegin)
5598     return ParseDirectivePALMetadataBegin();
5599 
5600   if (IDVal == PALMD::AssemblerDirective)
5601     return ParseDirectivePALMetadata();
5602 
5603   return true;
5604 }
5605 
5606 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5607                                            unsigned RegNo) {
5608 
5609   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5610     return isGFX9Plus();
5611 
5612   // GFX10+ has 2 more SGPRs 104 and 105.
5613   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5614     return hasSGPR104_SGPR105();
5615 
5616   switch (RegNo) {
5617   case AMDGPU::SRC_SHARED_BASE:
5618   case AMDGPU::SRC_SHARED_LIMIT:
5619   case AMDGPU::SRC_PRIVATE_BASE:
5620   case AMDGPU::SRC_PRIVATE_LIMIT:
5621     return isGFX9Plus();
5622   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5623     return isGFX9Plus() && !isGFX11Plus();
5624   case AMDGPU::TBA:
5625   case AMDGPU::TBA_LO:
5626   case AMDGPU::TBA_HI:
5627   case AMDGPU::TMA:
5628   case AMDGPU::TMA_LO:
5629   case AMDGPU::TMA_HI:
5630     return !isGFX9Plus();
5631   case AMDGPU::XNACK_MASK:
5632   case AMDGPU::XNACK_MASK_LO:
5633   case AMDGPU::XNACK_MASK_HI:
5634     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5635   case AMDGPU::SGPR_NULL:
5636     return isGFX10Plus();
5637   default:
5638     break;
5639   }
5640 
5641   if (isCI())
5642     return true;
5643 
5644   if (isSI() || isGFX10Plus()) {
5645     // No flat_scr on SI.
5646     // On GFX10Plus flat scratch is not a valid register operand and can only be
5647     // accessed with s_setreg/s_getreg.
5648     switch (RegNo) {
5649     case AMDGPU::FLAT_SCR:
5650     case AMDGPU::FLAT_SCR_LO:
5651     case AMDGPU::FLAT_SCR_HI:
5652       return false;
5653     default:
5654       return true;
5655     }
5656   }
5657 
5658   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5659   // SI/CI have.
5660   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5661     return hasSGPR102_SGPR103();
5662 
5663   return true;
5664 }
5665 
5666 OperandMatchResultTy
5667 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5668                               OperandMode Mode) {
5669   OperandMatchResultTy ResTy = parseVOPD(Operands);
5670   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5671       isToken(AsmToken::EndOfStatement))
5672     return ResTy;
5673 
5674   // Try to parse with a custom parser
5675   ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5676 
5677   // If we successfully parsed the operand or if there as an error parsing,
5678   // we are done.
5679   //
5680   // If we are parsing after we reach EndOfStatement then this means we
5681   // are appending default values to the Operands list.  This is only done
5682   // by custom parser, so we shouldn't continue on to the generic parsing.
5683   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5684       isToken(AsmToken::EndOfStatement))
5685     return ResTy;
5686 
5687   SMLoc RBraceLoc;
5688   SMLoc LBraceLoc = getLoc();
5689   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5690     unsigned Prefix = Operands.size();
5691 
5692     for (;;) {
5693       auto Loc = getLoc();
5694       ResTy = parseReg(Operands);
5695       if (ResTy == MatchOperand_NoMatch)
5696         Error(Loc, "expected a register");
5697       if (ResTy != MatchOperand_Success)
5698         return MatchOperand_ParseFail;
5699 
5700       RBraceLoc = getLoc();
5701       if (trySkipToken(AsmToken::RBrac))
5702         break;
5703 
5704       if (!skipToken(AsmToken::Comma,
5705                      "expected a comma or a closing square bracket")) {
5706         return MatchOperand_ParseFail;
5707       }
5708     }
5709 
5710     if (Operands.size() - Prefix > 1) {
5711       Operands.insert(Operands.begin() + Prefix,
5712                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5713       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5714     }
5715 
5716     return MatchOperand_Success;
5717   }
5718 
5719   return parseRegOrImm(Operands);
5720 }
5721 
5722 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5723   // Clear any forced encodings from the previous instruction.
5724   setForcedEncodingSize(0);
5725   setForcedDPP(false);
5726   setForcedSDWA(false);
5727 
5728   if (Name.endswith("_e64_dpp")) {
5729     setForcedDPP(true);
5730     setForcedEncodingSize(64);
5731     return Name.substr(0, Name.size() - 8);
5732   } else if (Name.endswith("_e64")) {
5733     setForcedEncodingSize(64);
5734     return Name.substr(0, Name.size() - 4);
5735   } else if (Name.endswith("_e32")) {
5736     setForcedEncodingSize(32);
5737     return Name.substr(0, Name.size() - 4);
5738   } else if (Name.endswith("_dpp")) {
5739     setForcedDPP(true);
5740     return Name.substr(0, Name.size() - 4);
5741   } else if (Name.endswith("_sdwa")) {
5742     setForcedSDWA(true);
5743     return Name.substr(0, Name.size() - 5);
5744   }
5745   return Name;
5746 }
5747 
5748 static void applyMnemonicAliases(StringRef &Mnemonic,
5749                                  const FeatureBitset &Features,
5750                                  unsigned VariantID);
5751 
5752 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5753                                        StringRef Name,
5754                                        SMLoc NameLoc, OperandVector &Operands) {
5755   // Add the instruction mnemonic
5756   Name = parseMnemonicSuffix(Name);
5757 
5758   // If the target architecture uses MnemonicAlias, call it here to parse
5759   // operands correctly.
5760   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
5761 
5762   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5763 
5764   bool IsMIMG = Name.startswith("image_");
5765 
5766   while (!trySkipToken(AsmToken::EndOfStatement)) {
5767     OperandMode Mode = OperandMode_Default;
5768     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5769       Mode = OperandMode_NSA;
5770     CPolSeen = 0;
5771     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5772 
5773     if (Res != MatchOperand_Success) {
5774       checkUnsupportedInstruction(Name, NameLoc);
5775       if (!Parser.hasPendingError()) {
5776         // FIXME: use real operand location rather than the current location.
5777         StringRef Msg =
5778           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5779                                             "not a valid operand.";
5780         Error(getLoc(), Msg);
5781       }
5782       while (!trySkipToken(AsmToken::EndOfStatement)) {
5783         lex();
5784       }
5785       return true;
5786     }
5787 
5788     // Eat the comma or space if there is one.
5789     trySkipToken(AsmToken::Comma);
5790   }
5791 
5792   return false;
5793 }
5794 
5795 //===----------------------------------------------------------------------===//
5796 // Utility functions
5797 //===----------------------------------------------------------------------===//
5798 
5799 OperandMatchResultTy
5800 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5801 
5802   if (!trySkipId(Prefix, AsmToken::Colon))
5803     return MatchOperand_NoMatch;
5804 
5805   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5806 }
5807 
5808 OperandMatchResultTy
5809 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5810                                     AMDGPUOperand::ImmTy ImmTy,
5811                                     bool (*ConvertResult)(int64_t&)) {
5812   SMLoc S = getLoc();
5813   int64_t Value = 0;
5814 
5815   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5816   if (Res != MatchOperand_Success)
5817     return Res;
5818 
5819   if (ConvertResult && !ConvertResult(Value)) {
5820     Error(S, "invalid " + StringRef(Prefix) + " value.");
5821   }
5822 
5823   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5824   return MatchOperand_Success;
5825 }
5826 
5827 OperandMatchResultTy
5828 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5829                                              OperandVector &Operands,
5830                                              AMDGPUOperand::ImmTy ImmTy,
5831                                              bool (*ConvertResult)(int64_t&)) {
5832   SMLoc S = getLoc();
5833   if (!trySkipId(Prefix, AsmToken::Colon))
5834     return MatchOperand_NoMatch;
5835 
5836   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5837     return MatchOperand_ParseFail;
5838 
5839   unsigned Val = 0;
5840   const unsigned MaxSize = 4;
5841 
5842   // FIXME: How to verify the number of elements matches the number of src
5843   // operands?
5844   for (int I = 0; ; ++I) {
5845     int64_t Op;
5846     SMLoc Loc = getLoc();
5847     if (!parseExpr(Op))
5848       return MatchOperand_ParseFail;
5849 
5850     if (Op != 0 && Op != 1) {
5851       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5852       return MatchOperand_ParseFail;
5853     }
5854 
5855     Val |= (Op << I);
5856 
5857     if (trySkipToken(AsmToken::RBrac))
5858       break;
5859 
5860     if (I + 1 == MaxSize) {
5861       Error(getLoc(), "expected a closing square bracket");
5862       return MatchOperand_ParseFail;
5863     }
5864 
5865     if (!skipToken(AsmToken::Comma, "expected a comma"))
5866       return MatchOperand_ParseFail;
5867   }
5868 
5869   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5870   return MatchOperand_Success;
5871 }
5872 
5873 OperandMatchResultTy
5874 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5875                                AMDGPUOperand::ImmTy ImmTy) {
5876   int64_t Bit;
5877   SMLoc S = getLoc();
5878 
5879   if (trySkipId(Name)) {
5880     Bit = 1;
5881   } else if (trySkipId("no", Name)) {
5882     Bit = 0;
5883   } else {
5884     return MatchOperand_NoMatch;
5885   }
5886 
5887   if (Name == "r128" && !hasMIMG_R128()) {
5888     Error(S, "r128 modifier is not supported on this GPU");
5889     return MatchOperand_ParseFail;
5890   }
5891   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5892     Error(S, "a16 modifier is not supported on this GPU");
5893     return MatchOperand_ParseFail;
5894   }
5895 
5896   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5897     ImmTy = AMDGPUOperand::ImmTyR128A16;
5898 
5899   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5900   return MatchOperand_Success;
5901 }
5902 
5903 OperandMatchResultTy
5904 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5905   unsigned CPolOn = 0;
5906   unsigned CPolOff = 0;
5907   SMLoc S = getLoc();
5908 
5909   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5910   if (isGFX940() && !Mnemo.startswith("s_")) {
5911     if (trySkipId("sc0"))
5912       CPolOn = AMDGPU::CPol::SC0;
5913     else if (trySkipId("nosc0"))
5914       CPolOff = AMDGPU::CPol::SC0;
5915     else if (trySkipId("nt"))
5916       CPolOn = AMDGPU::CPol::NT;
5917     else if (trySkipId("nont"))
5918       CPolOff = AMDGPU::CPol::NT;
5919     else if (trySkipId("sc1"))
5920       CPolOn = AMDGPU::CPol::SC1;
5921     else if (trySkipId("nosc1"))
5922       CPolOff = AMDGPU::CPol::SC1;
5923     else
5924       return MatchOperand_NoMatch;
5925   }
5926   else if (trySkipId("glc"))
5927     CPolOn = AMDGPU::CPol::GLC;
5928   else if (trySkipId("noglc"))
5929     CPolOff = AMDGPU::CPol::GLC;
5930   else if (trySkipId("slc"))
5931     CPolOn = AMDGPU::CPol::SLC;
5932   else if (trySkipId("noslc"))
5933     CPolOff = AMDGPU::CPol::SLC;
5934   else if (trySkipId("dlc"))
5935     CPolOn = AMDGPU::CPol::DLC;
5936   else if (trySkipId("nodlc"))
5937     CPolOff = AMDGPU::CPol::DLC;
5938   else if (trySkipId("scc"))
5939     CPolOn = AMDGPU::CPol::SCC;
5940   else if (trySkipId("noscc"))
5941     CPolOff = AMDGPU::CPol::SCC;
5942   else
5943     return MatchOperand_NoMatch;
5944 
5945   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5946     Error(S, "dlc modifier is not supported on this GPU");
5947     return MatchOperand_ParseFail;
5948   }
5949 
5950   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5951     Error(S, "scc modifier is not supported on this GPU");
5952     return MatchOperand_ParseFail;
5953   }
5954 
5955   if (CPolSeen & (CPolOn | CPolOff)) {
5956     Error(S, "duplicate cache policy modifier");
5957     return MatchOperand_ParseFail;
5958   }
5959 
5960   CPolSeen |= (CPolOn | CPolOff);
5961 
5962   for (unsigned I = 1; I != Operands.size(); ++I) {
5963     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5964     if (Op.isCPol()) {
5965       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5966       return MatchOperand_Success;
5967     }
5968   }
5969 
5970   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5971                                               AMDGPUOperand::ImmTyCPol));
5972 
5973   return MatchOperand_Success;
5974 }
5975 
5976 static void addOptionalImmOperand(
5977   MCInst& Inst, const OperandVector& Operands,
5978   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5979   AMDGPUOperand::ImmTy ImmT,
5980   int64_t Default = 0) {
5981   auto i = OptionalIdx.find(ImmT);
5982   if (i != OptionalIdx.end()) {
5983     unsigned Idx = i->second;
5984     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5985   } else {
5986     Inst.addOperand(MCOperand::createImm(Default));
5987   }
5988 }
5989 
5990 OperandMatchResultTy
5991 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5992                                        StringRef &Value,
5993                                        SMLoc &StringLoc) {
5994   if (!trySkipId(Prefix, AsmToken::Colon))
5995     return MatchOperand_NoMatch;
5996 
5997   StringLoc = getLoc();
5998   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5999                                                   : MatchOperand_ParseFail;
6000 }
6001 
6002 //===----------------------------------------------------------------------===//
6003 // MTBUF format
6004 //===----------------------------------------------------------------------===//
6005 
6006 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6007                                   int64_t MaxVal,
6008                                   int64_t &Fmt) {
6009   int64_t Val;
6010   SMLoc Loc = getLoc();
6011 
6012   auto Res = parseIntWithPrefix(Pref, Val);
6013   if (Res == MatchOperand_ParseFail)
6014     return false;
6015   if (Res == MatchOperand_NoMatch)
6016     return true;
6017 
6018   if (Val < 0 || Val > MaxVal) {
6019     Error(Loc, Twine("out of range ", StringRef(Pref)));
6020     return false;
6021   }
6022 
6023   Fmt = Val;
6024   return true;
6025 }
6026 
6027 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6028 // values to live in a joint format operand in the MCInst encoding.
6029 OperandMatchResultTy
6030 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6031   using namespace llvm::AMDGPU::MTBUFFormat;
6032 
6033   int64_t Dfmt = DFMT_UNDEF;
6034   int64_t Nfmt = NFMT_UNDEF;
6035 
6036   // dfmt and nfmt can appear in either order, and each is optional.
6037   for (int I = 0; I < 2; ++I) {
6038     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6039       return MatchOperand_ParseFail;
6040 
6041     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
6042       return MatchOperand_ParseFail;
6043     }
6044     // Skip optional comma between dfmt/nfmt
6045     // but guard against 2 commas following each other.
6046     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6047         !peekToken().is(AsmToken::Comma)) {
6048       trySkipToken(AsmToken::Comma);
6049     }
6050   }
6051 
6052   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6053     return MatchOperand_NoMatch;
6054 
6055   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6056   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6057 
6058   Format = encodeDfmtNfmt(Dfmt, Nfmt);
6059   return MatchOperand_Success;
6060 }
6061 
6062 OperandMatchResultTy
6063 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6064   using namespace llvm::AMDGPU::MTBUFFormat;
6065 
6066   int64_t Fmt = UFMT_UNDEF;
6067 
6068   if (!tryParseFmt("format", UFMT_MAX, Fmt))
6069     return MatchOperand_ParseFail;
6070 
6071   if (Fmt == UFMT_UNDEF)
6072     return MatchOperand_NoMatch;
6073 
6074   Format = Fmt;
6075   return MatchOperand_Success;
6076 }
6077 
6078 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6079                                     int64_t &Nfmt,
6080                                     StringRef FormatStr,
6081                                     SMLoc Loc) {
6082   using namespace llvm::AMDGPU::MTBUFFormat;
6083   int64_t Format;
6084 
6085   Format = getDfmt(FormatStr);
6086   if (Format != DFMT_UNDEF) {
6087     Dfmt = Format;
6088     return true;
6089   }
6090 
6091   Format = getNfmt(FormatStr, getSTI());
6092   if (Format != NFMT_UNDEF) {
6093     Nfmt = Format;
6094     return true;
6095   }
6096 
6097   Error(Loc, "unsupported format");
6098   return false;
6099 }
6100 
6101 OperandMatchResultTy
6102 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6103                                           SMLoc FormatLoc,
6104                                           int64_t &Format) {
6105   using namespace llvm::AMDGPU::MTBUFFormat;
6106 
6107   int64_t Dfmt = DFMT_UNDEF;
6108   int64_t Nfmt = NFMT_UNDEF;
6109   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6110     return MatchOperand_ParseFail;
6111 
6112   if (trySkipToken(AsmToken::Comma)) {
6113     StringRef Str;
6114     SMLoc Loc = getLoc();
6115     if (!parseId(Str, "expected a format string") ||
6116         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
6117       return MatchOperand_ParseFail;
6118     }
6119     if (Dfmt == DFMT_UNDEF) {
6120       Error(Loc, "duplicate numeric format");
6121       return MatchOperand_ParseFail;
6122     } else if (Nfmt == NFMT_UNDEF) {
6123       Error(Loc, "duplicate data format");
6124       return MatchOperand_ParseFail;
6125     }
6126   }
6127 
6128   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6129   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6130 
6131   if (isGFX10Plus()) {
6132     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6133     if (Ufmt == UFMT_UNDEF) {
6134       Error(FormatLoc, "unsupported format");
6135       return MatchOperand_ParseFail;
6136     }
6137     Format = Ufmt;
6138   } else {
6139     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6140   }
6141 
6142   return MatchOperand_Success;
6143 }
6144 
6145 OperandMatchResultTy
6146 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6147                                             SMLoc Loc,
6148                                             int64_t &Format) {
6149   using namespace llvm::AMDGPU::MTBUFFormat;
6150 
6151   auto Id = getUnifiedFormat(FormatStr, getSTI());
6152   if (Id == UFMT_UNDEF)
6153     return MatchOperand_NoMatch;
6154 
6155   if (!isGFX10Plus()) {
6156     Error(Loc, "unified format is not supported on this GPU");
6157     return MatchOperand_ParseFail;
6158   }
6159 
6160   Format = Id;
6161   return MatchOperand_Success;
6162 }
6163 
6164 OperandMatchResultTy
6165 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6166   using namespace llvm::AMDGPU::MTBUFFormat;
6167   SMLoc Loc = getLoc();
6168 
6169   if (!parseExpr(Format))
6170     return MatchOperand_ParseFail;
6171   if (!isValidFormatEncoding(Format, getSTI())) {
6172     Error(Loc, "out of range format");
6173     return MatchOperand_ParseFail;
6174   }
6175 
6176   return MatchOperand_Success;
6177 }
6178 
6179 OperandMatchResultTy
6180 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6181   using namespace llvm::AMDGPU::MTBUFFormat;
6182 
6183   if (!trySkipId("format", AsmToken::Colon))
6184     return MatchOperand_NoMatch;
6185 
6186   if (trySkipToken(AsmToken::LBrac)) {
6187     StringRef FormatStr;
6188     SMLoc Loc = getLoc();
6189     if (!parseId(FormatStr, "expected a format string"))
6190       return MatchOperand_ParseFail;
6191 
6192     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6193     if (Res == MatchOperand_NoMatch)
6194       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6195     if (Res != MatchOperand_Success)
6196       return Res;
6197 
6198     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6199       return MatchOperand_ParseFail;
6200 
6201     return MatchOperand_Success;
6202   }
6203 
6204   return parseNumericFormat(Format);
6205 }
6206 
6207 OperandMatchResultTy
6208 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6209   using namespace llvm::AMDGPU::MTBUFFormat;
6210 
6211   int64_t Format = getDefaultFormatEncoding(getSTI());
6212   OperandMatchResultTy Res;
6213   SMLoc Loc = getLoc();
6214 
6215   // Parse legacy format syntax.
6216   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6217   if (Res == MatchOperand_ParseFail)
6218     return Res;
6219 
6220   bool FormatFound = (Res == MatchOperand_Success);
6221 
6222   Operands.push_back(
6223     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6224 
6225   if (FormatFound)
6226     trySkipToken(AsmToken::Comma);
6227 
6228   if (isToken(AsmToken::EndOfStatement)) {
6229     // We are expecting an soffset operand,
6230     // but let matcher handle the error.
6231     return MatchOperand_Success;
6232   }
6233 
6234   // Parse soffset.
6235   Res = parseRegOrImm(Operands);
6236   if (Res != MatchOperand_Success)
6237     return Res;
6238 
6239   trySkipToken(AsmToken::Comma);
6240 
6241   if (!FormatFound) {
6242     Res = parseSymbolicOrNumericFormat(Format);
6243     if (Res == MatchOperand_ParseFail)
6244       return Res;
6245     if (Res == MatchOperand_Success) {
6246       auto Size = Operands.size();
6247       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6248       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6249       Op.setImm(Format);
6250     }
6251     return MatchOperand_Success;
6252   }
6253 
6254   if (isId("format") && peekToken().is(AsmToken::Colon)) {
6255     Error(getLoc(), "duplicate format");
6256     return MatchOperand_ParseFail;
6257   }
6258   return MatchOperand_Success;
6259 }
6260 
6261 //===----------------------------------------------------------------------===//
6262 // ds
6263 //===----------------------------------------------------------------------===//
6264 
6265 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6266                                     const OperandVector &Operands) {
6267   OptionalImmIndexMap OptionalIdx;
6268 
6269   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6270     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6271 
6272     // Add the register arguments
6273     if (Op.isReg()) {
6274       Op.addRegOperands(Inst, 1);
6275       continue;
6276     }
6277 
6278     // Handle optional arguments
6279     OptionalIdx[Op.getImmTy()] = i;
6280   }
6281 
6282   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6283   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6284   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6285 
6286   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6287 }
6288 
6289 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6290                                 bool IsGdsHardcoded) {
6291   OptionalImmIndexMap OptionalIdx;
6292   AMDGPUOperand::ImmTy OffsetType = AMDGPUOperand::ImmTyOffset;
6293 
6294   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6295     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6296 
6297     // Add the register arguments
6298     if (Op.isReg()) {
6299       Op.addRegOperands(Inst, 1);
6300       continue;
6301     }
6302 
6303     if (Op.isToken() && Op.getToken() == "gds") {
6304       IsGdsHardcoded = true;
6305       continue;
6306     }
6307 
6308     // Handle optional arguments
6309     OptionalIdx[Op.getImmTy()] = i;
6310 
6311     if (Op.getImmTy() == AMDGPUOperand::ImmTySwizzle)
6312       OffsetType = AMDGPUOperand::ImmTySwizzle;
6313   }
6314 
6315   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6316 
6317   if (!IsGdsHardcoded) {
6318     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6319   }
6320   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6321 }
6322 
6323 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6324   OptionalImmIndexMap OptionalIdx;
6325 
6326   unsigned OperandIdx[4];
6327   unsigned EnMask = 0;
6328   int SrcIdx = 0;
6329 
6330   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6331     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6332 
6333     // Add the register arguments
6334     if (Op.isReg()) {
6335       assert(SrcIdx < 4);
6336       OperandIdx[SrcIdx] = Inst.size();
6337       Op.addRegOperands(Inst, 1);
6338       ++SrcIdx;
6339       continue;
6340     }
6341 
6342     if (Op.isOff()) {
6343       assert(SrcIdx < 4);
6344       OperandIdx[SrcIdx] = Inst.size();
6345       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6346       ++SrcIdx;
6347       continue;
6348     }
6349 
6350     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6351       Op.addImmOperands(Inst, 1);
6352       continue;
6353     }
6354 
6355     if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
6356       continue;
6357 
6358     // Handle optional arguments
6359     OptionalIdx[Op.getImmTy()] = i;
6360   }
6361 
6362   assert(SrcIdx == 4);
6363 
6364   bool Compr = false;
6365   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6366     Compr = true;
6367     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6368     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6369     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6370   }
6371 
6372   for (auto i = 0; i < SrcIdx; ++i) {
6373     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6374       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6375     }
6376   }
6377 
6378   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6379   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6380 
6381   Inst.addOperand(MCOperand::createImm(EnMask));
6382 }
6383 
6384 //===----------------------------------------------------------------------===//
6385 // s_waitcnt
6386 //===----------------------------------------------------------------------===//
6387 
6388 static bool
6389 encodeCnt(
6390   const AMDGPU::IsaVersion ISA,
6391   int64_t &IntVal,
6392   int64_t CntVal,
6393   bool Saturate,
6394   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6395   unsigned (*decode)(const IsaVersion &Version, unsigned))
6396 {
6397   bool Failed = false;
6398 
6399   IntVal = encode(ISA, IntVal, CntVal);
6400   if (CntVal != decode(ISA, IntVal)) {
6401     if (Saturate) {
6402       IntVal = encode(ISA, IntVal, -1);
6403     } else {
6404       Failed = true;
6405     }
6406   }
6407   return Failed;
6408 }
6409 
6410 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6411 
6412   SMLoc CntLoc = getLoc();
6413   StringRef CntName = getTokenStr();
6414 
6415   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6416       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6417     return false;
6418 
6419   int64_t CntVal;
6420   SMLoc ValLoc = getLoc();
6421   if (!parseExpr(CntVal))
6422     return false;
6423 
6424   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6425 
6426   bool Failed = true;
6427   bool Sat = CntName.endswith("_sat");
6428 
6429   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6430     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6431   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6432     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6433   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6434     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6435   } else {
6436     Error(CntLoc, "invalid counter name " + CntName);
6437     return false;
6438   }
6439 
6440   if (Failed) {
6441     Error(ValLoc, "too large value for " + CntName);
6442     return false;
6443   }
6444 
6445   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6446     return false;
6447 
6448   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6449     if (isToken(AsmToken::EndOfStatement)) {
6450       Error(getLoc(), "expected a counter name");
6451       return false;
6452     }
6453   }
6454 
6455   return true;
6456 }
6457 
6458 OperandMatchResultTy
6459 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6460   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6461   int64_t Waitcnt = getWaitcntBitMask(ISA);
6462   SMLoc S = getLoc();
6463 
6464   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6465     while (!isToken(AsmToken::EndOfStatement)) {
6466       if (!parseCnt(Waitcnt))
6467         return MatchOperand_ParseFail;
6468     }
6469   } else {
6470     if (!parseExpr(Waitcnt))
6471       return MatchOperand_ParseFail;
6472   }
6473 
6474   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6475   return MatchOperand_Success;
6476 }
6477 
6478 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6479   SMLoc FieldLoc = getLoc();
6480   StringRef FieldName = getTokenStr();
6481   if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6482       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6483     return false;
6484 
6485   SMLoc ValueLoc = getLoc();
6486   StringRef ValueName = getTokenStr();
6487   if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6488       !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6489     return false;
6490 
6491   unsigned Shift;
6492   if (FieldName == "instid0") {
6493     Shift = 0;
6494   } else if (FieldName == "instskip") {
6495     Shift = 4;
6496   } else if (FieldName == "instid1") {
6497     Shift = 7;
6498   } else {
6499     Error(FieldLoc, "invalid field name " + FieldName);
6500     return false;
6501   }
6502 
6503   int Value;
6504   if (Shift == 4) {
6505     // Parse values for instskip.
6506     Value = StringSwitch<int>(ValueName)
6507                 .Case("SAME", 0)
6508                 .Case("NEXT", 1)
6509                 .Case("SKIP_1", 2)
6510                 .Case("SKIP_2", 3)
6511                 .Case("SKIP_3", 4)
6512                 .Case("SKIP_4", 5)
6513                 .Default(-1);
6514   } else {
6515     // Parse values for instid0 and instid1.
6516     Value = StringSwitch<int>(ValueName)
6517                 .Case("NO_DEP", 0)
6518                 .Case("VALU_DEP_1", 1)
6519                 .Case("VALU_DEP_2", 2)
6520                 .Case("VALU_DEP_3", 3)
6521                 .Case("VALU_DEP_4", 4)
6522                 .Case("TRANS32_DEP_1", 5)
6523                 .Case("TRANS32_DEP_2", 6)
6524                 .Case("TRANS32_DEP_3", 7)
6525                 .Case("FMA_ACCUM_CYCLE_1", 8)
6526                 .Case("SALU_CYCLE_1", 9)
6527                 .Case("SALU_CYCLE_2", 10)
6528                 .Case("SALU_CYCLE_3", 11)
6529                 .Default(-1);
6530   }
6531   if (Value < 0) {
6532     Error(ValueLoc, "invalid value name " + ValueName);
6533     return false;
6534   }
6535 
6536   Delay |= Value << Shift;
6537   return true;
6538 }
6539 
6540 OperandMatchResultTy
6541 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) {
6542   int64_t Delay = 0;
6543   SMLoc S = getLoc();
6544 
6545   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6546     do {
6547       if (!parseDelay(Delay))
6548         return MatchOperand_ParseFail;
6549     } while (trySkipToken(AsmToken::Pipe));
6550   } else {
6551     if (!parseExpr(Delay))
6552       return MatchOperand_ParseFail;
6553   }
6554 
6555   Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
6556   return MatchOperand_Success;
6557 }
6558 
6559 bool
6560 AMDGPUOperand::isSWaitCnt() const {
6561   return isImm();
6562 }
6563 
6564 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); }
6565 
6566 //===----------------------------------------------------------------------===//
6567 // DepCtr
6568 //===----------------------------------------------------------------------===//
6569 
6570 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6571                                   StringRef DepCtrName) {
6572   switch (ErrorId) {
6573   case OPR_ID_UNKNOWN:
6574     Error(Loc, Twine("invalid counter name ", DepCtrName));
6575     return;
6576   case OPR_ID_UNSUPPORTED:
6577     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6578     return;
6579   case OPR_ID_DUPLICATE:
6580     Error(Loc, Twine("duplicate counter name ", DepCtrName));
6581     return;
6582   case OPR_VAL_INVALID:
6583     Error(Loc, Twine("invalid value for ", DepCtrName));
6584     return;
6585   default:
6586     assert(false);
6587   }
6588 }
6589 
6590 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6591 
6592   using namespace llvm::AMDGPU::DepCtr;
6593 
6594   SMLoc DepCtrLoc = getLoc();
6595   StringRef DepCtrName = getTokenStr();
6596 
6597   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6598       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6599     return false;
6600 
6601   int64_t ExprVal;
6602   if (!parseExpr(ExprVal))
6603     return false;
6604 
6605   unsigned PrevOprMask = UsedOprMask;
6606   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6607 
6608   if (CntVal < 0) {
6609     depCtrError(DepCtrLoc, CntVal, DepCtrName);
6610     return false;
6611   }
6612 
6613   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6614     return false;
6615 
6616   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6617     if (isToken(AsmToken::EndOfStatement)) {
6618       Error(getLoc(), "expected a counter name");
6619       return false;
6620     }
6621   }
6622 
6623   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
6624   DepCtr = (DepCtr & ~CntValMask) | CntVal;
6625   return true;
6626 }
6627 
6628 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) {
6629   using namespace llvm::AMDGPU::DepCtr;
6630 
6631   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
6632   SMLoc Loc = getLoc();
6633 
6634   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6635     unsigned UsedOprMask = 0;
6636     while (!isToken(AsmToken::EndOfStatement)) {
6637       if (!parseDepCtr(DepCtr, UsedOprMask))
6638         return MatchOperand_ParseFail;
6639     }
6640   } else {
6641     if (!parseExpr(DepCtr))
6642       return MatchOperand_ParseFail;
6643   }
6644 
6645   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
6646   return MatchOperand_Success;
6647 }
6648 
6649 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
6650 
6651 //===----------------------------------------------------------------------===//
6652 // hwreg
6653 //===----------------------------------------------------------------------===//
6654 
6655 bool
6656 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6657                                 OperandInfoTy &Offset,
6658                                 OperandInfoTy &Width) {
6659   using namespace llvm::AMDGPU::Hwreg;
6660 
6661   // The register may be specified by name or using a numeric code
6662   HwReg.Loc = getLoc();
6663   if (isToken(AsmToken::Identifier) &&
6664       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6665     HwReg.IsSymbolic = true;
6666     lex(); // skip register name
6667   } else if (!parseExpr(HwReg.Id, "a register name")) {
6668     return false;
6669   }
6670 
6671   if (trySkipToken(AsmToken::RParen))
6672     return true;
6673 
6674   // parse optional params
6675   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6676     return false;
6677 
6678   Offset.Loc = getLoc();
6679   if (!parseExpr(Offset.Id))
6680     return false;
6681 
6682   if (!skipToken(AsmToken::Comma, "expected a comma"))
6683     return false;
6684 
6685   Width.Loc = getLoc();
6686   return parseExpr(Width.Id) &&
6687          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6688 }
6689 
6690 bool
6691 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6692                                const OperandInfoTy &Offset,
6693                                const OperandInfoTy &Width) {
6694 
6695   using namespace llvm::AMDGPU::Hwreg;
6696 
6697   if (HwReg.IsSymbolic) {
6698     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6699       Error(HwReg.Loc,
6700             "specified hardware register is not supported on this GPU");
6701       return false;
6702     }
6703   } else {
6704     if (!isValidHwreg(HwReg.Id)) {
6705       Error(HwReg.Loc,
6706             "invalid code of hardware register: only 6-bit values are legal");
6707       return false;
6708     }
6709   }
6710   if (!isValidHwregOffset(Offset.Id)) {
6711     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6712     return false;
6713   }
6714   if (!isValidHwregWidth(Width.Id)) {
6715     Error(Width.Loc,
6716           "invalid bitfield width: only values from 1 to 32 are legal");
6717     return false;
6718   }
6719   return true;
6720 }
6721 
6722 OperandMatchResultTy
6723 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6724   using namespace llvm::AMDGPU::Hwreg;
6725 
6726   int64_t ImmVal = 0;
6727   SMLoc Loc = getLoc();
6728 
6729   if (trySkipId("hwreg", AsmToken::LParen)) {
6730     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6731     OperandInfoTy Offset(OFFSET_DEFAULT_);
6732     OperandInfoTy Width(WIDTH_DEFAULT_);
6733     if (parseHwregBody(HwReg, Offset, Width) &&
6734         validateHwreg(HwReg, Offset, Width)) {
6735       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6736     } else {
6737       return MatchOperand_ParseFail;
6738     }
6739   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6740     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6741       Error(Loc, "invalid immediate: only 16-bit values are legal");
6742       return MatchOperand_ParseFail;
6743     }
6744   } else {
6745     return MatchOperand_ParseFail;
6746   }
6747 
6748   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6749   return MatchOperand_Success;
6750 }
6751 
6752 bool AMDGPUOperand::isHwreg() const {
6753   return isImmTy(ImmTyHwreg);
6754 }
6755 
6756 //===----------------------------------------------------------------------===//
6757 // sendmsg
6758 //===----------------------------------------------------------------------===//
6759 
6760 bool
6761 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6762                                   OperandInfoTy &Op,
6763                                   OperandInfoTy &Stream) {
6764   using namespace llvm::AMDGPU::SendMsg;
6765 
6766   Msg.Loc = getLoc();
6767   if (isToken(AsmToken::Identifier) &&
6768       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6769     Msg.IsSymbolic = true;
6770     lex(); // skip message name
6771   } else if (!parseExpr(Msg.Id, "a message name")) {
6772     return false;
6773   }
6774 
6775   if (trySkipToken(AsmToken::Comma)) {
6776     Op.IsDefined = true;
6777     Op.Loc = getLoc();
6778     if (isToken(AsmToken::Identifier) &&
6779         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6780       lex(); // skip operation name
6781     } else if (!parseExpr(Op.Id, "an operation name")) {
6782       return false;
6783     }
6784 
6785     if (trySkipToken(AsmToken::Comma)) {
6786       Stream.IsDefined = true;
6787       Stream.Loc = getLoc();
6788       if (!parseExpr(Stream.Id))
6789         return false;
6790     }
6791   }
6792 
6793   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6794 }
6795 
6796 bool
6797 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6798                                  const OperandInfoTy &Op,
6799                                  const OperandInfoTy &Stream) {
6800   using namespace llvm::AMDGPU::SendMsg;
6801 
6802   // Validation strictness depends on whether message is specified
6803   // in a symbolic or in a numeric form. In the latter case
6804   // only encoding possibility is checked.
6805   bool Strict = Msg.IsSymbolic;
6806 
6807   if (Strict) {
6808     if (Msg.Id == OPR_ID_UNSUPPORTED) {
6809       Error(Msg.Loc, "specified message id is not supported on this GPU");
6810       return false;
6811     }
6812   } else {
6813     if (!isValidMsgId(Msg.Id, getSTI())) {
6814       Error(Msg.Loc, "invalid message id");
6815       return false;
6816     }
6817   }
6818   if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
6819     if (Op.IsDefined) {
6820       Error(Op.Loc, "message does not support operations");
6821     } else {
6822       Error(Msg.Loc, "missing message operation");
6823     }
6824     return false;
6825   }
6826   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6827     Error(Op.Loc, "invalid operation id");
6828     return false;
6829   }
6830   if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
6831       Stream.IsDefined) {
6832     Error(Stream.Loc, "message operation does not support streams");
6833     return false;
6834   }
6835   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6836     Error(Stream.Loc, "invalid message stream id");
6837     return false;
6838   }
6839   return true;
6840 }
6841 
6842 OperandMatchResultTy
6843 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6844   using namespace llvm::AMDGPU::SendMsg;
6845 
6846   int64_t ImmVal = 0;
6847   SMLoc Loc = getLoc();
6848 
6849   if (trySkipId("sendmsg", AsmToken::LParen)) {
6850     OperandInfoTy Msg(OPR_ID_UNKNOWN);
6851     OperandInfoTy Op(OP_NONE_);
6852     OperandInfoTy Stream(STREAM_ID_NONE_);
6853     if (parseSendMsgBody(Msg, Op, Stream) &&
6854         validateSendMsg(Msg, Op, Stream)) {
6855       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6856     } else {
6857       return MatchOperand_ParseFail;
6858     }
6859   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6860     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6861       Error(Loc, "invalid immediate: only 16-bit values are legal");
6862       return MatchOperand_ParseFail;
6863     }
6864   } else {
6865     return MatchOperand_ParseFail;
6866   }
6867 
6868   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6869   return MatchOperand_Success;
6870 }
6871 
6872 bool AMDGPUOperand::isSendMsg() const {
6873   return isImmTy(ImmTySendMsg);
6874 }
6875 
6876 //===----------------------------------------------------------------------===//
6877 // v_interp
6878 //===----------------------------------------------------------------------===//
6879 
6880 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6881   StringRef Str;
6882   SMLoc S = getLoc();
6883 
6884   if (!parseId(Str))
6885     return MatchOperand_NoMatch;
6886 
6887   int Slot = StringSwitch<int>(Str)
6888     .Case("p10", 0)
6889     .Case("p20", 1)
6890     .Case("p0", 2)
6891     .Default(-1);
6892 
6893   if (Slot == -1) {
6894     Error(S, "invalid interpolation slot");
6895     return MatchOperand_ParseFail;
6896   }
6897 
6898   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6899                                               AMDGPUOperand::ImmTyInterpSlot));
6900   return MatchOperand_Success;
6901 }
6902 
6903 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6904   StringRef Str;
6905   SMLoc S = getLoc();
6906 
6907   if (!parseId(Str))
6908     return MatchOperand_NoMatch;
6909 
6910   if (!Str.startswith("attr")) {
6911     Error(S, "invalid interpolation attribute");
6912     return MatchOperand_ParseFail;
6913   }
6914 
6915   StringRef Chan = Str.take_back(2);
6916   int AttrChan = StringSwitch<int>(Chan)
6917     .Case(".x", 0)
6918     .Case(".y", 1)
6919     .Case(".z", 2)
6920     .Case(".w", 3)
6921     .Default(-1);
6922   if (AttrChan == -1) {
6923     Error(S, "invalid or missing interpolation attribute channel");
6924     return MatchOperand_ParseFail;
6925   }
6926 
6927   Str = Str.drop_back(2).drop_front(4);
6928 
6929   uint8_t Attr;
6930   if (Str.getAsInteger(10, Attr)) {
6931     Error(S, "invalid or missing interpolation attribute number");
6932     return MatchOperand_ParseFail;
6933   }
6934 
6935   if (Attr > 63) {
6936     Error(S, "out of bounds interpolation attribute number");
6937     return MatchOperand_ParseFail;
6938   }
6939 
6940   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6941 
6942   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6943                                               AMDGPUOperand::ImmTyInterpAttr));
6944   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6945                                               AMDGPUOperand::ImmTyAttrChan));
6946   return MatchOperand_Success;
6947 }
6948 
6949 //===----------------------------------------------------------------------===//
6950 // exp
6951 //===----------------------------------------------------------------------===//
6952 
6953 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6954   using namespace llvm::AMDGPU::Exp;
6955 
6956   StringRef Str;
6957   SMLoc S = getLoc();
6958 
6959   if (!parseId(Str))
6960     return MatchOperand_NoMatch;
6961 
6962   unsigned Id = getTgtId(Str);
6963   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6964     Error(S, (Id == ET_INVALID) ?
6965                 "invalid exp target" :
6966                 "exp target is not supported on this GPU");
6967     return MatchOperand_ParseFail;
6968   }
6969 
6970   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6971                                               AMDGPUOperand::ImmTyExpTgt));
6972   return MatchOperand_Success;
6973 }
6974 
6975 //===----------------------------------------------------------------------===//
6976 // parser helpers
6977 //===----------------------------------------------------------------------===//
6978 
6979 bool
6980 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6981   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6982 }
6983 
6984 bool
6985 AMDGPUAsmParser::isId(const StringRef Id) const {
6986   return isId(getToken(), Id);
6987 }
6988 
6989 bool
6990 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6991   return getTokenKind() == Kind;
6992 }
6993 
6994 bool
6995 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6996   if (isId(Id)) {
6997     lex();
6998     return true;
6999   }
7000   return false;
7001 }
7002 
7003 bool
7004 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7005   if (isToken(AsmToken::Identifier)) {
7006     StringRef Tok = getTokenStr();
7007     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
7008       lex();
7009       return true;
7010     }
7011   }
7012   return false;
7013 }
7014 
7015 bool
7016 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7017   if (isId(Id) && peekToken().is(Kind)) {
7018     lex();
7019     lex();
7020     return true;
7021   }
7022   return false;
7023 }
7024 
7025 bool
7026 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7027   if (isToken(Kind)) {
7028     lex();
7029     return true;
7030   }
7031   return false;
7032 }
7033 
7034 bool
7035 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7036                            const StringRef ErrMsg) {
7037   if (!trySkipToken(Kind)) {
7038     Error(getLoc(), ErrMsg);
7039     return false;
7040   }
7041   return true;
7042 }
7043 
7044 bool
7045 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7046   SMLoc S = getLoc();
7047 
7048   const MCExpr *Expr;
7049   if (Parser.parseExpression(Expr))
7050     return false;
7051 
7052   if (Expr->evaluateAsAbsolute(Imm))
7053     return true;
7054 
7055   if (Expected.empty()) {
7056     Error(S, "expected absolute expression");
7057   } else {
7058     Error(S, Twine("expected ", Expected) +
7059              Twine(" or an absolute expression"));
7060   }
7061   return false;
7062 }
7063 
7064 bool
7065 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7066   SMLoc S = getLoc();
7067 
7068   const MCExpr *Expr;
7069   if (Parser.parseExpression(Expr))
7070     return false;
7071 
7072   int64_t IntVal;
7073   if (Expr->evaluateAsAbsolute(IntVal)) {
7074     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7075   } else {
7076     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7077   }
7078   return true;
7079 }
7080 
7081 bool
7082 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7083   if (isToken(AsmToken::String)) {
7084     Val = getToken().getStringContents();
7085     lex();
7086     return true;
7087   } else {
7088     Error(getLoc(), ErrMsg);
7089     return false;
7090   }
7091 }
7092 
7093 bool
7094 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7095   if (isToken(AsmToken::Identifier)) {
7096     Val = getTokenStr();
7097     lex();
7098     return true;
7099   } else {
7100     if (!ErrMsg.empty())
7101       Error(getLoc(), ErrMsg);
7102     return false;
7103   }
7104 }
7105 
7106 AsmToken
7107 AMDGPUAsmParser::getToken() const {
7108   return Parser.getTok();
7109 }
7110 
7111 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7112   return isToken(AsmToken::EndOfStatement)
7113              ? getToken()
7114              : getLexer().peekTok(ShouldSkipSpace);
7115 }
7116 
7117 void
7118 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7119   auto TokCount = getLexer().peekTokens(Tokens);
7120 
7121   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7122     Tokens[Idx] = AsmToken(AsmToken::Error, "");
7123 }
7124 
7125 AsmToken::TokenKind
7126 AMDGPUAsmParser::getTokenKind() const {
7127   return getLexer().getKind();
7128 }
7129 
7130 SMLoc
7131 AMDGPUAsmParser::getLoc() const {
7132   return getToken().getLoc();
7133 }
7134 
7135 StringRef
7136 AMDGPUAsmParser::getTokenStr() const {
7137   return getToken().getString();
7138 }
7139 
7140 void
7141 AMDGPUAsmParser::lex() {
7142   Parser.Lex();
7143 }
7144 
7145 SMLoc
7146 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7147                                const OperandVector &Operands) const {
7148   for (unsigned i = Operands.size() - 1; i > 0; --i) {
7149     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7150     if (Test(Op))
7151       return Op.getStartLoc();
7152   }
7153   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7154 }
7155 
7156 SMLoc
7157 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7158                            const OperandVector &Operands) const {
7159   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7160   return getOperandLoc(Test, Operands);
7161 }
7162 
7163 SMLoc
7164 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7165                            const OperandVector &Operands) const {
7166   auto Test = [=](const AMDGPUOperand& Op) {
7167     return Op.isRegKind() && Op.getReg() == Reg;
7168   };
7169   return getOperandLoc(Test, Operands);
7170 }
7171 
7172 SMLoc
7173 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
7174   auto Test = [](const AMDGPUOperand& Op) {
7175     return Op.IsImmKindLiteral() || Op.isExpr();
7176   };
7177   return getOperandLoc(Test, Operands);
7178 }
7179 
7180 SMLoc
7181 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7182   auto Test = [](const AMDGPUOperand& Op) {
7183     return Op.isImmKindConst();
7184   };
7185   return getOperandLoc(Test, Operands);
7186 }
7187 
7188 //===----------------------------------------------------------------------===//
7189 // swizzle
7190 //===----------------------------------------------------------------------===//
7191 
7192 LLVM_READNONE
7193 static unsigned
7194 encodeBitmaskPerm(const unsigned AndMask,
7195                   const unsigned OrMask,
7196                   const unsigned XorMask) {
7197   using namespace llvm::AMDGPU::Swizzle;
7198 
7199   return BITMASK_PERM_ENC |
7200          (AndMask << BITMASK_AND_SHIFT) |
7201          (OrMask  << BITMASK_OR_SHIFT)  |
7202          (XorMask << BITMASK_XOR_SHIFT);
7203 }
7204 
7205 bool
7206 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7207                                      const unsigned MinVal,
7208                                      const unsigned MaxVal,
7209                                      const StringRef ErrMsg,
7210                                      SMLoc &Loc) {
7211   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7212     return false;
7213   }
7214   Loc = getLoc();
7215   if (!parseExpr(Op)) {
7216     return false;
7217   }
7218   if (Op < MinVal || Op > MaxVal) {
7219     Error(Loc, ErrMsg);
7220     return false;
7221   }
7222 
7223   return true;
7224 }
7225 
7226 bool
7227 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7228                                       const unsigned MinVal,
7229                                       const unsigned MaxVal,
7230                                       const StringRef ErrMsg) {
7231   SMLoc Loc;
7232   for (unsigned i = 0; i < OpNum; ++i) {
7233     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7234       return false;
7235   }
7236 
7237   return true;
7238 }
7239 
7240 bool
7241 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7242   using namespace llvm::AMDGPU::Swizzle;
7243 
7244   int64_t Lane[LANE_NUM];
7245   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7246                            "expected a 2-bit lane id")) {
7247     Imm = QUAD_PERM_ENC;
7248     for (unsigned I = 0; I < LANE_NUM; ++I) {
7249       Imm |= Lane[I] << (LANE_SHIFT * I);
7250     }
7251     return true;
7252   }
7253   return false;
7254 }
7255 
7256 bool
7257 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7258   using namespace llvm::AMDGPU::Swizzle;
7259 
7260   SMLoc Loc;
7261   int64_t GroupSize;
7262   int64_t LaneIdx;
7263 
7264   if (!parseSwizzleOperand(GroupSize,
7265                            2, 32,
7266                            "group size must be in the interval [2,32]",
7267                            Loc)) {
7268     return false;
7269   }
7270   if (!isPowerOf2_64(GroupSize)) {
7271     Error(Loc, "group size must be a power of two");
7272     return false;
7273   }
7274   if (parseSwizzleOperand(LaneIdx,
7275                           0, GroupSize - 1,
7276                           "lane id must be in the interval [0,group size - 1]",
7277                           Loc)) {
7278     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7279     return true;
7280   }
7281   return false;
7282 }
7283 
7284 bool
7285 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7286   using namespace llvm::AMDGPU::Swizzle;
7287 
7288   SMLoc Loc;
7289   int64_t GroupSize;
7290 
7291   if (!parseSwizzleOperand(GroupSize,
7292                            2, 32,
7293                            "group size must be in the interval [2,32]",
7294                            Loc)) {
7295     return false;
7296   }
7297   if (!isPowerOf2_64(GroupSize)) {
7298     Error(Loc, "group size must be a power of two");
7299     return false;
7300   }
7301 
7302   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7303   return true;
7304 }
7305 
7306 bool
7307 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7308   using namespace llvm::AMDGPU::Swizzle;
7309 
7310   SMLoc Loc;
7311   int64_t GroupSize;
7312 
7313   if (!parseSwizzleOperand(GroupSize,
7314                            1, 16,
7315                            "group size must be in the interval [1,16]",
7316                            Loc)) {
7317     return false;
7318   }
7319   if (!isPowerOf2_64(GroupSize)) {
7320     Error(Loc, "group size must be a power of two");
7321     return false;
7322   }
7323 
7324   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7325   return true;
7326 }
7327 
7328 bool
7329 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7330   using namespace llvm::AMDGPU::Swizzle;
7331 
7332   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7333     return false;
7334   }
7335 
7336   StringRef Ctl;
7337   SMLoc StrLoc = getLoc();
7338   if (!parseString(Ctl)) {
7339     return false;
7340   }
7341   if (Ctl.size() != BITMASK_WIDTH) {
7342     Error(StrLoc, "expected a 5-character mask");
7343     return false;
7344   }
7345 
7346   unsigned AndMask = 0;
7347   unsigned OrMask = 0;
7348   unsigned XorMask = 0;
7349 
7350   for (size_t i = 0; i < Ctl.size(); ++i) {
7351     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7352     switch(Ctl[i]) {
7353     default:
7354       Error(StrLoc, "invalid mask");
7355       return false;
7356     case '0':
7357       break;
7358     case '1':
7359       OrMask |= Mask;
7360       break;
7361     case 'p':
7362       AndMask |= Mask;
7363       break;
7364     case 'i':
7365       AndMask |= Mask;
7366       XorMask |= Mask;
7367       break;
7368     }
7369   }
7370 
7371   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7372   return true;
7373 }
7374 
7375 bool
7376 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7377 
7378   SMLoc OffsetLoc = getLoc();
7379 
7380   if (!parseExpr(Imm, "a swizzle macro")) {
7381     return false;
7382   }
7383   if (!isUInt<16>(Imm)) {
7384     Error(OffsetLoc, "expected a 16-bit offset");
7385     return false;
7386   }
7387   return true;
7388 }
7389 
7390 bool
7391 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7392   using namespace llvm::AMDGPU::Swizzle;
7393 
7394   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7395 
7396     SMLoc ModeLoc = getLoc();
7397     bool Ok = false;
7398 
7399     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7400       Ok = parseSwizzleQuadPerm(Imm);
7401     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7402       Ok = parseSwizzleBitmaskPerm(Imm);
7403     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7404       Ok = parseSwizzleBroadcast(Imm);
7405     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7406       Ok = parseSwizzleSwap(Imm);
7407     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7408       Ok = parseSwizzleReverse(Imm);
7409     } else {
7410       Error(ModeLoc, "expected a swizzle mode");
7411     }
7412 
7413     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7414   }
7415 
7416   return false;
7417 }
7418 
7419 OperandMatchResultTy
7420 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
7421   SMLoc S = getLoc();
7422   int64_t Imm = 0;
7423 
7424   if (trySkipId("offset")) {
7425 
7426     bool Ok = false;
7427     if (skipToken(AsmToken::Colon, "expected a colon")) {
7428       if (trySkipId("swizzle")) {
7429         Ok = parseSwizzleMacro(Imm);
7430       } else {
7431         Ok = parseSwizzleOffset(Imm);
7432       }
7433     }
7434 
7435     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7436 
7437     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
7438   } else {
7439     // Swizzle "offset" operand is optional.
7440     // If it is omitted, try parsing other optional operands.
7441     return parseOptionalOpr(Operands);
7442   }
7443 }
7444 
7445 bool
7446 AMDGPUOperand::isSwizzle() const {
7447   return isImmTy(ImmTySwizzle);
7448 }
7449 
7450 //===----------------------------------------------------------------------===//
7451 // VGPR Index Mode
7452 //===----------------------------------------------------------------------===//
7453 
7454 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7455 
7456   using namespace llvm::AMDGPU::VGPRIndexMode;
7457 
7458   if (trySkipToken(AsmToken::RParen)) {
7459     return OFF;
7460   }
7461 
7462   int64_t Imm = 0;
7463 
7464   while (true) {
7465     unsigned Mode = 0;
7466     SMLoc S = getLoc();
7467 
7468     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7469       if (trySkipId(IdSymbolic[ModeId])) {
7470         Mode = 1 << ModeId;
7471         break;
7472       }
7473     }
7474 
7475     if (Mode == 0) {
7476       Error(S, (Imm == 0)?
7477                "expected a VGPR index mode or a closing parenthesis" :
7478                "expected a VGPR index mode");
7479       return UNDEF;
7480     }
7481 
7482     if (Imm & Mode) {
7483       Error(S, "duplicate VGPR index mode");
7484       return UNDEF;
7485     }
7486     Imm |= Mode;
7487 
7488     if (trySkipToken(AsmToken::RParen))
7489       break;
7490     if (!skipToken(AsmToken::Comma,
7491                    "expected a comma or a closing parenthesis"))
7492       return UNDEF;
7493   }
7494 
7495   return Imm;
7496 }
7497 
7498 OperandMatchResultTy
7499 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7500 
7501   using namespace llvm::AMDGPU::VGPRIndexMode;
7502 
7503   int64_t Imm = 0;
7504   SMLoc S = getLoc();
7505 
7506   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7507     Imm = parseGPRIdxMacro();
7508     if (Imm == UNDEF)
7509       return MatchOperand_ParseFail;
7510   } else {
7511     if (getParser().parseAbsoluteExpression(Imm))
7512       return MatchOperand_ParseFail;
7513     if (Imm < 0 || !isUInt<4>(Imm)) {
7514       Error(S, "invalid immediate: only 4-bit values are legal");
7515       return MatchOperand_ParseFail;
7516     }
7517   }
7518 
7519   Operands.push_back(
7520       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7521   return MatchOperand_Success;
7522 }
7523 
7524 bool AMDGPUOperand::isGPRIdxMode() const {
7525   return isImmTy(ImmTyGprIdxMode);
7526 }
7527 
7528 //===----------------------------------------------------------------------===//
7529 // sopp branch targets
7530 //===----------------------------------------------------------------------===//
7531 
7532 OperandMatchResultTy
7533 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7534 
7535   // Make sure we are not parsing something
7536   // that looks like a label or an expression but is not.
7537   // This will improve error messages.
7538   if (isRegister() || isModifier())
7539     return MatchOperand_NoMatch;
7540 
7541   if (!parseExpr(Operands))
7542     return MatchOperand_ParseFail;
7543 
7544   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7545   assert(Opr.isImm() || Opr.isExpr());
7546   SMLoc Loc = Opr.getStartLoc();
7547 
7548   // Currently we do not support arbitrary expressions as branch targets.
7549   // Only labels and absolute expressions are accepted.
7550   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7551     Error(Loc, "expected an absolute expression or a label");
7552   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7553     Error(Loc, "expected a 16-bit signed jump offset");
7554   }
7555 
7556   return MatchOperand_Success;
7557 }
7558 
7559 //===----------------------------------------------------------------------===//
7560 // Boolean holding registers
7561 //===----------------------------------------------------------------------===//
7562 
7563 OperandMatchResultTy
7564 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7565   return parseReg(Operands);
7566 }
7567 
7568 //===----------------------------------------------------------------------===//
7569 // mubuf
7570 //===----------------------------------------------------------------------===//
7571 
7572 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7573   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7574 }
7575 
7576 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7577                                    const OperandVector &Operands,
7578                                    bool IsAtomic,
7579                                    bool IsLds) {
7580   OptionalImmIndexMap OptionalIdx;
7581   unsigned FirstOperandIdx = 1;
7582   bool IsAtomicReturn = false;
7583 
7584   if (IsAtomic) {
7585     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7586       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7587       if (!Op.isCPol())
7588         continue;
7589       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7590       break;
7591     }
7592 
7593     if (!IsAtomicReturn) {
7594       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7595       if (NewOpc != -1)
7596         Inst.setOpcode(NewOpc);
7597     }
7598 
7599     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7600                       SIInstrFlags::IsAtomicRet;
7601   }
7602 
7603   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7604     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7605 
7606     // Add the register arguments
7607     if (Op.isReg()) {
7608       Op.addRegOperands(Inst, 1);
7609       // Insert a tied src for atomic return dst.
7610       // This cannot be postponed as subsequent calls to
7611       // addImmOperands rely on correct number of MC operands.
7612       if (IsAtomicReturn && i == FirstOperandIdx)
7613         Op.addRegOperands(Inst, 1);
7614       continue;
7615     }
7616 
7617     // Handle the case where soffset is an immediate
7618     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7619       Op.addImmOperands(Inst, 1);
7620       continue;
7621     }
7622 
7623     // Handle tokens like 'offen' which are sometimes hard-coded into the
7624     // asm string.  There are no MCInst operands for these.
7625     if (Op.isToken()) {
7626       continue;
7627     }
7628     assert(Op.isImm());
7629 
7630     // Handle optional arguments
7631     OptionalIdx[Op.getImmTy()] = i;
7632   }
7633 
7634   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7635   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7636 
7637   if (!IsLds) { // tfe is not legal with lds opcodes
7638     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7639   }
7640   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7641 }
7642 
7643 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7644   OptionalImmIndexMap OptionalIdx;
7645 
7646   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7647     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7648 
7649     // Add the register arguments
7650     if (Op.isReg()) {
7651       Op.addRegOperands(Inst, 1);
7652       continue;
7653     }
7654 
7655     // Handle the case where soffset is an immediate
7656     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7657       Op.addImmOperands(Inst, 1);
7658       continue;
7659     }
7660 
7661     // Handle tokens like 'offen' which are sometimes hard-coded into the
7662     // asm string.  There are no MCInst operands for these.
7663     if (Op.isToken()) {
7664       continue;
7665     }
7666     assert(Op.isImm());
7667 
7668     // Handle optional arguments
7669     OptionalIdx[Op.getImmTy()] = i;
7670   }
7671 
7672   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7673                         AMDGPUOperand::ImmTyOffset);
7674   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7675   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7676   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7677   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7678 }
7679 
7680 //===----------------------------------------------------------------------===//
7681 // mimg
7682 //===----------------------------------------------------------------------===//
7683 
7684 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7685                               bool IsAtomic) {
7686   unsigned I = 1;
7687   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7688   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7689     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7690   }
7691 
7692   if (IsAtomic) {
7693     // Add src, same as dst
7694     assert(Desc.getNumDefs() == 1);
7695     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7696   }
7697 
7698   OptionalImmIndexMap OptionalIdx;
7699 
7700   for (unsigned E = Operands.size(); I != E; ++I) {
7701     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7702 
7703     // Add the register arguments
7704     if (Op.isReg()) {
7705       Op.addRegOperands(Inst, 1);
7706     } else if (Op.isImmModifier()) {
7707       OptionalIdx[Op.getImmTy()] = I;
7708     } else if (!Op.isToken()) {
7709       llvm_unreachable("unexpected operand type");
7710     }
7711   }
7712 
7713   bool IsGFX10Plus = isGFX10Plus();
7714 
7715   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7716   if (IsGFX10Plus)
7717     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7718   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7719   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7720   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7721   if (IsGFX10Plus)
7722     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7723   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7724     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7725   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7726   if (!IsGFX10Plus)
7727     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7728   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7729 }
7730 
7731 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7732   cvtMIMG(Inst, Operands, true);
7733 }
7734 
7735 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7736   OptionalImmIndexMap OptionalIdx;
7737   bool IsAtomicReturn = false;
7738 
7739   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7740     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7741     if (!Op.isCPol())
7742       continue;
7743     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7744     break;
7745   }
7746 
7747   if (!IsAtomicReturn) {
7748     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7749     if (NewOpc != -1)
7750       Inst.setOpcode(NewOpc);
7751   }
7752 
7753   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7754                     SIInstrFlags::IsAtomicRet;
7755 
7756   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7757     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7758 
7759     // Add the register arguments
7760     if (Op.isReg()) {
7761       Op.addRegOperands(Inst, 1);
7762       if (IsAtomicReturn && i == 1)
7763         Op.addRegOperands(Inst, 1);
7764       continue;
7765     }
7766 
7767     // Handle the case where soffset is an immediate
7768     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7769       Op.addImmOperands(Inst, 1);
7770       continue;
7771     }
7772 
7773     // Handle tokens like 'offen' which are sometimes hard-coded into the
7774     // asm string.  There are no MCInst operands for these.
7775     if (Op.isToken()) {
7776       continue;
7777     }
7778     assert(Op.isImm());
7779 
7780     // Handle optional arguments
7781     OptionalIdx[Op.getImmTy()] = i;
7782   }
7783 
7784   if ((int)Inst.getNumOperands() <=
7785       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7786     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7787   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7788 }
7789 
7790 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7791                                       const OperandVector &Operands) {
7792   for (unsigned I = 1; I < Operands.size(); ++I) {
7793     auto &Operand = (AMDGPUOperand &)*Operands[I];
7794     if (Operand.isReg())
7795       Operand.addRegOperands(Inst, 1);
7796   }
7797 
7798   Inst.addOperand(MCOperand::createImm(1)); // a16
7799 }
7800 
7801 //===----------------------------------------------------------------------===//
7802 // smrd
7803 //===----------------------------------------------------------------------===//
7804 
7805 bool AMDGPUOperand::isSMRDOffset8() const {
7806   return isImm() && isUInt<8>(getImm());
7807 }
7808 
7809 bool AMDGPUOperand::isSMEMOffset() const {
7810   return isImmTy(ImmTyNone) ||
7811          isImmTy(ImmTyOffset); // Offset range is checked later by validator.
7812 }
7813 
7814 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7815   // 32-bit literals are only supported on CI and we only want to use them
7816   // when the offset is > 8-bits.
7817   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7818 }
7819 
7820 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7821   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7822 }
7823 
7824 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7825   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7826 }
7827 
7828 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7829   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7830 }
7831 
7832 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7833   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7834 }
7835 
7836 //===----------------------------------------------------------------------===//
7837 // vop3
7838 //===----------------------------------------------------------------------===//
7839 
7840 static bool ConvertOmodMul(int64_t &Mul) {
7841   if (Mul != 1 && Mul != 2 && Mul != 4)
7842     return false;
7843 
7844   Mul >>= 1;
7845   return true;
7846 }
7847 
7848 static bool ConvertOmodDiv(int64_t &Div) {
7849   if (Div == 1) {
7850     Div = 0;
7851     return true;
7852   }
7853 
7854   if (Div == 2) {
7855     Div = 3;
7856     return true;
7857   }
7858 
7859   return false;
7860 }
7861 
7862 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7863 // This is intentional and ensures compatibility with sp3.
7864 // See bug 35397 for details.
7865 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7866   if (BoundCtrl == 0 || BoundCtrl == 1) {
7867     BoundCtrl = 1;
7868     return true;
7869   }
7870   return false;
7871 }
7872 
7873 // Note: the order in this table matches the order of operands in AsmString.
7874 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7875   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7876   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7877   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7878   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7879   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7880   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7881   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7882   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7883   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7884   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7885   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7886   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7887   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7888   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7889   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7890   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7891   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7892   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7893   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7894   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7895   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7896   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7897   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7898   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7899   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7900   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7901   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7902   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7903   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7904   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7905   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7906   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7907   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7908   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7909   {"dpp8",     AMDGPUOperand::ImmTyDPP8, false, nullptr},
7910   {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, nullptr},
7911   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7912   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7913   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7914   {"fi",   AMDGPUOperand::ImmTyDppFi, false, nullptr},
7915   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7916   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7917   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr},
7918   {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr},
7919   {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr}
7920 };
7921 
7922 void AMDGPUAsmParser::onBeginOfFile() {
7923   if (!getParser().getStreamer().getTargetStreamer() ||
7924       getSTI().getTargetTriple().getArch() == Triple::r600)
7925     return;
7926 
7927   if (!getTargetStreamer().getTargetID())
7928     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7929 
7930   if (isHsaAbiVersion3AndAbove(&getSTI()))
7931     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7932 }
7933 
7934 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7935 
7936   OperandMatchResultTy res = parseOptionalOpr(Operands);
7937 
7938   // This is a hack to enable hardcoded mandatory operands which follow
7939   // optional operands.
7940   //
7941   // Current design assumes that all operands after the first optional operand
7942   // are also optional. However implementation of some instructions violates
7943   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7944   //
7945   // To alleviate this problem, we have to (implicitly) parse extra operands
7946   // to make sure autogenerated parser of custom operands never hit hardcoded
7947   // mandatory operands.
7948 
7949   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7950     if (res != MatchOperand_Success ||
7951         isToken(AsmToken::EndOfStatement))
7952       break;
7953 
7954     trySkipToken(AsmToken::Comma);
7955     res = parseOptionalOpr(Operands);
7956   }
7957 
7958   return res;
7959 }
7960 
7961 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7962   OperandMatchResultTy res;
7963   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7964     // try to parse any optional operand here
7965     if (Op.IsBit) {
7966       res = parseNamedBit(Op.Name, Operands, Op.Type);
7967     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7968       res = parseOModOperand(Operands);
7969     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7970                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7971                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7972       res = parseSDWASel(Operands, Op.Name, Op.Type);
7973     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7974       res = parseSDWADstUnused(Operands);
7975     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7976                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7977                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7978                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7979       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7980                                         Op.ConvertResult);
7981     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7982       res = parseDim(Operands);
7983     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7984       res = parseCPol(Operands);
7985     } else if (Op.Type == AMDGPUOperand::ImmTyDPP8) {
7986       res = parseDPP8(Operands);
7987     } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) {
7988       res = parseDPPCtrl(Operands);
7989     } else {
7990       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7991       if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) {
7992         res = parseOperandArrayWithPrefix("neg", Operands,
7993                                           AMDGPUOperand::ImmTyBLGP,
7994                                           nullptr);
7995       }
7996     }
7997     if (res != MatchOperand_NoMatch) {
7998       return res;
7999     }
8000   }
8001   return MatchOperand_NoMatch;
8002 }
8003 
8004 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
8005   StringRef Name = getTokenStr();
8006   if (Name == "mul") {
8007     return parseIntWithPrefix("mul", Operands,
8008                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8009   }
8010 
8011   if (Name == "div") {
8012     return parseIntWithPrefix("div", Operands,
8013                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8014   }
8015 
8016   return MatchOperand_NoMatch;
8017 }
8018 
8019 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8020 // the number of src operands present, then copies that bit into src0_modifiers.
8021 void cvtVOP3DstOpSelOnly(MCInst &Inst) {
8022   int Opc = Inst.getOpcode();
8023   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8024   if (OpSelIdx == -1)
8025     return;
8026 
8027   int SrcNum;
8028   const int Ops[] = { AMDGPU::OpName::src0,
8029                       AMDGPU::OpName::src1,
8030                       AMDGPU::OpName::src2 };
8031   for (SrcNum = 0;
8032        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
8033        ++SrcNum);
8034   assert(SrcNum > 0);
8035 
8036   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8037 
8038   if ((OpSel & (1 << SrcNum)) != 0) {
8039     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8040     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8041     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
8042   }
8043 }
8044 
8045 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8046                                    const OperandVector &Operands) {
8047   cvtVOP3P(Inst, Operands);
8048   cvtVOP3DstOpSelOnly(Inst);
8049 }
8050 
8051 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8052                                    OptionalImmIndexMap &OptionalIdx) {
8053   cvtVOP3P(Inst, Operands, OptionalIdx);
8054   cvtVOP3DstOpSelOnly(Inst);
8055 }
8056 
8057 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8058       // 1. This operand is input modifiers
8059   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8060       // 2. This is not last operand
8061       && Desc.NumOperands > (OpNum + 1)
8062       // 3. Next operand is register class
8063       && Desc.OpInfo[OpNum + 1].RegClass != -1
8064       // 4. Next register is not tied to any other operand
8065       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
8066 }
8067 
8068 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8069 {
8070   OptionalImmIndexMap OptionalIdx;
8071   unsigned Opc = Inst.getOpcode();
8072 
8073   unsigned I = 1;
8074   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8075   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8076     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8077   }
8078 
8079   for (unsigned E = Operands.size(); I != E; ++I) {
8080     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8081     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8082       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8083     } else if (Op.isInterpSlot() ||
8084                Op.isInterpAttr() ||
8085                Op.isAttrChan()) {
8086       Inst.addOperand(MCOperand::createImm(Op.getImm()));
8087     } else if (Op.isImmModifier()) {
8088       OptionalIdx[Op.getImmTy()] = I;
8089     } else {
8090       llvm_unreachable("unhandled operand type");
8091     }
8092   }
8093 
8094   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
8095     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
8096   }
8097 
8098   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8099     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8100   }
8101 
8102   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8103     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8104   }
8105 }
8106 
8107 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8108 {
8109   OptionalImmIndexMap OptionalIdx;
8110   unsigned Opc = Inst.getOpcode();
8111 
8112   unsigned I = 1;
8113   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8114   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8115     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8116   }
8117 
8118   for (unsigned E = Operands.size(); I != E; ++I) {
8119     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8120     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8121       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8122     } else if (Op.isImmModifier()) {
8123       OptionalIdx[Op.getImmTy()] = I;
8124     } else {
8125       llvm_unreachable("unhandled operand type");
8126     }
8127   }
8128 
8129   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8130 
8131   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8132   if (OpSelIdx != -1)
8133     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8134 
8135   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8136 
8137   if (OpSelIdx == -1)
8138     return;
8139 
8140   const int Ops[] = { AMDGPU::OpName::src0,
8141                       AMDGPU::OpName::src1,
8142                       AMDGPU::OpName::src2 };
8143   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8144                          AMDGPU::OpName::src1_modifiers,
8145                          AMDGPU::OpName::src2_modifiers };
8146 
8147   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8148 
8149   for (int J = 0; J < 3; ++J) {
8150     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8151     if (OpIdx == -1)
8152       break;
8153 
8154     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8155     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8156 
8157     if ((OpSel & (1 << J)) != 0)
8158       ModVal |= SISrcMods::OP_SEL_0;
8159     if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8160         (OpSel & (1 << 3)) != 0)
8161       ModVal |= SISrcMods::DST_OP_SEL;
8162 
8163     Inst.getOperand(ModIdx).setImm(ModVal);
8164   }
8165 }
8166 
8167 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8168                               OptionalImmIndexMap &OptionalIdx) {
8169   unsigned Opc = Inst.getOpcode();
8170 
8171   unsigned I = 1;
8172   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8173   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8174     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8175   }
8176 
8177   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
8178     // This instruction has src modifiers
8179     for (unsigned E = Operands.size(); I != E; ++I) {
8180       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8181       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8182         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8183       } else if (Op.isImmModifier()) {
8184         OptionalIdx[Op.getImmTy()] = I;
8185       } else if (Op.isRegOrImm()) {
8186         Op.addRegOrImmOperands(Inst, 1);
8187       } else {
8188         llvm_unreachable("unhandled operand type");
8189       }
8190     }
8191   } else {
8192     // No src modifiers
8193     for (unsigned E = Operands.size(); I != E; ++I) {
8194       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8195       if (Op.isMod()) {
8196         OptionalIdx[Op.getImmTy()] = I;
8197       } else {
8198         Op.addRegOrImmOperands(Inst, 1);
8199       }
8200     }
8201   }
8202 
8203   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8204     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8205   }
8206 
8207   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8208     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8209   }
8210 
8211   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8212   // it has src2 register operand that is tied to dst operand
8213   // we don't allow modifiers for this operand in assembler so src2_modifiers
8214   // should be 0.
8215   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
8216       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
8217       Opc == AMDGPU::V_MAC_F32_e64_vi ||
8218       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
8219       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
8220       Opc == AMDGPU::V_MAC_F16_e64_vi ||
8221       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
8222       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
8223       Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
8224       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
8225       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
8226       Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
8227       Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
8228       Opc == AMDGPU::V_FMAC_F16_e64_gfx11) {
8229     auto it = Inst.begin();
8230     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8231     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8232     ++it;
8233     // Copy the operand to ensure it's not invalidated when Inst grows.
8234     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8235   }
8236 }
8237 
8238 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8239   OptionalImmIndexMap OptionalIdx;
8240   cvtVOP3(Inst, Operands, OptionalIdx);
8241 }
8242 
8243 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8244                                OptionalImmIndexMap &OptIdx) {
8245   const int Opc = Inst.getOpcode();
8246   const MCInstrDesc &Desc = MII.get(Opc);
8247 
8248   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8249 
8250   if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8251       Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) {
8252     Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8253     Inst.addOperand(Inst.getOperand(0));
8254   }
8255 
8256   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
8257     assert(!IsPacked);
8258     Inst.addOperand(Inst.getOperand(0));
8259   }
8260 
8261   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8262   // instruction, and then figure out where to actually put the modifiers
8263 
8264   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8265   if (OpSelIdx != -1) {
8266     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8267   }
8268 
8269   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8270   if (OpSelHiIdx != -1) {
8271     int DefaultVal = IsPacked ? -1 : 0;
8272     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8273                           DefaultVal);
8274   }
8275 
8276   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8277   if (NegLoIdx != -1) {
8278     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8279     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8280   }
8281 
8282   const int Ops[] = { AMDGPU::OpName::src0,
8283                       AMDGPU::OpName::src1,
8284                       AMDGPU::OpName::src2 };
8285   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8286                          AMDGPU::OpName::src1_modifiers,
8287                          AMDGPU::OpName::src2_modifiers };
8288 
8289   unsigned OpSel = 0;
8290   unsigned OpSelHi = 0;
8291   unsigned NegLo = 0;
8292   unsigned NegHi = 0;
8293 
8294   if (OpSelIdx != -1)
8295     OpSel = Inst.getOperand(OpSelIdx).getImm();
8296 
8297   if (OpSelHiIdx != -1)
8298     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8299 
8300   if (NegLoIdx != -1) {
8301     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8302     NegLo = Inst.getOperand(NegLoIdx).getImm();
8303     NegHi = Inst.getOperand(NegHiIdx).getImm();
8304   }
8305 
8306   for (int J = 0; J < 3; ++J) {
8307     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8308     if (OpIdx == -1)
8309       break;
8310 
8311     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8312 
8313     if (ModIdx == -1)
8314       continue;
8315 
8316     uint32_t ModVal = 0;
8317 
8318     if ((OpSel & (1 << J)) != 0)
8319       ModVal |= SISrcMods::OP_SEL_0;
8320 
8321     if ((OpSelHi & (1 << J)) != 0)
8322       ModVal |= SISrcMods::OP_SEL_1;
8323 
8324     if ((NegLo & (1 << J)) != 0)
8325       ModVal |= SISrcMods::NEG;
8326 
8327     if ((NegHi & (1 << J)) != 0)
8328       ModVal |= SISrcMods::NEG_HI;
8329 
8330     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8331   }
8332 }
8333 
8334 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8335   OptionalImmIndexMap OptIdx;
8336   cvtVOP3(Inst, Operands, OptIdx);
8337   cvtVOP3P(Inst, Operands, OptIdx);
8338 }
8339 
8340 //===----------------------------------------------------------------------===//
8341 // VOPD
8342 //===----------------------------------------------------------------------===//
8343 
8344 OperandMatchResultTy AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8345   if (!hasVOPD(getSTI()))
8346     return MatchOperand_NoMatch;
8347 
8348   if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8349     SMLoc S = getLoc();
8350     lex();
8351     lex();
8352     Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8353     const MCExpr *Expr;
8354     if (isToken(AsmToken::Identifier) && !Parser.parseExpression(Expr)) {
8355       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8356       return MatchOperand_Success;
8357     }
8358     Error(S, "invalid VOPD :: usage");
8359     return MatchOperand_ParseFail;
8360   }
8361   return MatchOperand_NoMatch;
8362 }
8363 
8364 // Create VOPD MCInst operands using parsed assembler operands.
8365 // Parsed VOPD operands are ordered as follows:
8366 //   OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
8367 //   OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
8368 // If both OpX and OpY have an imm, the first imm has a different name:
8369 //   OpXMnemo dstX src0X [vsrc1X|immDeferred vsrc1X|vsrc1X immDeferred] '::'
8370 //   OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
8371 // MCInst operands have the following order:
8372 //   dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8373 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8374   auto addOp = [&](uint16_t i) { // NOLINT:function pointer
8375     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8376     if (Op.isReg()) {
8377       Op.addRegOperands(Inst, 1);
8378       return;
8379     }
8380     if (Op.isImm()) {
8381       Op.addImmOperands(Inst, 1);
8382       return;
8383     }
8384     // Handle tokens like 'offen' which are sometimes hard-coded into the
8385     // asm string.  There are no MCInst operands for these.
8386     if (Op.isToken()) {
8387       return;
8388     }
8389     llvm_unreachable("Unhandled operand type in cvtVOPD");
8390   };
8391 
8392   // Indices into MCInst.Operands
8393   const auto FmamkOpXImmMCIndex = 3; // dstX, dstY, src0X, imm, ...
8394   const auto FmaakOpXImmMCIndex = 4; // dstX, dstY, src0X, src1X, imm, ...
8395   const auto MinOpYImmMCIndex = 4;   // dstX, dstY, src0X, src0Y, imm, ...
8396 
8397   unsigned Opc = Inst.getOpcode();
8398   bool HasVsrc1X =
8399       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1X) != -1;
8400   bool HasImmX =
8401       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 ||
8402       (HasVsrc1X && (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) ==
8403                          FmamkOpXImmMCIndex ||
8404                      AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) ==
8405                          FmaakOpXImmMCIndex));
8406 
8407   bool HasVsrc1Y =
8408       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1Y) != -1;
8409   bool HasImmY =
8410       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 ||
8411       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) >=
8412           MinOpYImmMCIndex + HasVsrc1X;
8413 
8414   // Indices of parsed operands relative to dst
8415   const auto DstIdx = 0;
8416   const auto Src0Idx = 1;
8417   const auto Vsrc1OrImmIdx = 2;
8418 
8419   const auto OpXOperandsSize = 2 + HasImmX + HasVsrc1X;
8420   const auto BridgeTokensSize = 2; // Special VOPD tokens ('::' and OpYMnemo)
8421 
8422   // Offsets into parsed operands
8423   const auto OpXFirstOperandOffset = 1;
8424   const auto OpYFirstOperandOffset =
8425       OpXFirstOperandOffset + OpXOperandsSize + BridgeTokensSize;
8426 
8427   // Order of addOp calls determines MC operand order
8428   addOp(OpXFirstOperandOffset + DstIdx); // vdstX
8429   addOp(OpYFirstOperandOffset + DstIdx); // vdstY
8430 
8431   addOp(OpXFirstOperandOffset + Src0Idx); // src0X
8432   if (HasImmX) {
8433     // immX then vsrc1X for fmamk, vsrc1X then immX for fmaak
8434     addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx);
8435     addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx + 1);
8436   } else {
8437     if (HasVsrc1X) // all except v_mov
8438       addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1X
8439   }
8440 
8441   addOp(OpYFirstOperandOffset + Src0Idx); // src0Y
8442   if (HasImmY) {
8443     // immY then vsrc1Y for fmamk, vsrc1Y then immY for fmaak
8444     addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx);
8445     addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx + 1);
8446   } else {
8447     if (HasVsrc1Y) // all except v_mov
8448       addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1Y
8449   }
8450 }
8451 
8452 //===----------------------------------------------------------------------===//
8453 // dpp
8454 //===----------------------------------------------------------------------===//
8455 
8456 bool AMDGPUOperand::isDPP8() const {
8457   return isImmTy(ImmTyDPP8);
8458 }
8459 
8460 bool AMDGPUOperand::isDPPCtrl() const {
8461   using namespace AMDGPU::DPP;
8462 
8463   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8464   if (result) {
8465     int64_t Imm = getImm();
8466     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8467            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8468            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8469            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8470            (Imm == DppCtrl::WAVE_SHL1) ||
8471            (Imm == DppCtrl::WAVE_ROL1) ||
8472            (Imm == DppCtrl::WAVE_SHR1) ||
8473            (Imm == DppCtrl::WAVE_ROR1) ||
8474            (Imm == DppCtrl::ROW_MIRROR) ||
8475            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8476            (Imm == DppCtrl::BCAST15) ||
8477            (Imm == DppCtrl::BCAST31) ||
8478            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8479            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8480   }
8481   return false;
8482 }
8483 
8484 //===----------------------------------------------------------------------===//
8485 // mAI
8486 //===----------------------------------------------------------------------===//
8487 
8488 bool AMDGPUOperand::isBLGP() const {
8489   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8490 }
8491 
8492 bool AMDGPUOperand::isCBSZ() const {
8493   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8494 }
8495 
8496 bool AMDGPUOperand::isABID() const {
8497   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8498 }
8499 
8500 bool AMDGPUOperand::isS16Imm() const {
8501   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8502 }
8503 
8504 bool AMDGPUOperand::isU16Imm() const {
8505   return isImm() && isUInt<16>(getImm());
8506 }
8507 
8508 //===----------------------------------------------------------------------===//
8509 // dim
8510 //===----------------------------------------------------------------------===//
8511 
8512 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8513   // We want to allow "dim:1D" etc.,
8514   // but the initial 1 is tokenized as an integer.
8515   std::string Token;
8516   if (isToken(AsmToken::Integer)) {
8517     SMLoc Loc = getToken().getEndLoc();
8518     Token = std::string(getTokenStr());
8519     lex();
8520     if (getLoc() != Loc)
8521       return false;
8522   }
8523 
8524   StringRef Suffix;
8525   if (!parseId(Suffix))
8526     return false;
8527   Token += Suffix;
8528 
8529   StringRef DimId = Token;
8530   if (DimId.startswith("SQ_RSRC_IMG_"))
8531     DimId = DimId.drop_front(12);
8532 
8533   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8534   if (!DimInfo)
8535     return false;
8536 
8537   Encoding = DimInfo->Encoding;
8538   return true;
8539 }
8540 
8541 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8542   if (!isGFX10Plus())
8543     return MatchOperand_NoMatch;
8544 
8545   SMLoc S = getLoc();
8546 
8547   if (!trySkipId("dim", AsmToken::Colon))
8548     return MatchOperand_NoMatch;
8549 
8550   unsigned Encoding;
8551   SMLoc Loc = getLoc();
8552   if (!parseDimId(Encoding)) {
8553     Error(Loc, "invalid dim value");
8554     return MatchOperand_ParseFail;
8555   }
8556 
8557   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8558                                               AMDGPUOperand::ImmTyDim));
8559   return MatchOperand_Success;
8560 }
8561 
8562 //===----------------------------------------------------------------------===//
8563 // dpp
8564 //===----------------------------------------------------------------------===//
8565 
8566 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8567   SMLoc S = getLoc();
8568 
8569   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8570     return MatchOperand_NoMatch;
8571 
8572   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8573 
8574   int64_t Sels[8];
8575 
8576   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8577     return MatchOperand_ParseFail;
8578 
8579   for (size_t i = 0; i < 8; ++i) {
8580     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8581       return MatchOperand_ParseFail;
8582 
8583     SMLoc Loc = getLoc();
8584     if (getParser().parseAbsoluteExpression(Sels[i]))
8585       return MatchOperand_ParseFail;
8586     if (0 > Sels[i] || 7 < Sels[i]) {
8587       Error(Loc, "expected a 3-bit value");
8588       return MatchOperand_ParseFail;
8589     }
8590   }
8591 
8592   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8593     return MatchOperand_ParseFail;
8594 
8595   unsigned DPP8 = 0;
8596   for (size_t i = 0; i < 8; ++i)
8597     DPP8 |= (Sels[i] << (i * 3));
8598 
8599   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8600   return MatchOperand_Success;
8601 }
8602 
8603 bool
8604 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8605                                     const OperandVector &Operands) {
8606   if (Ctrl == "row_newbcast")
8607     return isGFX90A();
8608 
8609   if (Ctrl == "row_share" ||
8610       Ctrl == "row_xmask")
8611     return isGFX10Plus();
8612 
8613   if (Ctrl == "wave_shl" ||
8614       Ctrl == "wave_shr" ||
8615       Ctrl == "wave_rol" ||
8616       Ctrl == "wave_ror" ||
8617       Ctrl == "row_bcast")
8618     return isVI() || isGFX9();
8619 
8620   return Ctrl == "row_mirror" ||
8621          Ctrl == "row_half_mirror" ||
8622          Ctrl == "quad_perm" ||
8623          Ctrl == "row_shl" ||
8624          Ctrl == "row_shr" ||
8625          Ctrl == "row_ror";
8626 }
8627 
8628 int64_t
8629 AMDGPUAsmParser::parseDPPCtrlPerm() {
8630   // quad_perm:[%d,%d,%d,%d]
8631 
8632   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8633     return -1;
8634 
8635   int64_t Val = 0;
8636   for (int i = 0; i < 4; ++i) {
8637     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8638       return -1;
8639 
8640     int64_t Temp;
8641     SMLoc Loc = getLoc();
8642     if (getParser().parseAbsoluteExpression(Temp))
8643       return -1;
8644     if (Temp < 0 || Temp > 3) {
8645       Error(Loc, "expected a 2-bit value");
8646       return -1;
8647     }
8648 
8649     Val += (Temp << i * 2);
8650   }
8651 
8652   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8653     return -1;
8654 
8655   return Val;
8656 }
8657 
8658 int64_t
8659 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8660   using namespace AMDGPU::DPP;
8661 
8662   // sel:%d
8663 
8664   int64_t Val;
8665   SMLoc Loc = getLoc();
8666 
8667   if (getParser().parseAbsoluteExpression(Val))
8668     return -1;
8669 
8670   struct DppCtrlCheck {
8671     int64_t Ctrl;
8672     int Lo;
8673     int Hi;
8674   };
8675 
8676   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8677     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8678     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8679     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8680     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8681     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8682     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8683     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8684     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8685     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8686     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8687     .Default({-1, 0, 0});
8688 
8689   bool Valid;
8690   if (Check.Ctrl == -1) {
8691     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8692     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8693   } else {
8694     Valid = Check.Lo <= Val && Val <= Check.Hi;
8695     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8696   }
8697 
8698   if (!Valid) {
8699     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8700     return -1;
8701   }
8702 
8703   return Val;
8704 }
8705 
8706 OperandMatchResultTy
8707 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8708   using namespace AMDGPU::DPP;
8709 
8710   if (!isToken(AsmToken::Identifier) ||
8711       !isSupportedDPPCtrl(getTokenStr(), Operands))
8712     return MatchOperand_NoMatch;
8713 
8714   SMLoc S = getLoc();
8715   int64_t Val = -1;
8716   StringRef Ctrl;
8717 
8718   parseId(Ctrl);
8719 
8720   if (Ctrl == "row_mirror") {
8721     Val = DppCtrl::ROW_MIRROR;
8722   } else if (Ctrl == "row_half_mirror") {
8723     Val = DppCtrl::ROW_HALF_MIRROR;
8724   } else {
8725     if (skipToken(AsmToken::Colon, "expected a colon")) {
8726       if (Ctrl == "quad_perm") {
8727         Val = parseDPPCtrlPerm();
8728       } else {
8729         Val = parseDPPCtrlSel(Ctrl);
8730       }
8731     }
8732   }
8733 
8734   if (Val == -1)
8735     return MatchOperand_ParseFail;
8736 
8737   Operands.push_back(
8738     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8739   return MatchOperand_Success;
8740 }
8741 
8742 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8743   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8744 }
8745 
8746 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8747   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8748 }
8749 
8750 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8751   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8752 }
8753 
8754 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8755   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8756 }
8757 
8758 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8759   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8760 }
8761 
8762 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8763   OptionalImmIndexMap OptionalIdx;
8764   unsigned Opc = Inst.getOpcode();
8765   bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8766   unsigned I = 1;
8767   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8768   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8769     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8770   }
8771 
8772   int Fi = 0;
8773   for (unsigned E = Operands.size(); I != E; ++I) {
8774     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8775                                             MCOI::TIED_TO);
8776     if (TiedTo != -1) {
8777       assert((unsigned)TiedTo < Inst.getNumOperands());
8778       // handle tied old or src2 for MAC instructions
8779       Inst.addOperand(Inst.getOperand(TiedTo));
8780     }
8781     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8782     // Add the register arguments
8783     if (IsDPP8 && Op.isFI()) {
8784       Fi = Op.getImm();
8785     } else if (HasModifiers &&
8786                isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8787       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8788     } else if (Op.isReg()) {
8789       Op.addRegOperands(Inst, 1);
8790     } else if (Op.isImm() &&
8791                Desc.OpInfo[Inst.getNumOperands()].RegClass != -1) {
8792       assert(!HasModifiers && "Case should be unreachable with modifiers");
8793       assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
8794       Op.addImmOperands(Inst, 1);
8795     } else if (Op.isImm()) {
8796       OptionalIdx[Op.getImmTy()] = I;
8797     } else {
8798       llvm_unreachable("unhandled operand type");
8799     }
8800   }
8801   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8802     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8803   }
8804   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8805     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8806   }
8807   if (Desc.TSFlags & SIInstrFlags::VOP3P)
8808     cvtVOP3P(Inst, Operands, OptionalIdx);
8809   else if (Desc.TSFlags & SIInstrFlags::VOP3)
8810     cvtVOP3OpSel(Inst, Operands, OptionalIdx);
8811   else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) {
8812     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8813   }
8814 
8815   if (IsDPP8) {
8816     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
8817     using namespace llvm::AMDGPU::DPP;
8818     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8819   } else {
8820     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
8821     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8822     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8823     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8824     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8825       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8826     }
8827   }
8828 }
8829 
8830 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8831   OptionalImmIndexMap OptionalIdx;
8832 
8833   unsigned Opc = Inst.getOpcode();
8834   bool HasModifiers =
8835       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8836   unsigned I = 1;
8837   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8838   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8839     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8840   }
8841 
8842   int Fi = 0;
8843   for (unsigned E = Operands.size(); I != E; ++I) {
8844     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8845                                             MCOI::TIED_TO);
8846     if (TiedTo != -1) {
8847       assert((unsigned)TiedTo < Inst.getNumOperands());
8848       // handle tied old or src2 for MAC instructions
8849       Inst.addOperand(Inst.getOperand(TiedTo));
8850     }
8851     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8852     // Add the register arguments
8853     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8854       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8855       // Skip it.
8856       continue;
8857     }
8858 
8859     if (IsDPP8) {
8860       if (Op.isDPP8()) {
8861         Op.addImmOperands(Inst, 1);
8862       } else if (HasModifiers &&
8863                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8864         Op.addRegWithFPInputModsOperands(Inst, 2);
8865       } else if (Op.isFI()) {
8866         Fi = Op.getImm();
8867       } else if (Op.isReg()) {
8868         Op.addRegOperands(Inst, 1);
8869       } else {
8870         llvm_unreachable("Invalid operand type");
8871       }
8872     } else {
8873       if (HasModifiers &&
8874           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8875         Op.addRegWithFPInputModsOperands(Inst, 2);
8876       } else if (Op.isReg()) {
8877         Op.addRegOperands(Inst, 1);
8878       } else if (Op.isDPPCtrl()) {
8879         Op.addImmOperands(Inst, 1);
8880       } else if (Op.isImm()) {
8881         // Handle optional arguments
8882         OptionalIdx[Op.getImmTy()] = I;
8883       } else {
8884         llvm_unreachable("Invalid operand type");
8885       }
8886     }
8887   }
8888 
8889   if (IsDPP8) {
8890     using namespace llvm::AMDGPU::DPP;
8891     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8892   } else {
8893     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8894     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8895     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8896     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8897       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8898     }
8899   }
8900 }
8901 
8902 //===----------------------------------------------------------------------===//
8903 // sdwa
8904 //===----------------------------------------------------------------------===//
8905 
8906 OperandMatchResultTy
8907 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8908                               AMDGPUOperand::ImmTy Type) {
8909   using namespace llvm::AMDGPU::SDWA;
8910 
8911   SMLoc S = getLoc();
8912   StringRef Value;
8913   OperandMatchResultTy res;
8914 
8915   SMLoc StringLoc;
8916   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8917   if (res != MatchOperand_Success) {
8918     return res;
8919   }
8920 
8921   int64_t Int;
8922   Int = StringSwitch<int64_t>(Value)
8923         .Case("BYTE_0", SdwaSel::BYTE_0)
8924         .Case("BYTE_1", SdwaSel::BYTE_1)
8925         .Case("BYTE_2", SdwaSel::BYTE_2)
8926         .Case("BYTE_3", SdwaSel::BYTE_3)
8927         .Case("WORD_0", SdwaSel::WORD_0)
8928         .Case("WORD_1", SdwaSel::WORD_1)
8929         .Case("DWORD", SdwaSel::DWORD)
8930         .Default(0xffffffff);
8931 
8932   if (Int == 0xffffffff) {
8933     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8934     return MatchOperand_ParseFail;
8935   }
8936 
8937   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8938   return MatchOperand_Success;
8939 }
8940 
8941 OperandMatchResultTy
8942 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8943   using namespace llvm::AMDGPU::SDWA;
8944 
8945   SMLoc S = getLoc();
8946   StringRef Value;
8947   OperandMatchResultTy res;
8948 
8949   SMLoc StringLoc;
8950   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8951   if (res != MatchOperand_Success) {
8952     return res;
8953   }
8954 
8955   int64_t Int;
8956   Int = StringSwitch<int64_t>(Value)
8957         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8958         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8959         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8960         .Default(0xffffffff);
8961 
8962   if (Int == 0xffffffff) {
8963     Error(StringLoc, "invalid dst_unused value");
8964     return MatchOperand_ParseFail;
8965   }
8966 
8967   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8968   return MatchOperand_Success;
8969 }
8970 
8971 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8972   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8973 }
8974 
8975 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8976   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8977 }
8978 
8979 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8980   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8981 }
8982 
8983 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8984   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8985 }
8986 
8987 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8988   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8989 }
8990 
8991 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8992                               uint64_t BasicInstType,
8993                               bool SkipDstVcc,
8994                               bool SkipSrcVcc) {
8995   using namespace llvm::AMDGPU::SDWA;
8996 
8997   OptionalImmIndexMap OptionalIdx;
8998   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8999   bool SkippedVcc = false;
9000 
9001   unsigned I = 1;
9002   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9003   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9004     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9005   }
9006 
9007   for (unsigned E = Operands.size(); I != E; ++I) {
9008     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9009     if (SkipVcc && !SkippedVcc && Op.isReg() &&
9010         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
9011       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9012       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9013       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9014       // Skip VCC only if we didn't skip it on previous iteration.
9015       // Note that src0 and src1 occupy 2 slots each because of modifiers.
9016       if (BasicInstType == SIInstrFlags::VOP2 &&
9017           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9018            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9019         SkippedVcc = true;
9020         continue;
9021       } else if (BasicInstType == SIInstrFlags::VOPC &&
9022                  Inst.getNumOperands() == 0) {
9023         SkippedVcc = true;
9024         continue;
9025       }
9026     }
9027     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9028       Op.addRegOrImmWithInputModsOperands(Inst, 2);
9029     } else if (Op.isImm()) {
9030       // Handle optional arguments
9031       OptionalIdx[Op.getImmTy()] = I;
9032     } else {
9033       llvm_unreachable("Invalid operand type");
9034     }
9035     SkippedVcc = false;
9036   }
9037 
9038   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
9039       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
9040       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
9041     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9042     switch (BasicInstType) {
9043     case SIInstrFlags::VOP1:
9044       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
9045                                      AMDGPU::OpName::clamp) != -1) {
9046         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9047                               AMDGPUOperand::ImmTyClampSI, 0);
9048       }
9049       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
9050                                      AMDGPU::OpName::omod) != -1) {
9051         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9052                               AMDGPUOperand::ImmTyOModSI, 0);
9053       }
9054       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
9055                                      AMDGPU::OpName::dst_sel) != -1) {
9056         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9057                               AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
9058       }
9059       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
9060                                      AMDGPU::OpName::dst_unused) != -1) {
9061         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9062                               AMDGPUOperand::ImmTySdwaDstUnused,
9063                               DstUnused::UNUSED_PRESERVE);
9064       }
9065       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
9066       break;
9067 
9068     case SIInstrFlags::VOP2:
9069       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9070       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
9071         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9072       }
9073       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
9074       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
9075       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
9076       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
9077       break;
9078 
9079     case SIInstrFlags::VOPC:
9080       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
9081         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9082       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
9083       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
9084       break;
9085 
9086     default:
9087       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9088     }
9089   }
9090 
9091   // special case v_mac_{f16, f32}:
9092   // it has src2 register operand that is tied to dst operand
9093   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9094       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
9095     auto it = Inst.begin();
9096     std::advance(
9097       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9098     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9099   }
9100 }
9101 
9102 //===----------------------------------------------------------------------===//
9103 // mAI
9104 //===----------------------------------------------------------------------===//
9105 
9106 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
9107   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
9108 }
9109 
9110 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
9111   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
9112 }
9113 
9114 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
9115   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
9116 }
9117 
9118 /// Force static initialization.
9119 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
9120   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
9121   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
9122 }
9123 
9124 #define GET_REGISTER_MATCHER
9125 #define GET_MATCHER_IMPLEMENTATION
9126 #define GET_MNEMONIC_SPELL_CHECKER
9127 #define GET_MNEMONIC_CHECKER
9128 #include "AMDGPUGenAsmMatcher.inc"
9129 
9130 // This function should be defined after auto-generated include so that we have
9131 // MatchClassKind enum defined
9132 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9133                                                      unsigned Kind) {
9134   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9135   // But MatchInstructionImpl() expects to meet token and fails to validate
9136   // operand. This method checks if we are given immediate operand but expect to
9137   // get corresponding token.
9138   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9139   switch (Kind) {
9140   case MCK_addr64:
9141     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9142   case MCK_gds:
9143     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9144   case MCK_lds:
9145     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9146   case MCK_idxen:
9147     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9148   case MCK_offen:
9149     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9150   case MCK_SSrcB32:
9151     // When operands have expression values, they will return true for isToken,
9152     // because it is not possible to distinguish between a token and an
9153     // expression at parse time. MatchInstructionImpl() will always try to
9154     // match an operand as a token, when isToken returns true, and when the
9155     // name of the expression is not a valid token, the match will fail,
9156     // so we need to handle it here.
9157     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
9158   case MCK_SSrcF32:
9159     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
9160   case MCK_SoppBrTarget:
9161     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
9162   case MCK_VReg32OrOff:
9163     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9164   case MCK_InterpSlot:
9165     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9166   case MCK_Attr:
9167     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9168   case MCK_AttrChan:
9169     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
9170   case MCK_ImmSMEMOffset:
9171     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
9172   case MCK_SReg_64:
9173   case MCK_SReg_64_XEXEC:
9174     // Null is defined as a 32-bit register but
9175     // it should also be enabled with 64-bit operands.
9176     // The following code enables it for SReg_64 operands
9177     // used as source and destination. Remaining source
9178     // operands are handled in isInlinableImm.
9179     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9180   default:
9181     return Match_InvalidOperand;
9182   }
9183 }
9184 
9185 //===----------------------------------------------------------------------===//
9186 // endpgm
9187 //===----------------------------------------------------------------------===//
9188 
9189 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
9190   SMLoc S = getLoc();
9191   int64_t Imm = 0;
9192 
9193   if (!parseExpr(Imm)) {
9194     // The operand is optional, if not present default to 0
9195     Imm = 0;
9196   }
9197 
9198   if (!isUInt<16>(Imm)) {
9199     Error(S, "expected a 16-bit value");
9200     return MatchOperand_ParseFail;
9201   }
9202 
9203   Operands.push_back(
9204       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9205   return MatchOperand_Success;
9206 }
9207 
9208 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9209 
9210 //===----------------------------------------------------------------------===//
9211 // LDSDIR
9212 //===----------------------------------------------------------------------===//
9213 
9214 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const {
9215   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST);
9216 }
9217 
9218 bool AMDGPUOperand::isWaitVDST() const {
9219   return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
9220 }
9221 
9222 //===----------------------------------------------------------------------===//
9223 // VINTERP
9224 //===----------------------------------------------------------------------===//
9225 
9226 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const {
9227   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP);
9228 }
9229 
9230 bool AMDGPUOperand::isWaitEXP() const {
9231   return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
9232 }
9233